Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
## [UNRELEASED] - YYYY-MM-DD

### Fixed
- Improve detection and truncation of anomalous samples causing clock synchronization corruption ([#158] by [Mathieu Scheltienne](https://github.com/mscheltienne))

## [1.17.2] - 2026-01-07
### Added
- Add new `case_sensitive` parameter to `match_streaminfos`, defaulting to `True` to maintain previous behavior; when `False`, stream properties are matched more leniently ([#134](https://github.com/xdf-modules/pyxdf/pull/134) by [Stefan Appelhoff](https://github.com/sappelhoff))
Expand Down
69 changes: 43 additions & 26 deletions src/pyxdf/pyxdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,18 +654,26 @@ def _detect_corrupted_clock_offset(


def _truncate_corrupted_offsets(temp, streams):
"""Truncate extra samples and corrupted clock offsets (pylsl#67 bug).
"""Truncate corrupted clock offsets and associated extra samples.

The pylsl#67 bug occurs when an LSL outlet is destroyed while an inlet is
still connected, causing an extra sample with potentially garbage clock
The pylsl#67, liblsl#246 bug occurs when an LSL outlet is destroyed while an inlet
is still connected, causing an extra sample with potentially garbage clock
offset values to be recorded.

This function:
This function requires BOTH conditions to confirm the pylsl#67, liblsl#246 bug:

1. Always truncates samples to match the footer sample_count (the footer
is authoritative).
2. Only truncates the last clock offset only if it's
statistically anomalous (see ``_detect_corrupted_clock_offset``).
1. Extra samples exist (sample count exceeds footer sample_count).
2. The last clock offset is statistically anomalous (see
``_detect_corrupted_clock_offset``).

If both conditions are met, both the extra samples and the corrupted clock
offset are truncated.

Note: Extra samples alone (without corrupted clock offset) are NOT truncated,
as the footer sample_count may be incorrect due to race conditions in
liblsl/LabRecorder where final samples arrive after the footer is written.
Similarly, anomalous clock offsets alone (without extra samples) are NOT
truncated, as they may be due to legitimate clock resets.

Args:
temp : dict of StreamData objects indexed by stream ID.
Expand All @@ -676,19 +684,42 @@ def _truncate_corrupted_offsets(temp, streams):

See Also:
https://github.com/labstreaminglayer/pylsl/issues/67
https://github.com/sccn/liblsl/issues/246
"""
for stream_id, stream in temp.items():
# First check if there are extra samples - this is the primary evidence
# of a potential pylsl#67, liblsl#246 bug.
footer = streams.get(stream_id, {}).get("footer", {}).get("info", {})
sample_count_str = footer.get("sample_count", [None])[0]
if sample_count_str is None:
continue
footer_count = int(sample_count_str)

# Only proceed if there's evidence of the pylsl#67 bug (extra sample).
# If sample count matches, don't touch clock offsets - large jumps from
# disconnect/reconnect are legitimate.
if len(stream.time_stamps) <= footer_count:
continue
continue # No extra samples → no evidence of pylsl#67, liblsl#246

# Extra samples exist - now check if clock offset is also corrupted.
# We require BOTH conditions because:
# - Extra samples alone could be valid "late" data (footer off-by-1)
# - Anomalous clock offsets alone could be legitimate clock resets
# - Both together strongly indicate the pylsl#67, liblsl#246 bug
clock_corrupted = False
if len(stream.clock_times) >= 3:
clock_corrupted = _detect_corrupted_clock_offset(
stream.clock_times, stream.clock_values
)

if not clock_corrupted:
continue # Extra samples but normal clock offset → keep everything

# Both conditions met → truncate both
logger.warning(
"Stream %s: last clock offset is statistically anomalous, "
"truncating (see pylsl#67, liblsl#246).",
stream_id,
)
stream.clock_times = stream.clock_times[:-1]
stream.clock_values = stream.clock_values[:-1]

logger.warning(
"Stream %s: sample count (%d) exceeds footer sample_count (%d), "
Expand All @@ -700,20 +731,6 @@ def _truncate_corrupted_offsets(temp, streams):
stream.time_stamps = stream.time_stamps[:footer_count]
stream.time_series = stream.time_series[:footer_count]

# Truncate clock offset only if statistically anomalous
if len(stream.clock_times) >= 3:
is_corrupted = _detect_corrupted_clock_offset(
stream.clock_times, stream.clock_values
)
if is_corrupted:
logger.warning(
"Stream %s: last clock offset is statistically anomalous, "
"truncating (see pylsl#67).",
stream_id,
)
stream.clock_times = stream.clock_times[:-1]
stream.clock_values = stream.clock_values[:-1]

return temp


Expand Down
68 changes: 62 additions & 6 deletions test/test_truncate_corrupted_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,14 @@ def test_no_truncation_when_sample_count_matches():
assert temp[1].clock_values == orig_clock_values


def test_sample_truncation_but_clock_offsets_preserved():
"""Extra sample should be truncated, but normal clock offsets preserved."""
def test_no_truncation_when_clock_offsets_normal():
"""Extra samples should NOT be truncated when clock offsets are normal.

This tests the case where footer sample_count is off-by-1 due to race
conditions (e.g., final sample arrives after footer is written), but the
data is valid. Without evidence of corruption (corrupted clock offset),
the extra samples should be preserved.
"""
n_samples = 100
n_offsets = 20
clock_tdiff = 5
Expand All @@ -69,17 +75,67 @@ def test_sample_truncation_but_clock_offsets_preserved():
# Footer says n_samples, but we have n_samples + 1
streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}

# Store original clock offset values
# Store original values
orig_timestamps = temp[1].time_stamps.copy()
orig_clock_times = temp[1].clock_times.copy()
orig_clock_values = temp[1].clock_values.copy()

# Apply truncation
temp = _truncate_corrupted_offsets(temp, streams)

# Verify samples were truncated to footer count
assert len(temp[1].time_stamps) == n_samples
# Verify samples were NOT truncated (clock offsets are normal, no evidence
# of pylsl#67, liblsl#246 bug, so we keep the extra samples)
np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
assert len(temp[1].time_stamps) == n_samples + 1

# Verify clock offsets were NOT truncated
assert temp[1].clock_times == orig_clock_times
assert temp[1].clock_values == orig_clock_values


def test_no_truncation_when_no_extra_samples():
"""Anomalous clock offset should NOT be truncated without extra samples.

This tests the case where clock offsets look anomalous (e.g., due to
legitimate clock resets) but there are no extra samples. Without extra
samples, we have no evidence of the pylsl#67 bug, so we should not
truncate the clock offset.
"""
n_samples = 100
n_offsets = 20
clock_tdiff = 5
clock_offset_value = -0.001 # Normal offset: -1ms

# Create stream with matching sample count but "anomalous" last clock offset
# This could be a legitimate clock reset, not pylsl#67 - liblsl#246 corruption
time_stamps = np.linspace(0, 100, n_samples)
clock_times = [i * clock_tdiff for i in range(n_offsets)]
clock_values = [clock_offset_value] * n_offsets

# Add an "anomalous" clock offset (could be legitimate clock reset)
clock_times.append(clock_times[-1] + 800000) # Huge time jump
clock_values.append(-750000) # Huge value change

temp = {
1: MockStreamData(
time_stamps=time_stamps,
clock_times=clock_times,
clock_values=clock_values,
)
}
# Footer matches sample count (no extra samples)
streams = {1: {"footer": {"info": {"sample_count": [str(n_samples)]}}}}

# Store original values
orig_timestamps = temp[1].time_stamps.copy()
orig_clock_times = temp[1].clock_times.copy()
orig_clock_values = temp[1].clock_values.copy()

# Verify clock offsets were NOT truncated (they are normal)
# Apply truncation
temp = _truncate_corrupted_offsets(temp, streams)

# Verify NOTHING was truncated (no extra samples = no evidence of pylsl#67)
np.testing.assert_array_equal(temp[1].time_stamps, orig_timestamps)
assert temp[1].clock_times == orig_clock_times
assert temp[1].clock_values == orig_clock_values

Expand Down