Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions pytrendy/detect_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .io.plot_pytrendy import plot_pytrendy
from .io.results_pytrendy import PyTrendyResults

def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, method_params:dict=None) -> PyTrendyResults:
def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, method_params: dict=None, debug: bool=False ) -> PyTrendyResults:
"""
This is the main function that runs trend detection end-to-end.

Expand Down Expand Up @@ -41,7 +41,10 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth

- **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`.
- **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`.

debug (bool, optional):
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy.
Defaults to `False`.

Returns:
PyTrendyResults:
An object encapsulating the detected segments and associated metadata.
Expand All @@ -62,7 +65,7 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth
}

# Core 5-step pipeline
df = process_signals(df, value_col)
df = process_signals(df, value_col, debug=debug)
segments = get_segments(df)
segments = refine_segments(df, value_col, segments, method_params)
segments = analyse_segments(df, value_col, segments)
Expand Down
85 changes: 51 additions & 34 deletions pytrendy/process_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
from scipy.stats import iqr
from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE

def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame:
"""
Applies signal processing techniques to classify regions of a time series.

Expand All @@ -31,6 +32,8 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
Input time series data with a datetime index and signal column.
value_col (str):
Name of the column containing the signal to process.
debug (bool, optional):
If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`.

Returns:
`pd.DataFrame`: Modified DataFrame with additional columns.
Expand All @@ -43,7 +46,7 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
WINDOW_NOISE = int(WINDOW_SMOOTH*0.5)

THRESHOLD_NOISE = 2.5 # Sensitivity to detecting noise (recommended 0-10)
THRESHOLD_SMOOTH = 0.25 # Sensitivity to detecting trends (recommended 0-0.5)
THRESHOLD_SMOOTH = 0.001 # Sensitivity to detecting trends as fraction of iqr

# 1. Noise detection via SNR.
# 1.1 Compute the SNR
Expand Down Expand Up @@ -173,38 +176,52 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame:
df['trend_flag'] = 0
df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2
df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3
df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
df.loc[(df['smoothed_deriv'] >= THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
df.loc[(df['smoothed_deriv'] < -THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1

# import matplotlib.pyplot as plt

# ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'smoothed_std']].plot(figsize=(20,3), secondary_y='smoothed_std')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'flat_flag']].plot(figsize=(20,3), secondary_y='flat_flag')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()

# ax = df[[value_col, 'trend_flag']].plot(figsize=(20,3), secondary_y='trend_flag')
# ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
# plt.show()
derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH
df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1)
df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1
df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1

if debug:
import matplotlib.pyplot as plt

#df['smoothed_deriv'].hist()
#plt.show()

ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr')
ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2)
plt.title("Signal-Noise Ratio (SNR)")
plt.show()

ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag')
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
plt.title("Noise Flag")
plt.show()

ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed')
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
plt.title("Smoothed")
plt.show()

ax = df[[value_col, 'smoothed_std']].plot(figsize=(20,3), secondary_y='smoothed_std')
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
plt.title("Smoothed Std")
plt.show()

ax = df[[value_col, 'flat_flag']].plot(figsize=(20,3), secondary_y='flat_flag')
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
plt.title("Flat Flag")
plt.show()

ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv')
ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2)
ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2)
plt.title("Smoothed Derivative")
plt.show()

ax = df[[value_col, 'trend_flag']].plot(figsize=(20,3), secondary_y='trend_flag')
ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2)
plt.title("Trend Flag")
plt.show()

return df
101 changes: 101 additions & 0 deletions tests/test_debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Tests for debug functionality.

These tests verify that the debug process works as expected.
Comment thread
ChrisMarsden833 marked this conversation as resolved.

This is work in progress.
"""

import pytest
import numpy as np
import pandas as pd
import pytrendy as pt
from pytrendy.io.results_pytrendy import PyTrendyResults
from conftest import assert_segments_match
import matplotlib.pyplot as plt
import matplotlib

def compare_debug(monkeypatch: pytest.MonkeyPatch, df: pd.DataFrame, date_col: str='date', value_col: str='gradual') -> tuple[PyTrendyResults, PyTrendyResults]:
"""Function that exists exclusively to test detect_trends in an identical way. Runs detect trends with supplied dataframe in both debug and normal mode and returns the results for later comparison.

Args:
monkeypatch (pytest.MonkeyPatch):
Monkeypatch for suppressing plotting.
df (pd.DataFrame):
Pandas dataframe containing the trend we are testing. Must contain date_col and value_col.
date_col (str, optional):
The date column.
Defaults to 'date'.
value_col (str, optional):
The value column.
Defaults to 'gradual'.

Returns:
tuple[pt.PyTrendyResults, pt.PyTrendyResults]: The Pytrendy results with and without debug mode activated, respectively.
"""

monkeypatch.setattr(plt, "show", lambda *a, **k: plt.close("all"))

results_debug = pt.detect_trends(
df,
date_col='date',
value_col='gradual',
plot=False,
debug=True,
method_params=dict(is_abrupt_padded=False)
)

results_no_debug = pt.detect_trends(
df,
date_col='date',
value_col='gradual',
plot=False,
debug=False,
method_params=dict(is_abrupt_padded=False)
)

return results_debug, results_no_debug


class TestDebug:
"""Test cases for data loader functionality."""

@pytest.mark.core
def test_debug_mode_equivalency(self, monkeypatch):
"""Test that the series series_synthetic data produces identical outputs when in debug mode vs when not in debug mode."""
df = pt.load_data('series_synthetic')
results_debug, results_no_debug = compare_debug(monkeypatch, df)
assert_segments_match(results_debug.segments, results_no_debug.segments)

@pytest.mark.core
def test_debug_mode_equivalency_noise(self, monkeypatch):
"""Test that the series series_synthetic data produces identical outputs when in debug mode vs when not in debug mode."""
df = pt.load_data('series_synthetic')
np.random.seed(42) # Deterministic Testing
df['gradual'] += np.random.normal(0, 10, size=len(df))
results_debug, results_no_debug = compare_debug(monkeypatch, df)
assert_segments_match(results_debug.segments, results_no_debug.segments)

@pytest.mark.core
def test_debug_mode_plots(self, monkeypatch):
"""
Test that the correct number of plots are created.
We use monkeypatch to replace plt.show with a fake function that records calls.
"""

show_calls = []
def fake_show(*args, **kwargs):
show_calls.append((args, kwargs))
plt.close("all")
monkeypatch.setattr(plt, 'show', fake_show)

df = pt.load_data('series_synthetic')
_ = pt.detect_trends(
df,
date_col='date',
value_col='gradual',
plot=False,
debug=True,
method_params=dict(is_abrupt_padded=False)
)
assert len(show_calls) == 7
7 changes: 4 additions & 3 deletions tests/tests_crashes_edgecases/data/TESTDATA.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
This is test data that were initially derived from random noise generation for quick manual testing,
however they were found to crash or break logic of the algorithm in new and interesting ways.
This folder contains test data that was found to break the logic of the algorithm in new and interesting ways.
Some of this came from random noise generation for quick manual testing, some from synthetic 'hand drawn' data.
As such, they were backed up here and immortalised for a never ending cycle of automated testing.

- noisy_crashes.csv: edge cases that were found to crash pytrendy (either by hanging up, or throwing execution errors)
- noisy_edgecases.csv: would make trends get detected in weird unexpected way, or plot visualisation bugs, etc...
- noisy_edgecases.csv: would make trends get detected in weird unexpected way, or plot visualisation bugs, etc...
- low_value_series.csv: a hand drawn series that is in the domain [0, 1], which was found to break the algorithm.
Loading
Loading