diff --git a/pytrendy/detect_trends.py b/pytrendy/detect_trends.py index c0a7f35..589f919 100644 --- a/pytrendy/detect_trends.py +++ b/pytrendy/detect_trends.py @@ -8,7 +8,7 @@ from .io.plot_pytrendy import plot_pytrendy from .io.results_pytrendy import PyTrendyResults -def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, method_params:dict=None) -> PyTrendyResults: +def detect_trends(df: pd.DataFrame, date_col: str, value_col: str, plot=True, method_params: dict=None, debug: bool=False ) -> PyTrendyResults: """ This is the main function that runs trend detection end-to-end. @@ -41,7 +41,10 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth - **is_abrupt_padded** (`bool`): Whether to pad abrupt transitions between segments. Defaults to `False`. - **abrupt_padding** (`int`): Number of days to pad around abrupt transitions. Only referenced when `is_abrupt_padded` is `True`. Defaults to `28`. - + debug (bool, optional): + If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. + Defaults to `False`. + Returns: PyTrendyResults: An object encapsulating the detected segments and associated metadata. @@ -62,7 +65,7 @@ def detect_trends(df:pd.DataFrame, date_col:str, value_col: str, plot=True, meth } # Core 5-step pipeline - df = process_signals(df, value_col) + df = process_signals(df, value_col, debug=debug) segments = get_segments(df) segments = refine_segments(df, value_col, segments, method_params) segments = analyse_segments(df, value_col, segments) diff --git a/pytrendy/process_signals.py b/pytrendy/process_signals.py index 4c119e0..8db4654 100644 --- a/pytrendy/process_signals.py +++ b/pytrendy/process_signals.py @@ -3,9 +3,10 @@ import pandas as pd import numpy as np from scipy.signal import savgol_filter +from scipy.stats import iqr from .post_processing.segments_refine.segment_grouping import GROUPING_DISTANCE -def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame: +def process_signals(df: pd.DataFrame, value_col: str, debug: bool=False) -> pd.DataFrame: """ Applies signal processing techniques to classify regions of a time series. @@ -31,6 +32,8 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame: Input time series data with a datetime index and signal column. value_col (str): Name of the column containing the signal to process. + debug (bool, optional): + If `True` will run in debug mode, outputting various additional plots and print statements. Only recommended for developers of pytrendy. Defaults to `False`. Returns: `pd.DataFrame`: Modified DataFrame with additional columns. @@ -43,7 +46,7 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame: WINDOW_NOISE = int(WINDOW_SMOOTH*0.5) THRESHOLD_NOISE = 2.5 # Sensitivity to detecting noise (recommended 0-10) - THRESHOLD_SMOOTH = 0.25 # Sensitivity to detecting trends (recommended 0-0.5) + THRESHOLD_SMOOTH = 0.001 # Sensitivity to detecting trends as fraction of iqr # 1. Noise detection via SNR. # 1.1 Compute the SNR @@ -173,38 +176,52 @@ def process_signals(df: pd.DataFrame, value_col: str) -> pd.DataFrame: df['trend_flag'] = 0 df.loc[df['flat_flag'] == 1, 'trend_flag'] = -2 df.loc[df['noise_flag'] == 1, 'trend_flag'] = -3 - df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1) - df.loc[(df['smoothed_deriv'] >= THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1 - df.loc[(df['smoothed_deriv'] < -THRESHOLD_SMOOTH) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1 - - # import matplotlib.pyplot as plt - - # ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - - # ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - - # ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - # ax = df[[value_col, 'smoothed_std']].plot(figsize=(20,3), secondary_y='smoothed_std') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - - # ax = df[[value_col, 'flat_flag']].plot(figsize=(20,3), secondary_y='flat_flag') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - - # ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() - - # ax = df[[value_col, 'trend_flag']].plot(figsize=(20,3), secondary_y='trend_flag') - # ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) - # plt.show() + derivative_limit = abs(iqr(df[value_col])) * THRESHOLD_SMOOTH + df['smoothed_deriv'] = savgol_filter(df[value_col], window_length=WINDOW_SMOOTH, polyorder=1, deriv=1) + df.loc[(df['smoothed_deriv'] >= derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = 1 + df.loc[(df['smoothed_deriv'] < -derivative_limit) & (df['flat_flag'] == 0) & (df['noise_flag'] == 0), 'trend_flag'] = -1 + + if debug: + import matplotlib.pyplot as plt + + #df['smoothed_deriv'].hist() + #plt.show() + + ax = df[[value_col, 'snr']].plot(figsize=(20,3), secondary_y='snr') + ax.right_ax.axhline(y=THRESHOLD_NOISE, color='gray', linestyle='--', linewidth=2) + plt.title("Signal-Noise Ratio (SNR)") + plt.show() + + ax = df[[value_col, 'noise_flag']].plot(figsize=(20,3), secondary_y='noise_flag') + ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) + plt.title("Noise Flag") + plt.show() + + ax = df[[value_col, 'smoothed']].plot(figsize=(20,3), secondary_y='smoothed') + ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) + plt.title("Smoothed") + plt.show() + + ax = df[[value_col, 'smoothed_std']].plot(figsize=(20,3), secondary_y='smoothed_std') + ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) + plt.title("Smoothed Std") + plt.show() + + ax = df[[value_col, 'flat_flag']].plot(figsize=(20,3), secondary_y='flat_flag') + ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) + plt.title("Flat Flag") + plt.show() + + ax = df[[value_col, 'smoothed_deriv']].plot(figsize=(20,3), secondary_y='smoothed_deriv') + ax.right_ax.axhline(y=THRESHOLD_SMOOTH, color='gray', linestyle='--', linewidth=2) + ax.right_ax.axhline(y=-THRESHOLD_SMOOTH, color='gray', linestyle=':', linewidth=2) + plt.title("Smoothed Derivative") + plt.show() + + ax = df[[value_col, 'trend_flag']].plot(figsize=(20,3), secondary_y='trend_flag') + ax.right_ax.axhline(y=0, color='gray', linestyle='--', linewidth=2) + plt.title("Trend Flag") + plt.show() return df \ No newline at end of file diff --git a/tests/test_debug.py b/tests/test_debug.py new file mode 100644 index 0000000..b5fce4f --- /dev/null +++ b/tests/test_debug.py @@ -0,0 +1,101 @@ +""" +Tests for debug functionality. + +These tests verify that the debug process works as expected. + +This is work in progress. +""" + +import pytest +import numpy as np +import pandas as pd +import pytrendy as pt +from pytrendy.io.results_pytrendy import PyTrendyResults +from conftest import assert_segments_match +import matplotlib.pyplot as plt +import matplotlib + +def compare_debug(monkeypatch: pytest.MonkeyPatch, df: pd.DataFrame, date_col: str='date', value_col: str='gradual') -> tuple[PyTrendyResults, PyTrendyResults]: + """Function that exists exclusively to test detect_trends in an identical way. Runs detect trends with supplied dataframe in both debug and normal mode and returns the results for later comparison. + + Args: + monkeypatch (pytest.MonkeyPatch): + Monkeypatch for suppressing plotting. + df (pd.DataFrame): + Pandas dataframe containing the trend we are testing. Must contain date_col and value_col. + date_col (str, optional): + The date column. + Defaults to 'date'. + value_col (str, optional): + The value column. + Defaults to 'gradual'. + + Returns: + tuple[pt.PyTrendyResults, pt.PyTrendyResults]: The Pytrendy results with and without debug mode activated, respectively. + """ + + monkeypatch.setattr(plt, "show", lambda *a, **k: plt.close("all")) + + results_debug = pt.detect_trends( + df, + date_col='date', + value_col='gradual', + plot=False, + debug=True, + method_params=dict(is_abrupt_padded=False) + ) + + results_no_debug = pt.detect_trends( + df, + date_col='date', + value_col='gradual', + plot=False, + debug=False, + method_params=dict(is_abrupt_padded=False) + ) + + return results_debug, results_no_debug + + +class TestDebug: + """Test cases for data loader functionality.""" + + @pytest.mark.core + def test_debug_mode_equivalency(self, monkeypatch): + """Test that the series series_synthetic data produces identical outputs when in debug mode vs when not in debug mode.""" + df = pt.load_data('series_synthetic') + results_debug, results_no_debug = compare_debug(monkeypatch, df) + assert_segments_match(results_debug.segments, results_no_debug.segments) + + @pytest.mark.core + def test_debug_mode_equivalency_noise(self, monkeypatch): + """Test that the series series_synthetic data produces identical outputs when in debug mode vs when not in debug mode.""" + df = pt.load_data('series_synthetic') + np.random.seed(42) # Deterministic Testing + df['gradual'] += np.random.normal(0, 10, size=len(df)) + results_debug, results_no_debug = compare_debug(monkeypatch, df) + assert_segments_match(results_debug.segments, results_no_debug.segments) + + @pytest.mark.core + def test_debug_mode_plots(self, monkeypatch): + """ + Test that the correct number of plots are created. + We use monkeypatch to replace plt.show with a fake function that records calls. + """ + + show_calls = [] + def fake_show(*args, **kwargs): + show_calls.append((args, kwargs)) + plt.close("all") + monkeypatch.setattr(plt, 'show', fake_show) + + df = pt.load_data('series_synthetic') + _ = pt.detect_trends( + df, + date_col='date', + value_col='gradual', + plot=False, + debug=True, + method_params=dict(is_abrupt_padded=False) + ) + assert len(show_calls) == 7 \ No newline at end of file diff --git a/tests/tests_crashes_edgecases/data/TESTDATA.md b/tests/tests_crashes_edgecases/data/TESTDATA.md index 4c23e6f..8b8cfed 100644 --- a/tests/tests_crashes_edgecases/data/TESTDATA.md +++ b/tests/tests_crashes_edgecases/data/TESTDATA.md @@ -1,6 +1,7 @@ -This is test data that were initially derived from random noise generation for quick manual testing, -however they were found to crash or break logic of the algorithm in new and interesting ways. +This folder contains test data that was found to break the logic of the algorithm in new and interesting ways. +Some of this came from random noise generation for quick manual testing, some from synthetic 'hand drawn' data. As such, they were backed up here and immortalised for a never ending cycle of automated testing. - noisy_crashes.csv: edge cases that were found to crash pytrendy (either by hanging up, or throwing execution errors) -- noisy_edgecases.csv: would make trends get detected in weird unexpected way, or plot visualisation bugs, etc... \ No newline at end of file +- noisy_edgecases.csv: would make trends get detected in weird unexpected way, or plot visualisation bugs, etc... +- low_value_series.csv: a hand drawn series that is in the domain [0, 1], which was found to break the algorithm. \ No newline at end of file diff --git a/tests/tests_crashes_edgecases/data/low_value_series.csv b/tests/tests_crashes_edgecases/data/low_value_series.csv new file mode 100644 index 0000000..531b5a8 --- /dev/null +++ b/tests/tests_crashes_edgecases/data/low_value_series.csv @@ -0,0 +1,366 @@ +,date,trend +0,2000-01-01,0.85836195432759 +1,2000-01-02,0.8799735953390033 +2,2000-01-03,0.8980311307126388 +3,2000-01-04,0.9063256383518135 +4,2000-01-05,0.9114487165995389 +5,2000-01-06,0.9243783902723698 +6,2000-01-07,0.9356864733513829 +7,2000-01-08,0.9405225444143619 +8,2000-01-09,0.9475924880652846 +9,2000-01-10,0.9518401943995332 +10,2000-01-11,0.9534857022587465 +11,2000-01-12,0.9550211906971404 +12,2000-01-13,0.9566666985563539 +13,2000-01-14,0.9592019286882815 +14,2000-01-15,0.9592019286882815 +15,2000-01-16,0.9592019286882815 +16,2000-01-17,0.9592019286882815 +17,2000-01-18,0.9592019286882815 +18,2000-01-19,0.9575659877352263 +19,2000-01-20,0.9575659877352263 +20,2000-01-21,0.9567384503525405 +21,2000-01-22,0.9550929424933271 +22,2000-01-23,0.9544089087030145 +23,2000-01-24,0.9529069044361744 +24,2000-01-25,0.9492619131898935 +25,2000-01-26,0.9441914529260382 +26,2000-01-27,0.9407808508806337 +27,2000-01-28,0.9371502099935902 +28,2000-01-29,0.9280664325963627 +29,2000-01-30,0.9221636514967425 +30,2000-01-31,0.9124293244807562 +31,2000-02-01,0.9028576348694596 +32,2000-02-02,0.893285945258163 +33,2000-02-03,0.8790695227070517 +34,2000-02-04,0.8685746266514872 +35,2000-02-05,0.8656567202732308 +36,2000-02-06,0.8514402977221196 +37,2000-02-07,0.8476039683526744 +38,2000-02-08,0.8211323390128866 +39,2000-02-09,0.8097046696068958 +40,2000-02-10,0.8077099696729074 +41,2000-02-11,0.8000086102155424 +42,2000-02-12,0.7895280645192151 +43,2000-02-13,0.7855386646512384 +44,2000-02-14,0.7778373051938734 +45,2000-02-15,0.7692127392922403 +46,2000-02-16,0.760578606484449 +47,2000-02-17,0.7556660001722043 +48,2000-02-18,0.7508921139992538 +49,2000-02-19,0.7469027141312771 +50,2000-02-20,0.7452667731782219 +51,2000-02-21,0.7412104049671376 +52,2000-02-22,0.7406459575038028 +53,2000-02-23,0.7371827374745281 +54,2000-02-24,0.7364652195126618 +55,2000-02-25,0.7313995427018857 +56,2000-02-26,0.7258603040362777 +57,2000-02-27,0.7192160877093957 +58,2000-02-28,0.7107015412285821 +59,2000-02-29,0.709419575803381 +60,2000-03-01,0.701679948721383 +61,2000-03-02,0.693940321639385 +62,2000-03-03,0.6819625551292967 +63,2000-03-04,0.6751413510384877 +64,2000-03-05,0.6575765113320003 +65,2000-03-06,0.648320529623925 +66,2000-03-07,0.6336161948587447 +67,2000-03-08,0.6252786361418581 +68,2000-03-09,0.6100385546318177 +69,2000-03-10,0.6019832196465985 +70,2000-03-11,0.5996584614501517 +71,2000-03-12,0.5961234896246903 +72,2000-03-13,0.5933251695734116 +73,2000-03-14,0.5918088149473342 +74,2000-03-15,0.5890104948960556 +75,2000-03-16,0.5859825690969798 +76,2000-03-17,0.5843466281439246 +77,2000-03-18,0.5843466281439246 +78,2000-03-19,0.5843466281439246 +79,2000-03-20,0.5851311144488984 +80,2000-03-21,0.5859251676600304 +81,2000-03-22,0.5875467582538483 +82,2000-03-23,0.5883742956365341 +83,2000-03-24,0.589206616472299 +84,2000-03-25,0.5908664746907497 +85,2000-03-26,0.5933251695734116 +86,2000-03-27,0.5941287896907019 +87,2000-03-28,0.5947649889502233 +88,2000-03-29,0.5970753967874329 +89,2000-03-30,0.6002994441627522 +90,2000-03-31,0.6040496713767735 +91,2000-04-01,0.6060874223884738 +92,2000-04-02,0.613267385460216 +93,2000-04-03,0.6194141226668708 +94,2000-04-04,0.6232408851301577 +95,2000-04-05,0.6252834195949372 +96,2000-04-06,0.6318176165009998 +97,2000-04-07,0.6385192342648311 +98,2000-04-08,0.6475360433189511 +99,2000-04-09,0.6500904072631951 +100,2000-04-10,0.6566915725123652 +101,2000-04-11,0.6663876319037186 +102,2000-04-12,0.675241803553149 +103,2000-04-13,0.6980684416466559 +104,2000-04-14,0.7047030910673797 +105,2000-04-15,0.716106843207975 +106,2000-04-16,0.7387421431783175 +107,2000-04-17,0.751078668669339 +108,2000-04-18,0.7643527509638658 +109,2000-04-19,0.7757565031044611 +110,2000-04-20,0.7795880490208271 +111,2000-04-21,0.7891501717259656 +112,2000-04-22,0.7996211505161346 +113,2000-04-23,0.8100921293063036 +114,2000-04-24,0.8148612320261751 +115,2000-04-25,0.8211227721067285 +116,2000-04-26,0.8240358950319057 +117,2000-04-27,0.8326365436681432 +118,2000-04-28,0.8442029332134281 +119,2000-04-29,0.8502587848115798 +120,2000-04-30,0.8602370679346006 +121,2000-05-01,0.8730949898112449 +122,2000-05-02,0.8795765687334373 +123,2000-05-03,0.885154075023678 +124,2000-05-04,0.8894304820764013 +125,2000-05-05,0.8975432184985698 +126,2000-05-06,0.9069044361743855 +127,2000-05-07,0.9115013345834091 +128,2000-05-08,0.9183895070173257 +129,2000-05-09,0.9223789068853023 +130,2000-05-10,0.9293196973030892 +131,2000-05-11,0.9355381863059306 +132,2000-05-12,0.9426176968630114 +133,2000-05-13,0.9496972074200924 +134,2000-05-14,0.9518354109464541 +135,2000-05-15,0.9589866732997216 +136,2000-05-16,0.9652434299271958 +137,2000-05-17,0.9688071024711319 +138,2000-05-18,0.9742841562467114 +139,2000-05-19,0.9757096252642857 +140,2000-05-20,0.9811101437905996 +141,2000-05-21,0.9840089163565394 +142,2000-05-22,0.9880030996775953 +143,2000-05-23,0.9894764032259608 +144,2000-05-24,0.9894764032259608 +145,2000-05-25,0.9901891377347479 +146,2000-05-26,0.9901891377347479 +147,2000-05-27,0.9901891377347479 +148,2000-05-28,0.9901891377347479 +149,2000-05-29,0.9896868751614415 +150,2000-05-30,0.9891846125881352 +151,2000-05-31,0.9884670946262688 +152,2000-06-01,0.9854343853741139 +153,2000-06-02,0.9814689027715328 +154,2000-06-03,0.9786131812833048 +155,2000-06-04,0.9752312799563749 +156,2000-06-05,0.9678264945899145 +157,2000-06-06,0.9602542883656854 +158,2000-06-07,0.9561692194361265 +159,2000-06-08,0.9499028959024941 +160,2000-06-09,0.9449568054186956 +161,2000-06-10,0.9335291360127048 +162,2000-06-11,0.9214030824571642 +163,2000-06-12,0.9147588661302821 +164,2000-06-13,0.9060960326040162 +165,2000-06-14,0.9023075377653621 +166,2000-06-15,0.8967587321935959 +167,2000-06-16,0.8937499402068365 +168,2000-06-17,0.8882154849943077 +169,2000-06-18,0.8823844556908741 +170,2000-06-19,0.8794665493126178 +171,2000-06-20,0.876266419202694 +172,2000-06-21,0.8704353898992605 +173,2000-06-22,0.8655227835870158 +174,2000-06-23,0.8606101772747711 +175,2000-06-24,0.8562094004419911 +176,2000-06-25,0.8503735876854784 +177,2000-06-26,0.847455681307222 +178,2000-06-27,0.8397830225683316 +179,2000-06-28,0.834875199709166 +180,2000-06-29,0.8301013135362155 +181,2000-06-30,0.8196207678398882 +182,2000-07-01,0.8119194083825232 +183,2000-07-02,0.8071455222095727 +184,2000-07-03,0.7956030499296832 +185,2000-07-04,0.7888248969165862 +186,2000-07-05,0.7831182373932094 +187,2000-07-06,0.7782056310809647 +188,2000-07-07,0.7677107350254001 +189,2000-07-08,0.7590766022176089 +190,2000-07-09,0.7513752427602438 +191,2000-07-10,0.7466013565872932 +192,2000-07-11,0.7416935337281276 +193,2000-07-12,0.7365321878557693 +194,2000-07-13,0.7256211313823223 +195,2000-07-14,0.7203688999014609 +196,2000-07-15,0.7054110421230878 +197,2000-07-16,0.6939164043739895 +198,2000-07-17,0.6822639126732806 +199,2000-07-18,0.678853310627876 +200,2000-07-19,0.667808317468214 +201,2000-07-20,0.6567107063246816 +202,2000-07-21,0.6462779951591455 +203,2000-07-22,0.6412792866914768 +204,2000-07-23,0.6326068862590527 +205,2000-07-24,0.627608177791384 +206,2000-07-25,0.619557626259244 +207,2000-07-26,0.6173428874836167 +208,2000-07-27,0.6108804423737407 +209,2000-07-28,0.6070249791919791 +210,2000-07-29,0.6045710677623963 +211,2000-07-30,0.6025237498445378 +212,2000-07-31,0.5987639557243583 +213,2000-08-01,0.595759947190678 +214,2000-08-02,0.5922297588182958 +215,2000-08-03,0.5908616912376706 +216,2000-08-04,0.5877524467362499 +217,2000-08-05,0.5854181216336449 +218,2000-08-06,0.5845905842509591 +219,2000-08-07,0.5838730662890927 +220,2000-08-08,0.5830550958125652 +221,2000-08-09,0.5830550958125652 +222,2000-08-10,0.5830550958125652 +223,2000-08-11,0.5830550958125652 +224,2000-08-12,0.5830550958125652 +225,2000-08-13,0.5830550958125652 +226,2000-08-14,0.5847245209371741 +227,2000-08-15,0.5854803065236733 +228,2000-08-16,0.5879150841409396 +229,2000-08-17,0.589426655313938 +230,2000-08-18,0.591842299118888 +231,2000-08-19,0.5932916854018578 +232,2000-08-20,0.5964726816994652 +233,2000-08-21,0.5996201938255188 +234,2000-08-22,0.6009786944999856 +235,2000-08-23,0.6038296325351344 +236,2000-08-24,0.6110000287007185 +237,2000-08-25,0.6169219436126551 +238,2000-08-26,0.6228151578061171 +239,2000-08-27,0.629449807226841 +240,2000-08-28,0.635261702717958 +241,2000-08-29,0.6402508442794684 +242,2000-08-30,0.6452399858409789 +243,2000-08-31,0.6480909238761277 +244,2000-09-01,0.6537927999464254 +245,2000-09-02,0.6645460024682618 +246,2000-09-03,0.6722282281133104 +247,2000-09-04,0.6813024386043798 +248,2000-09-05,0.6862915801658902 +249,2000-09-06,0.703617247218422 +250,2000-09-07,0.7083863499382934 +251,2000-09-08,0.7282998651066231 +252,2000-09-09,0.7462473810594392 +253,2000-09-10,0.7585839065504606 +254,2000-09-11,0.7700020090502933 +255,2000-09-12,0.7738335549666593 +256,2000-09-13,0.789915524218623 +257,2000-09-14,0.8134740306332335 +258,2000-09-15,0.8230122360729764 +259,2000-09-16,0.8268437819893425 +260,2000-09-17,0.8419881944378007 +261,2000-09-18,0.8552622767323276 +262,2000-09-19,0.8668286662776125 +263,2000-09-20,0.8735733351191558 +264,2000-09-21,0.8846374620911344 +265,2000-09-22,0.8959120609985937 +266,2000-09-23,0.9025132262477638 +267,2000-09-24,0.9071723095468156 +268,2000-09-25,0.912333655419174 +269,2000-09-26,0.9161556344293819 +270,2000-09-27,0.9191405091507457 +271,2000-09-28,0.9250576406096033 +272,2000-09-29,0.9276120045538473 +273,2000-09-30,0.9356529891798291 +274,2000-10-01,0.9393123307853474 +275,2000-10-02,0.9476881571268667 +276,2000-10-03,0.9546289475446535 +277,2000-10-04,0.9586661819434213 +278,2000-10-05,0.9622298544873573 +279,2000-10-06,0.968591847082572 +280,2000-10-07,0.9779434978522296 +281,2000-10-08,0.9829039386952654 +282,2000-10-09,0.9844489940398174 +283,2000-10-10,0.9885101457039808 +284,2000-10-11,0.9925904311804605 +285,2000-10-12,0.9959579821481531 +286,2000-10-13,0.9967137677346523 +287,2000-10-14,0.9991868129765515 +288,2000-10-15,1.0 +289,2000-10-16,1.0 +290,2000-10-17,1.0 +291,2000-10-18,0.9993590172873994 +292,2000-10-19,0.9987180345747989 +293,2000-10-20,0.9980340007844863 +294,2000-10-21,0.9980340007844863 +295,2000-10-22,0.9974504195088351 +296,2000-10-23,0.9968094367962345 +297,2000-10-24,0.9961254030059219 +298,2000-10-25,0.9961254030059219 +299,2000-10-26,0.99538396777866 +300,2000-10-27,0.9938676131525825 +301,2000-10-28,0.9914806700661073 +302,2000-10-29,0.9876921752274532 +303,2000-10-30,0.9846116314445071 +304,2000-10-31,0.9822151214518737 +305,2000-11-01,0.977527337434347 +306,2000-11-02,0.975193012331742 +307,2000-11-03,0.9711988290106862 +308,2000-11-04,0.9653869335195691 +309,2000-11-05,0.9554230007557856 +310,2000-11-06,0.9511131095315086 +311,2000-11-07,0.9435552536665168 +312,2000-11-08,0.9383891243410794 +313,2000-11-09,0.9318979785127288 +314,2000-11-10,0.9275689534761353 +315,2000-11-11,0.925574253542147 +316,2000-11-12,0.9176767725085385 +317,2000-11-13,0.9097649411156926 +318,2000-11-14,0.8991074076554383 +319,2000-11-15,0.8952710782859931 +320,2000-11-16,0.8885168425382916 +321,2000-11-17,0.8750992566513915 +322,2000-11-18,0.8703253704784409 +323,2000-11-19,0.8617151549360452 +324,2000-11-20,0.8493499287265491 +325,2000-11-21,0.8425956929788476 +326,2000-11-22,0.8331483731476078 +327,2000-11-23,0.8245381576052121 +328,2000-11-24,0.8103217350541009 +329,2000-11-25,0.7961005290499106 +330,2000-11-26,0.7884278703110201 +331,2000-11-27,0.7732738909564036 +332,2000-11-28,0.7656012322175132 +333,2000-11-29,0.7552211390358472 +334,2000-11-30,0.7466109234934515 +335,2000-12-01,0.7350014828704545 +336,2000-12-02,0.7164512518296708 +337,2000-12-03,0.7032776220498053 +338,2000-12-04,0.6998670200044008 +339,2000-12-05,0.6895825958843169 +340,2000-12-06,0.68382331837707 +341,2000-12-07,0.6749643632745607 +342,2000-12-08,0.6661054081720512 +343,2000-12-09,0.6609871133774049 +344,2000-12-10,0.6516689467793011 +345,2000-12-11,0.6488132252910731 +346,2000-12-12,0.6410544643967587 +347,2000-12-13,0.6377730155844902 +348,2000-12-14,0.6345728854745664 +349,2000-12-15,0.6300573057678878 +350,2000-12-16,0.6274120562151406 +351,2000-12-17,0.6247668066623935 +352,2000-12-18,0.6226860045729812 +353,2000-12-19,0.6213227204454351 +354,2000-12-20,0.6181321572416696 +355,2000-12-21,0.6167306054894908 +356,2000-12-22,0.6160130875276244 +357,2000-12-23,0.6146115357754456 +358,2000-12-24,0.6138940178135792 +359,2000-12-25,0.613176499851713 +360,2000-12-26,0.6124350646244511 +361,2000-12-27,0.6115979603356071 +362,2000-12-28,0.6115979603356071 +363,2000-12-29,0.6115979603356071 +364,2000-12-30,0.6115979603356071 diff --git a/tests/tests_crashes_edgecases/test_uncommon_values.py b/tests/tests_crashes_edgecases/test_uncommon_values.py new file mode 100644 index 0000000..439a9ea --- /dev/null +++ b/tests/tests_crashes_edgecases/test_uncommon_values.py @@ -0,0 +1,33 @@ +""" +Tests for edge case scenarios caused by unusual data in trend detection algorithm. + +These tests verify that the trend detection algorithm handles situations where the data is doing something +we wouldn't typically expect. + +Reference: tests/tests_crashes_edgecases/data/TESTDATA.md +""" + +import pandas as pd +import pytrendy as pt +from conftest import assert_segments_in_a_haystack + + +class TestUncommonValues: + """Test cases for Scenarios with Uncommon or Unusual Values""" + + def test_low_value_series(self): + """Test that algorithm handles a low/normalised value series (data in range [0, 1]) reasonably.""" + df = pd.read_csv('tests/tests_crashes_edgecases/data/low_value_series.csv') + results = pt.detect_trends( + df, + date_col='date', + value_col='trend', + plot=False, + ) + + expected_segments = [ + {'direction': 'Up', 'start': '2000-01-02', 'end': '2000-01-14'}, + {'direction': 'Flat', 'start': '2000-01-15', 'end': '2000-01-18'}, + {'direction': 'Down', 'start': '2000-01-19', 'end': '2000-03-17'}, + ] + assert_segments_in_a_haystack(results.segments, expected_segments) \ No newline at end of file