diff --git a/docs/conf.py b/docs/conf.py index cfd0e1b..85e3ee9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,7 +6,7 @@ project = "profinder" author = "Jesse Cusack" -release = "0.2.1" +release = "0.2.2" extensions = [ "myst_nb", "sphinx.ext.autodoc", diff --git a/docs/index.md b/docs/index.md index d905b1f..fbfd559 100644 --- a/docs/index.md +++ b/docs/index.md @@ -32,41 +32,43 @@ Currently the package contains just one algorithm for identifying profiles, call from profinder import get_example_data, find_profiles ``` -The function operates on pressure or depth time series data with no explicit need for time information (it assumes uniformly spaced data) e.g. +The function operates on pressure or depth time series data with no explicit need for time information. It assumes uniformly spaced data, but will still mostly work in the case that data are not uniform. The pressure data in the examples come from a 32 Hz RBR Concerto profiling in an icy Alaskan fjord. You can get the example data using: ```{code-cell} pressure = get_example_data() print(pressure[:10]) ``` -It will identify up and down pairs in the data. Each up or down part in a profile is defined by start and end indexes, `(down_start, down_end, up_start, up_end)`. If you are not applying a speed threshold, then down_end and up_start will be identical. +The find_profiles function will identify up and down pairs in the data. Each up or down part in a profile is defined by start and end indexes, `(down_start, down_end, up_start, up_end)`. The down_end and up_start will sometimes be identical for sharp profiles, or if speed thresholding options are not selected. ```{code-cell} segments = find_profiles(pressure) print(segments) ``` -The default parameters may need to be changed to find the profiles properly. Note that some smaller profiles are not identified in the following example. +The default parameters may need to be changed to find the profiles properly. Note that some smaller profiles are not identified in the following example. Zoom in to see the different between down end and up start locations. ```{code-cell} :tags: [hide-input] segments = np.asarray(segments) start = segments[:, 0] -peak = segments[:, 1] +down_end = segments[:, 1] +up_start = segments[:, 2] end = segments[:, 3] x = np.arange(0, pressure.size) cut = slice(0, None, 8) # reduce data for plotting fig = go.Figure() fig.add_trace(go.Scatter(x=x[cut], y=pressure[cut], mode="lines", name="pressure")) -fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="start", marker_color="green")) -fig.add_trace(go.Scatter(x=x[peak], y=pressure[peak], mode="markers", name="peak", marker_color="blue")) -fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="end", marker_color="red")) +fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="down start", marker_color="green")) +fig.add_trace(go.Scatter(x=x[down_end], y=pressure[down_end], mode="markers", name="down end", marker_color="blue")) +fig.add_trace(go.Scatter(x=x[up_start], y=pressure[up_start], mode="markers", name="up start", marker_color="darkviolet")) +fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="up end", marker_color="red")) fig.update_layout(yaxis_title="Pressure (dbar)", xaxis_title="Data index (-)", yaxis_autorange="reversed") HTML(fig.to_html(include_plotlyjs='cdn')) ``` -`find_profiles` accepts a number of arguments for fine-tuning profile identification. Underneath the hood it is applying `scipy` functions [`find_peaks`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html) and [`savgol_filter`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html). Consequently, most of the arguments to `find_profiles` alter the behaviour of these functions and it is helpful to be familiar with their operation. +`find_profiles` accepts a number of arguments for fine-tuning profile identification. Underneath the hood it is applying `scipy` functions [`find_peaks`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.find_peaks.html) and [`savgol_filter`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html). Consequently, a lot of the arguments to `find_profiles` alter the behaviour of these functions and it is helpful to be familiar with their operation. The profile finding algorithm roughly follows the steps below. The action of each step is modified by a set of arguments. @@ -88,19 +90,23 @@ segments = find_profiles(pressure, apply_smoothing=True, peaks_kwargs=peaks_kwar :tags: [hide-input] segments = np.asarray(segments) start = segments[:, 0] -peak = segments[:, 1] +down_end = segments[:, 1] +up_start = segments[:, 2] end = segments[:, 3] fig = go.Figure() fig.add_trace(go.Scatter(x=x[cut], y=pressure[cut], mode="lines", name="pressure")) -fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="start", marker_color="green")) -fig.add_trace(go.Scatter(x=x[peak], y=pressure[peak], mode="markers", name="peak", marker_color="blue")) -fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="end", marker_color="red")) +fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="down start", marker_color="green")) +fig.add_trace(go.Scatter(x=x[down_end], y=pressure[down_end], mode="markers", name="down end", marker_color="blue")) +fig.add_trace(go.Scatter(x=x[up_start], y=pressure[up_start], mode="markers", name="up start", marker_color="darkviolet")) +fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="up end", marker_color="red")) fig.update_layout(yaxis_title="Pressure (dbar)", xaxis_title="Data index (-)", yaxis_autorange="reversed") HTML(fig.to_html(include_plotlyjs='cdn')) ``` -We can also set a minimum pressure threshold for profiles to start and end. Note that start and end pressures may not match this minimum exactly when applying smoothing. If it is important that the restriction be met exactly, add some buffer into the value (e.g. choose 3.5 dbar instead of 3 dbar). You can also increase the `run_length` to make sure that the instrument is going up/down for a sufficient number of points to consider the profile started. This removes some of the flatter regions inbetween profiles. +We can also set a minimum pressure threshold for profiles to start and end. Note that start and end pressures may not match this minimum exactly when applying smoothing. If it is important that the restriction be met exactly while smoothing, add some buffer into the value (e.g. choose 3.5 dbar instead of 3 dbar) or don't smooth. + +You can also increase the `run_length` to make sure that the instrument is going up/down for a sufficient number of points to consider the profile started and ended. This argument is coupled with `min_pressure_change`, which defines the minimum pressure changes between each point in the run. For example, if your profile starts with pressure values [1, 2, 5, 8, ...] and you have a run length of 2 and minimum pressure change of 2, the profile would start at the value 2, because the although the pressure is always increasing, the change between 1 and 2 does not exceed the minimum. This removes some of the flatter regions in data. ```{code-cell} segments = find_profiles( @@ -117,14 +123,16 @@ segments = find_profiles( :tags: [hide-input] segments = np.asarray(segments) start = segments[:, 0] -peak = segments[:, 1] +down_end = segments[:, 1] +up_start = segments[:, 2] end = segments[:, 3] fig = go.Figure() fig.add_trace(go.Scatter(x=x[cut], y=pressure[cut], mode="lines", name="pressure")) -fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="start", marker_color="green")) -fig.add_trace(go.Scatter(x=x[peak], y=pressure[peak], mode="markers", name="peak", marker_color="blue")) -fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="end", marker_color="red")) +fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="down start", marker_color="green")) +fig.add_trace(go.Scatter(x=x[down_end], y=pressure[down_end], mode="markers", name="down end", marker_color="blue")) +fig.add_trace(go.Scatter(x=x[up_start], y=pressure[up_start], mode="markers", name="up start", marker_color="darkviolet")) +fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="up end", marker_color="red")) fig.update_layout(yaxis_title="Pressure (dbar)", xaxis_title="Data index (-)", yaxis_autorange="reversed") HTML(fig.to_html(include_plotlyjs='cdn')) ``` @@ -145,15 +153,17 @@ segments = find_profiles(pressure, peaks_kwargs=peaks_kwargs) :tags: [hide-input] segments = np.asarray(segments) start = segments[:, 0] -peak = segments[:, 1] +down_end = segments[:, 1] +up_start = segments[:, 2] end = segments[:, 3] x = np.arange(0, pressure.size) fig = go.Figure() fig.add_trace(go.Scatter(x=x, y=pressure, mode="markers", name="pressure")) -fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="start", marker_color="green")) -fig.add_trace(go.Scatter(x=x[peak], y=pressure[peak], mode="markers", name="peak", marker_color="blue")) -fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="end", marker_color="red")) +fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="down start", marker_color="green")) +fig.add_trace(go.Scatter(x=x[down_end], y=pressure[down_end], mode="markers", name="down end", marker_color="blue")) +fig.add_trace(go.Scatter(x=x[up_start], y=pressure[up_start], mode="markers", name="up start", marker_color="darkviolet")) +fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="up end", marker_color="red")) fig.update_layout(yaxis_title="Pressure (dbar)", xaxis_title="Data index (-)", yaxis_autorange="reversed") HTML(fig.to_html(include_plotlyjs='cdn')) ``` @@ -296,15 +306,17 @@ segments = find_profiles(pressure, peaks_kwargs=peaks_kwargs, missing="drop") :tags: [hide-input] segments = np.asarray(segments) start = segments[:, 0] -peak = segments[:, 1] +down_end = segments[:, 1] +up_start = segments[:, 2] end = segments[:, 3] x = np.arange(0, pressure.size) fig = go.Figure() fig.add_trace(go.Scatter(x=x, y=pressure, mode="markers", name="pressure")) -fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="start", marker_color="green")) -fig.add_trace(go.Scatter(x=x[peak], y=pressure[peak], mode="markers", name="peak", marker_color="blue")) -fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="end", marker_color="red")) +fig.add_trace(go.Scatter(x=x[start], y=pressure[start], mode="markers", name="down start", marker_color="green")) +fig.add_trace(go.Scatter(x=x[down_end], y=pressure[down_end], mode="markers", name="down end", marker_color="blue")) +fig.add_trace(go.Scatter(x=x[up_start], y=pressure[up_start], mode="markers", name="up start", marker_color="darkviolet")) +fig.add_trace(go.Scatter(x=x[end], y=pressure[end], mode="markers", name="up end", marker_color="red")) fig.update_layout(yaxis_title="Pressure (dbar)", xaxis_title="Data index (-)", yaxis_autorange="reversed") HTML(fig.to_html(include_plotlyjs='cdn')) ``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8612e40..0fc2df8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "profinder" -version = "0.2.1" +version = "0.2.2" description = "Functions to find profiles in ocean pressure data" readme = "README.md" authors = [ diff --git a/src/profinder/__init__.py b/src/profinder/__init__.py index 48df3f2..51ea295 100644 --- a/src/profinder/__init__.py +++ b/src/profinder/__init__.py @@ -1,8 +1,8 @@ from .example_data import get_example_data, synthetic_glider_pressure -from .profinder import find_profiles, find_yo +from .profinder import find_profiles, find_segment __all__ = [ - "find_yo", + "find_segment", "find_profiles", "get_example_data", "synthetic_glider_pressure", diff --git a/src/profinder/profinder.py b/src/profinder/profinder.py index 6562e8d..63bff04 100644 --- a/src/profinder/profinder.py +++ b/src/profinder/profinder.py @@ -11,7 +11,7 @@ FloatArray = NDArray[np.floating] ProfileTuple = Tuple[int, int, int, int] -__all__ = ["find_profiles", "find_yo"] +__all__ = ["find_profiles", "find_segment"] _default_peaks_kwargs = {"height": 25, "distance": 200, "width": 200, "prominence": 25} @@ -95,7 +95,7 @@ def _validate_pressure( return pressure -def _find_yo( +def _find_segment( pressure: ArrayLike, apply_min_pressure: bool = True, min_pressure: float = -1.0, @@ -180,7 +180,7 @@ def _find_profiles( troughs_kwargs: dict[str, Any], min_pressure: float = -1.0, run_length: int = 4, - min_pressure_change: float = 0.01, + min_pressure_change: float = 0.0, apply_speed_threshold: bool = False, time: Optional[ArrayLike] = None, velocity: Optional[ArrayLike] = None, @@ -221,12 +221,6 @@ def _find_profiles( List of (down_start, down_end, up_start, up_end) index tuples for each detected profile. If no speed threshold is applied the middle two indices will be identical (peak). - Notes - ----- - - The function can use Savitzky-Golay smoothing to reduce noise if smoothing=True. - - Profile start and end indices are refined to avoid long periods near the surface. - - Troughs are detected as local minima in the (optionally smoothed) pressure signal. - - Additional arguments for peak/trough detection can be passed via peaks_kwargs and troughs_kwargs. """ ndata = pressure.size @@ -266,19 +260,40 @@ def _find_profiles( while end - 1 > peak and pressure[end - 1] < min_pressure: end -= 1 + # Ensure a robust run_length of increasing pressure toward the peak + de = int(peak) + for i in range(max(int(start), 0), max(int(peak - run_length), int(start)) + 1): + # Find the latest window that ends at or before the peak + if ( + np.all(diffs[i : i + run_length] > min_pressure_change) + and (i + run_length) <= peak + ): + de = int(i + run_length) + # Ensure de is within [start+1, peak] + de = max(min(de, int(peak)), int(start + 1)) + + # Ensure a robust run_length of decreasing pressure away from the peak + us = int(peak) + for j in range(int(peak + run_length), int(end) + 1): + if np.all(diffs[j - run_length : j] < -min_pressure_change): + us = int(j - run_length) + break + # Ensure us within [peak, end-1] + us = min(max(int(us), int(peak)), int(max(int(peak), int(end - 1)))) + # (down_start, down_end, up_start, up_end) - profiles.append((int(start), int(peak), int(peak), int(end))) + profiles.append((int(start), int(de), int(us), int(end))) if not apply_speed_threshold: return profiles - def _refine_segment( + def refine_segment( segment_pressure: FloatArray, seg_time: Optional[ArrayLike], seg_velocity: Optional[ArrayLike], seg_direction: Literal["up", "down"], ) -> Tuple[int, int]: - return _find_yo( + return _find_segment( segment_pressure, apply_min_pressure=False, apply_speed_threshold=True, @@ -292,7 +307,7 @@ def _refine_segment( for idx, (ds, de, us, ue) in enumerate(profiles): # Down portion refinement if direction in ("down", "both"): - ds_off, de_off = _refine_segment( + ds_off, de_off = refine_segment( pressure[ds:de], None if time is None else np.asarray(time)[ds:de], None if velocity is None else np.asarray(velocity)[ds:de], @@ -304,7 +319,7 @@ def _refine_segment( # Up portion refinement if direction in ("up", "both"): - us_off, ue_off = _refine_segment( + us_off, ue_off = refine_segment( pressure[us:ue], None if time is None else np.asarray(time)[us:ue], None if velocity is None else np.asarray(velocity)[us:ue], @@ -319,7 +334,7 @@ def _refine_segment( return profiles -def find_yo( +def find_segment( pressure: ArrayLike, apply_smoothing: bool = False, window_length: int = 9, @@ -374,7 +389,7 @@ def find_yo( if not apply_min_pressure and not apply_speed_threshold: return 0, p.size - 1 - start, end = _find_yo( + start, end = _find_segment( p, apply_min_pressure=apply_min_pressure, min_pressure=min_pressure, @@ -450,12 +465,6 @@ def find_profiles( List of (down_start, down_end, up_start, up_end) index tuples for each detected profile. If no speed threshold is applied the middle two indices will be identical (peak). - Notes - ----- - - The function can use Savitzky-Golay smoothing to reduce noise if smoothing=True. - - Profile start and end indices are refined to avoid long periods near the surface. - - Troughs are detected as local minima in the (optionally smoothed) pressure signal. - - Additional arguments for peak/trough detection can be passed via peaks_kwargs and troughs_kwargs. """ if apply_speed_threshold and (time is None and velocity is None): diff --git a/uv.lock b/uv.lock index a3a04a2..4fbad54 100644 --- a/uv.lock +++ b/uv.lock @@ -207,7 +207,7 @@ wheels = [ [[package]] name = "profinder" -version = "0.2.1" +version = "0.2.2" source = { editable = "." } dependencies = [ { name = "numpy" },