diff --git a/docs/algorithms/spline.rst b/docs/algorithms/spline.rst index 714dbc69..1c22691e 100644 --- a/docs/algorithms/spline.rst +++ b/docs/algorithms/spline.rst @@ -582,16 +582,15 @@ Weighting: w_i = \frac {1} {1 + \exp{\left(\frac - {k (r_i - \sigma^-)} + {k (r_i - \sigma^- + offset)} {\sigma^-} \right)}} where :math:`r_i = y_i - v_i`, :math:`\sigma^-` is the standard deviation -of the negative values in the residual vector :math:`\mathbf r`, and :math:`k` -is the asymmetric coefficient (Note that the default value of :math:`k` is 0.5 in -pybaselines rather than 2 in the published version of the asPLS. pybaselines -uses the factor of 0.5 since it matches the results in Table 2 and Figure 5 -of the asPLS paper closer than the factor of 2 and fits noisy data much better). +of the negative values in the residual vector :math:`\mathbf r`, :math:`k` +is the asymmetric coefficient, and `offset` is the mean of the negative values +in the residual vector, :math:`\mu^-`, if ``alternate_weighting`` is True and +0 if ``alternate_weighting`` is False. .. plot:: :align: center @@ -844,13 +843,15 @@ Weighting: w_i = \frac{1}{2}\left( 1 - \frac - {10^t (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-} - {1 + \text{abs}[10^t (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-]} + {f(t) (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-} + {1 + \text{abs}[f(t) (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-]} \right) -where :math:`r_i = y_i - v_i`, :math:`t` is the iteration number, and +where :math:`r_i = y_i - v_i`, :math:`t` is the iteration number, :math:`\mu^-` and :math:`\sigma^-` are the mean and standard deviation, -respectively, of the negative values in the residual vector :math:`\mathbf r`. +respectively, of the negative values in the residual vector :math:`\mathbf r`, +and :math:`f(t)` is :math:`10^t` if ``alternate_weighting`` is False and :math:`\exp(t)` +if ``alternate_weighting`` is True. .. plot:: :align: center diff --git a/docs/algorithms/whittaker.rst b/docs/algorithms/whittaker.rst index 78cd3ff0..586b03b2 100644 --- a/docs/algorithms/whittaker.rst +++ b/docs/algorithms/whittaker.rst @@ -505,16 +505,15 @@ Weighting: w_i = \frac {1} {1 + \exp{\left(\frac - {k (r_i - \sigma^-)} + {k (r_i - \sigma^- + offset)} {\sigma^-} \right)}} where :math:`r_i = y_i - v_i`, :math:`\sigma^-` is the standard deviation -of the negative values in the residual vector :math:`\mathbf r`, and :math:`k` -is the asymmetric coefficient (Note that the default value of :math:`k` is 0.5 in -pybaselines rather than 2 in the published version of the asPLS. pybaselines -uses the factor of 0.5 since it matches the results in Table 2 and Figure 5 -of the asPLS paper closer than the factor of 2 and fits noisy data much better). +of the negative values in the residual vector :math:`\mathbf r`, :math:`k` +is the asymmetric coefficient, and `offset` is the mean of the negative values +in the residual vector, :math:`\mu^-`, if ``alternate_weighting`` is True and +0 if ``alternate_weighting`` is False. .. plot:: :align: center @@ -722,13 +721,15 @@ Weighting: w_i = \frac{1}{2}\left( 1 - \frac - {10^t (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-} - {1 + \text{abs}[10^t (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-]} + {f(t) (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-} + {1 + \text{abs}[f(t) (r_i - (-\mu^- + 2 \sigma^-))/\sigma^-]} \right) -where :math:`r_i = y_i - v_i`, :math:`t` is the iteration number, and +where :math:`r_i = y_i - v_i`, :math:`t` is the iteration number, :math:`\mu^-` and :math:`\sigma^-` are the mean and standard deviation, -respectively, of the negative values in the residual vector :math:`\mathbf r`. +respectively, of the negative values in the residual vector :math:`\mathbf r`, +and :math:`f(t)` is :math:`10^t` if ``alternate_weighting`` is False and :math:`\exp(t)` +if ``alternate_weighting`` is True. .. plot:: :align: center diff --git a/pybaselines/_weighting.py b/pybaselines/_weighting.py index ea97940f..da0496d8 100644 --- a/pybaselines/_weighting.py +++ b/pybaselines/_weighting.py @@ -312,7 +312,7 @@ def _iarpls(y, baseline, iteration): return weights, exit_early -def _aspls(y, baseline, asymmetric_coef): +def _aspls(y, baseline, asymmetric_coef=2., alternate_weighting=True): """ Weighting for the adaptive smoothness penalized least squares smoothing (aspls). @@ -322,9 +322,12 @@ def _aspls(y, baseline, asymmetric_coef): The measured data. baseline : numpy.ndarray, shape (N,) The calculated baseline. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). + weighting curve (ie. more step-like). Default is 2. + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the aspls paper. Returns ------- @@ -363,8 +366,9 @@ def _aspls(y, baseline, asymmetric_coef): exit_early = False std = _safe_std(neg_residual, ddof=1) # use dof=1 since sampling subset + shifted_residual = residual + neg_residual.mean() if alternate_weighting else residual # add a negative sign since expit performs 1/(1+exp(-input)) - weights = expit(-(asymmetric_coef / std) * (residual - std)) + weights = expit(-(asymmetric_coef / std) * (shifted_residual - std)) return weights, residual, exit_early @@ -582,7 +586,7 @@ def _brpls(y, baseline, beta): return weights, exit_early -def _lsrpls(y, baseline, iteration): +def _lsrpls(y, baseline, iteration, alternate_weighting=False): """ The weighting for the locally symmetric reweighted penalized least squares (lsrpls). @@ -595,6 +599,10 @@ def _lsrpls(y, baseline, iteration): iteration : int The iteration number. Should be 1-based, such that the first iteration is 1 instead of 0. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. Returns ------- @@ -604,12 +612,29 @@ def _lsrpls(y, baseline, iteration): Designates if there is a potential error with the calculation such that no further iterations should be performed. + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. """ + if alternate_weighting: + return _drpls(y, baseline, iteration) + residual = y - baseline neg_residual = residual[residual < 0] if neg_residual.size < 2: diff --git a/pybaselines/spline.py b/pybaselines/spline.py index 23b38911..df61f53f 100644 --- a/pybaselines/spline.py +++ b/pybaselines/spline.py @@ -916,7 +916,8 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord @_Algorithm._register(sort_keys=('weights', 'alpha')) def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2, - max_iter=100, tol=1e-3, weights=None, alpha=None, asymmetric_coef=0.5): + max_iter=100, tol=1e-3, weights=None, alpha=None, asymmetric_coef=2., + alternate_weighting=True): """ A penalized spline version of the asPLS algorithm. @@ -946,9 +947,21 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde An array of values that control the local value of `lam` to better fit peak and non-peak regions. If None (default), then the initial values will be an array with size equal to N and all values set to 1. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). Default is 2, as used in the asPLS paper [1]_. + Note that a value of 4 results in the weighting scheme used in the NasPLS + (Non-sensitive-areas adaptive smoothness penalized least squares smoothing) algorithm + [2]_. + + .. versionadded:: 1.2.0 + + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the asPLS paper [1]_. + See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -979,19 +992,24 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde Notes ----- - The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead - of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it - matches the results in Table 2 and Figure 5 of the asPLS paper closer than the - factor of 2 and fits noisy data much better. + The weighting scheme as written in the asPLS paper [1]_ does not reproduce the paper's + results for noisy data. By subtracting the mean of negative residuals (ie. the negative + values of ``data - baseline``) within the weighting scheme, the asPLS paper's results can + be correctly replicated (see https://github.com/derb12/pybaselines/issues/40 for more + details). Given this discrepancy, the default for ``pspline_aspls`` is to also subtract the + negative residuals within the weighting. To use the weighting scheme as it is written in + the asPLS paper, set ``alternate_weighting`` to False. References ---------- - Zhang, F., et al. Baseline correction for infrared spectra using - adaptive smoothness parameter penalized least squares method. - Spectroscopy Letters, 2020, 53(3), 222-233. - - Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary - Reviews: Computational Statistics, 2010, 2(6), 637-653. + .. [1] Zhang, F., et al. Baseline correction for infrared spectra using + adaptive smoothness parameter penalized least squares method. + Spectroscopy Letters, 2020, 53(3), 222-233. + .. [2] Zhang, F., et al. An automatic baseline correction method based on reweighted + penalized least squares method for non-sensitive areas. Vibrational Spectroscopy, + 2025, 138, 103806. + .. [3] Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary + Reviews: Computational Statistics, 2010, 2(6), 637-653. """ y, weight_array, pspline = self._setup_spline( @@ -1014,7 +1032,9 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde pspline.num_bands, pspline.num_bands ) baseline = pspline.solve_pspline(y, weight_array, alpha_penalty) - new_weights, residual, exit_early = _weighting._aspls(y, baseline, asymmetric_coef) + new_weights, residual, exit_early = _weighting._aspls( + y, baseline, asymmetric_coef, alternate_weighting + ) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break @@ -1507,7 +1527,7 @@ def pspline_brpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_orde @_Algorithm._register(sort_keys=('weights',)) def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_order=2, - max_iter=50, tol=1e-3, weights=None): + max_iter=50, tol=1e-3, weights=None, alternate_weighting=False): """ A penalized spline version of the LSRPLS algorithm. @@ -1533,6 +1553,12 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord weights : array-like, shape (N,), optional The weighting array. If None (default), then the initial weights will be an array with size equal to N and all values set to 1. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -1553,10 +1579,24 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord -------- Baseline.lsrpls + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. """ y, weight_array, pspline = self._setup_spline( @@ -1565,7 +1605,7 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord tol_history = np.empty(max_iter + 1) for i in range(1, max_iter + 2): baseline = pspline.solve_pspline(y, weight_array) - new_weights, exit_early = _weighting._lsrpls(y, baseline, i) + new_weights, exit_early = _weighting._lsrpls(y, baseline, i, alternate_weighting) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break @@ -2329,7 +2369,7 @@ def pspline_iarpls(data, lam=1e3, num_knots=100, spline_degree=3, diff_order=2, @_spline_wrapper def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2, max_iter=100, tol=1e-3, weights=None, alpha=None, x_data=None, - asymmetric_coef=0.5): + asymmetric_coef=2., alternate_weighting=True): """ A penalized spline version of the asPLS algorithm. @@ -2362,9 +2402,16 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2, x_data : array-like, shape (N,), optional The x-values of the measured data. Default is None, which will create an array from -1 to 1 with N points. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). Default is 2, as used in the asPLS paper [1]_. + Note that a value of 4 results in the weighting scheme used in the NasPLS + (Non-sensitive-areas adaptive smoothness penalized least squares smoothing) algorithm + [2]_. + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the asPLS paper [1]_. + See the Notes section below for more details. Returns ------- @@ -2395,19 +2442,25 @@ def pspline_aspls(data, lam=1e4, num_knots=100, spline_degree=3, diff_order=2, Notes ----- - The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead - of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it - matches the results in Table 2 and Figure 5 of the asPLS paper closer than the - factor of 2 and fits noisy data much better. + The weighting scheme as written in the asPLS paper [1]_ does not reproduce the paper's + results for noisy data. By subtracting the mean of negative residuals (``data-baseline``) + within the weighting scheme, as used by other algorithms such as ``arPLS`` and ``drPLS``, + the asPLS paper's results can be correctly replicated (see + https://github.com/derb12/pybaselines/issues/40 for more details). Given this discrepancy, + the default for ``aspls`` is to also subtract the negative residuals within the weighting. + To use the weighting scheme as it is written in the asPLS paper, simply set + ``alternate_weighting`` to False. References ---------- - Zhang, F., et al. Baseline correction for infrared spectra using - adaptive smoothness parameter penalized least squares method. - Spectroscopy Letters, 2020, 53(3), 222-233. - - Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary - Reviews: Computational Statistics, 2010, 2(6), 637-653. + .. [1] Zhang, F., et al. Baseline correction for infrared spectra using + adaptive smoothness parameter penalized least squares method. + Spectroscopy Letters, 2020, 53(3), 222-233. + .. [2] Zhang, F., et al. An automatic baseline correction method based on reweighted + penalized least squares method for non-sensitive areas. Vibrational Spectroscopy, + 2025, 138, 103806. + .. [3] Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary + Reviews: Computational Statistics, 2010, 2(6), 637-653. """ @@ -2742,7 +2795,7 @@ def pspline_brpls(data, x_data=None, lam=1e3, num_knots=100, spline_degree=3, di @_spline_wrapper def pspline_lsrpls(data, x_data=None, lam=1e3, num_knots=100, spline_degree=3, diff_order=2, - max_iter=50, tol=1e-3, weights=None): + max_iter=50, tol=1e-3, weights=None, alternate_weighting=False): """ A penalized spline version of the LSRPLS algorithm. @@ -2771,6 +2824,10 @@ def pspline_lsrpls(data, x_data=None, lam=1e3, num_knots=100, spline_degree=3, d weights : array-like, shape (N,), optional The weighting array. If None (default), then the initial weights will be an array with size equal to N and all values set to 1. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. Returns ------- @@ -2791,9 +2848,23 @@ def pspline_lsrpls(data, x_data=None, lam=1e3, num_knots=100, spline_degree=3, d -------- pybaselines.whittaker.lsrpls + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. """ diff --git a/pybaselines/two_d/spline.py b/pybaselines/two_d/spline.py index eaaedc7d..b79bd6cf 100644 --- a/pybaselines/two_d/spline.py +++ b/pybaselines/two_d/spline.py @@ -947,7 +947,7 @@ def pspline_brpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order @_Algorithm2D._register(sort_keys=('weights',)) def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order=2, - max_iter=50, tol=1e-3, weights=None): + max_iter=50, tol=1e-3, weights=None, altnerate_weighting=False): """ A penalized spline version of the LSRPLS algorithm. @@ -976,6 +976,12 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde weights : array-like, shape (M, N), optional The weighting array. If None (default), then the initial weights will be an array with size equal to N and all values set to 1. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -996,13 +1002,26 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde -------- Baseline2D.lsrpls + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. - - Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary - Reviews: Computational Statistics, 2010, 2(6), 637-653. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. + .. [3] Eilers, P., et al. Splines, knots, and penalties. Wiley Interdisciplinary + Reviews: Computational Statistics, 2010, 2(6), 637-653. """ y, weight_array, pspline = self._setup_spline( @@ -1011,7 +1030,7 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde tol_history = np.empty(max_iter + 1) for i in range(1, max_iter + 2): baseline = pspline.solve(y, weight_array) - new_weights, exit_early = _weighting._lsrpls(y, baseline, i) + new_weights, exit_early = _weighting._lsrpls(y, baseline, i, altnerate_weighting) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break diff --git a/pybaselines/two_d/whittaker.py b/pybaselines/two_d/whittaker.py index f33e196f..4cca99cd 100644 --- a/pybaselines/two_d/whittaker.py +++ b/pybaselines/two_d/whittaker.py @@ -591,7 +591,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non sort_keys=('weights', 'alpha'), reshape_keys=('weights', 'alpha'), reshape_baseline=True ) def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, - weights=None, alpha=None, asymmetric_coef=0.5): + weights=None, alpha=None, asymmetric_coef=2., alternate_weighting=True): """ Adaptive smoothness penalized least squares smoothing (asPLS). @@ -618,9 +618,21 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, An array of values that control the local value of `lam` to better fit peak and non-peak regions. If None (default), then the initial values will be an array with shape equal to (M, N) and all values set to 1. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). Default is 2, as used in the asPLS paper [1]_. + Note that a value of 4 results in the weighting scheme used in the NasPLS + (Non-sensitive-areas adaptive smoothness penalized least squares smoothing) algorithm + [2]_. + + .. versionadded:: 1.2.0 + + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the asPLS paper [1]_. + See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -647,16 +659,22 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, Notes ----- - The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead - of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it - matches the results in Table 2 and Figure 5 of the asPLS paper closer than the - factor of 2 and fits noisy data much better. + The weighting scheme as written in the asPLS paper [1]_ does not reproduce the paper's + results for noisy data. By subtracting the mean of negative residuals (ie. the negative + values of ``data - baseline``) within the weighting scheme, the asPLS paper's results can + be correctly replicated (see https://github.com/derb12/pybaselines/issues/40 for more + details). Given this discrepancy, the default for ``aspls`` is to also subtract the + negative residuals within the weighting. To use the weighting scheme as it is written in + the asPLS paper, set ``alternate_weighting`` to False. References ---------- - Zhang, F., et al. Baseline correction for infrared spectra using - adaptive smoothness parameter penalized least squares method. - Spectroscopy Letters, 2020, 53(3), 222-233. + .. [1] Zhang, F., et al. Baseline correction for infrared spectra using + adaptive smoothness parameter penalized least squares method. + Spectroscopy Letters, 2020, 53(3), 222-233. + .. [2] Zhang, F., et al. An automatic baseline correction method based on reweighted + penalized least squares method for non-sensitive areas. Vibrational Spectroscopy, + 2025, 138, 103806. """ y, weight_array, whittaker_system = self._setup_whittaker(data, lam, diff_order, weights) @@ -676,7 +694,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, for i in range(max_iter + 1): penalty = alpha_matrix @ whittaker_system.penalty baseline = whittaker_system.solve(y, weight_array, penalty=penalty) - new_weights, residual, exit_early = _weighting._aspls(y, baseline, asymmetric_coef) + new_weights, residual, exit_early = _weighting._aspls( + y, baseline, asymmetric_coef, alternate_weighting + ) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break @@ -950,7 +970,7 @@ def brpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5 @_Algorithm2D._register(sort_keys=('weights',)) def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=None, - num_eigens=(10, 10), return_dof=False): + num_eigens=(10, 10), return_dof=False, alternate_weighting=False): """ Locally Symmetric Reweighted Penalized Least Squares (LSRPLS). @@ -983,6 +1003,12 @@ def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=Non If True and `num_eigens` is not None, then the effective degrees of freedom for each eigenvector will be calculated and returned in the parameter dictionary. Default is False since the calculation takes time. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -1003,13 +1029,26 @@ def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=Non with each eigenvector. Lower values signify that the eigenvector was less important for the fit. + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. - - Biessy, G. Revisiting Whittaker-Henderson Smoothing. https://hal.science/hal-04124043 - (Preprint), 2023. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. + .. [3] Biessy, G. Revisiting Whittaker-Henderson Smoothing. https://hal.science/hal-04124043 + (Preprint), 2023. """ y, weight_array, whittaker_system = self._setup_whittaker( @@ -1018,7 +1057,7 @@ def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=Non tol_history = np.empty(max_iter + 1) for i in range(1, max_iter + 2): baseline = whittaker_system.solve(y, weight_array) - new_weights, exit_early = _weighting._lsrpls(y, baseline, i) + new_weights, exit_early = _weighting._lsrpls(y, baseline, i, alternate_weighting) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break diff --git a/pybaselines/whittaker.py b/pybaselines/whittaker.py index 04dd3b6c..ab878694 100644 --- a/pybaselines/whittaker.py +++ b/pybaselines/whittaker.py @@ -503,7 +503,7 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non @_Algorithm._register(sort_keys=('weights', 'alpha')) def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, - weights=None, alpha=None, asymmetric_coef=0.5): + weights=None, alpha=None, asymmetric_coef=2., alternate_weighting=True): """ Adaptive smoothness penalized least squares smoothing (asPLS). @@ -529,9 +529,21 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, An array of values that control the local value of `lam` to better fit peak and non-peak regions. If None (default), then the initial values will be an array with size equal to N and all values set to 1. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). Default is 2, as used in the asPLS paper [1]_. + Note that a value of 4 results in the weighting scheme used in the NasPLS + (Non-sensitive-areas adaptive smoothness penalized least squares smoothing) algorithm + [2]_. + + .. versionadded:: 1.2.0 + + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the asPLS paper [1]_. + See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -558,16 +570,22 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, Notes ----- - The default asymmetric coefficient (`k` in the asPLS paper) is 0.5 instead - of the 2 listed in the asPLS paper. pybaselines uses the factor of 0.5 since it - matches the results in Table 2 and Figure 5 of the asPLS paper closer than the - factor of 2 and fits noisy data much better. + The weighting scheme as written in the asPLS paper [1]_ does not reproduce the paper's + results for noisy data. By subtracting the mean of negative residuals (ie. the negative + values of ``data - baseline``) within the weighting scheme, the asPLS paper's results can + be correctly replicated (see https://github.com/derb12/pybaselines/issues/40 for more + details). Given this discrepancy, the default for ``aspls`` is to also subtract the + negative residuals within the weighting. To use the weighting scheme as it is written in + the asPLS paper, set ``alternate_weighting`` to False. References ---------- - Zhang, F., et al. Baseline correction for infrared spectra using - adaptive smoothness parameter penalized least squares method. - Spectroscopy Letters, 2020, 53(3), 222-233. + .. [1] Zhang, F., et al. Baseline correction for infrared spectra using + adaptive smoothness parameter penalized least squares method. + Spectroscopy Letters, 2020, 53(3), 222-233. + .. [2] Zhang, F., et al. An automatic baseline correction method based on reweighted + penalized least squares method for non-sensitive areas. Vibrational Spectroscopy, + 2025, 138, 103806. """ y, weight_array, whittaker_system = self._setup_whittaker( @@ -592,7 +610,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, lhs, weight_array * y, overwrite_ab=True, overwrite_b=True, l_and_u=lower_upper_bands ) - new_weights, residual, exit_early = _weighting._aspls(y, baseline, asymmetric_coef) + new_weights, residual, exit_early = _weighting._aspls( + y, baseline, asymmetric_coef, alternate_weighting + ) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break @@ -939,7 +959,8 @@ def brpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5 return baseline, params @_Algorithm._register(sort_keys=('weights',)) - def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None): + def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None, + alternate_weighting=False): """ Locally Symmetric Reweighted Penalized Least Squares (LSRPLS). @@ -961,6 +982,12 @@ def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non weights : array-like, shape (N,), optional The weighting array. If None (default), then the initial weights will be an array with size equal to N and all values set to 1. + alternate_weighting : bool, optional + If False (default), the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. + + .. versionadded:: 1.3.0 Returns ------- @@ -977,10 +1004,24 @@ def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non completed. If the last value in the array is greater than the input `tol` value, then the function did not converge. + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. """ y, weight_array, whittaker_system = self._setup_whittaker(data, lam, diff_order, weights) @@ -990,7 +1031,7 @@ def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non whittaker_system.add_diagonal(weight_array), weight_array * y, overwrite_b=True ) - new_weights, exit_early = _weighting._lsrpls(y, baseline, i) + new_weights, exit_early = _weighting._lsrpls(y, baseline, i, alternate_weighting) if exit_early: i -= 1 # reduce i so that output tol_history indexing is correct break @@ -1352,7 +1393,7 @@ def iarpls(data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None, x_d @_whittaker_wrapper def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None, - alpha=None, x_data=None, asymmetric_coef=0.5): + alpha=None, x_data=None, asymmetric_coef=2., alternate_weighting=True): """ Adaptive smoothness penalized least squares smoothing (asPLS). @@ -1381,9 +1422,16 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None, x_data : array-like, optional The x-values. Not used by this function, but input is allowed for consistency with other functions. - asymmetric_coef : float + asymmetric_coef : float, optional The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). Default is 2, as used in the asPLS paper [1]_. + Note that a value of 4 results in the weighting scheme used in the NasPLS + (Non-sensitive-areas adaptive smoothness penalized least squares smoothing) algorithm + [2]_. + alternate_weighting : bool, optional + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the asPLS paper [1]_. + See the Notes section below for more details. Returns ------- @@ -1417,9 +1465,12 @@ def aspls(data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3, weights=None, References ---------- - Zhang, F., et al. Baseline correction for infrared spectra using - adaptive smoothness parameter penalized least squares method. - Spectroscopy Letters, 2020, 53(3), 222-233. + .. [1] Zhang, F., et al. Baseline correction for infrared spectra using + adaptive smoothness parameter penalized least squares method. + Spectroscopy Letters, 2020, 53(3), 222-233. + .. [2] Zhang, F., et al. An automatic baseline correction method based on reweighted + penalized least squares method for non-sensitive areas. Vibrational Spectroscopy, + 2025, 138, 103806. """ @@ -1648,7 +1699,8 @@ def brpls(data, x_data=None, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, max_i @_whittaker_wrapper -def lsrpls(data, x_data=None, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None): +def lsrpls(data, x_data=None, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None, + alternate_weighting=False): """ Locally Symmetric Reweighted Penalized Least Squares (LSRPLS). diff --git a/tests/test_spline.py b/tests/test_spline.py index d6b1326a..f54c2a0e 100644 --- a/tests/test_spline.py +++ b/tests/test_spline.py @@ -472,7 +472,8 @@ def test_avoid_overflow_warning(self, no_noise_data_fixture): @pytest.mark.parametrize('lam', (1e1, 1e5)) @pytest.mark.parametrize('diff_order', (1, 2, 3)) - def test_whittaker_comparison(self, lam, diff_order): + @pytest.mark.parametrize('alternate_weighting', (True, False)) + def test_whittaker_comparison(self, lam, diff_order, alternate_weighting): """ Ensures the P-spline version is the same as the Whittaker version. @@ -480,10 +481,17 @@ def test_whittaker_comparison(self, lam, diff_order): get the alpha values at the coefficients' x-values. """ if diff_order == 2: - rtol = 2e-3 + rtol = 3e-3 else: rtol = 5e-2 - super().test_whittaker_comparison(lam=lam, diff_order=diff_order, test_rtol=rtol) + if alternate_weighting: + asymmetric_coef = 2. + else: + asymmetric_coef = 0.5 + super().test_whittaker_comparison( + lam=lam, diff_order=diff_order, alternate_weighting=alternate_weighting, + asymmetric_coef=asymmetric_coef, test_rtol=rtol + ) @pytest.mark.parametrize('asymmetric_coef', (0, -1)) def test_outside_asymmetric_coef_fails(self, asymmetric_coef): @@ -498,13 +506,15 @@ def test_numba_implementation(self, diff_order, spline_degree): """ Runs the numba vs sparse comparison using a non-default asymmetric_coef value. - The weighting for aspls when asymmetric_coef=0.5 seems to be a bit sensitive, so likely + The weighting for aspls when asymmetric_coef=2 seems to be a bit sensitive, so likely any small floating point differences in the B.T @ W @ B and B.T @ W @ y calculation after several iterations leads to slightly different results (the test needs an rtol of ~1e-5 to pass). Increasing asymmetric_coef or setting max_iter to ~20 both fix this, so the - divergence arises from the aspls weighting and not within the pspline solver. + divergence arises from the aspls weighting and not within the pspline solver. To avoid + this, the tolerance for the method is set to 1e-2 to exit early enough that these floating + point issues do not influence the output. """ - super().test_numba_implementation(diff_order, spline_degree, asymmetric_coef=1.) + super().test_numba_implementation(diff_order, spline_degree, tol=1e-2) class TestPsplinePsalsa(IterativeSplineTester, WhittakerComparisonMixin): diff --git a/tests/test_weighting.py b/tests/test_weighting.py index 94841520..f013f5a4 100644 --- a/tests/test_weighting.py +++ b/tests/test_weighting.py @@ -722,7 +722,7 @@ def test_iarpls_overflow(one_d): assert not exit_early -def expected_aspls(y, baseline, asymmetric_coef=0.5): +def expected_aspls(y, baseline, asymmetric_coef, alternate_weighting): """ The weighting for adaptive smoothness penalized least squares smoothing (aspls). @@ -736,7 +736,10 @@ def expected_aspls(y, baseline, asymmetric_coef=0.5): The calculated baseline. asymmetric_coef : float The asymmetric coefficient for the weighting. Higher values leads to a steeper - weighting curve (ie. more step-like). Default is 0.5. + weighting curve (ie. more step-like). + alternate_weighting : bool + If True (default), subtracts the mean of the negative residuals within the weighting + equation. If False, uses the weighting equation as stated within the aspls paper. Returns ------- @@ -750,24 +753,34 @@ def expected_aspls(y, baseline, asymmetric_coef=0.5): """ residual = y - baseline - std = np.std(residual[residual < 0], ddof=1) # use dof=1 since sampling subset + neg_residual = residual[residual < 0] + std = np.std(neg_residual, ddof=1) # use dof=1 since sampling subset - weights = 1 / (1 + np.exp(asymmetric_coef * (residual - std) / std)) + if alternate_weighting: + shifted_residual = residual + neg_residual.mean() + else: + shifted_residual = residual + weights = 1 / (1 + np.exp(asymmetric_coef * (shifted_residual - std) / std)) return weights, residual @pytest.mark.parametrize('one_d', (True, False)) -@pytest.mark.parametrize('asymmetric_coef', (0.5, 2)) -def test_aspls_normal(one_d, asymmetric_coef): +@pytest.mark.parametrize('asymmetric_coef', (0.5, 2, 4)) +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_aspls_normal(one_d, asymmetric_coef, alternate_weighting): """Ensures aspls weighting works as intented for a normal baseline.""" if one_d: y_data, baseline = baseline_1d_normal() else: y_data, baseline = baseline_2d_normal() - weights, residual, exit_early = _weighting._aspls(y_data, baseline, asymmetric_coef) - expected_weights, expected_residual = expected_aspls(y_data, baseline, asymmetric_coef) + weights, residual, exit_early = _weighting._aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) + expected_weights, expected_residual = expected_aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) assert isinstance(weights, np.ndarray) assert weights.shape == y_data.shape @@ -779,16 +792,21 @@ def test_aspls_normal(one_d, asymmetric_coef): @pytest.mark.parametrize('one_d', (True, False)) -@pytest.mark.parametrize('asymmetric_coef', (0.5, 2)) -def test_aspls_all_above(one_d, asymmetric_coef): +@pytest.mark.parametrize('asymmetric_coef', (0.5, 2, 4)) +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_aspls_all_above(one_d, asymmetric_coef, alternate_weighting): """Ensures aspls weighting works as intented for a baseline with all points above the data.""" if one_d: y_data, baseline = baseline_1d_all_above() else: y_data, baseline = baseline_2d_all_above() - weights, residual, exit_early = _weighting._aspls(y_data, baseline, asymmetric_coef) - expected_weights, expected_residual = expected_aspls(y_data, baseline, asymmetric_coef) + weights, residual, exit_early = _weighting._aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) + expected_weights, expected_residual = expected_aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) assert isinstance(weights, np.ndarray) assert weights.shape == y_data.shape @@ -798,8 +816,9 @@ def test_aspls_all_above(one_d, asymmetric_coef): @pytest.mark.parametrize('one_d', (True, False)) -@pytest.mark.parametrize('asymmetric_coef', (0.5, 2)) -def test_aspls_all_below(one_d, asymmetric_coef): +@pytest.mark.parametrize('asymmetric_coef', (0.5, 2, 4)) +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_aspls_all_below(one_d, asymmetric_coef, alternate_weighting): """Ensures aspls weighting works as intented for a baseline with all points below the data.""" if one_d: y_data, baseline = baseline_1d_all_below() @@ -807,7 +826,9 @@ def test_aspls_all_below(one_d, asymmetric_coef): y_data, baseline = baseline_2d_all_below() with pytest.warns(utils.ParameterWarning): - weights, residual, exit_early = _weighting._aspls(y_data, baseline, asymmetric_coef) + weights, residual, exit_early = _weighting._aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) expected_weights = np.zeros_like(y_data) assert isinstance(weights, np.ndarray) @@ -818,8 +839,9 @@ def test_aspls_all_below(one_d, asymmetric_coef): @pytest.mark.parametrize('one_d', (True, False)) -@pytest.mark.parametrize('asymmetric_coef', (0.5, 2)) -def test_aspls_overflow(one_d, asymmetric_coef): +@pytest.mark.parametrize('asymmetric_coef', (0.5, 2, 4)) +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_aspls_overflow(one_d, asymmetric_coef, alternate_weighting): """Ensures exponential overflow does not occur from aspls weighting.""" if one_d: y_data, baseline = baseline_1d_normal() @@ -843,12 +865,16 @@ def test_aspls_overflow(one_d, asymmetric_coef): # sanity check to ensure overflow actually should occur with pytest.warns(RuntimeWarning): - expected_weights, expected_residual = expected_aspls(y_data, baseline, asymmetric_coef) + expected_weights, expected_residual = expected_aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) # the resulting weights should still be finite since 1 / (1 + inf) == 0 assert np.isfinite(expected_weights).all() with np.errstate(over='raise'): - weights, residual, exit_early = _weighting._aspls(y_data, baseline, asymmetric_coef) + weights, residual, exit_early = _weighting._aspls( + y_data, baseline, asymmetric_coef, alternate_weighting + ) assert np.isfinite(weights).all() assert not exit_early @@ -1327,7 +1353,7 @@ def test_brpls_beta_extremes(one_d, beta): assert_allclose(weights, expected_weights, rtol=1e-10, atol=1e-10) -def expected_lsrpls(y, baseline, iteration): +def expected_lsrpls(y, baseline, iteration, alternate_weighting): """ The weighting for the locally symmetric reweighted penalized least squares (lsrpls). @@ -1342,38 +1368,67 @@ def expected_lsrpls(y, baseline, iteration): iteration : int The iteration number. Should be 1-based, such that the first iteration is 1 instead of 0. + alternate_weighting : bool + If False, the weighting uses a prefactor term of ``10^t``, where ``t`` is + the iteration number, which is equation 8 within the LSRPLS paper [1]_. If True, uses + a prefactor term of ``exp(t)``. See the Notes section below for more details. Returns ------- weights : numpy.ndarray, shape (N,) The calculated weights. + Notes + ----- + In the LSRPLS paper [1]_, the weighting equation is written with a prefactor term + of ``10^t``, where ``t`` is the iteration number, but the plotted weighting curve in + Figure 1 of the paper shows a prefactor term of ``exp(t)`` instead. Since it is ambiguous + which prefactor term is actually used for the algorithm, both are permitted by setting + `alternate_weighting` to True to use ``10^t`` and False to use ``exp(t)``. In practice, + the prefactor determines how quickly the weighting curve converts from a sigmoidal curve + to a step curve, and does not heavily influence the result. + + If ``alternate_weighting`` is False, the weighting is the same as the drPLS algorithm [2]_. + + References ---------- - Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric - Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [1] Heng, Z., et al. Baseline correction for Raman Spectra Based on Locally Symmetric + Reweighted Penalized Least Squares. Chinese Journal of Lasers, 2018, 45(12), 1211001. + .. [2] Xu, D. et al. Baseline correction method based on doubly reweighted + penalized least squares, Applied Optics, 2019, 58, 3913-3920. + """ residual = y - baseline neg_residual = residual[residual < 0] std = _weighting._safe_std(neg_residual, ddof=1) # use dof=1 since sampling subset - inner = ((10**iteration) / std) * (residual - (2 * std - np.mean(neg_residual))) + if alternate_weighting: + prefactor = np.exp(iteration) + else: + prefactor = 10**iteration + inner = (prefactor / std) * (residual - (2 * std - np.mean(neg_residual))) weights = 0.5 * (1 - (inner / (1 + np.abs(inner)))) return weights @pytest.mark.parametrize('iteration', (1, 10)) @pytest.mark.parametrize('one_d', (True, False)) -def test_lsrpls_normal(iteration, one_d): +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_lsrpls_normal(iteration, one_d, alternate_weighting): """Ensures lsrpls weighting works as intented for a normal baseline.""" if one_d: y_data, baseline = baseline_1d_normal() else: y_data, baseline = baseline_2d_normal() - weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration) - expected_weights = expected_lsrpls(y_data, baseline, iteration) + weights, exit_early = _weighting._lsrpls( + y_data, baseline, iteration, alternate_weighting + ) + expected_weights = expected_lsrpls( + y_data, baseline, iteration, alternate_weighting + ) assert isinstance(weights, np.ndarray) assert weights.shape == y_data.shape @@ -1385,14 +1440,15 @@ def test_lsrpls_normal(iteration, one_d): @pytest.mark.parametrize('iteration', (1, 10)) @pytest.mark.parametrize('one_d', (True, False)) -def test_lsrpls_all_above(iteration, one_d): +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_lsrpls_all_above(iteration, one_d, alternate_weighting): """Ensures lsrpls weighting works as intented for a baseline with all points above the data.""" if one_d: y_data, baseline = baseline_1d_all_above() else: y_data, baseline = baseline_2d_all_above() - weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration) - expected_weights = expected_lsrpls(y_data, baseline, iteration) + weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration, alternate_weighting) + expected_weights = expected_lsrpls(y_data, baseline, iteration, alternate_weighting) assert isinstance(weights, np.ndarray) assert weights.shape == y_data.shape @@ -1402,7 +1458,8 @@ def test_lsrpls_all_above(iteration, one_d): @pytest.mark.parametrize('iteration', (1, 10)) @pytest.mark.parametrize('one_d', (True, False)) -def test_lsrpls_all_below(iteration, one_d): +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_lsrpls_all_below(iteration, one_d, alternate_weighting): """Ensures lsrpls weighting works as intented for a baseline with all points below the data.""" if one_d: y_data, baseline = baseline_1d_all_below() @@ -1410,7 +1467,7 @@ def test_lsrpls_all_below(iteration, one_d): y_data, baseline = baseline_2d_all_below() with pytest.warns(utils.ParameterWarning): - weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration) + weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration, alternate_weighting) expected_weights = np.zeros_like(y_data) assert isinstance(weights, np.ndarray) @@ -1420,7 +1477,8 @@ def test_lsrpls_all_below(iteration, one_d): @pytest.mark.parametrize('one_d', (True, False)) -def test_lsrpls_overflow(one_d): +@pytest.mark.parametrize('alternate_weighting', (True, False)) +def test_lsrpls_overflow(one_d, alternate_weighting): """Ensures exponential overflow does not occur from lsrpls weighting.""" if one_d: y_data, baseline = baseline_1d_normal() @@ -1429,11 +1487,17 @@ def test_lsrpls_overflow(one_d): iteration = 10000 # sanity check to ensure overflow actually should occur - with pytest.raises(OverflowError): - expected_lsrpls(y_data, baseline, iteration) + if alternate_weighting: + # exponential overflow only warns, so ensure that it produces nan weights + with pytest.warns(RuntimeWarning): + expected_weights = expected_drpls(y_data, baseline, iteration) + assert (~np.isfinite(expected_weights)).any() + else: + with pytest.raises(OverflowError): + expected_lsrpls(y_data, baseline, iteration, alternate_weighting) with np.errstate(over='raise'): - weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration) + weights, exit_early = _weighting._lsrpls(y_data, baseline, iteration, alternate_weighting) assert np.isfinite(weights).all() assert not exit_early diff --git a/tests/test_whittaker.py b/tests/test_whittaker.py index e77f1e82..1f722752 100644 --- a/tests/test_whittaker.py +++ b/tests/test_whittaker.py @@ -62,7 +62,8 @@ def sparse_drpls(data, lam, eta=0.5, diff_order=2, tol=1e-3, max_iter=50): return baseline -def sparse_aspls(data, lam, diff_order=2, tol=1e-3, max_iter=100, asymmetric_coef=2): +def sparse_aspls(data, lam, diff_order=2, tol=1e-3, max_iter=100, asymmetric_coef=2., + alternate_weighting=True): """A sparse version of aspls for testing that the banded version is implemented correctly.""" y = np.asarray(data) num_y = len(y) @@ -74,7 +75,9 @@ def sparse_aspls(data, lam, diff_order=2, tol=1e-3, max_iter=100, asymmetric_coe for _ in range(max_iter + 1): lhs = weight_matrix + alpha_matrix @ penalty_matrix baseline = spsolve(lhs, weight_array * y) - new_weights, residual, _ = _weighting._aspls(y, baseline, asymmetric_coef) + new_weights, residual, _ = _weighting._aspls( + y, baseline, asymmetric_coef, alternate_weighting + ) if relative_difference(weight_array, new_weights) < tol: break weight_array = new_weights @@ -424,9 +427,10 @@ def test_outside_asymmetric_coef_fails(self, asymmetric_coef): with pytest.raises(ValueError): self.class_func(self.y, asymmetric_coef=asymmetric_coef) - @pytest.mark.parametrize('asymmetric_coef', (1, 2)) + @pytest.mark.parametrize('asymmetric_coef', (0.5, 2, 4)) + @pytest.mark.parametrize('alternate_weighting', (True, False)) @pytest.mark.parametrize('diff_order', (2, 3)) - def test_sparse_comparison(self, diff_order, asymmetric_coef): + def test_sparse_comparison(self, diff_order, asymmetric_coef, alternate_weighting): """ Ensures the banded version of the implementation is correct. @@ -438,14 +442,14 @@ def test_sparse_comparison(self, diff_order, asymmetric_coef): lam = {2: 1e7, 3: 1e10}[diff_order] sparse_output = sparse_aspls( self.y, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, - asymmetric_coef=asymmetric_coef + asymmetric_coef=asymmetric_coef, alternate_weighting=alternate_weighting ) banded_output = self.class_func( self.y, lam=lam, diff_order=diff_order, max_iter=max_iter, tol=tol, - asymmetric_coef=asymmetric_coef + asymmetric_coef=asymmetric_coef, alternate_weighting=alternate_weighting )[0] - rtol = {2: 1e-4, 3: 2e-4}[diff_order] + rtol = {2: 1e-4, 3: 5e-4}[diff_order] assert_allclose(banded_output, sparse_output, rtol=rtol, atol=1e-8)