diff --git a/docs/_templates/autosummary/class.rst b/docs/_templates/autosummary/class.rst
new file mode 100644
index 00000000..d9a71f2a
--- /dev/null
+++ b/docs/_templates/autosummary/class.rst
@@ -0,0 +1,33 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+      :toctree:
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+      :template: autosummary/method.rst
+      :toctree:
+      :nosignatures:
+      {% for item in methods %}
+         {%- if not item.startswith('_') or not item in ['__init__'] %}
+         ~{{ name }}.{{ item }}
+         {%- endif -%}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
diff --git a/docs/_templates/autosummary/module.rst b/docs/_templates/autosummary/module.rst
index e2c3c4da..846b30d8 100644
--- a/docs/_templates/autosummary/module.rst
+++ b/docs/_templates/autosummary/module.rst
@@ -19,6 +19,7 @@
    .. rubric:: {{ _('Classes') }}
 
    .. autosummary::
+      :template: autosummary/class.rst
       :toctree:
       :nosignatures:
    {% for item in classes %}
diff --git a/docs/algorithms/algorithms_1d/misc.rst b/docs/algorithms/algorithms_1d/misc.rst
index 08b8f3f3..1da18950 100644
--- a/docs/algorithms/algorithms_1d/misc.rst
+++ b/docs/algorithms/algorithms_1d/misc.rst
@@ -78,7 +78,29 @@ beads (Baseline Estimation And Denoising with Sparsity)
 
 :meth:`~.Baseline.beads` decomposes the input data into baseline and pure, noise-free signal by
 modeling the baseline as a low pass filter and by considering the signal and its derivatives
-as sparse.
+as sparse. The minimized equation for calculating the pure signal is given as:
+
+.. math::
+
+    \frac{1}{2} ||H(y - s)||_2^2
+    + \lambda_0 \sum\limits_{i}^{N} \theta(s_i, r)
+    + \lambda_1 \sum\limits_{i}^{N - 1} \phi(\Delta^1 s_i)
+    + \lambda_2 \sum\limits_{i}^{N - 2} \phi(\Delta^2 s_i)
+
+where :math:`y` is the measured data, :math:`s` is the calculated pure signal,
+:math:`H` is a high pass filter, :math:`\theta()` is a differentiable, symmetric
+or asymmetric penalty function on the calculated signal, :math:`r` is the asymmetry
+of :math:`\theta()` by which negative values are penalized more, :math:`\Delta^1` and :math:`\Delta^2`
+are :ref:`finite-difference operators <difference-matrix-explanation>` of order 1
+and 2, respectively, and :math:`\phi()` is a differentiable, symmetric penalty function
+approximating the L1 loss (mean absolute error) applied to the first and second derivatives
+of the calculated signal.
+
+The calculated baseline, :math:`v`, upon convergence of calculating the pure signal is given by:
+
+.. math::
+
+    v = y - s - H(y - s)
 
 .. plot::
    :align: center
@@ -189,22 +211,23 @@ as sparse.
     x, data, baselines = create_data()
     baseline_fitter = Baseline(x, check_finite=False)
 
-    fit_params = [(3, 3), (0.15, 8), (0.1, 6), (0.25, 8), (0.1, 0.6)]
-
+    fit_params = [
+        (500, 6, 0.01),
+        (0.01, 8, 0.08),
+        (80, 8, 0.01),
+        (0.2, 6, 0.04),
+        (100, 1, 0.01)
+    ]
     figure, axes, handles = create_plots(data, baselines)
     for i, (ax, y) in enumerate(zip(axes, data)):
-        if i == 0:
-            freq_cutoff = 0.002
-        else:
-            freq_cutoff = 0.005
-        lam_0, asymmetry = fit_params[i]
+        alpha, asymmetry, freq_cutoff = fit_params[i]
         baseline, params = baseline_fitter.beads(
-            y, freq_cutoff=freq_cutoff, lam_0=lam_0, lam_1=0.05, lam_2=0.2, asymmetry=asymmetry
+            y, freq_cutoff=freq_cutoff, alpha=alpha, asymmetry=asymmetry, tol=1e-3
         )
         ax.plot(baseline, 'g--')
 
 The signal with both noise and baseline removed can also be obtained from the output
-of the beads function.
+of the beads method by accessing the 'signal' key in the output parameters.
 
 .. plot::
    :align: center
@@ -214,15 +237,10 @@ of the beads function.
 
     # to see contents of create_data function, look at the second-to-top-most algorithm's code
     figure, axes, handles = create_plots(data, baselines)
-    fit_params = [(3, 3), (0.15, 8), (0.1, 6), (0.25, 8), (0.1, 0.6)]
     for i, (ax, y) in enumerate(zip(axes, data)):
-        if i == 0:
-            freq_cutoff = 0.002
-        else:
-            freq_cutoff = 0.005
-        lam_0, asymmetry = fit_params[i]
+        alpha, asymmetry, freq_cutoff = fit_params[i]
         baseline, params = baseline_fitter.beads(
-            y, freq_cutoff=freq_cutoff, lam_0=lam_0, lam_1=0.05, lam_2=0.2, asymmetry=asymmetry
+            y, freq_cutoff=freq_cutoff, alpha=alpha, asymmetry=asymmetry, tol=1e-3
         )
 
         ax.clear()  # remove the old plots in the axis
@@ -234,3 +252,86 @@ of the beads function.
         (data_handle[0], signal_handle[0]),
         ('data', 'signal from beads'), loc='center', frameon=False
     )
+
+pybaselines version 1.3.0 introduced an optional simplification of the :math:`\lambda_0`,
+:math:`\lambda_1`, :math:`\lambda_2` regularization parameter selection using the procedure
+recommended by the BEADS manuscript through the addition of the parameter :math:`\alpha`.
+In detail, it is assumed that each :math:`\lambda_d` value is approximately proportional to some
+constant :math:`\alpha` divided by the L1 norm of the d'th derivative of the input data
+such that:
+
+.. math::
+    :label: lam_eq
+
+    \lambda_0 = \frac{\alpha}{||y||_1},
+    \lambda_1 = \frac{\alpha}{||y'||_1},
+    \lambda_2 = \frac{\alpha}{||y''||_1}
+
+Such a parametrization allows varying just :math:`\alpha`, which simplifies basic usage and
+allows for easier integration within optimization frameworks to find the best regularization
+parameter, as demonstrated by `Bosten, et al. <https://doi.org/10.1016/j.chroma.2023.464360>`_
+
+At first glance, Eq. :eq:`lam_eq` seems to have an issue in that the penalties within the BEADS
+algorithm are applied to the calculated signal, while the estimated :math:`\lambda_d` values
+will be based on the input data, which is composed of the signal, baseline, and noise.
+Due to this, the estimate for :math:`\lambda_0` is affected by an overestimation of the
+signal's L1 norm, while also not accounting for the asymmetry of the penalty function
+:math:`\theta()`. Further, while the estimates for :math:`\lambda_1` and :math:`\lambda_2`
+are less affected by the baseline since taking the derivatives eliminates some
+contributions from the baseline, the influence of noise is amplified, as demonstrated
+in the figure below, also resulting in an overestimation of their L1 norms. In practice,
+however, this systematic norm overestimation can be accounted for by simply selecting a larger
+:math:`\alpha`, and Eq. :eq:`lam_eq` ends up being a fairly good first approximation to
+allow one value to determine all three regularization terms.
+
+.. plot::
+   :align: center
+   :context: close-figs
+   :include-source: False
+   :show-source-link: True
+
+    from pybaselines.utils import make_data
+
+    x, y_no_noise, known_baseline = make_data(
+        return_baseline=True, noise_std=0.001, bkg_type='sine'
+    )
+    true_y = y_no_noise - known_baseline
+    x, y = make_data(noise_std=0.1, bkg_type='sine')
+    # subtract the minimum so it plots nicer with the pure signal
+    y -= y.min()
+
+    _, (ax1, ax2, ax3) = plt.subplots(nrows=3, sharex=True)
+    dy = np.gradient(y)
+    d2y = np.gradient(dy)
+    true_dy = np.gradient(true_y)
+    true_d2y = np.gradient(true_dy)
+    ax1.plot(x, y, label='data')
+    ax1.plot(x, true_y, '--', label='true signal')
+    ax2.plot(x, dy)
+    ax2.plot(x, true_dy, '--')
+    ax3.plot(x, d2y, label='data')
+    ax3.plot(x, true_d2y, '--', label='true signal')
+
+    ax1.legend()
+    ax1.set_ylabel('0th Derivative')
+    ax2.set_ylabel('1st Derivative')
+    ax3.set_ylabel('2nd Derivative')
+
+    plt.figure()
+    deriv_values = np.arange(3, dtype=int)
+    plt.plot(
+        deriv_values,
+        [abs(y).sum(), abs(dy).sum(), abs(d2y).sum()],
+        'o-', label='data'
+    )
+    plt.plot(
+        deriv_values,
+        [abs(true_y).sum(), abs(true_dy).sum(), abs(true_d2y).sum()],
+        'o--', label='true signal'
+    )
+
+    plt.xlabel('Derivative Order')
+    plt.xticks(deriv_values)
+    plt.ylabel('L1 Norm')
+    plt.semilogy()
+    plt.legend()
diff --git a/docs/algorithms/algorithms_1d/optimizers.rst b/docs/algorithms/algorithms_1d/optimizers.rst
index 215a591b..ea626d4c 100644
--- a/docs/algorithms/algorithms_1d/optimizers.rst
+++ b/docs/algorithms/algorithms_1d/optimizers.rst
@@ -13,13 +13,13 @@ Algorithms
 optimize_extended_range
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-The :meth:`~.Baseline.optimize_extended_range` function is based on the `Extended Range
+The :meth:`~.Baseline.optimize_extended_range` method is based on the `Extended Range
 Penalized Least Squares (erPLS) method <https://doi.org/10.3390/s20072015>`_,
 but extends its usage to all Whittaker-smoothing-based, polynomial, and spline algorithms.
 
 In this algorithm, a linear baseline is extrapolated from the left and/or
 right edges, Gaussian peaks are added to these baselines, and then the original
-data plus the extensions are input into the indicated Whittaker or polynomial function.
+data plus the extensions are input into the indicated Whittaker or polynomial method.
 An example of data with added baseline and Gaussian peaks is shown below.
 
 .. _extending-data-explanation:
@@ -313,3 +313,135 @@ reducing the number of data points in regions where higher stiffness
 is required. There is no figure showing the fits for various baseline types for
 this method since it is more suited for hard-to-fit data; however, :ref:`an
 example <sphx_glr_generated_examples_optimizers_plot_custom_bc_1_whittaker.py>` showcases its use.
+
+
+optimize_pls (Optimize Penalized Least Squares)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The :meth:`~.Baseline.optimize_pls` method is based on the `method developed by the authors
+of the arPLS algorithm <https://dx.doi.org/10.5573/ieie.2016.53.3.124>`_,
+but extends its usage to all Whittaker-smoothing and penalized spline algorithms.
+
+The method works by considering the general equation of penalized least squares given by
+
+.. math::
+    F + \lambda P
+
+where :math:`F` is the fidelity of the fit and :math:`P` is the penalty term whose contribution
+is controlled by the regularization parameter :math:`\lambda`. In general, both Whittaker
+smoothing and penalized splines have a fidelity given by:
+
+.. math::
+
+    F = \sum\limits_{i}^N w_i (y_i - v_i)^2
+
+where :math:`y_i` is the measured data, :math:`v_i` is the calculated baseline,
+and :math:`w_i` is the weight. The penalty for Whittaker smoothing is generally:
+
+.. math::
+
+    P = \sum\limits_{i}^{N - d} (\Delta^d v_i)^2
+
+where :math:`\Delta^d` is the finite-difference operator of order d. For penalized
+splines the penalty is generally:
+
+.. math::
+
+
+    P = \sum\limits_{i}^{M - d} (\Delta^d c_i)^2
+
+where :math:`c` are the calculated spline coefficients.
+
+In either case, a range of ``lam`` values are tested, with the fidelity and penalty
+values calculated for each fit. Then the array of fidelity and penalty values are
+normalized by their minimum and maximum values:
+
+.. math::
+
+    F_{norm} = \frac{F - \min(F)}{\max(F) - \min(F)}
+
+.. math::
+
+    P_{norm} = \frac{P - \min(P)}{\max(P) - \min(P)}
+
+
+The ``lam`` value that produces a minimum of the sum of normalized penalty and fidelity
+values is then selected as the optimal value. An example demonstrating this process is shown below.
+
+.. note::
+    Fun fact: this method was the reason that all baseline correction methods in pybaselines
+    output a parameter dictionary. Since the conception of pybaselines, the author had tried to
+    implement this method and only realized after ~5 years that the original publication had a
+    typo that prevented being able to replicate the publication's results.
+
+
+.. plot::
+   :align: center
+   :context: close-figs
+   :include-source: False
+   :show-source-link: True
+
+    def normalize(values):
+        return (values - values.min()) / (values.max() - values.min())
+
+    x = np.linspace(1, 1000, 500)
+    signal = (
+        gaussian(x, 6, 180, 5)
+        + gaussian(x, 6, 550, 5)
+        + gaussian(x, 9, 800, 10)
+        + gaussian(x, 9, 100, 12)
+        + gaussian(x, 15, 400, 8)
+        + gaussian(x, 13, 700, 12)
+        + gaussian(x, 9, 880, 8)
+    )
+    baseline = 5 + 15 * np.exp(-x / 800) + gaussian(x, 5, 700, 300)
+    noise = np.random.default_rng(1).normal(0, 0.2, x.size)
+    y = signal * 0.5 + baseline + noise
+
+    min_value = 2
+    max_value = 10
+    step = 0.25
+    lam_range = np.arange(min_value, max_value, step)
+
+    fit, params = baseline_fitter.optimize_pls(
+        y, opt_method='u-curve', min_value=min_value, max_value=max_value, step=step
+    )
+
+    plt.figure()
+    plt.plot(lam_range, normalize(params['penalty']), label='Penalty, $P_{norm}$')
+    plt.plot(lam_range, normalize(params['fidelity']), label='Fidelity, $F_{norm}$')
+    plt.plot(lam_range, params['metric'], '--', label='$F_{norm} + P_{norm}$')
+    index = np.argmin(abs(lam_range - np.log10(params['optimal_parameter'])))
+    plt.plot(lam_range[index], params['metric'][index], 'o', label='Optimal Value')
+    plt.xlabel('log$_{10}$(lam)')
+    plt.ylabel('Normalized Value')
+    plt.legend()
+
+    plt.figure()
+    plt.plot(x, y)
+    plt.plot(
+        x, baseline_fitter.arpls(y, lam=10**min_value)[0], label=f'lam=10$^{{{min_value}}}$'
+    )
+    plt.plot(
+        x, baseline_fitter.arpls(y, lam=10**max_value)[0], label=f'lam=10$^{{{max_value}}}$'
+    )
+    plt.plot(x, fit, label=f'lam=10$^{{{np.log10(params['optimal_parameter']):.1f}}}$')
+    plt.legend()
+
+
+In general, this method is more sensitive to the minimum and maximum ``lam`` values used for
+the fits compared to :meth:`~.Baseline.optimize_extended_range`.
+
+.. plot::
+   :align: center
+   :context: close-figs
+   :include-source: False
+   :show-source-link: True
+
+    # to see contents of create_data function, look at the top-most algorithm's code
+    figure, axes, handles = create_plots(data, baselines)
+    for i, (ax, y) in enumerate(zip(axes, data)):
+        baseline, params = baseline_fitter.optimize_pls(
+            y, method='arpls', opt_method='u-curve', min_value=2.5, max_value=5
+        )
+        ax.plot(baseline, 'g--')
diff --git a/docs/algorithms/algorithms_1d/spline.rst b/docs/algorithms/algorithms_1d/spline.rst
index 9ec59c33..0cacb08f 100644
--- a/docs/algorithms/algorithms_1d/spline.rst
+++ b/docs/algorithms/algorithms_1d/spline.rst
@@ -319,7 +319,7 @@ Minimized function:
 
 .. math::
 
-    \sum\limits_{i}^N (w_i (y_i - v(x_i)))^2
+    \sum\limits_{i}^N w_i (y_i -  v(x_i))^2
     + \lambda \sum\limits_{i}^{M - d} (\Delta^d c_i)^2
     + \lambda_1 \sum\limits_{i}^{N - 1} (\Delta^1 (y_i - v(x_i)))^2
 
@@ -327,19 +327,44 @@ Linear system:
 
 .. math::
 
-    (B^{\mathsf{T}} W^{\mathsf{T}} W B + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1 B + \lambda D_d^{\mathsf{T}} D_d) c
-    = (B^{\mathsf{T}} W^{\mathsf{T}} W B + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1) y
+    (B^{\mathsf{T}} W B + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1 B + \lambda D_d^{\mathsf{T}} D_d) c
+    = (B^{\mathsf{T}} W + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1) y
 
 Weighting:
 
 .. math::
 
     w_i = \left\{\begin{array}{cr}
-        p & y_i > v_i \\
-        1 - p & y_i \le v_i
+        p^2 & y_i > v_i \\
+        (1 - p)^2 & y_i \le v_i
     \end{array}\right.
 
 
+.. note::
+
+    Using the literature implementation of IAsLs, its equivalent P-Spline linear equation would
+    be
+
+    .. math::
+
+        (B^{\mathsf{T}} W^{\mathsf{T}} W B + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1 B + \lambda D_d^{\mathsf{T}} D_d) c
+        = (B^{\mathsf{T}} W^{\mathsf{T}} W + \lambda_1 B^{\mathsf{T}} D_1^{\mathsf{T}} D_1) y
+
+    with the weighting scheme
+
+    .. math::
+
+        w_i = \left\{\begin{array}{cr}
+            p & y_i > v_i \\
+            1 - p & y_i \le v_i
+        \end{array}\right.
+
+    These are equivalent to the linear equation and weighting scheme listed above when
+    incorporating the squaring of the weights directly within the weighting scheme. The
+    simplified functional form using squared weights is used in pybaselines for consistency
+    with all other algorithms.
+
+
 .. plot::
    :align: center
    :context: close-figs
diff --git a/docs/algorithms/algorithms_1d/whittaker.rst b/docs/algorithms/algorithms_1d/whittaker.rst
index e1ca4804..700e34af 100644
--- a/docs/algorithms/algorithms_1d/whittaker.rst
+++ b/docs/algorithms/algorithms_1d/whittaker.rst
@@ -343,7 +343,7 @@ Minimized function:
 
 .. math::
 
-    \sum\limits_{i}^N (w_i (y_i - v_i))^2
+    \sum\limits_{i}^N w_i (y_i - v_i)^2
     + \lambda \sum\limits_{i}^{N - d} (\Delta^d v_i)^2
     + \lambda_1 \sum\limits_{i}^{N - 1} (\Delta^1 (y_i - v_i))^2
 
@@ -351,18 +351,41 @@ Linear system:
 
 .. math::
 
-    (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1 + \lambda D_d^{\mathsf{T}} D_d) v
-    = (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1) y
+    (W + \lambda_1 D_1^{\mathsf{T}} D_1 + \lambda D_d^{\mathsf{T}} D_d) v
+    = (W + \lambda_1 D_1^{\mathsf{T}} D_1) y
 
 Weighting:
 
 .. math::
 
     w_i = \left\{\begin{array}{cr}
-        p & y_i > v_i \\
-        1 - p & y_i \le v_i
+        p^2 & y_i > v_i \\
+        (1 - p)^2 & y_i \le v_i
     \end{array}\right.
 
+.. note::
+
+    Within literature, IAsLs uses the linear equation
+
+    .. math::
+
+        (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1 + \lambda D_d^{\mathsf{T}} D_d) v
+        = (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1) y
+
+    with the weighting scheme
+
+    .. math::
+
+        w_i = \left\{\begin{array}{cr}
+            p & y_i > v_i \\
+            1 - p & y_i \le v_i
+        \end{array}\right.
+
+    These are equivalent to the linear equation and weighting scheme listed above when
+    incorporating the squaring of the weights directly within the weighting scheme. The
+    simplified functional form using squared weights is used in pybaselines for consistency
+    with all other algorithms.
+
 
 .. plot::
    :align: center
diff --git a/docs/api/Baseline.rst b/docs/api/Baseline.rst
index 4738c3e3..f7783ef6 100644
--- a/docs/api/Baseline.rst
+++ b/docs/api/Baseline.rst
@@ -127,6 +127,7 @@ Optimizing Algorithms
    Baseline.optimize_extended_range
    Baseline.adaptive_minmax
    Baseline.custom_bc
+   Baseline.optimize_pls
 
 Miscellaneous Algorithms
 ------------------------
diff --git a/docs/api/index.rst b/docs/api/index.rst
index a6dc6264..2307b239 100644
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -7,4 +7,5 @@ API Reference
    Baseline
    Baseline2D
    utils
+   results
    functional_interface
\ No newline at end of file
diff --git a/docs/api/results.rst b/docs/api/results.rst
new file mode 100644
index 00000000..9b2c03d7
--- /dev/null
+++ b/docs/api/results.rst
@@ -0,0 +1,12 @@
+Objects for Examining Results
+=============================
+
+.. currentmodule:: pybaselines
+
+.. autosummary::
+   :toctree: ../generated/api/
+   :template: autosummary/module.rst
+   :nosignatures:
+   :recursive:
+
+   results
diff --git a/docs/conf.py b/docs/conf.py
index 09a23f7d..99a4be18 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -52,6 +52,7 @@
     'sphinx_gallery.gen_gallery',
     'viewcode_inherit_methods',  # custom extension to allow viewcode with inherited methods
     'modify_module_docstring',  # custom extension to modify module docstrings
+    'xref_param_dict',  # custom extension to add cross references for parameter dictionary typings
 ]
 
 autosummary_generate = True # enables autosummary extension
@@ -173,12 +174,28 @@
 
 # creates cross references for types in docstrings
 numpydoc_xref_param_type = True
-numpydoc_xref_ignore = {'optional', 'shape', 'K', 'L', 'M', 'N', 'P', 'Q', 'or', 'deprecated'}
+numpydoc_xref_ignore = {
+    'optional',
+    'shape',
+    'J',
+    'K',
+    'L',
+    'M',
+    'N',
+    'P',
+    'Q',
+    'or',
+    'deprecated',
+}
 numpydoc_xref_aliases = {
     'Sequence': ':term:`python:sequence`',
     'Callable': ':term:`python:callable`',
     'Iterable': ':term:`python:iterable`',
-    'ParameterWarning': ':class:`pybaselines.utils.ParameterWarning`',
+    'ParameterWarning': ':class:`~pybaselines.utils.ParameterWarning`',
+    'WhittakerResult': ':class:`~pybaselines.results.WhittakerResult`',
+    'WhittakerResult2D': ':class:`~pybaselines.results.WhittakerResult2D`',
+    'PSplineResult': ':class:`~pybaselines.results.PSplineResult`',
+    'PSplineResult2D': ':class:`~pybaselines.results.PSplineResult2D`',
 }
 # have to set numpydoc_class_members_toctree to False to work well with autosummary;
 # otherwise, duplicate objects are added to the toctree
diff --git a/docs/viewcode_inherit_methods.py b/docs/viewcode_inherit_methods.py
index 329d1eab..ad768ec2 100644
--- a/docs/viewcode_inherit_methods.py
+++ b/docs/viewcode_inherit_methods.py
@@ -35,6 +35,11 @@ def find_super_method(app, modname):
         have a value resembling `('def', 90, 140))`, and the key "Baseline" would have a value
         resembling `('class', 14, 80)`.
 
+    Raises
+    ------
+    RuntimeError
+        Raised if an issue occurred when retrieving the tags from `modname`.
+
     Notes
     -----
     The "viewcode_follow_imported_members" toggle for sphinx.ext.viewcode can find the
@@ -74,8 +79,8 @@ def find_super_method(app, modname):
         analyzer.find_tags()
         code = analyzer.code
         tags = analyzer.tags
-    except Exception:
-        return
+    except Exception as e:
+        raise RuntimeError('Error within custom viewcode-inherit-methods extension') from e
 
     if 'two_d' in modname:
         new_obj = 'Baseline2D'
diff --git a/docs/xref_param_dict.py b/docs/xref_param_dict.py
new file mode 100644
index 00000000..4283b804
--- /dev/null
+++ b/docs/xref_param_dict.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+"""Plugin to add cross references for types within returned parameter dictionaries.
+
+Created on January 25, 2026
+@author: Donald Erb
+
+"""
+
+from numpydoc.xref import make_xref
+
+
+def xref_parameter_dict(app, what, name, obj, options, lines):
+    """
+    Adds cross references for items within the parameter dictionaries for baseline methods.
+
+    Makes it so that the types for items within the parameter dictionary are also cross
+    referenced just like the inputs and returns are. See
+    https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#event-autodoc-process-docstring
+    for more details on the Sphinx event. This should be ran after numpydoc has ran since it
+    expects numpydoc formatting within the docstrings.
+
+    Parameters
+    ----------
+    app : sphinx.application.Sphinx
+        The Sphinx application object.
+    what : str
+        The type of object; can be 'module', 'class', etc.
+    name : str
+        The full name of the object being documented.
+    obj : object
+        The actual object, eg. the class, function, module, etc. being documented.
+    options : sphinx.ext.autodoc.Options
+        The options for the current object's directive.
+    lines : list[str]
+        The list of strings for the docstring. Must be modified in-place in order
+        to change the value.
+
+    Raises
+    ------
+    ValueError
+        Raised if the formatting for a parameter item is incorrect.
+
+    Notes
+    -----
+    Relies on numpydoc formatting within the sections, so this may break for future numpydoc
+    releases. For reference, tested on numpydoc versions 1.8.0, 1.9.0, and 1.10.0.
+
+    """
+    # parameter dictionary items are only within Baseline[2D] methods or the functional
+    # interface
+    if what not in ('method', 'function'):
+        return
+
+    # if using napoleon instead of numpydoc, this should raise an error, which is desired behavior
+    # since docstring formatting will not be the same otherwise
+    xref_aliases = app.config.numpydoc_xref_aliases_complete
+    xref_ignore = app.config.numpydoc_xref_ignore
+
+    key_type_separator = ': '
+    in_return_section = False
+    for i, line in enumerate(lines):
+        # NOTE this could be made more robust using regular expressions, but the formatting
+        # should be kept simple since this is just for internal formatting
+        #
+        # Assumes formatting within the Returns sections is like:
+        #
+        # params : dict
+        #     * 'key' : value_typing
+        #         Description of value.
+        if in_return_section and line.startswith('        *'):
+            key, *value_typing = line.split(key_type_separator)
+            if not value_typing:
+                # in case something is incorrectly formatted as "name type" rather than
+                # "name : type"
+                raise ValueError(
+                    f'Incorrect parameter dictionary format for {name} at line: "{line}"'
+                )
+            # could split value_typing[0] using ',' to separate things like
+            # "numpy.ndarray, shape (N,)", but that fails for others like "dict[str, list]",
+            # so just pass the full typing reference to numpydoc and let it process accordingly
+            xref = make_xref(value_typing[0], xref_aliases=xref_aliases, xref_ignore=xref_ignore)
+            lines[i] = key_type_separator.join([key, xref])
+        elif in_return_section and (line.startswith(':') or line.startswith('..')):
+            # other docstring sections after Returns start with things like '.. rubric:: References'
+            # or ':Raises:' after numpydoc formatting
+            break
+        elif line == ':Returns:':
+            in_return_section = True
+
+
+def setup(app):
+    """Connects the xref_parameter_dict to the autodoc-process-docstring event.
+
+    Returns
+    -------
+    dict
+        Relevant information about the extension to pass to Sphinx. See
+        https://www.sphinx-doc.org/en/master/extdev/index.html for metadata fields.
+
+    """
+    # Add a high priority so that numpydoc should process the docstrings first
+    app.connect('autodoc-process-docstring', xref_parameter_dict, priority=10000)
+    # according to https://www.sphinx-doc.org/en/master/extdev/index.html, since this
+    # extension does not store data in the Sphinx environment, and it is not doing anything
+    # that is not parallel safe, then parallel_read_safe can be set to True; plus
+    # numpydoc is set as True for parallel_read_safe, so the same logic should apply
+    return {
+        'version': '0.0.1',
+        'parallel_read_safe': True,
+    }
diff --git a/pybaselines/_banded_utils.py b/pybaselines/_banded_utils.py
index d119b82d..e9d8946f 100644
--- a/pybaselines/_banded_utils.py
+++ b/pybaselines/_banded_utils.py
@@ -7,9 +7,13 @@
 """
 
 import numpy as np
-from scipy.linalg import solve_banded, solveh_banded
+from scipy.linalg import (
+    cholesky_banded, cho_solve_banded, get_blas_funcs, solve_banded, solveh_banded
+)
 
-from ._banded_solvers import solve_banded_penta
+from scipy.sparse.linalg import factorized
+
+from ._banded_solvers import penta_factorize, penta_factorize_solve, solve_banded_penta
 from ._compat import _HAS_NUMBA, dia_object, diags, identity
 from ._validation import _check_lam
 
@@ -83,6 +87,7 @@ def _lower_to_full(ab):
         The full, symmetric banded array.
 
     """
+    ab = np.atleast_2d(ab)
     ab_rows, ab_columns = ab.shape
     ab_full = np.concatenate((np.zeros((ab_rows - 1, ab_columns)), ab))
     ab_full[:ab_rows - 1] = ab[1:][::-1]
@@ -224,7 +229,7 @@ def _sparse_to_banded(matrix):
     ):
         # offsets are typically negative to positive, but can get flipped during conversion
         # between sparse formats; LAPACK's banded format matches SciPy's diagonal data format
-        # when offsets are from positve to negative
+        # when offsets are from positive to negative
         if upper == diag_matrix.offsets[0]:
             ab = diag_matrix.data
         else:
@@ -236,6 +241,170 @@ def _sparse_to_banded(matrix):
     return ab, (abs(lower), upper)
 
 
+def _banded_to_sparse(ab, lower=True, sparse_format='csc'):
+    """
+    Converts a banded matrix into its square sparse version.
+
+    Parameters
+    ----------
+    ab : numpy.ndarray, shape (M, N)
+        The banded matrix in LAPACK format, either with its full band structure (ie.
+        ``M == 2 * num_bands + 1``) or only the lower bands (``M == num_bands + 1``).
+    lower : bool, optional
+        False indicates that `ab` represents the full band structure,
+        while True (default) indicates `ab` is in lower format.
+    sparse_format : str, optional
+        The format for the output sparse array or matrix. Default is 'csc'.
+
+    Returns
+    -------
+    matrix : scipy.sparse.spmatrix or scipy.sparse.sparray, shape (N, N)
+        The sparse, banded matrix.
+
+    Notes
+    -----
+    The input `ab` banded matrix is assumed to have equal numbers of upper
+    and lower diagonals.
+
+    """
+    ab = np.atleast_2d(ab)
+    rows, columns = ab.shape
+    if lower:
+        ab_full = _lower_to_full(ab)
+        bands = rows - 1
+    else:
+        ab_full = ab
+        bands = rows // 2
+
+    matrix = dia_object(
+        (ab_full, np.arange(bands, -bands - 1, -1)), shape=(columns, columns),
+    ).asformat(sparse_format)
+
+    return matrix
+
+
+def _banded_dot_vector_full(ab, x, ab_lu, a_full_shape):
+    """
+    Computes the dot product of the matrix `a` in banded format (`ab`) with the vector `x`.
+
+    Parameters
+    ----------
+    ab : array-like, shape (`n_lower` + `n_upper` + 1, N)
+        The banded matrix.
+    x : array-like, shape (N,)
+        The vector.
+    ab_lu : Container[int, int]
+        The number of lower (`n_lower`) and upper (`n_upper`) diagonals in `ab`.
+    a_full_shape : Container[int, int]
+        The number of rows and columns in the full `a` matrix.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape (N,)
+        The dot product of `ab` and `x`.
+
+    Notes
+    -----
+    The function is faster if the input `ab` matrix is Fortran-ordered (has the
+    F_CONTIGUOUS numpy flag), since the underlying 'gbmv' BLAS function is
+    implemented in Fortran.
+
+    """
+    matrix = np.asarray(ab)
+    vector = np.asarray(x)
+
+    gbmv = get_blas_funcs(['gbmv'], (matrix, vector))[0]
+    # gbmv computes y = alpha * a * x + beta * y where a is the banded matrix
+    # (in compressed form), x is the input vector, y is the output vector, and alpha
+    # and beta are scalar multipliers
+    output = gbmv(
+        m=a_full_shape[0],  # number of rows of `a` matrix in full form
+        n=a_full_shape[1],  # number of columns of `a` matrix in full form
+        kl=ab_lu[0],  # sub-diagonals
+        ku=ab_lu[1],  # super-diagonals
+        alpha=1.0,  # alpha, required
+        a=matrix,  # `a` matrix in compressed form
+        x=vector,  # `x` vector
+        # trans=False,  # transpose a, optional; may allow later
+    )
+
+    return output
+
+
+def _banded_dot_vector_lower(ab, x):
+    """
+    Computes the dot product of the matrix `a` in lower banded format (`ab`) with the vector `x`.
+
+    Parameters
+    ----------
+    ab : array-like, shape (`n_lower` + 1, N)
+        The banded matrix.
+    x : array-like, shape (N,)
+        The vector.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape (N,)
+        The dot product of `ab` and `x`.
+
+    Notes
+    -----
+    The function is faster if the input `ab` matrix is Fortran-ordered (has the
+    F_CONTIGUOUS numpy flag), since the underlying 'sbmv' BLAS function is
+    implemented in Fortran.
+
+    """
+    matrix = np.asarray(ab)
+    vector = np.asarray(x)
+
+    sbmv = get_blas_funcs(['sbmv'], (matrix, vector))[0]
+    # sbmv computes y = alpha * a * x + beta * y where a is the symmetric banded matrix
+    # (in compressed lower form), x is the input vector, y is the output vector, and alpha
+    # and beta are scalar multipliers
+    output = sbmv(
+        k=matrix.shape[0] - 1,  # number of bands
+        alpha=1.0,  # alpha, required
+        a=matrix,  # `a` matrix in compressed form
+        x=vector,  # `x` vector
+        lower=1,  # using lower banded format
+    )
+
+    return output
+
+
+def _banded_dot_vector(ab, x, lower=True):
+    """
+    Computes ``A @ x`` given `A` in banded format (`ab`) with the vector `x`.
+
+    Parameters
+    ----------
+    ab : array-like, shape (`n_lower` + `n_upper` + 1, N) or (`n_lower` + 1, N)
+        The banded matrix. Can either be in lower format or full. If given a full
+        banded matrix, assumes the number of lower (`n_lower`) and upper (`n_upper`)
+        diagonals are the same. If that assumption does not hold true, then must use
+        ``_banded_dot_vector_full`` directly.
+    x : array-like, shape (N,)
+        The vector.
+    lower : bool, optional
+        False indicates that `ab` represents the full band structure,
+        while True (default) indicates `ab` is in lower format.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape (N,)
+        The dot product of `ab` and `x`.
+
+    """
+    if lower:
+        output = _banded_dot_vector_lower(ab, x)
+    else:
+        rows, columns = ab.shape
+        bands = rows // 2
+        output = _banded_dot_vector_full(ab, x, (bands, bands), (columns, columns))
+
+    return output
+
+
 def difference_matrix(data_size, diff_order=2, diff_format=None):
     """
     Creates an n-th order finite-difference matrix.
@@ -567,7 +736,7 @@ def diff_penalty_matrix(data_size, diff_order=2, diff_format='csr'):
     Raises
     ------
     ValueError
-        Raised if `diff_order` is greater or equal to `data_size`.
+        Raised if `diff_order` is not greater than `data_size`.
 
     Notes
     -----
@@ -582,7 +751,7 @@ def diff_penalty_matrix(data_size, diff_order=2, diff_format='csr'):
 
     """
     if data_size <= diff_order:
-        raise ValueError('data size must be greater than or equal to the difference order.')
+        raise ValueError('data size must be greater than the difference order.')
     penalty_bands = diff_penalty_diagonals(data_size, diff_order, lower_only=False)
     penalty_matrix = dia_object(
         (penalty_bands, np.arange(diff_order, -diff_order - 1, -1)), shape=(data_size, data_size),
@@ -862,3 +1031,86 @@ def reverse_penalty(self):
         self.penalty = self.penalty[::-1]
         self.original_diagonals = self.original_diagonals[::-1]
         self.reversed = not self.reversed
+
+    def update_lam(self, lam):
+        """
+        Updates the penalty with a new regularization parameter.
+
+        Parameters
+        ----------
+        lam : float
+            The new regularization parameter to apply to the penalty.
+
+        """
+        new_lam = _check_lam(lam, allow_zero=False)
+        self.penalty *= (new_lam / self.lam)
+        self.main_diagonal = self.penalty[self.main_diagonal_index].copy()
+        self.lam = new_lam
+
+    def factorize(self, lhs, overwrite_ab=False, check_finite=False):
+        """
+        Calculates the factorization of ``A`` for the linear equation ``A x = b``.
+
+        Parameters
+        ----------
+        lhs : array-like, shape (M, N)
+            The left-hand side of the equation, in banded format. `lhs` is assumed to be
+            some slight modification of `self.penalty` in the same format (reversed, lower,
+            number of bands, etc. are all the same).
+        overwrite_ab : bool, optional
+            Whether to overwrite `lhs` during factorization. Default is False.
+        check_finite : bool, optional
+            Whether to check if the inputs are finite. Default is False.
+
+        Returns
+        -------
+        factorization : numpy.ndarray or Callable
+            The factorization of `lhs`.
+
+        """
+        if self.using_penta:
+            factorization = penta_factorize(
+                lhs, solver=self.penta_solver, overwrite_ab=overwrite_ab
+            )
+        elif self.lower:
+            factorization = cholesky_banded(
+                lhs, lower=True, overwrite_ab=overwrite_ab, check_finite=check_finite
+            )
+        else:
+            factorization = factorized(_banded_to_sparse(lhs, self.lower, sparse_format='csc'))
+
+        return factorization
+
+    def factorized_solve(self, factorization, rhs, overwrite_b=False, check_finite=False):
+        """
+        Solves ``A x = b`` given the factorization of ``A``.
+
+        Parameters
+        ----------
+        factorization : numpy.ndarray or Callable
+            The factorization of ``A``, output by :meth:`PenalizedSystem.factorize`.
+        rhs : array-like, shape (N,) or (N, M)
+            The right-hand side of the equation.
+        overwrite_b : bool, optional
+            Whether to overwrite `rhs` when using any of the solvers. Default is False.
+        check_finite : bool, optional
+            Whether to check if the inputs are finite. Default is False.
+
+        Returns
+        -------
+        output : numpy.ndarray, shape (N,) or (N, M)
+            The solution to the linear system, `x`.
+
+        """
+        if self.using_penta:
+            output = penta_factorize_solve(
+                factorization, rhs, solver=self.penta_solver, overwrite_b=overwrite_b
+            )
+        elif self.lower:
+            output = cho_solve_banded(
+                (factorization, True), rhs, overwrite_b=overwrite_b, check_finite=check_finite
+            )
+        else:
+            output = factorization(rhs)
+
+        return output
diff --git a/pybaselines/_compat.py b/pybaselines/_compat.py
index 206edd36..8b256566 100644
--- a/pybaselines/_compat.py
+++ b/pybaselines/_compat.py
@@ -52,7 +52,7 @@ def wrapper(*args, **kwargs):
     trapezoid = integrate.trapz
 
 
-@lru_cache(maxsize=1)
+@lru_cache(maxsize=None)
 def _use_sparse_arrays():
     """
     Checks that the installed SciPy version is new enough to use sparse arrays.
@@ -84,7 +84,7 @@ def _use_sparse_arrays():
     return _scipy_version[0] > 1 or (_scipy_version[0] == 1 and _scipy_version[1] >= 12)
 
 
-@lru_cache(maxsize=1)
+@lru_cache(maxsize=None)
 def _np_ge_2():
     """
     Checks that the installed NumPy version is version 2.0 or later.
@@ -222,3 +222,60 @@ def diags(data, offsets=0, dtype=None, **kwargs):
         return sparse.diags_array(data, offsets=offsets, dtype=dtype, **kwargs)
     else:
         return sparse.diags(data, offsets=offsets, dtype=dtype, **kwargs)
+
+
+@lru_cache(maxsize=None)
+def _allows_1d_slice():
+    """
+    Determines whether 1d slicing is allowed for SciPy's sparse arrays.
+
+    Returns
+    -------
+    can_1d_slice : bool
+        Whether 1d slicing is allowed for the SciPy version being used.
+
+    Notes
+    -----
+    An equivalent function would be checking that the SciPy version is at least 1.15.0 or
+    that the output of `diags` is a sparse matrix.
+
+    """
+    try:
+        diags([1]).tocsc()[:, 0]
+        can_1d_slice = True
+    except NotImplementedError:
+        can_1d_slice = False
+
+    return can_1d_slice
+
+
+def _sparse_col_index(matrix, index):
+    """
+    Indexes a column within a sparse matrix or array.
+
+    Parameters
+    ----------
+    matrix : scipy.sparse.spmatrix or scipy.sparse.sparray, shape (M, N)
+        The sparse object to index.
+    index : int
+        The column to select.
+
+    Returns
+    -------
+    output : numpy.ndarray, shape (M,)
+        The selected column from the sparse object.
+
+    """
+    if not matrix.format == 'csc':
+        matrix = matrix.tocsc()
+
+    if sparse.isspmatrix(matrix) or not _allows_1d_slice():
+        # for sparse matrices, both matrix[:, [i]] and matrix[:, i] produce the
+        # same 2d output
+        output = matrix[:, [index]].toarray().ravel()
+    else:
+        # when allowed, want to use non-fancy indexing since it should be faster
+        # for sparse arrays
+        output = matrix[:, index].toarray()
+
+    return output
diff --git a/pybaselines/_spline_utils.py b/pybaselines/_spline_utils.py
index 017765cf..d2473707 100644
--- a/pybaselines/_spline_utils.py
+++ b/pybaselines/_spline_utils.py
@@ -48,7 +48,9 @@
 import numpy as np
 from scipy.interpolate import BSpline
 
-from ._banded_utils import PenalizedSystem, _add_diagonals, _lower_to_full, _sparse_to_banded
+from ._banded_utils import (
+    PenalizedSystem, _add_diagonals, _lower_to_full, _sparse_to_banded
+)
 from ._compat import _HAS_NUMBA, csr_object, dia_object, jit
 from ._validation import _check_array
 
@@ -296,6 +298,11 @@ def _spline_knots(x, num_knots=10, spline_degree=3, penalized=True):
     knots : numpy.ndarray, shape (``num_knots + 2 * spline_degree``,)
         The array of knots for the spline, properly padded on each side.
 
+    Raises
+    ------
+    ValueError
+        Raised if `num_knots` is less than 2.
+
     Notes
     -----
     If `penalized` is True, makes the knots uniformly spaced to create penalized
@@ -305,11 +312,6 @@ def _spline_knots(x, num_knots=10, spline_degree=3, penalized=True):
     The knots are padded on each end with `spline_degree` extra knots to provide proper
     support for the outermost inner knots.
 
-    Raises
-    ------
-    ValueError
-        Raised if `num_knots` is less than 2.
-
     References
     ----------
     Eilers, P., et al. Twenty years of P-splines. SORT: Statistics and Operations Research
@@ -346,7 +348,7 @@ def _spline_knots(x, num_knots=10, spline_degree=3, penalized=True):
     return knots
 
 
-@lru_cache(maxsize=1)
+@lru_cache(maxsize=None)
 def _bspline_has_extrapolate():
     """
     Checks if ``scipy.interpolate.BSpline.design_matrix`` has the `extrapolate` keyword.
@@ -501,6 +503,53 @@ def _numba_btb_bty(x, knots, spline_degree, y, weights, ab, rhs, basis_data):
             rhs[initial_idx + j] += work_val * y_val  # B.T @ W @ y
 
 
+# adapted from scipy (scipy/interpolate/_bspl.pyx/_norm_eq_lsq); see license above
+@jit(nopython=True, cache=True)
+def _numba_btb(x, knots, spline_degree, weights, ab, basis_data):
+    """
+    Computes ``B.T @ W @ B`` for a spline.
+
+    The result of ``B.T @ W @ B`` is stored in LAPACK's lower banded format (see
+    :func:`scipy.linalg.solveh_banded`).
+
+    Parameters
+    ----------
+    x : numpy.ndarray, shape (N,)
+        The x-values for the spline.
+    knots : numpy.ndarray, shape (K,)
+        The array of knots for the spline. Should be padded on each end with
+        `spline_degree` extra knots.
+    spline_degree : int
+        The degree of the spline.
+    weights : numpy.ndarray, shape(N,)
+        The weights for each y-value.
+    ab : numpy.ndarray, shape (`spline_degree` + 1, N)
+        An array of zeros that will be modified inplace to contain ``B.T @ W @ B`` in
+        lower banded format.
+    basis_data : numpy.ndarray, shape (``N * (spline_degree + 1)``,)
+        The data for all of the basis functions. The basis for each `x[i]` value is represented
+        by ``basis_data[i * (spline_degree + 1):(i + 1) * (spline_degree + 1)]``. If the basis,
+        `B` is a sparse matrix, then `basis_data` can be gotten using `B.tocsr().data`.
+
+    """
+    spline_order = spline_degree + 1
+    num_bases = len(knots) - spline_order
+
+    left_knot_idx = spline_degree
+    idx = 0
+    for i in range(len(x)):
+        weight_val = weights[i]
+        left_knot_idx = _find_interval(knots, spline_degree, x[i], left_knot_idx, num_bases)
+        next_idx = idx + spline_order
+        work = basis_data[idx:next_idx]
+        idx = next_idx
+        initial_idx = left_knot_idx - spline_degree
+        for j in range(spline_order):
+            work_val = work[j] * weight_val  # B.T @ W
+            for k in range(j + 1):
+                ab[j - k, initial_idx + k] += work_val * work[k]   # B.T @ W @ B
+
+
 def _basis_midpoints(knots, spline_degree):
     """
     Calculates the midpoint x-values of spline basis functions assuming evenly spaced knots.
@@ -610,7 +659,18 @@ def same_basis(self, num_knots=100, spline_degree=3):
 
     @property
     def tk(self):
-        """The knots and spline degree for the spline."""
+        """
+        The knots and spline degree for the spline.
+
+        Returns
+        -------
+        knots : numpy.ndarray, shape (K,)
+            The knots for the spline. Has a shape of `K`, which is equal to
+            ``num_knots + 2 * spline_degree``.
+        spline_degree : int
+            The degree of the spline.
+
+        """
         return self.knots, self.spline_degree
 
 
@@ -708,9 +768,20 @@ def tck(self):
         The knots, spline coefficients, and spline degree to reconstruct the spline.
 
         Convenience function for easily reconstructing the last solved spline with outside
-        modules, such as with SciPy's `BSpline`, to allow for other usages such as evaulating
+        modules, such as with SciPy's `BSpline`, to allow for other usages such as evaluating
         with different x-values.
 
+        Returns
+        -------
+        knots : numpy.ndarray, shape (K,)
+            The knots for the spline. Has a shape of `K`, which is equal to
+            ``num_knots + 2 * spline_degree``.
+        coef : numpy.ndarray, shape (M,)
+            The spline coeffieicnts. Has a shape of `M`, which is the number of basis functions
+            (equal to ``K - spline_degree - 1`` or equivalently ``num_knots + spline_degree - 1``).
+        spline_degree : int
+            The degree of the spline.
+
         Raises
         ------
         ValueError
@@ -751,7 +822,7 @@ def reset_penalty_diagonals(self, lam=1, diff_order=2, allow_lower=True, reverse
         common setup, cubic splines, have a bandwidth of 3 and would not use the
         pentadiagonal solvers anyway.
 
-        Adds padding to the penalty diagonals to accomodate the different shapes of the spline
+        Adds padding to the penalty diagonals to accommodate the different shapes of the spline
         basis and the penalty to speed up calculations when the two are added.
 
         """
@@ -777,10 +848,11 @@ def solve_pspline(self, y, weights, penalty=None, rhs_extra=None):
             The y-values for fitting the spline.
         weights : numpy.ndarray, shape (N,)
             The weights for each y-value.
-        penalty : numpy.ndarray, shape (D, N)
+        penalty : numpy.ndarray, shape (M, N), optional
             The finite difference penalty matrix, in LAPACK's lower banded format (see
-            :func:`scipy.linalg.solveh_banded`) if `lower_only` is True or the full banded
-            format (see :func:`scipy.linalg.solve_banded`) if `lower_only` is False.
+            :func:`scipy.linalg.solveh_banded`) if `self.lower` is True or the full banded
+            format (see :func:`scipy.linalg.solve_banded`) if `self.lower` is False. Default
+            is None, which uses the object's penalty.
         rhs_extra : float or numpy.ndarray, shape (N,), optional
             If supplied, `rhs_extra` will be added to the right hand side (``B.T @ W @ y``)
             of the equation before solving. Default is None, which adds nothing.
@@ -842,3 +914,56 @@ def solve_pspline(self, y, weights, penalty=None, rhs_extra=None):
         )
 
         return self.basis.basis @ self.coef
+
+    def _make_btwb(self, weights):
+        """
+        Calculates the result of ``B.T @ W @ B``.
+
+        Parameters
+        ----------
+        weights : numpy.ndarray, shape (N,)
+            The weights for each y-value.
+
+        Returns
+        -------
+        btwb : numpy.ndarray, shape (L, N)
+            The calculation of ``B.T @ W @ B`` in the same banded format as the PSpline object.
+
+        """
+        use_backup = True
+        # prefer numba version since it directly uses the basis
+        if self._use_numba:
+            basis_data = self.basis.basis.tocsr().data
+            # TODO if using the numba version, does fortran ordering speed up the calc? or
+            # can btwb just be c ordered?
+
+            # create btwb array outside of numba function since numba's implementation
+            # of np.zeros is slower than numpy's (https://github.com/numba/numba/issues/7259)
+            btwb = np.zeros((self.basis.spline_degree + 1, self.basis._num_bases), order='F')
+            _numba_btb(
+                self.basis.x, self.basis.knots, self.basis.spline_degree, weights, btwb,
+                basis_data
+            )
+            # TODO can probably make the full matrix directly within the numba
+            # btb calculation
+            if not self.lower:
+                btwb = _lower_to_full(btwb)
+            use_backup = False
+
+        if use_backup:
+            # worst case scenario; have to convert weights to a sparse diagonal matrix,
+            # do B.T @ W @ B, and convert back to lower banded
+            full_matrix = (
+                self.basis.basis.T
+                @ dia_object((weights, 0), shape=(self.basis._x_len, self.basis._x_len)).tocsr()
+                @ self.basis.basis
+            )
+            btwb = _sparse_to_banded(full_matrix)[0]
+
+            # take only the lower diagonals of the symmetric ab; cannot just do
+            # ab[spline_degree:] since some diagonals become fully 0 and are truncated from
+            # the data attribute, so have to calculate the number of bands first
+            if self.lower:
+                btwb = btwb[len(btwb) // 2:]
+
+        return btwb
diff --git a/pybaselines/_weighting.py b/pybaselines/_weighting.py
index db0c8fcb..17ffa496 100644
--- a/pybaselines/_weighting.py
+++ b/pybaselines/_weighting.py
@@ -76,16 +76,16 @@ def _airpls(y, baseline, iteration, normalize_weights=False):
         Designates if there is a potential error with the calculation such that no further
         iterations should be performed.
 
-    References
-    ----------
-    Zhang, Z.M., et al. Baseline correction using adaptive iteratively
-    reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
-
     Notes
     -----
     Equation 9 in the original algorithm was misprinted according to the author
     (https://github.com/zmzhang/airPLS/issues/8), so the correct weighting is used here.
 
+    References
+    ----------
+    Zhang, Z.M., et al. Baseline correction using adaptive iteratively
+    reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
+
     """
     residual = y - baseline
     neg_mask = residual < 0
diff --git a/pybaselines/classification.py b/pybaselines/classification.py
index fc589fb4..93d578d6 100644
--- a/pybaselines/classification.py
+++ b/pybaselines/classification.py
@@ -57,6 +57,7 @@
 from ._compat import jit, trapezoid
 from ._validation import _check_scalar, _check_scalar_variable
 from ._weighting import _safe_std_mean
+from .results import WhittakerResult
 from .utils import (
     _MIN_FLOAT, ParameterWarning, _convert_coef, _interp_inplace, gaussian, estimate_window,
     pad_edges, relative_difference
@@ -794,6 +795,9 @@ def fabc(self, data, lam=1e6, scale=None, num_std=3.0, diff_order=2, min_length=
                 as False.
             * 'weights': numpy.ndarray, shape (N,)
                 The weight array used for fitting the data.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Notes
         -----
@@ -839,7 +843,10 @@ def fabc(self, data, lam=1e6, scale=None, num_std=3.0, diff_order=2, min_length=
             whittaker_system.add_diagonal(whittaker_weights), whittaker_weights * y,
             overwrite_b=True, overwrite_ab=True
         )
-        params = {'mask': mask, 'weights': whittaker_weights}
+        params = {
+            'mask': mask, 'weights': whittaker_weights,
+            'result': WhittakerResult(whittaker_system, whittaker_weights)
+        }
 
         return baseline, params
 
@@ -890,12 +897,18 @@ def rubberband(self, data, segments=1, lam=None, diff_order=2, weights=None,
         -------
         baseline : numpy.ndarray, shape (N,)
             The calculated baseline.
-        dict
+        params : dict
             A dictionary with the following items:
 
             * 'mask': numpy.ndarray, shape (N,)
                 The boolean array designating baseline points as True and peak points
                 as False.
+            * 'weights': numpy.ndarray, shape (N,)
+                The weight array used for fitting the data. Only returned if `lam` is a
+                positive value.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations. Only returned if `lam` is a positive value.
 
         Raises
         ------
@@ -948,16 +961,24 @@ def rubberband(self, data, segments=1, lam=None, diff_order=2, weights=None,
         mask = np.zeros(self._shape, dtype=bool)
         mask[np.unique(total_vertices)] = True
         np.logical_and(mask, weight_array, out=mask)
+
+        params = {'mask': mask}
         if lam is not None and lam != 0:
-            _, _, whittaker_system = self._setup_whittaker(y, lam, diff_order, mask)
+            _, whittaker_weights, whittaker_system = self._setup_whittaker(
+                y, lam, diff_order, mask
+            )
             baseline = whittaker_system.solve(
-                whittaker_system.add_diagonal(mask), mask * y,
+                whittaker_system.add_diagonal(whittaker_weights), whittaker_weights * y,
                 overwrite_b=True, overwrite_ab=True
             )
+            params.update({
+                'weights': whittaker_weights,
+                'result': WhittakerResult(whittaker_system, whittaker_weights)
+            })
         else:
             baseline = np.interp(self.x, self.x[mask], y[mask])
 
-        return baseline, {'mask': mask}
+        return baseline, params
 
 
 _classification_wrapper = _class_wrapper(_Classification)
@@ -1683,6 +1704,11 @@ def _haar(num_points, scale=2):
     wavelet : numpy.ndarray
         The Haar wavelet.
 
+    Raises
+    ------
+    TypeError
+        Raised if `scale` is not an integer.
+
     Notes
     -----
     This implementation is only designed to work for integer scales.
@@ -1690,11 +1716,6 @@ def _haar(num_points, scale=2):
     Matches pywavelets's Haar implementation after applying patches from pywavelets
     issue #365 and pywavelets pull request #580.
 
-    Raises
-    ------
-    TypeError
-        Raised if `scale` is not an integer.
-
     References
     ----------
     https://wikipedia.org/wiki/Haar_wavelet
diff --git a/pybaselines/misc.py b/pybaselines/misc.py
index 493fb827..799aac41 100644
--- a/pybaselines/misc.py
+++ b/pybaselines/misc.py
@@ -157,7 +157,25 @@ def beads(self, data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asy
 
         Decomposes the input data into baseline and pure, noise-free signal by modeling
         the baseline as a low pass filter and by considering the signal and its derivatives
-        as sparse [1]_.
+        as sparse. The minimized equation for calculating the pure signal is given as:
+
+        .. math::
+
+            0.5 * ||H(y - s)||_2^2
+            + \lambda_0 \sum\limits_{i}^{N} \theta(s_i)
+            + \lambda_1 \sum\limits_{i}^{N - 1} \phi(\Delta^1 s_i)
+            + \lambda_2 \sum\limits_{i}^{N - 2} \phi(\Delta^2 s_i)
+
+        where y is the measured data, s is the calculated pure signal,
+        :math:`H` is a high pass filter, :math:`\theta()` is a differentiable, symmetric
+        or asymmetric penalty function on the calculated signal, :math:`\Delta^1` and
+        :math:`\Delta^2` are finite-difference operators of order 1 and 2, respectively,
+        and :math:`\phi()` is a differentiable, symmetric penalty function approximating the
+        L1 loss (mean absolute error) applied to the first and second derivatives of the
+        calculated signal.
+
+        The calculated baseline, v, upon convergence of calculating the pure signal is
+        given by :math:`v = y - s - H(y - s)`.
 
         Parameters
         ----------
@@ -200,10 +218,10 @@ def beads(self, data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asy
             An integer describing the high pass filter type. The order of the high pass
             filter is ``2 * filter_type``. Default is 1 (second order filter).
         cost_function : {2, 1, "l1_v1", "l1_v2"}, optional
-            An integer or string indicating which approximation of the l1 (absolute value)
-            penalty to use. 1 or "l1_v1" will use :math:`l(x) = \sqrt{x^2 + \text{eps_1}}`
+            An integer or string indicating which approximation of the L1 (absolute value)
+            penalty to use. 1 or "l1_v1" will use :math:`\phi(x) = \sqrt{x^2 + \text{eps_1}}`
             and 2 (default) or "l1_v2" will use
-            :math:`l(x) = |x| - \text{eps_1}\log{(|x| + \text{eps_1})}`.
+            :math:`\phi(x) = |x| - \text{eps_1}\log{(|x| + \text{eps_1})}`.
         max_iter : int, optional
             The maximum number of iterations. Default is 50.
         tol : float, optional
@@ -246,13 +264,30 @@ def beads(self, data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asy
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'fidelity': float
+                The fidelity term of the final fit, given as :math:`0.5 * ||H(y - s)||_2^2`.
+
+                .. versionadded:: 1.3.0
+
+            * 'penalty' : tuple[float, float, float]
+                The penalty terms of the final fit before multiplication with the `lam_d`
+                terms. These correspond to :math:`\sum\limits_{i}^{N} \theta(s_i)`,
+                :math:`\sum\limits_{i}^{N - 1} \phi(\Delta^1 s_i)`, and
+                :math:`\sum\limits_{i}^{N - 2} \phi(\Delta^2 s_i)`, respectively.
+
+                .. versionadded:: 1.3.0
+
+        Raises
+        ------
+        ValueError
+            Raised if `asymmetry`, `lam_0`, `lam_1`, `lam_2`, or `alpha` is less than 0.
 
         Notes
         -----
         If any of `lam_0`, `lam_1`, or `lam_2` are None, uses the proceedure recommended in [1]_
         to base the `lam_d` values on the inverse of the L1 norm values of the `d'th` derivative
-        of `data`. In detail, it is assumed that `lam_0`, `lam_1`, and `lam_2` are related by
-        some constant `alpha` such that ``lam_0 = alpha / ||data||_1``,
+        of `data`. In detail, it is assumed that `lam_0`, `lam_1`, and `lam_2` are approximately
+        related by some constant `alpha` such that ``lam_0 = alpha / ||data||_1``,
         ``lam_1 = alpha / ||data'||_1``, and ``lam_2 = alpha / ||data''||_1``.
 
         When finding the best parameters for fitting, it is usually best to find the optimal
@@ -265,12 +300,7 @@ def beads(self, data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asy
         any of the regularization parameters within beads seems to increase the curvature of the
         baseline, the actual effect is due to the increased penalty on the signal. This can be
         readily observed by looking at the 'signal' key within the output parameter dictionary
-        with varying `lam_0`, `lam_1`, are `lam_2` values.
-
-        Raises
-        ------
-        ValueError
-            Raised if `asymmetry`, `lam_0`, `lam_1`, or `lam_2` is less than 0.
+        with varying `lam_0`, `lam_1`, `lam_2`, or `alpha` values.
 
         References
         ----------
@@ -1036,11 +1066,14 @@ def _sparse_beads(y, freq_cutoff=0.005, lam_0=1.0, lam_1=1.0, lam_2=1.0, asymmet
         h = B.dot(A_factor.solve(y - x))
         d1_x, d2_x = _abs_diff(x, smooth_half_window)
         abs_x, big_x, theta = _beads_theta(x, asymmetry, eps_0)
+        fidelity = 0.5 * (h @ h)
+        d1_loss = _beads_loss(d1_x, use_v2_loss, eps_1).sum()
+        d2_loss = _beads_loss(d2_x, use_v2_loss, eps_1).sum()
         cost = (
-            0.5 * h.dot(h)
+            fidelity
             + lam_0 * theta
-            + lam_1 * _beads_loss(d1_x, use_v2_loss, eps_1).sum()
-            + lam_2 * _beads_loss(d2_x, use_v2_loss, eps_1).sum()
+            + lam_1 * d1_loss
+            + lam_2 * d2_loss
         )
         cost_difference = relative_difference(cost_old, cost)
         tol_history[i] = cost_difference
@@ -1051,7 +1084,12 @@ def _sparse_beads(y, freq_cutoff=0.005, lam_0=1.0, lam_1=1.0, lam_2=1.0, asymmet
     diff = y - x
     baseline = diff - B.dot(A_factor.solve(diff))
 
-    return baseline, {'signal': x, 'tol_history': tol_history[:i + 1]}
+    params = {
+        'signal': x, 'tol_history': tol_history[:i + 1], 'fidelity': fidelity,
+        'penalty': (theta, d1_loss, d2_loss)
+    }
+
+    return baseline, params
 
 
 def _process_lams(y, alpha, lam_0, lam_1, lam_2):
@@ -1076,6 +1114,12 @@ def _process_lams(y, alpha, lam_0, lam_1, lam_2):
     output_lams : list[float, float, float]
         The `lam_0`, `lam_1`, and `lam_2` values.
 
+    Raises
+    ------
+    ValueError
+        Raised if `alpha` is not positive or if all three of `lam_0`, `lam_1`, and `lam_2`
+        are 0.
+
     Notes
     -----
     Follows the proceedure recommended in [1]_ to base the `lam_d` values on the inverse of
@@ -1086,12 +1130,6 @@ def _process_lams(y, alpha, lam_0, lam_1, lam_2):
     values are calculated assuming `alpha` is 1. If only `lam_0` is not None, the
     corresponding `alpha` value is calculated and `lam_1` and `lam_2` are set accordingly.
 
-    Raises
-    ------
-    ValueError
-        Raised if `alpha` is not positive or if all three of `lam_0`, `lam_1`, and `lam_2`
-        are 0.
-
     References
     ----------
     .. [1] Ning, X., et al. Chromatogram baseline estimation and denoising using sparsity
@@ -1273,11 +1311,14 @@ def _banded_beads(y, freq_cutoff=0.005, lam_0=1.0, lam_1=1.0, lam_2=1.0, asymmet
             solveh_banded(A_lower, y - x, check_finite=False, overwrite_b=True, lower=True),
             ab_lu, full_shape
         )
+        fidelity = 0.5 * (h @ h)
+        d1_loss = _beads_loss(d1_x, use_v2_loss, eps_1).sum()
+        d2_loss = _beads_loss(d2_x, use_v2_loss, eps_1).sum()
         cost = (
-            0.5 * h.dot(h)
+            fidelity
             + lam_0 * theta
-            + lam_1 * _beads_loss(d1_x, use_v2_loss, eps_1).sum()
-            + lam_2 * _beads_loss(d2_x, use_v2_loss, eps_1).sum()
+            + lam_1 * d1_loss
+            + lam_2 * d2_loss
         )
         cost_difference = relative_difference(cost_old, cost)
         tol_history[i] = cost_difference
@@ -1295,7 +1336,12 @@ def _banded_beads(y, freq_cutoff=0.005, lam_0=1.0, lam_1=1.0, lam_2=1.0, asymmet
         )
     )
 
-    return baseline, {'signal': x, 'tol_history': tol_history[:i + 1]}
+    params = {
+        'signal': x, 'tol_history': tol_history[:i + 1], 'fidelity': fidelity,
+        'penalty': (theta, d1_loss, d2_loss)
+    }
+
+    return baseline, params
 
 
 @_misc_wrapper
@@ -1388,6 +1434,11 @@ def beads(data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asymmetry
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
 
+    Raises
+    ------
+    ValueError
+        Raised if `asymmetry` is less than 0.
+
     Notes
     -----
     If any of `lam_0`, `lam_1`, or `lam_2` are None, uses the proceedure recommended in [4]_
@@ -1400,11 +1451,6 @@ def beads(data, freq_cutoff=0.005, lam_0=None, lam_1=None, lam_2=None, asymmetry
     `freq_cutoff` for the noise in the data before adjusting any other parameters since
     it has the largest effect [5]_.
 
-    Raises
-    ------
-    ValueError
-        Raised if `asymmetry` is less than 0.
-
     References
     ----------
     .. [4] Ning, X., et al. Chromatogram baseline estimation and denoising using sparsity
diff --git a/pybaselines/morphological.py b/pybaselines/morphological.py
index 0ca86f5b..83e5873b 100644
--- a/pybaselines/morphological.py
+++ b/pybaselines/morphological.py
@@ -13,6 +13,7 @@
 
 from ._algorithm_setup import _Algorithm, _class_wrapper
 from ._validation import _check_lam, _check_half_window
+from .results import PSplineResult, WhittakerResult
 from .utils import _mollifier_kernel, _sort_array, pad_edges, padded_convolve, relative_difference
 
 
@@ -88,6 +89,9 @@ def mpls(self, data, half_window=None, lam=1e6, p=0.0, diff_order=2, tol=None, m
                 The weight array used for fitting the data.
             * 'half_window': int
                 The half window used for the morphological calculations.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -179,7 +183,10 @@ def mpls(self, data, half_window=None, lam=1e6, p=0.0, diff_order=2, tol=None, m
             overwrite_ab=True, overwrite_b=True
         )
 
-        params = {'weights': weight_array, 'half_window': half_wind}
+        params = {
+            'weights': weight_array, 'half_window': half_wind,
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
         return baseline, params
 
     @_Algorithm._register
@@ -725,6 +732,9 @@ def mpspline(self, data, half_window=None, lam=1e4, lam_smooth=1e-2, p=0.0,
                 The weight array used for fitting the data.
             * 'half_window': int
                 The half window used for the morphological calculations.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -787,10 +797,14 @@ def mpspline(self, data, half_window=None, lam=1e4, lam_smooth=1e-2, p=0.0,
             # TODO should this use np.isclose instead?
             weight_array = np.where(spline_fit == optimal_opening, 1 - p, p)
 
-        pspline.penalty = (_check_lam(lam) / lam_smooth) * pspline.penalty
+        pspline.update_lam(lam)
         baseline = pspline.solve_pspline(spline_fit, weight_array)
+        params = {
+            'half_window': half_window, 'weights': weight_array,
+            'result': PSplineResult(pspline, weight_array)
+        }
 
-        return baseline, {'half_window': half_window, 'weights': weight_array}
+        return baseline, params
 
     @_Algorithm._register(sort_keys=('signal',))
     def jbcd(self, data, half_window=None, alpha=0.1, beta=1e1, gamma=1., beta_mult=1.1,
diff --git a/pybaselines/optimizers.py b/pybaselines/optimizers.py
index 3db648c5..c0e50d98 100644
--- a/pybaselines/optimizers.py
+++ b/pybaselines/optimizers.py
@@ -10,15 +10,17 @@
 """
 
 from collections import defaultdict
+import inspect
 import itertools
 from math import ceil
+import warnings
 
 import numpy as np
 
 from . import classification, misc, morphological, polynomial, smooth, spline, whittaker
 from ._algorithm_setup import _Algorithm, _class_wrapper
 from ._validation import _check_optional_array
-from .utils import _check_scalar, _get_edges, _sort_array, gaussian
+from .utils import ParameterWarning, _check_scalar, _get_edges, _sort_array, gaussian
 
 
 class _Optimizers(_Algorithm):
@@ -63,7 +65,7 @@ def collab_pls(self, data, average_dataset=True, method='asls', method_kwargs=No
                 :meth:`~.Baseline.aspls` or :meth:`~.Baseline.pspline_aspls` methods.
             * 'method_params': dict[str, list]
                 A dictionary containing the output parameters for each individual fit.
-                Keys will depend on the selected method and will have a list of values,
+                Keys will depend on the selected `method` and will have a list of values,
                 with each item corresponding to a fit.
 
         Raises
@@ -141,7 +143,7 @@ def collab_pls(self, data, average_dataset=True, method='asls', method_kwargs=No
 
     @_Algorithm._register(skip_sorting=True)
     def optimize_extended_range(self, data, method='asls', side='both', width_scale=0.1,
-                                height_scale=1., sigma_scale=1 / 12, min_value=2, max_value=8,
+                                height_scale=1., sigma_scale=1 / 12, min_value=2, max_value=9,
                                 step=1, pad_kwargs=None, method_kwargs=None):
         """
         Extends data and finds the best parameter value for the given baseline method.
@@ -177,11 +179,11 @@ def optimize_extended_range(self, data, method='asls', side='both', width_scale=
             be the exponent to raise to the power of 10 (eg. a `min_value` value of 2
             designates a `lam` value of 10**2). Default is 2.
         max_value : int or float, optional
-            The maximum value for the `lam` or `poly_order` value to use with the
+            The maximum value for the `lam` or `poly_order` value to potentially use with the
             indicated method. If using a polynomial method, `max_value` must be an
             integer. If using a Whittaker-smoothing-based method, `max_value` should
             be the exponent to raise to the power of 10 (eg. a `max_value` value of 3
-            designates a `lam` value of 10**3). Default is 8.
+            designates a `lam` value of 10**3). Default is 9.
         step : int or float, optional
             The step size for iterating the parameter value from `min_value` to `max_value`.
             If using a polynomial method, `step` must be an integer. If using a
@@ -200,36 +202,44 @@ def optimize_extended_range(self, data, method='asls', side='both', width_scale=
         -------
         baseline : numpy.ndarray, shape (N,)
             The baseline calculated with the optimum parameter.
-        method_params : dict
+        params : dict
             A dictionary with the following items:
 
-            * 'optimal_parameter': int or float
+            * 'optimal_parameter': float or int
                 The `lam` or `poly_order` value that produced the lowest
                 root-mean-squared-error.
             * 'min_rmse': float
+                The minimum root-mean-squared-error obtained when using
+                the optimal parameter.
 
                 .. deprecated:: 1.2.0
-                    The 'min_rmse' key will be removed from the ``method_params``
+                    The 'min_rmse' key will be removed from the `params`
                     dictionary in pybaselines version 1.4.0 in favor of the new
                     'rmse' key which returns all root-mean-squared-error values.
 
-            * 'rmse' : numpy.ndarray
+            * 'rmse': numpy.ndarray, shape (P,)
                 The array of the calculated root-mean-squared-error for each
                 of the fits.
+
+                .. versionadded:: 1.2.0
+
             * 'method_params': dict
                 A dictionary containing the output parameters for the optimal fit.
-                Items will depend on the selected method.
+                Items will depend on the selected `method`.
 
         Raises
         ------
         ValueError
-            Raised if `side` is not 'left', 'right', or 'both'.
+            Raised if `side` is not 'left', 'right', or 'both'. Also raised if using a
+            non-polynomial method and `min_value`, `max_value`, or `step` is
+            greater than 15.
         TypeError
             Raised if using a polynomial method and `min_value`, `max_value`, or
             `step` is not an integer.
-        ValueError
-            Raised if using a Whittaker-smoothing-based method and `min_value`,
-            `max_value`, or `step` is greater than 100.
+
+        See Also
+        --------
+        Baseline.optimize_pls
 
         Notes
         -----
@@ -247,6 +257,10 @@ def optimize_extended_range(self, data, method='asls', side='both', width_scale=
         value for fitting non-padded data will be slightly lower than the optimal value
         obtained from :meth:`~.Baseline.optimize_extended_range`.
 
+        The range of values to test is generated using
+        ``numpy.arange(min_value, max_value, step)``, so `max_value` is likely not included in
+        the range of tested values.
+
         References
         ----------
         .. [1] Zhang, F., et al. An Automatic Baseline Correction Method Based on
@@ -266,26 +280,12 @@ def optimize_extended_range(self, data, method='asls', side='both', width_scale=
         )
         method = method.lower()
         if func_module == 'polynomial' or method in ('dietrich', 'cwt_br'):
-            if any(not isinstance(val, int) for val in (min_value, max_value, step)):
-                raise TypeError((
-                    'min_value, max_value, and step must all be integers when'
-                    ' using a polynomial method'
-                ))
             param_name = 'poly_order'
         else:
-            if any(val > 15 for val in (min_value, max_value, step)):
-                raise ValueError((
-                    'min_value, max_value, and step should be the power of 10 to use '
-                    '(eg. min_value=2 denotes 10**2), not the actual "lam" value, and '
-                    'thus should not be greater than 15'
-                ))
             param_name = 'lam'
-        if step == 0 or min_value == max_value:
-            do_optimization = False
-        else:
-            do_optimization = True
-            if max_value < min_value and step > 0:
-                step = -step
+        variables = _param_grid(
+            min_value, max_value, step, polynomial_fit=param_name == 'poly_order'
+        )
 
         added_window = int(self._size * width_scale)
         for key in ('weights', 'alpha'):
@@ -352,22 +352,7 @@ def optimize_extended_range(self, data, method='asls', side='both', width_scale=
 
         new_fitter = fit_object._override_x(fit_x_data, new_sort_order=new_sort_order)
         baseline_func = getattr(new_fitter, method)
-        if do_optimization:
-            if param_name == 'poly_order':
-                variables = np.arange(min_value, max_value + step, step)
-            else:
-                # use linspace for floats since it ensures endpoints are included; use
-                # logspace to skip having to do 10.0**linspace(...)
-                variables = np.logspace(
-                    min_value, max_value, ceil((max_value - min_value) / step), base=10.0
-                )
-            # double check that variables has at least one item; otherwise skip the optimization
-            if variables.size == 0:
-                do_optimization = False
-        if not do_optimization:
-            variables = np.array([min_value])
-            if param_name == 'lam':
-                variables = 10.0**variables
+
         upper_idx = len(fit_data) - upper_bound
         min_sum_squares = np.inf
         best_idx = 0
@@ -462,7 +447,7 @@ def adaptive_minmax(self, data, poly_order=None, method='modpoly', weights=None,
                 An array of the two polynomial orders used for the fitting.
             * 'method_params': dict[str, list]
                 A dictionary containing the output parameters for each individual fit.
-                Keys will depend on the selected method and will have a list of values,
+                Keys will depend on the selected `method` and will have a list of values,
                 with each item corresponding to a fit.
 
         Raises
@@ -584,7 +569,7 @@ def custom_bc(self, data, method='asls', regions=((None, None),), sampling=1, la
                 The truncated baseline before interpolating from `P` points to `N` points.
             * 'method_params': dict
                 A dictionary containing the output parameters for the fit using the selected
-                method.
+                `method`.
 
         Raises
         ------
@@ -686,10 +671,465 @@ def custom_bc(self, data, method='asls', regions=((None, None),), sampling=1, la
 
         return baseline, params
 
+    @_Algorithm._register(skip_sorting=True)
+    def optimize_pls(self, data, method='arpls', opt_method='U-Curve', min_value=4, max_value=7,
+                     step=0.5, method_kwargs=None, euclidean=False, rho=None, n_samples=0):
+        """
+        Optimizes the regularization parameter for penalized least squares methods.
+
+        Parameters
+        ----------
+        data : array-like, shape (N,)
+            The y-values of the measured data, with N data points.
+        method : str, optional
+            A string indicating the Whittaker-smoothing or spline method
+            to use for fitting the baseline. Default is 'arpls'.
+        opt_method : {'U-Curve', 'GCV', 'BIC'}, optional
+            The optimization method used to optimize `lam`. Supported methods are:
+
+            * 'U-Curve'
+            * 'GCV'
+            * 'BIC'
+
+            Details on each optimization method are in the Notes section below.
+        min_value : int or float, optional
+            The minimum value for the `lam` value to use with the indicated method. Should
+            be the exponent to raise to the power of 10 (eg. a `min_value` value of 2
+            designates a `lam` value of 10**2). Default is 4.
+        max_value : int or float, optional
+            The maximum value for the `lam` value to use with the indicated method. Should
+            be the exponent to raise to the power of 10 (eg. a `max_value` value of 3
+            designates a `lam` value of 10**3). Default is 7.
+        step : int or float, optional
+            The step size for iterating the parameter value from `min_value` to `max_value`.
+            Should be the exponent to raise to the power of 10 (eg. a `step` value of 1
+            designates a `lam` value of 10**1). Default is 0.5.
+        method_kwargs : dict, optional
+            A dictionary of keyword arguments to pass to the selected `method` function.
+            Default is None, which will use an empty dictionary.
+        euclidean : bool, optional
+            Only used if `opt_method` is 'U-curve'. If False (default), the optimization metric
+            is the minimum of the sum of the normalized fidelity and penalty values [1]_, which is
+            equivalent to the minimum graph distance from the origin. If True, the metric is the
+            euclidean distance from the origin, similar to [2]_ and [3]_.
+        rho : float, optional
+            Only used if `opt_method` is 'GCV'. The stabilization parameter for the modified
+            generalized cross validation (mGCV) criteria. A value of 1 defines normal GCV, while
+            higher values of `rho` stabilize the scores to make a single, global minima value
+            more likely (when applied to smoothing). If None (default), the value of `rho` will
+            be selected following [4]_, with the value being 1.3 if ``len(data)`` is less than
+            100, otherwise 2.
+        n_samples : int, optional
+            Only used if `opt_method` is 'GCV' or 'BIC'. If 0 (default), will calculate the
+            analytical trace. Otherwise, will use stochastic trace estimation with a matrix of
+            (N, `n_samples`) Rademacher random variables (ie. either -1 or 1).
+
+        Returns
+        -------
+        baseline : numpy.ndarray, shape (N,)
+            The baseline calculated with the optimum parameter.
+        params : dict
+            A dictionary with the following items:
+
+            * 'optimal_parameter': float
+                The `lam` value that minimized the computed metric.
+            * 'metric': numpy.ndarray, shape (P,)
+                The computed metric for each `lam` value tested.
+            * 'method_params': dict
+                A dictionary containing the output parameters for the optimal fit.
+                Items will depend on the selected `method`.
+            * 'fidelity': numpy.ndarray, shape (P,)
+                Only returned if `opt_method` is 'U-curve'. The computed non-normalized
+                fidelity term for each `lam` value tested. For
+                most algorithms within pybaselines, this is equivalent to the weighted residual
+                sum of squares (eg. ``sum(weights * (data - baseline)**2)``)
+            * 'penalty': numpy.ndarray, shape (P,)
+                Only returned if `opt_method` is 'U-curve'. The computed non-normalized penalty
+                values for each `lam` value tested.
+            * 'wrss': numpy.ndarray, shape (P,)
+                Only returned if `opt_method` is 'GCV' or 'BIC'. The weighted residual sum of
+                squares (eg. ``sum(weights * (data - baseline)**2)``) for each `lam` value tested.
+            * 'trace': numpy.ndarray, shape (P,)
+                Only returned if `opt_method` is 'GCV' or 'BIC. The computed trace of the smoother
+                matrix for each `lam` value tested, which signifies the effective dimension
+                for the system.
+
+        Raises
+        ------
+        ValueError
+            Raised if `opt_method` is 'GCV' and the input `rho` is less than 1.
+        NotImplementedError
+            Raised if `method` is 'beads' and `opt_method` is 'GCV' or 'BIC'.
+
+        See Also
+        --------
+        Baseline.optimize_extended_range
+
+        Notes
+        -----
+        `opt_method` 'U-Curve' requires that the sum of the normalized penalty and fidelity values
+        is roughly 'U' shaped (see Figure 5 in [1]_), which depends on appropriate selection of
+        `min_value` and `max_value` such that penalty continually decreases and fidelity
+        continually increases as `lam` increases.
+
+        For `opt_method` 'U-Curve', the multipliers on `lam` used in methods `drpls` or `aspls`,
+        ``(1 - eta * weights)`` and ``alpha``, respectively, are omitted from the penalty term.
+        Otherwise, the penalty term shows little change with varying `lam` and gives bad results.
+        Likewise, for method='iasls', the penalty term from `lam_1` is omitted since its gradient
+        with respect to `lam` is assumed to be 0. More advanced optimization varying both `lam`
+        and `lam_1` is possible, but not supported within this method.
+
+        Uses a grid search for optimization since the objective functions for all supported
+        `opt_method` inputs are highly non-smooth (ie. many local minima) when performing
+        baseline correction, due to the reliance of calculated weights on the input `lam`.
+        Scalar minimization using :func:`scipy.optimize.minimize_scalar` was found to
+        perform okay in most cases, but it would also not allow some methods like 'U-Curve'
+        which requires calculating with all `lam` values before computing the objective.
+
+        The range of values to test is generated using
+        ``numpy.arange(min_value, max_value, step)``, so `max_value` is likely not included in
+        the range of tested values.
+
+        References
+        ----------
+        .. [1] Park, A., et al. Automatic Selection of Optimal Parameter for Baseline Correction
+                using Asymmetrically Reweighted Penalized Least Squares. Journal of the Institute
+                of Electronics and Information Engineers, 2016, 53(3), 124-131.
+        .. [2] Belge, M., et al. Efficient determination of multiple regularization parameters in
+                a generalized L-curve framework. Inverse Problems, 2002, 18, 1161-1183.
+        .. [3] Andriyana, Y., et al. P-splines quantile regression estimation in varying
+                coefficient models. TEST, 2014, 23, 153-194.
+        .. [4] Lukas, M., et al. Practical use of robust GCV and modified GCV for spline
+                smoothing. Computational Statistics, 2016, 31, 269-289.
+
+        """
+        y, baseline_func, _, method_kws, fitting_object = self._setup_optimizer(
+            data, method, (whittaker, morphological, spline, classification, misc),
+            method_kwargs, copy_kwargs=False
+        )
+        method = method.lower()
+        if 'lam' in method_kws:
+            # TODO maybe just warn and pop out instead? Would need to copy input kwargs in that
+            # case so that the original input is not modified
+            raise ValueError('lam must not be specified within method_kwargs')
+
+        lam_range = _param_grid(min_value, max_value, step, polynomial_fit=False)
+        selected_method = opt_method.lower().replace('-', '_').replace('_', '')
+        if selected_method in ('ucurve',):
+            baseline, params = _optimize_ucurve(
+                y, selected_method, method, method_kws, baseline_func, fitting_object, lam_range,
+                euclidean
+            )
+        elif selected_method in ('gcv', 'bic'):
+            baseline, params = _optimize_ed(
+                y, selected_method, method, method_kws, baseline_func, fitting_object, lam_range,
+                rho, n_samples
+            )
+        else:
+            raise ValueError(f'{opt_method} is not a supported opt_method input')
+
+        return baseline, params
+
 
 _optimizers_wrapper = _class_wrapper(_Optimizers)
 
 
+def _param_grid(min_value, max_value, step, polynomial_fit=False):
+    """
+    Creates a range of parameters to use for grid optimization.
+
+    Parameters
+    ----------
+    min_value : int or float
+        The minimum parameter value. If `polynomial_fit` is True, `min_value` must be an
+        integer. Otherwise, `min_value` should be the exponent to raise to the power of
+        10 (eg. a `min_value` value of 2 designates a `lam` value of 10**2).
+    max_value : int or float, optional
+        The maximum parameter value for the range. If `polynomial_fit` is True, `max_value`
+        must be an integer. Otherwise, `min_value` should be the exponent to raise to the
+        power of 10 (eg. a `max_value` value of 5 designates a `lam` value of 10**5).
+    step : int or float
+        If `polynomial_fit` is True, `step` must be an integer. Otherwise, `step` should
+        be the exponent to raise to the power of 10 (eg. a `step` value of 1
+        designates a `lam` value of 10**1).
+    polynomial_fit : bool, optional
+        Whether the parameters define polynomial degrees. Default is False.
+
+    Returns
+    -------
+    values : numpy.ndarray
+        The range of parameters.
+
+    Raises
+    ------
+    TypeError
+        Raised if using a polynomial method and `min_value`, `max_value`, or
+        `step` is not an integer.
+    ValueError
+        Raised if using a Whittaker-smoothing-based method and `min_value`,
+        `max_value`, or `step` is greater than 15.
+
+    Notes
+    -----
+    The complete range of values for the grid is generated using
+    ``numpy.arange(min_value, max_value, step)``, so `max_value` is likely not included.
+
+    """
+    if polynomial_fit:
+        if any(not isinstance(val, int) for val in (min_value, max_value, step)):
+            raise TypeError((
+                'min_value, max_value, and step must all be integers when'
+                ' using a polynomial method'
+            ))
+    else:
+        if any(val > 15 for val in (min_value, max_value, step)):
+            raise ValueError((
+                'min_value, max_value, and step should be the power of 10 to use '
+                '(eg. min_value=2 denotes 10**2), not the actual "lam" value, and '
+                'thus should not be greater than 15'
+            ))
+
+    if step == 0 or min_value == max_value:
+        do_optimization = False
+    else:
+        do_optimization = True
+        if polynomial_fit:
+            values = np.arange(min_value, max_value, step)
+        else:
+            # explicitly set float dtype so that input dtypes are uninportant for arange step size
+            values = 10.0**np.arange(min_value, max_value, step, dtype=float)
+        # double check that values has at least two items; otherwise skip the optimization
+        if values.size < 2:
+            do_optimization = False
+
+    if not do_optimization:
+        warnings.warn(
+            ('min_value, max_value, and step were set such that only a single value '
+             'was fit'), ParameterWarning, stacklevel=2
+        )
+        values = np.array([min_value])
+        if not polynomial_fit:
+            values = 10.0**values
+
+    return values
+
+
+def _optimize_ucurve(y, opt_method, method, method_kws, baseline_func, baseline_obj,
+                     lam_range, euclidean):
+    """
+    Performs U-curve optimization based on the fit fidelity and penalty.
+
+    Parameters
+    ----------
+    y : _type_
+        _description_
+    opt_method : _type_
+        _description_
+    method : _type_
+        _description_
+    method_kws : _type_
+        _description_
+    baseline_func : _type_
+        _description_
+    baseline_obj : _Algorithm
+        _description_
+    lam_range : _type_
+        _description_
+    euclidean : bool, optional
+        _description_. Default is False.
+
+    Returns
+    -------
+    _type_
+        _description_
+
+    References
+    ----------
+    .. [1] Park, A., et al. Automatic Selection of Optimal Parameter for Baseline Correction using
+           Asymmetrically Reweighted Penalized Least Squares. Journal of the Institute of
+           Electronics and Information Engineers, 2016, 53(3), 124-131.
+    .. [2] Andriyana, Y., et al. P-splines quantile regression estimation in varying coefficient
+           models. TEST, 2014, 23(1), 153-194.
+
+    """
+    if 'pspline' in method or method in ('mixture_model', 'irsqr'):
+        spline_fit = True
+    else:
+        spline_fit = False
+
+    using_drpls = 'drpls' in method
+    using_beads = method == 'beads'
+    if using_beads:
+        param_key = 'alpha'
+    else:
+        param_key = 'lam'
+        # some methods have different defaults, so have to inspect them
+        method_signature = inspect.signature(baseline_func).parameters
+        diff_order = method_kws.get(
+            'diff_order', method_signature['diff_order'].default
+        )
+
+    n_lams = len(lam_range)
+    penalty = np.empty(n_lams)
+    fidelity = np.empty(n_lams)
+    for i, lam in enumerate(lam_range):
+        fit_baseline, fit_params = baseline_func(y, **{param_key: lam}, **method_kws)
+        if using_beads:
+            fit_penalty = sum(fit_params['penalty'])
+            fit_fidelity = fit_params['fidelity']
+        else:
+            if spline_fit:
+                penalized_object = fit_params['result'].tck[1]  # the spline coefficients
+            else:
+                # have to ensure sort order of the fit baseline since
+                # diff(y_ordered) != diff(y_disordered); spline coefficients are always
+                # sorted since they correspond to sorted x-values
+                penalized_object = _sort_array(fit_baseline, baseline_obj._sort_order)
+
+            # Park, et al. multiplied the penalty by lam (Equation 8), but I think that may have
+            # been a typo since it otherwise favors low lam values and does not produce a
+            # penalty plot shown in Figure 4 in the Park, et al. reference
+            partial_penalty = np.diff(penalized_object, diff_order)
+            fit_penalty = partial_penalty @ partial_penalty
+
+            residual = y - fit_baseline
+            if 'weights' in fit_params:
+                fit_fidelity = fit_params['weights'] @ residual**2
+            else:
+                fit_fidelity = residual @ residual
+
+            if using_drpls:
+                if spline_fit:  # still need to sort the baseline
+                    additional_fidelity = np.diff(
+                        _sort_array(fit_baseline, baseline_obj._sort_order), 1
+                    )
+                else:
+                    additional_fidelity = np.diff(penalized_object, 1)
+                fit_fidelity += additional_fidelity @ additional_fidelity
+
+        penalty[i] = fit_penalty
+        fidelity[i] = fit_fidelity
+
+    # add fidelity and penalty to params before potentially normalizing
+    params = {'fidelity': fidelity, 'penalty': penalty}
+    if lam_range.size > 1:
+        penalty = (penalty - penalty.min()) / (penalty.max() - penalty.min())
+        fidelity = (fidelity - fidelity.min()) / (fidelity.max() - fidelity.min())
+    if euclidean:
+        metric = np.sqrt(fidelity**2 + penalty**2)
+    else:  # graph distance from the origin, ie. only travelling along x and y axes
+        metric = fidelity + penalty
+
+    best_lam = lam_range[np.argmin(metric)]
+    baseline, best_params = baseline_func(y, **{param_key: best_lam}, **method_kws)
+    params.update({'optimal_parameter': best_lam, 'metric': metric, 'method_params': best_params})
+
+    return baseline, params
+
+
+def _optimize_ed(y, opt_method, method, method_kws, baseline_func, baseline_obj,
+                  lam_range, rho, n_samples):
+    """
+    Optimizes the regularization coefficient using criteria based on the effective dimension.
+
+    Parameters
+    ----------
+    y : _type_
+        _description_
+    opt_method : {'gcv', 'bic'}
+        _description_
+    method : _type_
+        _description_
+    method_kws : _type_
+        _description_
+    baseline_func : _type_
+        _description_
+    baseline_obj : _Algorithm
+        _description_
+    lam_range : _type_
+        _description_
+    rho : _type_, optional
+        _description_. Default is None.
+    n_samples : _type_, optional
+        _description_.
+
+    Returns
+    -------
+    _type_
+        _description_
+
+    Raises
+    ------
+    ValueError
+        _description_
+    NotImplementedError
+        _description_
+
+    """
+    use_gcv = opt_method == 'gcv'
+    if use_gcv:
+        if rho is None:
+            # selection of rho based on https://doi.org/10.1007/s00180-015-0577-7
+            rho = 1.3 if baseline_obj._size < 100 else 2.
+        else:
+            if rho < 1:
+                raise ValueError('rho must be >= 1')
+
+    if method == 'beads':  # only supported for L-curve-based optimization options
+        raise NotImplementedError(
+            'optimize_pls does not support the beads method for GCV or BIC opt_method inputs'
+        )
+
+    n_lams = len(lam_range)
+    min_metric = np.inf
+    metrics = np.empty(n_lams)
+    traces = np.empty(n_lams)
+    wrss = np.empty(n_lams)
+    for i, lam in enumerate(lam_range):
+        fit_baseline, fit_params = baseline_func(y, lam=lam, **method_kws)
+
+        trace = fit_params['result'].effective_dimension(n_samples)
+        # TODO should fidelity calc be directly added to the result objects so that this
+        # can be handled directly?
+        fit_wrss = fit_params['weights'] @ (y - fit_baseline)**2
+        if use_gcv:
+            # GCV = (1/N) * RSS / (1 - rho * trace / N)**2 == RSS * N / (N - rho * trace)**2
+            # Note that some papers use different terms for fidelity (eg. RSS / N vs just RSS),
+            # within the actual minimized equation, but both Woltring
+            # (https://doi.org/10.1016/0141-1195(86)90098-7) and Eilers
+            # (https://doi.org/10.1021/ac034173t) use the same GCV score
+            # formulation for penalized splines and Whittaker smoothing, respectively (using a
+            # fidelity term of just RSS), so this should be correct
+            metric = fit_wrss * baseline_obj._size / (baseline_obj._size - rho * trace)**2
+        else:
+            # BIC = -2 * l + ln(N) * ED, where l == log likelihood and
+            # ED == effective dimension ~ trace
+            # log likelhood of Whittaker/P-Spline smoothing can be approximated from the result
+            # of fitting with lam=0, ie. RSS / N (see Eilers's original 1996 P-Spline paper)
+            # For Gaussian errors: BIC ~ N * ln(RSS / N) + ln(N) * trace
+            metric = (
+                baseline_obj._size * np.log(fit_wrss / baseline_obj._size)
+                + np.log(baseline_obj._size) * trace
+            )
+
+        if metric < min_metric:
+            min_metric = metric
+            best_lam = lam
+            baseline = fit_baseline
+            best_params = fit_params
+
+        metrics[i] = metric
+        traces[i] = trace
+        wrss[i] = fit_wrss
+
+    params = {
+        'optimal_parameter': best_lam, 'metric': metrics, 'trace': traces,
+        'wrss': wrss, 'method_params': best_params
+    }
+
+    return baseline, params
+
+
 @_optimizers_wrapper
 def collab_pls(data, average_dataset=True, method='asls', method_kwargs=None, x_data=None):
     """
@@ -750,7 +1190,7 @@ def collab_pls(data, average_dataset=True, method='asls', method_kwargs=None, x_
 
 @_optimizers_wrapper
 def optimize_extended_range(data, x_data=None, method='asls', side='both', width_scale=0.1,
-                            height_scale=1., sigma_scale=1. / 12., min_value=2, max_value=8,
+                            height_scale=1., sigma_scale=1. / 12., min_value=2, max_value=9,
                             step=1, pad_kwargs=None, method_kwargs=None):
     """
     Extends data and finds the best parameter value for the given baseline method.
@@ -994,6 +1434,9 @@ def custom_bc(data, x_data=None, method='asls', regions=((None, None),), samplin
     ----------
     data : array-like, shape (N,)
         The y-values of the measured data, with N data points.
+    x_data : array-like, shape (N,), optional
+        The x-values of the measured data. Default is None, which will create an
+        array from -1 to 1 with N points.
     method : str, optional
         A string indicating the algorithm to use for fitting the baseline; can be any
         non-optimizer algorithm in pybaselines. Default is 'asls'.
@@ -1056,3 +1499,140 @@ def custom_bc(data, x_data=None, method='asls', regions=((None, None),), samplin
             Intelligent Laboratory Systems, 2011, 109(1), 51-56.
 
     """
+
+
+@_optimizers_wrapper
+def optimize_pls(data, method='arpls', opt_method='U-Curve', min_value=4, max_value=7, step=0.5,
+                 method_kwargs=None, euclidean=False, rho=None, n_samples=0, x_data=None):
+    """
+    Optimizes the regularization parameter for penalized least squares methods.
+
+    Parameters
+    ----------
+    data : array-like, shape (N,)
+        The y-values of the measured data, with N data points.
+    method : str, optional
+        A string indicating the Whittaker-smoothing or spline method
+        to use for fitting the baseline. Default is 'arpls'.
+    opt_method : {'U-Curve', 'GCV', 'BIC'}, optional
+        The optimization method used to optimize `lam`. Supported methods are:
+
+        * 'U-Curve'
+        * 'GCV'
+        * 'BIC'
+
+        Details on each optimization method are in the Notes section below.
+    min_value : int or float, optional
+        The minimum value for the `lam` value to use with the indicated method. Should
+        be the exponent to raise to the power of 10 (eg. a `min_value` value of 2
+        designates a `lam` value of 10**2). Default is 4.
+    max_value : int or float, optional
+        The maximum value for the `lam` value to use with the indicated method. Should
+        be the exponent to raise to the power of 10 (eg. a `max_value` value of 3
+        designates a `lam` value of 10**3). Default is 7.
+    step : int or float, optional
+        The step size for iterating the parameter value from `min_value` to `max_value`.
+        Should be the exponent to raise to the power of 10 (eg. a `step` value of 1
+        designates a `lam` value of 10**1). Default is 0.5.
+    method_kwargs : dict, optional
+        A dictionary of keyword arguments to pass to the selected `method` function.
+        Default is None, which will use an empty dictionary.
+    euclidean : bool, optional
+        Only used if `opt_method` is 'U-curve'. If False (default), the optimization metric
+        is the minimum of the sum of the normalized fidelity and penalty values [1]_, which is
+        equivalent to the minimum graph distance from the origin. If True, the metric is the
+        euclidean distance from the origin, similar to [2]_ and [3]_.
+    rho : float, optional
+        Only used if `opt_method` is 'GCV'. The stabilization parameter for the modified
+        generalized cross validation (mGCV) criteria. A value of 1 defines normal GCV, while
+        higher values of `rho` stabilize the scores to make a single, global minima value
+        more likely (when applied to smoothing). If None (default), the value of `rho` will
+        be selected following [4]_, with the value being 1.3 if ``len(data)`` is less than
+        100, otherwise 2.
+    n_samples : int, optional
+        Only used if `opt_method` is 'GCV' or 'BIC'. If 0 (default), will calculate the
+        analytical trace. Otherwise, will use stochastic trace estimation with a matrix of
+        (N, `n_samples`) Rademacher random variables (ie. either -1 or 1).
+    x_data : array-like, shape (N,), optional
+        The x-values of the measured data. Default is None, which will create an
+        array from -1 to 1 with N points.
+
+    Returns
+    -------
+    baseline : numpy.ndarray, shape (N,)
+        The baseline calculated with the optimum parameter.
+    params : dict
+        A dictionary with the following items:
+
+        * 'optimal_parameter': float
+            The `lam` value that minimized the computed metric.
+        * 'metric': numpy.ndarray, shape (P,)
+            The computed metric for each `lam` value tested.
+        * 'method_params': dict
+            A dictionary containing the output parameters for the optimal fit.
+            Items will depend on the selected `method`.
+        * 'fidelity': numpy.ndarray, shape (P,)
+            Only returned if `opt_method` is 'U-curve'. The computed non-normalized
+            fidelity term for each `lam` value tested. For
+            most algorithms within pybaselines, this is equivalent to the weighted residual
+            sum of squares (eg. ``sum(weights * (data - baseline)**2)``)
+        * 'penalty': numpy.ndarray, shape (P,)
+            Only returned if `opt_method` is 'U-curve'. The computed non-normalized penalty
+            values for each `lam` value tested.
+        * 'wrss': numpy.ndarray, shape (P,)
+            Only returned if `opt_method` is 'GCV' or 'BIC'. The weighted residual sum of
+            squares (eg. ``sum(weights * (data - baseline)**2)``) for each `lam` value tested.
+        * 'trace': numpy.ndarray, shape (P,)
+            Only returned if `opt_method` is 'GCV' or 'BIC. The computed trace of the smoother
+            matrix for each `lam` value tested, which signifies the effective dimension
+            for the system.
+
+    Raises
+    ------
+    ValueError
+        Raised if `opt_method` is 'GCV' and the input `rho` is less than 1.
+    NotImplementedError
+        Raised if `method` is 'beads' and `opt_method` is 'GCV' or 'BIC'.
+
+    See Also
+    --------
+    pybaselines.optimizers.optimize_extended_range
+
+    Notes
+    -----
+    `opt_method` 'U-Curve' requires that the sum of the normalized penalty and fidelity values
+    is roughly 'U' shaped (see Figure 5 in [1]_), which depends on appropriate selection of
+    `min_value` and `max_value` such that penalty continually decreases and fidelity
+    continually increases as `lam` increases.
+
+    For `opt_method` 'U-Curve', the multipliers on `lam` used in methods `drpls` or `aspls`,
+    ``(1 - eta * weights)`` and ``alpha``, respectively, are omitted from the penalty term.
+    Otherwise, the penalty term shows little change with varying `lam` and gives bad results.
+    Likewise, for method='iasls', the penalty term from `lam_1` is omitted since its gradient
+    with respect to `lam` is assumed to be 0. More advanced optimization varying both `lam`
+    and `lam_1` is possible, but not supported within this method.
+
+    Uses a grid search for optimization since the objective functions for all supported
+    `opt_method` inputs are highly non-smooth (ie. many local minima) when performing
+    baseline correction, due to the reliance of calculated weights on the input `lam`.
+    Scalar minimization using :func:`scipy.optimize.minimize_scalar` was found to
+    perform okay in most cases, but it would also not allow some methods like 'U-Curve'
+    which requires calculating with all `lam` values before computing the objective.
+
+    The range of values to test is generated using
+    ``numpy.arange(min_value, max_value, step)``, so `max_value` is likely not included in
+    the range of tested values.
+
+    References
+    ----------
+    .. [1] Park, A., et al. Automatic Selection of Optimal Parameter for Baseline Correction
+            using Asymmetrically Reweighted Penalized Least Squares. Journal of the Institute
+            of Electronics and Information Engineers, 2016, 53(3), 124-131.
+    .. [2] Belge, M., et al. Efficient determination of multiple regularization parameters in
+            a generalized L-curve framework. Inverse Problems, 2002, 18, 1161-1183.
+    .. [3] Andriyana, Y., et al. P-splines quantile regression estimation in varying
+            coefficient models. TEST, 2014, 23, 153-194.
+    .. [4] Lukas, M., et al. Practical use of robust GCV and modified GCV for spline
+            smoothing. Computational Statistics, 2016, 31, 269-289.
+
+    """
diff --git a/pybaselines/polynomial.py b/pybaselines/polynomial.py
index d9d7ff4a..979bf3d6 100644
--- a/pybaselines/polynomial.py
+++ b/pybaselines/polynomial.py
@@ -115,7 +115,7 @@ def poly(self, data, poly_order=2, weights=None, return_coef=False):
 
             * 'weights': numpy.ndarray, shape (N,)
                 The weight array used for fitting the data.
-            * 'coef': numpy.ndarray, shape (poly_order,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -186,7 +186,7 @@ def modpoly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
-            * 'coef': numpy.ndarray, shape (poly_order + 1,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -288,7 +288,7 @@ def imodpoly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
-            * 'coef': numpy.ndarray, shape (poly_order + 1,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -421,7 +421,7 @@ def penalized_poly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=Non
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
-            * 'coef': numpy.ndarray, shape (poly_order + 1,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -573,7 +573,7 @@ def loess(self, data, fraction=0.2, total_points=None, poly_order=1, scale=3.0,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
-            * 'coef': numpy.ndarray, shape (N, poly_order + 1)
+            * 'coef': numpy.ndarray, shape (N, ``poly_order + 1``)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a polynomial
                 using :class:`numpy.polynomial.polynomial.Polynomial`. If `delta` is > 0,
@@ -751,7 +751,7 @@ def quant_reg(self, data, poly_order=2, quantile=0.05, tol=1e-6, max_iter=250,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
-            * 'coef': numpy.ndarray, shape (poly_order + 1,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -881,7 +881,7 @@ def goldindec(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
             * 'threshold' : float
                 The optimal threshold value. Could be used in :meth:`~.Baseline.penalized_poly`
                 for fitting other similar data.
-            * 'coef': numpy.ndarray, shape (poly_order + 1,)
+            * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
                 Only if `return_coef` is True. The array of polynomial parameters
                 for the baseline, in increasing order. Can be used to create a
                 polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -1030,7 +1030,7 @@ def poly(data, x_data=None, poly_order=2, weights=None, return_coef=False):
 
         * 'weights': numpy.ndarray, shape (N,)
             The weight array used for fitting the data.
-        * 'coef': numpy.ndarray, shape (poly_order,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -1091,7 +1091,7 @@ def modpoly(data, x_data=None, poly_order=2, tol=1e-3, max_iter=250, weights=Non
             each iteration. The length of the array is the number of iterations
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
-        * 'coef': numpy.ndarray, shape (poly_order + 1,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -1166,7 +1166,7 @@ def imodpoly(data, x_data=None, poly_order=2, tol=1e-3, max_iter=250, weights=No
             each iteration. The length of the array is the number of iterations
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
-        * 'coef': numpy.ndarray, shape (poly_order + 1,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -1464,7 +1464,7 @@ def penalized_poly(data, x_data=None, poly_order=2, tol=1e-3, max_iter=250,
             each iteration. The length of the array is the number of iterations
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
-        * 'coef': numpy.ndarray, shape (poly_order + 1,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -2036,7 +2036,7 @@ def loess(data, x_data=None, fraction=0.2, total_points=None, poly_order=1, scal
             each iteration. The length of the array is the number of iterations
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
-        * 'coef': numpy.ndarray, shape (N, poly_order + 1)
+        * 'coef': numpy.ndarray, shape (N, ``poly_order + 1``)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a polynomial
             using :class:`numpy.polynomial.polynomial.Polynomial`. If `delta` is > 0,
@@ -2131,7 +2131,7 @@ def quant_reg(data, x_data=None, poly_order=2, quantile=0.05, tol=1e-6, max_iter
             each iteration. The length of the array is the number of iterations
             completed. If the last value in the array is greater than the input
             `tol` value, then the function did not converge.
-        * 'coef': numpy.ndarray, shape (poly_order + 1,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
@@ -2237,7 +2237,7 @@ def goldindec(data, x_data=None, poly_order=2, tol=1e-3, max_iter=250, weights=N
         * 'threshold' : float
             The optimal threshold value. Could be used in :meth:`~.Baseline.penalized_poly`
             for fitting other similar data.
-        * 'coef': numpy.ndarray, shape (poly_order + 1,)
+        * 'coef': numpy.ndarray, shape (``poly_order + 1``,)
             Only if `return_coef` is True. The array of polynomial parameters
             for the baseline, in increasing order. Can be used to create a
             polynomial using :class:`numpy.polynomial.polynomial.Polynomial`.
diff --git a/pybaselines/results.py b/pybaselines/results.py
new file mode 100644
index 00000000..85cac65c
--- /dev/null
+++ b/pybaselines/results.py
@@ -0,0 +1,988 @@
+# -*- coding: utf-8 -*-
+"""Objects for calculating additional terms from results of analytical baseline correction methods.
+
+Created on November 15, 2025
+@author: Donald Erb
+
+"""
+
+import numpy as np
+from scipy.linalg import solve
+from scipy.sparse import issparse
+from scipy.sparse.linalg import factorized
+
+from ._banded_utils import _banded_to_sparse, _add_diagonals
+from ._compat import diags, _sparse_col_index
+from .utils import _get_rng
+
+
+class WhittakerResult:
+    """
+    Represents the result of Whittaker smoothing.
+
+    Provides methods for extending the solution obtained from baseline algorithms that use
+    Whittaker smoothing. This class should not be initialized by external users.
+
+    """
+
+    def __init__(self, penalized_object, weights=None, lhs=None, rhs_extra=None):
+        """
+        Initializes the result object.
+
+        In the most basic formulation, Whittaker smoothing solves ``(W + P) @ v = W @ y``.
+        Then the hat matrix would be ``(W + P)^-1 @ W``. For more complex usages, the
+        equation can be expressed as ``lhs @ v = (W + rhs_extra) @ y`` with a corresponding
+        hat matrix of ``lhs^-1 @ (W + rhs_extra)``.
+
+        Parameters
+        ----------
+        penalized_object : pybaselines._banded_utils.PenalizedSystem
+            The penalized system object used for solving.
+        weights : numpy.ndarray, shape (N,) optional
+            The weights used to solve the system. Default is None, which will set
+            all weights to 1.
+        lhs : numpy.ndarray, optional
+            The left hand side of the normal equation. Default is None, which will assume that
+            `lhs` is the addition of ``diags(weights)`` and ``pentalized_object.penalty``.
+        rhs_extra : numpy.ndarray or scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            Additional terms besides the weights within the right hand side of the hat matrix.
+            Default is None.
+
+        """
+        self._penalized_object = penalized_object
+        self._hat_lhs = lhs
+        self._hat_rhs = None
+        self._rhs_extra = rhs_extra
+        self._trace = None
+        if weights is None:
+            weights = np.ones(self._shape)
+        self._weights = weights
+
+    @property
+    def _shape(self):
+        """The shape of the penalized system.
+
+        Returns
+        -------
+        tuple[int, int]
+            The penalized system's shape.
+
+        """
+        # TODO need to add an attribute to join 1D and 2D PenalizedSystem and PSpline objects
+        # so that this can just access that attribute rather than having to modify for each
+        # subclass
+        return self._basis_shape
+
+    @property
+    def _size(self):
+        """The total size of the penalized system.
+
+        Returns
+        -------
+        int
+            The penalized system's size.
+
+        """
+        return np.prod(self._shape)
+
+    @property
+    def _basis_shape(self):
+        """The shape of the system's basis matrix.
+
+        Returns
+        -------
+        tuple[int, int]
+            The penalized system's basis shape.
+
+        """
+        return self._penalized_object._num_bases
+
+    @property
+    def _basis_size(self):
+        """The total size of the system's basis matrix.
+
+        Returns
+        -------
+        int
+            The system's basis matrix size.
+
+        """
+        return np.prod(self._basis_shape)
+
+    @property
+    def _lhs(self):
+        """
+        The left hand side of the hat matrix in banded format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array representing the left hand side of the hat matrix.
+
+        """
+        if self._hat_lhs is None:
+            self._hat_lhs = self._penalized_object.add_diagonal(self._weights)
+        return self._hat_lhs
+
+    @property
+    def _rhs(self):
+        """
+        The right hand side of the hat matrix in sparse format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.sparray or scipy.sparse.spmatrix
+            The sparse object representing the right hand side of the hat matrix.
+
+        """
+        if self._hat_rhs is None:
+            if self._rhs_extra is None:
+                self._hat_rhs = diags(self._weights)
+            else:
+                if not issparse(self._rhs_extra):
+                    self._rhs_extra = _banded_to_sparse(
+                        self._rhs_extra, lower=self._penalized_object.lower
+                    )
+                self._rhs_extra.setdiag(self._rhs_extra.diagonal() + self._weights)
+                self._hat_rhs = self._rhs_extra
+        return self._hat_rhs
+
+    def effective_dimension(self, n_samples=0, rng=1234):
+        """
+        Calculates the effective dimension from the trace of the hat matrix.
+
+        For typical Whittaker smoothing, the linear equation would be
+        ``(W + P) v = W @ y``. Then the hat matrix would be ``(W + P)^-1 @ W``.
+        The effective dimension for the system can be estimated as the trace
+        of the hat matrix.
+
+        Parameters
+        ----------
+        n_samples : int, optional
+            If 0 (default), will calculate the analytical trace. Otherwise, will use stochastic
+            trace estimation with a matrix of (N, `n_samples`) Rademacher random variables
+            (ie. either -1 or 1).
+        rng : int or numpy.random.Generator or numpy.random.RandomState
+            The integer for the seed of the random number generator or an existing generating
+            object to use for the stochastic trace estimation.
+
+        Returns
+        -------
+        trace : float
+            The trace of the hat matrix, denoting the effective dimension for
+            the system.
+
+        Raises
+        ------
+        TypeError
+            Raised if `n_samples` is not an integer greater than or equal to 0.
+
+        Notes
+        -----
+        For systems larger than ~1000 data points, it is heavily suggested to use stochastic
+        trace estimation since the time required for the analytical solution calculation scales
+        poorly with size.
+
+        References
+        ----------
+        Eilers, P. A Perfect Smoother. Analytical Chemistry, 2003, 75(14), 3631-3636.
+
+        Hutchinson, M. A stochastic estimator of the trace of the influence matrix for laplacian
+        smoothing splines. Communications in Statistics - Simulation and Computation, (1990),
+        19(2), 433-450.
+
+        Meyer, R., et al. Hutch++: Optimal Stochastic Trace Estimation. 2021 Symposium on
+        Simplicity in Algorithms (SOSA), (2021), 142-155.
+
+        """
+        # NOTE if diff_order is 2 and matrix is symmetric, could use the fast trace calculation from
+        # Frasso G, Eilers PH. L- and V-curves for optimal smoothing. Statistical Modelling.
+        # (2014), 15(1), 91-111. https://doi.org/10.1177/1471082X14549288, which is in turn based on
+        # Hutchinson, M, et al. Smoothing noisy data with spline functions. Numerische Mathematik.
+        # (1985), 47, 99-106. https://doi.org/10.1007/BF01389878
+        # For non-symmetric matrices, can use the slightly more involved algorithm from:
+        # Erisman, A., et al. On Computing Certain Elements of the Inverse of a Sparse Matrix.
+        # Communication of the ACM. (1975) 18(3), 177-179. https://doi.org/10.1145/360680.360704
+        # -> worth the effort? -> maybe...? For diff_order=2 and symmetric lhs, the timing is
+        # much faster than even the stochastic calculation and does not increase much with data
+        # size, and it provides the exact trace rather than an estimate -> however, this is only
+        # useful for GCV/BIC calculations atm, which are going to be very very rarely used -> could
+        # allow calculating the full inverse hat diagonal to allow calculating the baseline fit
+        # errors, but that's still incredibly niche...
+        # Also note that doing so would require performing inv(lhs) @ rhs, which is typically less
+        # numerically stable than solve(lhs, rhs) and would be complicated for non diagonal rhs;
+        # as such, I'd rather not implement it and just leave the above for reference.
+
+        # TODO could maybe make default n_samples to None and decide to use analytical or
+        # stochastic trace based on data size; data size > 1000 use stochastic with default
+        # n_samples = 100?
+        if n_samples == 0:
+            if self._trace is not None:
+                return self._trace
+            use_analytic = True
+        else:
+            if n_samples < 0 or not isinstance(n_samples, int):
+                raise TypeError('n_samples must be a non-negative integer')
+            use_analytic = False
+
+        if use_analytic:
+            # compute each diagonal of the hat matrix separately so that the full
+            # hat matrix does not need to be stored in memory
+            # note to self: sparse factorization is the worst case scenario (non-symmetric lhs and
+            # diff_order != 2), but it is still much faster than individual solves through
+            # solve_banded
+            factorization = self._penalized_object.factorize(self._lhs)
+            trace = 0
+            if self._rhs_extra is None:
+                # note: about an order of magnitude faster to omit the sparse rhs for the simple
+                # case of lhs @ v = w * y
+                eye = np.zeros(self._size)
+                for i in range(self._size):
+                    eye[i] = self._weights[i]
+                    trace += self._penalized_object.factorized_solve(factorization, eye)[i]
+                    eye[i] = 0
+            else:
+                rhs = self._rhs.tocsc()
+                for i in range(self._basis_size):
+                    trace += self._penalized_object.factorized_solve(
+                        factorization, _sparse_col_index(rhs, i)
+                    )[i]
+
+            # prevent needing to calculate analytical solution again
+            self._trace = trace
+        else:
+            rng_samples = _get_rng(rng).choice([-1., 1.], size=(self._basis_size, n_samples))
+            if self._rhs_extra is None:
+                rhs_u = self._weights[:, None] * rng_samples
+            else:
+                rhs_u = self._rhs.tocsr() @ rng_samples
+            # H @ u == (W + P)^-1 @ (W @ u)
+            hat_u = self._penalized_object.solve(self._lhs, rhs_u, overwrite_b=True)
+            # stochastic trace is the average of the trace of u.T @ H @ u;
+            # trace(A.T @ B) == (A * B).sum() (see
+            # https://en.wikipedia.org/wiki/Trace_(linear_algebra)#Trace_of_a_product ),
+            # with the latter using less memory and being much faster to compute; for future
+            # reference: einsum('ij,ij->', A, B) == (A * B).sum(), but is typically faster
+            trace = np.einsum('ij,ij->', rng_samples, hat_u) / n_samples
+
+        return trace
+
+
+class PSplineResult(WhittakerResult):
+    """
+    Represents the result of penalized spline (P-Spline) smoothing.
+
+    Provides methods for extending the solution obtained from baseline algorithms that use
+    P-Spline smoothing. This class should not be initialized by external users.
+
+    """
+
+    def __init__(self, penalized_object, weights=None, rhs_extra=None, penalty=None):
+        """
+        Initializes the result object.
+
+        In the most basic formulation, the linear equation for P-spline smoothing
+        is ``(B.T @ W @ B + P) c = B.T @ W @ y`` and ``v = B @ c``.
+        ``(W + P) @ v = W @ y``. Then the hat matrix would be
+        ``B @ (B.T @ W @ B + P)^-1 @ (B.T @ W)`` or, equivalently
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W @ B)``. The latter expression is preferred
+        since it reduces the dimensionality of intermediate calculations.
+
+        For more complex usages, the equation can be expressed as:
+        ``(B.T @ W @ B + P) @ c = (B.T @ W + rhs_partial) @ y``, such that the hat is given as:
+        ``B @ (B.T @ W @ B + P)^-1 @ (B.T @ W + rhs_partial)``, or equivalently
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W + rhs_partial) @ B``. Simplifying leads to
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W @ B + rhs_extra)``.
+
+        Parameters
+        ----------
+        penalized_object : pybaselines._spline_utils.PSpline
+            The penalized system object used for solving.
+        weights : numpy.ndarray, shape (N,) optional
+            The weights used to solve the system. Default is None, which will set
+            all weights to 1.
+        rhs_extra : numpy.ndarray or scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            Additional terms besides ``B.T @ W @ B`` within the right hand side of the hat
+            matrix. Default is None.
+        penalty : numpy.ndarray, optional
+            The penalty `P` for the system, in the same banded format as used by
+            `penalized_object`. If None (default), will use ``penalized_object.penalty``.
+            If given, will overwrite ``penalized_object.penalty`` with the given penalty.
+
+        """
+        super().__init__(penalized_object, weights=weights, rhs_extra=rhs_extra)
+        self._btwb_ = None
+        if penalty is not None:
+            self._penalized_object.penalty = penalty
+
+    @property
+    def _shape(self):
+        """The shape of the penalized system.
+
+        Returns
+        -------
+        tuple[int, int]
+            The penalized system's shape.
+
+        """
+        return (len(self._penalized_object.basis.x),)
+
+    @property
+    def _lhs(self):
+        """
+        The left hand side of the hat matrix in banded format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array representing the left hand side of the hat matrix.
+
+        """
+        if self._hat_lhs is None:
+            self._hat_lhs = _add_diagonals(
+                self._btwb, self._penalized_object.penalty, self._penalized_object.lower
+            )
+        return self._hat_lhs
+
+    @property
+    def _rhs(self):
+        """
+        The right hand side of the hat matrix in sparse format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.sparray or scipy.sparse.spmatrix
+            The sparse object representing the right hand side of the hat matrix.
+
+        """
+        if self._hat_rhs is None:
+            btwb = _banded_to_sparse(self._btwb, lower=self._penalized_object.lower)
+            if self._rhs_extra is None:
+                self._hat_rhs = btwb
+            else:
+                if not issparse(self._rhs_extra):
+                    self._rhs_extra = _banded_to_sparse(
+                        self._rhs_extra, lower=self._penalized_object.lower
+                    )
+                self._hat_rhs = self._rhs_extra + btwb
+        return self._hat_rhs
+
+    @property
+    def _btwb(self):
+        """
+        The matrix multiplication of ``B.T @ W @ B`` in banded format.
+
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array representing the matrix multiplication of ``B.T @ W @ B``.
+
+        """
+        if self._btwb_ is None:
+            self._btwb_ = self._penalized_object._make_btwb(self._weights)
+        return self._btwb_
+
+    @property
+    def tck(self):
+        """
+        The knots, spline coefficients, and spline degree to reconstruct the fit baseline.
+
+        Can be used with SciPy's :class:`scipy.interpolate.BSpline`, to allow for reconstructing
+        the fit baseline to allow for other usages such as evaluating with different x-values.
+
+        Returns
+        -------
+        numpy.ndarray, shape (K,)
+            The knots for the spline. Has a shape of `K`, which is equal to
+            ``num_knots + 2 * spline_degree``.
+        numpy.ndarray, shape (M,)
+            The spline coeffieicnts. Has a shape of `M`, which is the number of basis functions
+            (equal to ``K - spline_degree - 1`` or equivalently ``num_knots + spline_degree - 1``).
+        int
+            The degree of the spline.
+
+        """
+        return self._penalized_object.tck
+
+    def effective_dimension(self, n_samples=0, rng=1234):
+        """
+        Calculates the effective dimension from the trace of the hat matrix.
+
+        For typical P-spline smoothing, the linear equation would be
+        ``(B.T @ W @ B + lam * P) c = B.T @ W @ y`` and ``v = B @ c``. Then the hat matrix
+        would be ``B @ (B.T @ W @ B + lam * P)^-1 @ (B.T @ W)`` or, equivalently
+        ``(B.T @ W @ B + lam * P)^-1 @ (B.T @ W @ B)``. The latter expression is preferred
+        since it reduces the dimensionality. The effective dimension for the system
+        can be estimated as the trace of the hat matrix.
+
+        Parameters
+        ----------
+        n_samples : int, optional
+            If 0 (default), will calculate the analytical trace. Otherwise, will use stochastic
+            trace estimation with a matrix of (N, `n_samples`) Rademacher random variables
+            (ie. either -1 or 1).
+
+        Returns
+        -------
+        trace : float
+            The trace of the hat matrix, denoting the effective dimension for
+            the system.
+
+        Raises
+        ------
+        TypeError
+            Raised if `n_samples` is not an integer greater than or equal to 0.
+
+        References
+        ----------
+        Eilers, P., et al. Flexible Smoothing with B-splines and Penalties. Statistical Science,
+        1996, 11(2), 89-121.
+
+        Hutchinson, M. A stochastic estimator of the trace of the influence matrix for laplacian
+        smoothing splines. Communications in Statistics - Simulation and Computation, (1990),
+        19(2), 433-450.
+
+        Meyer, R., et al. Hutch++: Optimal Stochastic Trace Estimation. 2021 Symposium on
+        Simplicity in Algorithms (SOSA), (2021), 142-155.
+
+        """
+        # TODO could maybe make default n_samples to None and decide to use analytical or
+        # stochastic trace based on data size; data size > 1000 use stochastic with default
+        # n_samples = 100?
+        if n_samples == 0:
+            if self._trace is not None:
+                return self._trace
+            use_analytic = True
+            rhs_format = 'csc'
+        else:
+            if n_samples < 0 or not isinstance(n_samples, int):
+                raise TypeError('n_samples must be a non-negative integer')
+            use_analytic = False
+            rhs_format = 'csr'
+
+        rhs = self._rhs.asformat(rhs_format)
+        if use_analytic:
+            # compute each diagonal of the hat matrix separately so that the full
+            # hat matrix does not need to be stored in memory
+            trace = 0
+            factorization = self._penalized_object.factorize(self._lhs)
+            for i in range(self._basis_size):
+                trace += self._penalized_object.factorized_solve(
+                    factorization, _sparse_col_index(rhs, i)
+                )[i]
+            # prevent needing to calculate analytical solution again
+            self._trace = trace
+        else:
+            rng_samples = _get_rng(rng).choice([-1., 1.], size=(self._basis_size, n_samples))
+            # H @ u == (B.T @ W @ B + P)^-1 @ (B.T @ W @ B) @ u
+            hat_u = self._penalized_object.solve(self._lhs, rhs @ rng_samples, overwrite_b=True)
+            # stochastic trace is the average of the trace of u.T @ H @ u;
+            # trace(u.T @ H @ u) == sum(u * (H @ u))
+            trace = np.einsum('ij,ij->', rng_samples, hat_u) / n_samples
+
+        return trace
+
+
+class PSplineResult2D(PSplineResult):
+    """
+    Represents the result of 2D penalized spline (P-Spline) smoothing.
+
+    Provides methods for extending the solution obtained from baseline algorithms that use
+    P-Spline smoothing. This class should not be initialized by external users.
+
+    """
+
+    def __init__(self, penalized_object, weights=None, rhs_extra=None, penalty=None):
+        """
+        Initializes the result object.
+
+        In the most basic formulation, the linear equation for P-spline smoothing
+        is ``(B.T @ W @ B + P) c = B.T @ W @ y`` and ``v = B @ c``.
+        ``(W + P) @ v = W @ y``. Then the hat matrix would be
+        ``B @ (B.T @ W @ B + P)^-1 @ (B.T @ W)`` or, equivalently
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W @ B)``. The latter expression is preferred
+        since it reduces the dimensionality of intermediate calculations.
+
+        For more complex usages, the equation can be expressed as:
+        ``(B.T @ W @ B + P) @ c = (B.T @ W + rhs_partial) @ y``, such that the hat is given as:
+        ``B @ (B.T @ W @ B + P)^-1 @ (B.T @ W + rhs_partial)``, or equivalently
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W + rhs_partial) @ B``. Simplifying leads to
+        ``(B.T @ W @ B + P)^-1 @ (B.T @ W @ B + rhs_extra)``.
+
+        Parameters
+        ----------
+        penalized_object : pybaselines.two_d._spline_utils.PSpline2D
+            The penalized system object used for solving.
+        weights : numpy.ndarray, shape (M, N) or shape (``M * N``,) optional
+            The weights used to solve the system. Default is None, which will set
+            all weights to 1.
+        rhs_extra : numpy.ndarray or scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            Additional terms besides ``B.T @ W @ B`` within the right hand side of the hat
+            matrix. Default is None.
+        penalty : scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            The penalty `P` for the system in full, sparse format. If None (default), will use
+            ``penalized_object.penalty``. If given, will overwrite ``penalized_object.penalty``
+            with the given penalty.
+
+        """
+        super().__init__(penalized_object, weights=weights, rhs_extra=rhs_extra, penalty=penalty)
+        if self._weights.ndim == 1:
+            self._weights = self._weights.reshape(self._shape)
+
+    @property
+    def _shape(self):
+        """The shape of the penalized system.
+
+        Returns
+        -------
+        tuple[int, int]
+            The penalized system's shape.
+
+        """
+        return (len(self._penalized_object.basis.x), len(self._penalized_object.basis.z))
+
+    @property
+    def _lhs(self):
+        """
+        The left hand side of the hat matrix in banded format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.csc_array or scipy.sparse.csc_matrix
+            The left hand side of the hat matrix.
+
+        """
+        if self._hat_lhs is None:
+            self._hat_lhs = (self._btwb + self._penalized_object.penalty).tocsc()
+        return self._hat_lhs
+
+    @property
+    def _rhs(self):
+        """
+        The right hand side of the hat matrix in sparse format.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.sparray or scipy.sparse.spmatrix
+            The sparse object representing the right hand side of the hat matrix.
+
+        """
+        if self._hat_rhs is None:
+            if self._rhs_extra is None:
+                self._hat_rhs = self._btwb
+            else:
+                self._hat_rhs = self._rhs_extra + self._btwb
+        return self._hat_rhs
+
+    @property
+    def _btwb(self):
+        """
+        The matrix multiplication of ``B.T @ W @ B`` in full, sparse format.
+
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.sparray or scipy.sparse.spmatrix
+            The sparse object representing the matrix multiplication of ``B.T @ W @ B``.
+
+        """
+        # TODO can remove once PSpline and PSpline2D unify their btwb method calls; or
+        # just keep the docstring since the types are different
+        if self._btwb_ is None:
+            self._btwb_ = self._penalized_object.basis._make_btwb(self._weights)
+        return self._btwb_
+
+    @property
+    def tck(self):
+        """
+        The knots, spline coefficients, and spline degree to reconstruct the fit baseline.
+
+        Can be used with SciPy's :class:`scipy.interpolate.NdBSpline`, to allow for reconstructing
+        the fit baseline to allow for other usages such as evaluating with different x- and
+        z-values.
+
+        Returns
+        -------
+        tuple[numpy.ndarray, numpy.ndarray]
+            The knots for the spline along the rows and columns.
+        numpy.ndarray, shape (M, N)
+            The spline coeffieicnts. Has a shape of (`M`, `N`), corresponding to the number
+            of basis functions along the rows and columns.
+        numpy.ndarray([int, int])
+            The degree of the spline for the rows and columns.
+
+        """
+        # method only added to document differing output types compared to PSplineResult.tck
+        return super().tck
+
+    def effective_dimension(self, n_samples=0, rng=1234):
+        """
+        Calculates the effective dimension from the trace of the hat matrix.
+
+        For typical P-spline smoothing, the linear equation would be
+        ``(B.T @ W @ B + lam * P) c = B.T @ W @ y`` and ``v = B @ c``. Then the hat matrix
+        would be ``B @ (B.T @ W @ B + lam * P)^-1 @ (B.T @ W)`` or, equivalently
+        ``(B.T @ W @ B + lam * P)^-1 @ (B.T @ W @ B)``. The latter expression is preferred
+        since it reduces the dimensionality. The effective dimension for the system
+        can be estimated as the trace of the hat matrix.
+
+        Parameters
+        ----------
+        n_samples : int, optional
+            If 0 (default), will calculate the analytical trace. Otherwise, will use stochastic
+            trace estimation with a matrix of (``M * N``, `n_samples`) Rademacher random variables
+            (eg. either -1 or 1).
+
+        Returns
+        -------
+        trace : float
+            The trace of the hat matrix, denoting the effective dimension for
+            the system.
+
+        Raises
+        ------
+        TypeError
+            Raised if `n_samples` is not an integer greater than or equal to 0.
+
+        References
+        ----------
+        Eilers, P., et al. Fast and compact smoothing on large multidimensional grids. Computational
+        Statistics and Data Analysis, 2006, 50(1), 61-76.
+
+        Hutchinson, M. A stochastic estimator of the trace of the influence matrix for laplacian
+        smoothing splines. Communications in Statistics - Simulation and Computation, (1990),
+        19(2), 433-450.
+
+        Meyer, R., et al. Hutch++: Optimal Stochastic Trace Estimation. 2021 Symposium on
+        Simplicity in Algorithms (SOSA), (2021), 142-155.
+
+        """
+        # TODO unify the PSpline and PSpline2D method namings and availability for factorization
+        # and solving so that this can be directly inherited from the PSplineResult object
+        if n_samples == 0:
+            if self._trace is not None:
+                return self._trace
+            use_analytic = True
+            rhs_format = 'csc'
+        else:
+            if n_samples < 0 or not isinstance(n_samples, int):
+                raise TypeError('n_samples must be a non-negative integer')
+            use_analytic = False
+            rhs_format = 'csr'
+
+        rhs = self._rhs.asformat(rhs_format)
+        if use_analytic:
+            # compute each diagonal of the hat matrix separately so that the full
+            # hat matrix does not need to be stored in memory
+            trace = 0
+            factorization = factorized(self._lhs)
+            for i in range(self._basis_size):
+                trace += factorization(_sparse_col_index(rhs, i))[i]
+            # prevent needing to calculate analytical solution again
+            self._trace = trace
+        else:
+            rng_samples = _get_rng(rng).choice([-1., 1.], size=(self._basis_size, n_samples))
+            # H @ u == (B.T @ W @ B + P)^-1 @ (B.T @ W @ B) @ u
+            hat_u = self._penalized_object.direct_solve(self._lhs, rhs @ rng_samples)
+            # stochastic trace is the average of the trace of u.T @ H @ u;
+            # trace(u.T @ H @ u) == sum(u * (H @ u))
+            trace = np.einsum('ij,ij->', rng_samples, hat_u) / n_samples
+
+        return trace
+
+
+class WhittakerResult2D(WhittakerResult):
+    """
+    Represents the result of 2D Whittaker smoothing.
+
+    Provides methods for extending the solution obtained from baseline algorithms that use
+    Whittaker smoothing. This class should not be initialized by external users.
+
+    """
+
+    def __init__(self, penalized_object, weights=None, lhs=None, rhs_extra=None, penalty=None):
+        """
+        Initializes the result object.
+
+        In the most basic formulation, Whittaker smoothing solves ``(W + P) @ v = W @ y``.
+        Then the hat matrix would be ``(W + P)^-1 @ W``. For more complex usages, the
+        equation can be expressed as ``lhs @ v = (W + rhs_extra) @ y`` with a corresponding
+        hat matrix of ``lhs^-1 @ (W + rhs_extra)``.
+
+        Parameters
+        ----------
+        penalized_object : pybaselines.two_d._whittaker_utils.WhittakerSystem2D
+            The penalized system object used for solving.
+        weights : numpy.ndarray, shape (M, N) or shape (``M * N``,) optional
+            The weights used to solve the system. Default is None, which will set
+            all weights to 1.
+        lhs : scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            The left hand side of the hat matrix. Default is None, which will assume that
+            `lhs` is the addition of ``diags(weights)`` and ``pentalized_object.penalty``.
+        rhs_extra : scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            Additional terms besides the weights within the right hand side of the hat matrix.
+            Default is None.
+        penalty : scipy.sparse.sparray or scipy.sparse.spmatrix, optional
+            The penalty `P` for the system in full, sparse format. If None (default), will use
+            ``penalized_object.penalty``. If given, will overwrite ``penalized_object.penalty``
+            with the given penalty.
+
+        Raises
+        ------
+        ValueError
+            Raised if both `penalty` and `lhs` are not None.
+
+        """
+        super().__init__(penalized_object, weights=weights, lhs=lhs, rhs_extra=rhs_extra)
+        self._btwb_ = None
+        if penalty is not None:
+            if lhs is not None:
+                raise ValueError('both `lhs` and `penalty` cannot be supplied')
+            self._penalized_object.penalty = penalty
+
+        if self._penalized_object._using_svd and self._weights.ndim == 1:
+            self._weights = self._weights.reshape(self._shape)
+        elif not self._penalized_object._using_svd and self._weights.ndim == 2:
+            self._weights = self._weights.ravel()
+
+    @property
+    def _shape(self):
+        """The shape of the penalized system.
+
+        Returns
+        -------
+        tuple[int, int]
+            The penalized system's shape.
+
+        """
+        # TODO replace/remove once PenalizedSystem2D and WhittakerSystem2D are unified
+        if hasattr(self._penalized_object, '_num_points'):
+            shape = self._penalized_object._num_points
+        else:
+            shape = self._penalized_object._num_bases
+        return shape
+
+    @property
+    def _btwb(self):
+        """
+        The matrix multiplication of ``B.T @ W @ B`` in full, dense format.
+
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        numpy.ndarray
+            The array representing the matrix multiplication of ``B.T @ W @ B``.
+
+        """
+        if self._btwb_ is None:
+            self._btwb_ = self._penalized_object._make_btwb(self._weights)
+        return self._btwb_
+
+    @property
+    def _lhs(self):
+        """
+        The left hand side of the hat matrix.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        numpy.ndarray or scipy.sparse.csc_array or scipy.sparse.csc_matrix
+            The left hand side of the hat matrix. If using SVD, then the output is a numpy
+            array; otherwise, it is a sparse object wit CSC format.
+
+        """
+        if self._hat_lhs is None:
+            if self._penalized_object._using_svd:
+                lhs = self._btwb.copy()
+                np.fill_diagonal(lhs, lhs.diagonal() + self._penalized_object.penalty)
+                self._hat_lhs = lhs
+            else:
+                return super()._lhs.tocsc()
+
+        return self._hat_lhs
+
+    @property
+    def _rhs(self):
+        """
+        The right hand side of the hat matrix.
+
+        Given the linear system ``lhs @ v = rhs @ y``, the hat matrix is given as ``lhs^-1 @ rhs.
+        Lazy implementation so that the calculation is only performed when needed.
+
+        Returns
+        -------
+        scipy.sparse.sparray or scipy.sparse.spmatrix
+            The sparse object representing the right hand side of the hat matrix.
+
+        """
+        if self._hat_rhs is None:
+            if self._penalized_object._using_svd:
+                self._hat_rhs = self._btwb
+            else:
+                return super()._rhs
+
+        return self._hat_rhs
+
+    def relative_dof(self):
+        """
+        Calculates the relative effective degrees of freedom for each eigenvector.
+
+        Returns
+        -------
+        dof : numpy.ndarray, shape (P, Q)
+            The relative effective degrees of freedom associated with each eigenvector
+            used for the fit. Each individual effective degree of freedom value is between
+            0 and 1, with lower values signifying that the eigenvector was less important
+            for the fit.
+
+        Raises
+        ------
+        ValueError
+            Raised if the system was solved analytically rather than using eigendecomposition,
+            ie. ``num_eigens`` was set to None.
+
+        """
+        if not self._penalized_object._using_svd:
+            raise ValueError(
+                'Cannot calculate degrees of freedom when not using eigendecomposition'
+            )
+        dof = solve(self._lhs, self._btwb, check_finite=False, assume_a='pos')
+        return dof.diagonal().reshape(self._basis_shape)
+
+    def effective_dimension(self, n_samples=0, rng=1234):
+        """
+        Calculates the effective dimension from the trace of the hat matrix.
+
+        For typical Whittaker smoothing, the linear equation would be
+        ``(W + lam * P) v = W @ y``. Then the hat matrix would be ``(W + lam * P)^-1 @ W``.
+        If using SVD, the linear equation is ``(B.T @ W @ B + lam * P) c = B.T @ W @ y``  and
+        ``v = B @ c``. Then the hat matrix would be ``B @ (B.T @ W @ B + lam * P)^-1 @ (B.T @ W)``
+        or, equivalently ``(B.T @ W @ B + lam * P)^-1 @ (B.T @ W @ B)``. The latter expression
+        is preferred since it reduces the dimensionality. The effective dimension for the system
+        can be estimated as the trace of the hat matrix.
+
+        Parameters
+        ----------
+        n_samples : int, optional
+            If 0 (default), will calculate the analytical trace. Otherwise, will use stochastic
+            trace estimation with a matrix of (``M * N``, `n_samples`) Rademacher random variables
+            (eg. either -1 or 1).
+
+        Returns
+        -------
+        trace : float
+            The trace of the hat matrix, denoting the effective dimension for
+            the system.
+
+        Raises
+        ------
+        TypeError
+            Raised if `n_samples` is not 0 and a non-positive integer.
+
+        Notes
+        -----
+        If using SVD, the trace will be lower than the actual analytical trace. The relative
+        difference is reduced as the number of eigenvalues selected approaches the data
+        size.
+
+        References
+        ----------
+        Biessy, G. Whittaker-Henderson smoothing revisited: A modern statistical framework for
+        practical use. ASTIN Bulletin, 2025, 1-31.
+
+        Hutchinson, M. A stochastic estimator of the trace of the influence matrix for laplacian
+        smoothing splines. Communications in Statistics - Simulation and Computation, (1990),
+        19(2), 433-450.
+
+        Meyer, R., et al. Hutch++: Optimal Stochastic Trace Estimation. 2021 Symposium on
+        Simplicity in Algorithms (SOSA), (2021), 142-155.
+
+        """
+        if n_samples == 0:
+            if self._trace is not None:
+                return self._trace
+            use_analytic = True
+        else:
+            if n_samples < 0 or not isinstance(n_samples, int):
+                raise TypeError('n_samples must be a non-negative integer')
+            use_analytic = False
+            rng_samples = _get_rng(rng).choice([-1., 1.], size=(self._basis_size, n_samples))
+
+        if self._penalized_object._using_svd:
+            # NOTE the only Whittaker-based algorithms that allow performing SVD for solving
+            # all use the simple (W + P) v = w * y formulation, so no need to implement for
+            # rhs_extra
+            if self._rhs_extra is not None:
+                raise NotImplementedError(
+                    'rhs_extra is not supported when using eigendecomposition'
+                )
+            if use_analytic:
+                trace = self.relative_dof().sum()
+                self._trace = trace
+            else:
+                # H @ u == (B.T @ W @ B + P)^-1 @ (B.T @ W @ B) @ u
+                hat_u = solve(
+                    self._lhs, self._rhs @ rng_samples, overwrite_b=True,
+                    check_finite=False, assume_a='pos'
+                )
+                # stochastic trace is the average of the trace of u.T @ H @ u;
+                # trace(u.T @ H @ u) == sum(u * (H @ u))
+                trace = np.einsum('ij,ij->', rng_samples, hat_u) / n_samples
+        else:
+            # TODO unify PenalizedSystem and PenalizedSystem2D methods so that this can be
+            # directly inherited from WhittakerResult
+            if use_analytic:
+                # compute each diagonal of the hat matrix separately so that the full
+                # hat matrix does not need to be stored in memory
+                trace = 0
+                factorization = factorized(self._lhs)
+                if self._rhs_extra is None:
+                    # note: about an order of magnitude faster to omit the sparse rhs for the simple
+                    # case of lhs @ v = w * y
+                    eye = np.zeros(self._size)
+                    for i in range(self._size):
+                        eye[i] = self._weights[i]
+                        trace += factorization(eye)[i]
+                        eye[i] = 0
+                else:
+                    rhs = self._rhs.tocsc()
+                    for i in range(self._basis_size):
+                        trace += factorization(_sparse_col_index(rhs, i))[i]
+                self._trace = trace
+
+            else:
+                if self._rhs_extra is None:
+                    rhs_u = self._weights[:, None] * rng_samples
+                else:
+                    rhs_u = self._rhs.tocsr() @ rng_samples
+                # H @ u == (W + P)^-1 @ (W @ u)
+                hat_u = self._penalized_object.direct_solve(self._lhs, rhs_u)
+                # stochastic trace is the average of the trace of u.T @ H @ u;
+                # trace(u.T @ H @ u) == sum(u * (H @ u))
+                trace = np.einsum('ij,ij->', rng_samples, hat_u) / n_samples
+
+        return trace
diff --git a/pybaselines/spline.py b/pybaselines/spline.py
index 6f1c80c4..4960bd87 100644
--- a/pybaselines/spline.py
+++ b/pybaselines/spline.py
@@ -17,6 +17,7 @@
 from ._compat import dia_object, jit, trapezoid
 from ._spline_utils import _basis_midpoints
 from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
+from .results import PSplineResult
 from .utils import (
     ParameterWarning, _mollifier_kernel, _sort_array, gaussian, pad_edges, padded_convolve,
     relative_difference, _MIN_FLOAT
@@ -90,6 +91,9 @@ def mixture_model(self, data, lam=1e5, p=1e-2, num_knots=100, spline_degree=3, d
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -195,7 +199,8 @@ def mixture_model(self, data, lam=1e5, p=1e-2, num_knots=100, spline_degree=3, d
             residual = y - baseline
 
         params = {
-            'weights': weight_array, 'tol_history': tol_history[:i + 1]
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
         }
 
         baseline = np.polynomial.polyutils.mapdomain(baseline, np.array([-1., 1.]), y_domain)
@@ -253,6 +258,9 @@ def irsqr(self, data, lam=100, quantile=0.05, num_knots=100, spline_degree=3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -283,7 +291,10 @@ def irsqr(self, data, lam=100, quantile=0.05, num_knots=100, spline_degree=3,
             old_coef = pspline.coef
             weight_array = _weighting._quantile(y, baseline, quantile, eps)
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -412,6 +423,9 @@ def pspline_asls(self, data, lam=1e3, p=1e-2, num_knots=100, spline_degree=3, di
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -451,7 +465,10 @@ def pspline_asls(self, data, lam=1e3, p=1e-2, num_knots=100, spline_degree=3, di
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -471,8 +488,8 @@ def pspline_iasls(self, data, lam=1e1, p=1e-2, lam_1=1e-4, num_knots=100,
             Default is 1e1.
         p : float, optional
             The penalizing weighting factor. Must be between 0 and 1. Values greater
-            than the baseline will be given `p` weight, and values less than the baseline
-            will be given `1 - p` weight. Default is 1e-2.
+            than the baseline will be given ``p**2`` weight, and values less than the baseline
+            will be given ``(1 - p)**2`` weight. Default is 1e-2.
         lam_1 : float, optional
             The smoothing parameter for the first derivative of the residual. Default is 1e-4.
         num_knots : int, optional
@@ -499,11 +516,19 @@ def pspline_iasls(self, data, lam=1e1, p=1e-2, lam_1=1e-4, num_knots=100,
 
             * 'weights': numpy.ndarray, shape (N,)
                 The weight array used for fitting the data.
+
+                .. versionchanged:: 1.3.0
+                    Prior to version 1.3.0, the returned weights were the non-squared
+                    values (ie. ``p`` or ``1 - p``).
+
             * 'tol_history': numpy.ndarray
                 An array containing the calculated tolerance values for
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -514,6 +539,13 @@ def pspline_iasls(self, data, lam=1e1, p=1e-2, lam_1=1e-4, num_knots=100,
         --------
         Baseline.iasls
 
+        Notes
+        -----
+        Although both ``pspline_iasls`` and :meth:`~.Baseline.pspline_asls` use `p` for defining
+        the weights, the appropriate `p` value for ``pspline_iasls`` will be approximately equal
+        to the square root of the value used for ``pspline_asls`` when `p` is small since
+        ``pspline_iasls`` uses squared weights.
+
         References
         ----------
         He, S., et al. Baseline correction for raman spectra using an improved
@@ -556,15 +588,18 @@ def pspline_iasls(self, data, lam=1e1, p=1e-2, lam_1=1e-4, num_knots=100,
 
         tol_history = np.empty(max_iter + 1)
         for i in range(max_iter + 1):
-            baseline = pspline.solve_pspline(y, weight_array**2, rhs_extra=partial_rhs)
-            new_weights = _weighting._asls(y, baseline, p)
+            baseline = pspline.solve_pspline(y, weight_array, rhs_extra=partial_rhs)
+            new_weights = _weighting._asls(y, baseline, p)**2
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array, rhs_extra=d1_penalty)
+        }
 
         return baseline, params
 
@@ -615,6 +650,9 @@ def pspline_airpls(self, data, lam=1e3, num_knots=100, spline_degree=3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -648,7 +686,10 @@ def pspline_airpls(self, data, lam=1e3, num_knots=100, spline_degree=3,
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -695,6 +736,9 @@ def pspline_arpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_orde
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -725,7 +769,10 @@ def pspline_arpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_orde
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -776,6 +823,9 @@ def pspline_drpls(self, data, lam=1e3, eta=0.5, num_knots=100, spline_degree=3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -817,10 +867,8 @@ def pspline_drpls(self, data, lam=1e3, eta=0.5, num_knots=100, spline_degree=3,
                 diff_n_diagonals * np.interp(interp_pts, self.x, weight_array),
                 pspline.num_bands, pspline.num_bands
             )
-            baseline = pspline.solve_pspline(
-                y, weight_array,
-                penalty=_add_diagonals(pspline.penalty, diff_n_w_diagonals, lower_only=False)
-            )
+            penalty = _add_diagonals(pspline.penalty, diff_n_w_diagonals, lower_only=False)
+            baseline = pspline.solve_pspline(y, weight_array, penalty=penalty)
             new_weights, exit_early = _weighting._drpls(y, baseline, i)
             if exit_early:
                 i -= 1  # reduce i so that output tol_history indexing is correct
@@ -832,7 +880,10 @@ def pspline_drpls(self, data, lam=1e3, eta=0.5, num_knots=100, spline_degree=3,
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult(pspline, weight_array, penalty=penalty)
+        }
 
         return baseline, params
 
@@ -879,6 +930,9 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -910,7 +964,10 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -979,6 +1036,9 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -1031,7 +1091,7 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
                 pspline.penalty * np.interp(interp_pts, self.x, alpha_array),
                 pspline.num_bands, pspline.num_bands
             )
-            baseline = pspline.solve_pspline(y, weight_array, alpha_penalty)
+            baseline = pspline.solve_pspline(y, weight_array, penalty=alpha_penalty)
             new_weights, residual, exit_early = _weighting._aspls(
                 y, baseline, asymmetric_coef, alternate_weighting
             )
@@ -1047,7 +1107,8 @@ def pspline_aspls(self, data, lam=1e4, num_knots=100, spline_degree=3, diff_orde
             alpha_array = abs_d / abs_d.max()
 
         params = {
-            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1]
+            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array, penalty=alpha_penalty)
         }
 
         return baseline, params
@@ -1105,6 +1166,9 @@ def pspline_psalsa(self, data, lam=1e3, p=0.5, k=None, num_knots=100, spline_deg
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -1146,7 +1210,10 @@ def pspline_psalsa(self, data, lam=1e3, p=0.5, k=None, num_knots=100, spline_deg
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -1218,6 +1285,9 @@ def pspline_derpsalsa(self, data, lam=1e2, p=1e-2, k=None, num_knots=100, spline
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -1281,7 +1351,10 @@ def pspline_derpsalsa(self, data, lam=1e2, p=1e-2, k=None, num_knots=100, spline
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -1354,6 +1427,9 @@ def pspline_mpls(self, data, half_window=None, lam=1e3, p=0.0, num_knots=100, sp
                 The weight array used for fitting the data.
             * 'half_window': int
                 The half window used for the morphological calculations.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -1417,7 +1493,10 @@ def pspline_mpls(self, data, half_window=None, lam=1e3, p=0.0, num_knots=100, sp
         )
         baseline = pspline.solve_pspline(y, weight_array)
 
-        params = {'weights': weight_array, 'half_window': half_wind}
+        params = {
+            'weights': weight_array, 'half_window': half_wind,
+            'result': PSplineResult(pspline, weight_array)
+        }
         return baseline, params
 
     @_Algorithm._register(sort_keys=('weights',))
@@ -1473,6 +1552,9 @@ def pspline_brpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_orde
                 `max_iter_2`, `tol_2`), and shape K is the maximum of the number of
                 iterations for the threshold and the maximum number of iterations for all of
                 the fits of the various threshold values (related to `max_iter` and `tol`).
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -1530,7 +1612,8 @@ def pspline_brpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_orde
             beta = 1 - weight_mean
 
         params = {
-            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1]
+            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1],
+            'result': PSplineResult(pspline, baseline_weights)
         }
 
         return baseline, params
@@ -1584,6 +1667,9 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -1625,7 +1711,10 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=100, spline_degree=3, diff_ord
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult(pspline, weight_array)
+        }
 
         return baseline, params
 
diff --git a/pybaselines/two_d/_spline_utils.py b/pybaselines/two_d/_spline_utils.py
index 3b3cc7e3..86097bcc 100644
--- a/pybaselines/two_d/_spline_utils.py
+++ b/pybaselines/two_d/_spline_utils.py
@@ -144,7 +144,7 @@ def basis(self):
         return self._basis
 
     def _make_btwb(self, weights):
-        """Computes ``Basis.T @ Weights @ Basis`` using a more efficient method.
+        """Computes ``Basis.T @ Weights @ Basis`` as a generalized linear array model.
 
         Returns
         -------
@@ -158,6 +158,11 @@ def _make_btwb(self, weights):
 
         """
         # do not save intermediate results since they are memory intensive for high number of bases
+        # Note to self: F is fully dense, such that B.T @ W @ B + P is also fully dense; it is
+        # still kept as a sparse system since solving the dense system is slower and
+        # significantly more memory intensive, even with scipy.linalg.solve with assume_a='sym' or
+        # 'pos'; using solve[h]_banded also offers no significant speed up, although memory usage
+        # is comparable to spsolve
         F = csr_object(
             np.transpose(
                 (self._G_r.T @ weights @ self._G_c).reshape((
@@ -174,6 +179,13 @@ def tk(self):
         """
         The knots and spline degree for the spline.
 
+        Returns
+        -------
+        knots : tuple[numpy.ndarray, numpy.ndarray]
+            The knots for the spline along the rows and columns.
+        spline_degree : numpy.ndarray([int, int])
+            The degree of the spline for the rows and columns.
+
         Notes
         -----
         To compare with :class:`scipy.interpolate.NdBSpline`, the setup would look like:
@@ -274,9 +286,7 @@ def solve(self, y, weights, penalty=None, rhs_extra=None):
 
         Solves the linear equation ``(B.T @ W @ B + P) c = B.T @ W @ y`` for the spline
         coefficients, `c`, given the spline basis, `B`, the weights (diagonal of `W`), the
-        penalty `P`, and `y`, and returns the resulting spline, ``B @ c``. Attempts to
-        calculate ``B.T @ W @ B`` and ``B.T @ W @ y`` as a banded system to speed up
-        the calculation.
+        penalty `P`, and `y`, and returns the resulting spline, ``B @ c``.
 
         Parameters
         ----------
@@ -284,10 +294,9 @@ def solve(self, y, weights, penalty=None, rhs_extra=None):
             The y-values for fitting the spline.
         weights : numpy.ndarray, shape (M, N)
             The weights for each y-value.
-        penalty : numpy.ndarray, shape (``M * N``, ``M * N``)
-            The finite difference penalty matrix, in LAPACK's lower banded format (see
-            :func:`scipy.linalg.solveh_banded`) if `lower_only` is True or the full banded
-            format (see :func:`scipy.linalg.solve_banded`) if `lower_only` is False.
+        penalty : scipy.sparse.spmatrix or scipy.sparse.sparray, shape (``P * Q``, ``P * Q``)
+            The finite difference penalty matrix. Default is None, which will use the
+            object's penalty.
         rhs_extra : float or numpy.ndarray, shape (``M * N``,), optional
             If supplied, `rhs_extra` will be added to the right hand side (``B.T @ W @ y``)
             of the equation before solving. Default is None, which adds nothing.
@@ -300,10 +309,11 @@ def solve(self, y, weights, penalty=None, rhs_extra=None):
 
         Notes
         -----
-        Uses the more efficient algorithm from Eilers's paper, although the memory usage
-        is higher than the straigtforward method when the number of knots is high; however,
-        it is significantly faster and memory efficient when the number of knots is lower,
-        which will be the more typical use case.
+        Uses the more efficient algorithm from Eilers's paper, as a generalized linear array
+        model, although the memory usage is higher than the straightforward method when the
+        number of knots is high since reshaping and transposing cannot be done in sparse format;
+        however, it is significantly faster and memory efficient when the number of knots is
+        lower, which will be the more typical use case.
 
         References
         ----------
@@ -331,9 +341,19 @@ def tck(self):
         The knots, spline coefficients, and spline degree to reconstruct the spline.
 
         Convenience function for easily reconstructing the last solved spline with outside
-        modules, such as with SciPy's `NdBSpline`, to allow for other usages such as evaulating
+        modules, such as with SciPy's `NdBSpline`, to allow for other usages such as evaluating
         with different x- and z-values.
 
+        Returns
+        -------
+        knots : tuple[numpy.ndarray, numpy.ndarray]
+            The knots for the spline along the rows and columns.
+        coef : numpy.ndarray, shape (M, N)
+            The spline coeffieicnts. Has a shape of (`M`, `N`), corresponding to the number
+            of basis functions along the rows and columns.
+        spline_degree : numpy.ndarray([int, int])
+            The degree of the spline for the rows and columns.
+
         Raises
         ------
         ValueError
@@ -348,7 +368,7 @@ def tck(self):
             pspline = Pspline2D(x, z, ...)
             pspline_fit = pspline.solve(...)
             XZ = np.array(np.meshgrid(x, z)).T  # same as zipping the meshgrid and rearranging
-            fit = NdBSpline(pspline.tck)(XZ)  # fit == pspline_fit
+            fit = NdBSpline(*pspline.tck)(XZ)  # fit == pspline_fit
 
         """
         if self.coef is None:
diff --git a/pybaselines/two_d/_whittaker_utils.py b/pybaselines/two_d/_whittaker_utils.py
index e555a23d..a5283c7e 100644
--- a/pybaselines/two_d/_whittaker_utils.py
+++ b/pybaselines/two_d/_whittaker_utils.py
@@ -173,7 +173,7 @@ def solve(self, y, weights, penalty=None, rhs_extra=None):
         weights : numpy.ndarray
             The weights for each y-value. Will also be added to the diagonal of the
             penalty.
-        penalty : numpy.ndarray
+        penalty : scipy.sparse.spmatrix or scipy.sparse.sparray
             The penalty to use for solving. Default is None which uses the object's
             penalty.
         rhs_extra : float or numpy.ndarray, optional
@@ -198,6 +198,22 @@ def solve(self, y, weights, penalty=None, rhs_extra=None):
         return self.direct_solve(lhs, rhs)
 
     def direct_solve(self, lhs, rhs):
+        """
+        Solves the linear system ``lhs @ x = rhs``.
+
+        Parameters
+        ----------
+        lhs : scipy.sparse.spmatrix or scipy.sparse.sparray
+            The left hand side of the equation.
+        rhs : numpy.ndarray or scipy.sparse.spmatrix or scipy.sparse.sparray
+            The right hand side of the equation.
+
+        Returns
+        -------
+        scipy.sparse.spmatrix or scipy.sparse.sparray
+            The solution to the linear system, with the same shape as `rhs`.
+
+        """
         return spsolve(lhs, rhs)
 
     def add_diagonal(self, value):
@@ -211,7 +227,7 @@ def add_diagonal(self, value):
 
         Returns
         -------
-        scipy.sparse.spmatrix
+        scipy.sparse.spmatrix or scipy.sparse.sparray
             The penalty matrix with the main diagonal updated.
 
         """
@@ -561,10 +577,9 @@ def solve(self, y, weights, penalty=None, rhs_extra=None, assume_a='pos'):
             The y-values for fitting the spline.
         weights : numpy.ndarray, shape (M, N)
             The weights for each y-value.
-        penalty : numpy.ndarray, shape (``M * N``, ``M * N``)
-            The finite difference penalty matrix, in LAPACK's lower banded format (see
-            :func:`scipy.linalg.solveh_banded`) if `lower_only` is True or the full banded
-            format (see :func:`scipy.linalg.solve_banded`) if `lower_only` is False.
+        penalty : numpy.ndarray or scipy.sparse.spmatrix or scipy.sparse.sparray
+            The finite difference penalty matrix with shape (``M * N``, ``M * N``). Default
+            is None, which will use the object's penalty.
         rhs_extra : float or numpy.ndarray, shape (``M * N``,), optional
             If supplied, `rhs_extra` will be added to the right hand side (``B.T @ W @ y``)
             of the equation before solving. Default is None, which adds nothing.
@@ -577,10 +592,10 @@ def solve(self, y, weights, penalty=None, rhs_extra=None, assume_a='pos'):
 
         Notes
         -----
-        Uses the more efficient algorithm from Eilers's paper, although the memory usage
-        is higher than the straigtforward method when the number of eigenvalues is high; however,
-        it is significantly faster and memory efficient when the number of eigenvalues is lower,
-        which will be the more typical use case.
+        Uses the more efficient algorithm from Eilers's paper, as a generalized linear array
+        model, although the memory usage is higher than the straightforward method when the
+        number of eigenvalues is high; however, it is significantly faster and memory efficient
+        when the number of eigenvalues is lower, which will be the more typical use case.
 
         References
         ----------
@@ -667,7 +682,6 @@ def _calc_dof(self, weights, assume_a='pos'):
 
         """
         if not self._using_svd:
-            # Could maybe just output a matrix of ones?
             raise ValueError(
                 'Cannot calculate degrees of freedom when not using eigendecomposition'
             )
diff --git a/pybaselines/two_d/spline.py b/pybaselines/two_d/spline.py
index 0e22acd5..38d071bd 100644
--- a/pybaselines/two_d/spline.py
+++ b/pybaselines/two_d/spline.py
@@ -12,6 +12,7 @@
 
 from .. import _weighting
 from .._validation import _check_scalar_variable
+from ..results import PSplineResult2D
 from ..utils import ParameterWarning, gaussian, relative_difference, _MIN_FLOAT
 from ._algorithm_setup import _Algorithm2D
 from ._whittaker_utils import PenalizedSystem2D
@@ -82,6 +83,9 @@ def mixture_model(self, data, lam=1e3, p=1e-2, num_knots=25, spline_degree=3, di
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -182,7 +186,8 @@ def mixture_model(self, data, lam=1e3, p=1e-2, num_knots=25, spline_degree=3, di
             residual = y - baseline
 
         params = {
-            'weights': weight_array, 'tol_history': tol_history[:i + 1]
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array)
         }
 
         baseline = np.polynomial.polyutils.mapdomain(baseline, np.array([-1., 1.]), y_domain)
@@ -243,6 +248,9 @@ def irsqr(self, data, lam=1e3, quantile=0.05, num_knots=25, spline_degree=3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -273,7 +281,10 @@ def irsqr(self, data, lam=1e3, quantile=0.05, num_knots=25, spline_degree=3,
             old_coef = pspline.coef
             weight_array = _weighting._quantile(y, baseline, quantile, eps)
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -327,6 +338,9 @@ def pspline_asls(self, data, lam=1e3, p=1e-2, num_knots=25, spline_degree=3, dif
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -364,7 +378,10 @@ def pspline_asls(self, data, lam=1e3, p=1e-2, num_knots=25, spline_degree=3, dif
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -384,8 +401,8 @@ def pspline_iasls(self, data, lam=1e3, p=1e-2, lam_1=1e-4, num_knots=25,
             baselines. Default is 1e3.
         p : float, optional
             The penalizing weighting factor. Must be between 0 and 1. Values greater
-            than the baseline will be given `p` weight, and values less than the baseline
-            will be given `1 - p` weight. Default is 1e-2.
+            than the baseline will be given ``p**2`` weight, and values less than the baseline
+            will be given ``(1 - p)**2`` weight. Default is 1e-2.
         lam_1 : float or Sequence[float, float], optional
             The smoothing parameter for the rows and columns, respectively, of the first
             derivative of the residual. If a single value is given, both will use the same
@@ -417,11 +434,19 @@ def pspline_iasls(self, data, lam=1e3, p=1e-2, lam_1=1e-4, num_knots=25,
 
             * 'weights': numpy.ndarray, shape (M, N)
                 The weight array used for fitting the data.
+
+                .. versionchanged:: 1.3.0
+                    Prior to version 1.3.0, the returned weights were the non-squared
+                    values (ie. ``p`` or ``1 - p``).
+
             * 'tol_history': numpy.ndarray
                 An array containing the calculated tolerance values for
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -432,6 +457,13 @@ def pspline_iasls(self, data, lam=1e3, p=1e-2, lam_1=1e-4, num_knots=25,
         --------
         Baseline2D.iasls
 
+        Notes
+        -----
+        Although both ``pspline_iasls`` and :meth:`~.Baseline2D.pspline_asls` use `p` for defining
+        the weights, the appropriate `p` value for ``pspline_iasls`` will be approximately equal
+        to the square root of the value used for ``pspline_asls`` when `p` is small since
+        ``pspline_iasls`` uses squared weights.
+
         References
         ----------
         He, S., et al. Baseline correction for raman spectra using an improved
@@ -459,22 +491,26 @@ def pspline_iasls(self, data, lam=1e3, p=1e-2, lam_1=1e-4, num_knots=25,
 
         # B.T @ P_1 @ B and B.T @ P_1 @ y
         penalized_system_1 = PenalizedSystem2D(self._shape, lam_1, diff_order=1)
-        p1_partial_penalty = pspline.basis.basis.T @ penalized_system_1.penalty
+        d1_penalty = pspline.basis.basis.T @ penalized_system_1.penalty
 
-        partial_rhs = p1_partial_penalty @ y.ravel()
-        pspline.add_penalty(p1_partial_penalty @ pspline.basis.basis)
+        partial_rhs = d1_penalty @ y.ravel()
+        d1_penalty = d1_penalty @ pspline.basis.basis
+        pspline.add_penalty(d1_penalty)
 
         tol_history = np.empty(max_iter + 1)
         for i in range(max_iter + 1):
-            baseline = pspline.solve(y, weight_array**2, rhs_extra=partial_rhs)
-            new_weights = _weighting._asls(y, baseline, p)
+            baseline = pspline.solve(y, weight_array, rhs_extra=partial_rhs)
+            new_weights = _weighting._asls(y, baseline, p)**2
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array, rhs_extra=d1_penalty)
+        }
 
         return baseline, params
 
@@ -528,6 +564,9 @@ def pspline_airpls(self, data, lam=1e3, num_knots=25, spline_degree=3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -561,7 +600,10 @@ def pspline_airpls(self, data, lam=1e3, num_knots=25, spline_degree=3,
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -611,6 +653,9 @@ def pspline_arpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -641,7 +686,10 @@ def pspline_arpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -691,6 +739,9 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -722,7 +773,10 @@ def pspline_iarpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -782,6 +836,9 @@ def pspline_psalsa(self, data, lam=1e3, p=0.5, k=None, num_knots=25, spline_degr
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -823,7 +880,10 @@ def pspline_psalsa(self, data, lam=1e3, p=0.5, k=None, num_knots=25, spline_degr
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
 
@@ -883,6 +943,9 @@ def pspline_brpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order
                 `max_iter_2`, `tol_2`), and shape K is the maximum of the number of
                 iterations for the threshold and the maximum number of iterations for all of
                 the fits of the various threshold values (related to `max_iter` and `tol`).
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -940,7 +1003,8 @@ def pspline_brpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_order
             beta = 1 - weight_mean
 
         params = {
-            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1]
+            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1],
+            'result': PSplineResult2D(pspline, weight_array)
         }
 
         return baseline, params
@@ -997,6 +1061,9 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': PSplineResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         See Also
         --------
@@ -1040,6 +1107,9 @@ def pspline_lsrpls(self, data, lam=1e3, num_knots=25, spline_degree=3, diff_orde
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': PSplineResult2D(pspline, weight_array)
+        }
 
         return baseline, params
diff --git a/pybaselines/two_d/whittaker.py b/pybaselines/two_d/whittaker.py
index 5871cdf2..34fbd37e 100644
--- a/pybaselines/two_d/whittaker.py
+++ b/pybaselines/two_d/whittaker.py
@@ -11,6 +11,7 @@
 from .. import _weighting
 from .._compat import diags
 from .._validation import _check_optional_array, _check_scalar_variable
+from ..results import WhittakerResult2D
 from ..utils import _MIN_FLOAT, relative_difference
 from ._algorithm_setup import _Algorithm2D
 from ._whittaker_utils import PenalizedSystem2D
@@ -77,6 +78,9 @@ def asls(self, data, lam=1e6, p=1e-2, diff_order=2, max_iter=50, tol=1e-3, weigh
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -109,7 +113,10 @@ def asls(self, data, lam=1e6, p=1e-2, diff_order=2, max_iter=50, tol=1e-3, weigh
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i + 1]}
+        params = {
+            'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
@@ -140,8 +147,8 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
             baselines. Default is 1e6.
         p : float, optional
             The penalizing weighting factor. Must be between 0 and 1. Values greater
-            than the baseline will be given `p` weight, and values less than the baseline
-            will be given `1 - p` weight. Default is 1e-2.
+            than the baseline will be given ``p**2`` weight, and values less than the baseline
+            will be given ``(1 - p)**2`` weight. Default is 1e-2.
         lam_1 : float or Sequence[float, float], optional
             The smoothing parameter for the rows and columns, respectively, of the first
             derivative of the residual. Default is 1e-4.
@@ -166,17 +173,31 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
 
             * 'weights': numpy.ndarray, shape (M, N)
                 The weight array used for fitting the data.
+
+                .. versionchanged:: 1.3.0
+                    Prior to version 1.3.0, the returned weights were the non-squared
+                    values (ie. ``p`` or ``1 - p``).
+
             * 'tol_history': numpy.ndarray
                 An array containing the calculated tolerance values for
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
         ValueError
             Raised if `p` is not between 0 and 1 or if `diff_order` is less than 2.
 
+        Notes
+        -----
+        Although both ``iasls`` and :meth:`~.Baseline2D.asls` use `p` for defining the weights,
+        the appropriate `p` value for ``iasls`` will be approximately equal to the square root
+        of the value used for ``asls`` when `p` is small since ``iasls`` uses squared weights.
+
         References
         ----------
         He, S., et al. Baseline correction for raman spectra using an improved
@@ -203,15 +224,20 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
         p1_y = penalized_system_1.penalty @ y
         tol_history = np.empty(max_iter + 1)
         for i in range(max_iter + 1):
-            baseline = whittaker_system.solve(y, weight_array**2, rhs_extra=p1_y)
-            new_weights = _weighting._asls(y, baseline, p)
+            baseline = whittaker_system.solve(y, weight_array, rhs_extra=p1_y)
+            new_weights = _weighting._asls(y, baseline, p)**2
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult2D(
+                whittaker_system, weight_array, rhs_extra=penalized_system_1.penalty
+            )
+        }
 
         return baseline, params
 
@@ -273,6 +299,9 @@ def airpls(self, data, lam=1e6, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -303,7 +332,10 @@ def airpls(self, data, lam=1e6, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i]}
+        params = {
+            'tol_history': tol_history[:i],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
@@ -368,6 +400,9 @@ def arpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=None
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -394,7 +429,10 @@ def arpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=None
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i + 1]}
+        params = {
+            'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
@@ -450,6 +488,9 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -476,9 +517,8 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
         weight_matrix = diags(weight_array, format='csr')
         tol_history = np.empty(max_iter + 1)
         for i in range(1, max_iter + 2):
-            baseline = whittaker_system.direct_solve(
-                partial_penalty + weight_matrix @ partial_penalty_2, weight_array * y
-            )
+            lhs = partial_penalty + weight_matrix @ partial_penalty_2
+            baseline = whittaker_system.direct_solve(lhs, weight_array * y)
             new_weights, exit_early = _weighting._drpls(y, baseline, i)
             if exit_early:
                 i -= 1  # reduce i so that output tol_history indexing is correct
@@ -491,7 +531,10 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
             weight_array = new_weights
             weight_matrix.setdiag(weight_array)
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': WhittakerResult2D(whittaker_system, weight_array, lhs=lhs)
+        }
 
         return baseline, params
 
@@ -549,6 +592,9 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -576,7 +622,10 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i]}
+        params = {
+            'tol_history': tol_history[:i],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
@@ -650,6 +699,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -712,7 +764,8 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
             alpha_matrix.setdiag(alpha_array)
 
         params = {
-            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1]
+            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult2D(whittaker_system, weight_array, penalty=penalty)
         }
 
         return baseline, params
@@ -785,6 +838,9 @@ def psalsa(self, data, lam=1e5, p=0.5, k=None, diff_order=2, max_iter=50, tol=1e
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -830,7 +886,10 @@ def psalsa(self, data, lam=1e5, p=0.5, k=None, diff_order=2, max_iter=50, tol=1e
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i + 1]}
+        params = {
+            'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
@@ -905,6 +964,9 @@ def brpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -957,7 +1019,10 @@ def brpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5
                 break
             beta = 1 - weight_mean
 
-        params = {'tol_history': tol_history[:i + 2, :max(i, j_max) + 1]}
+        params = {
+            'tol_history': tol_history[:i + 2, :max(i, j_max) + 1],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = baseline_weights
             if return_dof:
@@ -1028,6 +1093,9 @@ def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 Only if `return_dof` is True. The effective degrees of freedom associated
                 with each eigenvector. Lower values signify that the eigenvector was
                 less important for the fit.
+            * 'result': WhittakerResult2D
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Notes
         -----
@@ -1067,7 +1135,10 @@ def lsrpls(self, data, lam=1e3, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'tol_history': tol_history[:i]}
+        params = {
+            'tol_history': tol_history[:i],
+            'result': WhittakerResult2D(whittaker_system, weight_array)
+        }
         if whittaker_system._using_svd:
             params['weights'] = weight_array
             if return_dof:
diff --git a/pybaselines/utils.py b/pybaselines/utils.py
index 4ae5c10a..c0c41af3 100644
--- a/pybaselines/utils.py
+++ b/pybaselines/utils.py
@@ -950,15 +950,15 @@ def _sort_array2d(array, sort_order=None):
     output : numpy.ndarray
         The input array after optionally sorting.
 
-    Notes
-    -----
-    For all inputs, assumes the last 2 axes correspond to the data that needs sorted.
-
     Raises
     ------
     ValueError
         Raised if the input array is not two or three dimensional.
 
+    Notes
+    -----
+    For all inputs, assumes the last 2 axes correspond to the data that needs sorted.
+
     """
     if sort_order is None:
         output = array
@@ -997,15 +997,15 @@ def _sort_array(array, sort_order=None):
     output : numpy.ndarray
         The input array after optionally sorting.
 
-    Notes
-    -----
-    For all inputs, assumes the last axis corresponds to the data that needs sorted.
-
     Raises
     ------
     ValueError
         Raised if the input array has more than two dimensions.
 
+    Notes
+    -----
+    For all inputs, assumes the last axis corresponds to the data that needs sorted.
+
     """
     if sort_order is None:
         output = array
@@ -1302,3 +1302,29 @@ def estimate_polyorder(data, x_data=None, min_value=1, max_value=10):
             max_skew = residual_skew
 
     return best_order
+
+
+def _get_rng(rng):
+    """
+    Generates or returns a random number generator from the given input.
+
+    Parameters
+    ----------
+    rng : int or numpy.random.Generator or numpy.random.RandomState
+        The integer for the seed of the random number generator or an existing generating
+        object.
+
+    Returns
+    -------
+    output : numpy.random.Generator or numpy.random.RandomState
+        The random number generator corresponding to the input `rng`. If `rng` was an existing
+        RandomState or Generator object, it is returned; otherwise, `output` is
+        a Generator object.
+
+    """
+    if isinstance(rng, (np.random.Generator, np.random.RandomState)):
+        output = rng
+    else:
+        output = np.random.default_rng(rng)
+
+    return output
diff --git a/pybaselines/whittaker.py b/pybaselines/whittaker.py
index 610dd73c..621484fa 100644
--- a/pybaselines/whittaker.py
+++ b/pybaselines/whittaker.py
@@ -12,6 +12,7 @@
 from ._algorithm_setup import _Algorithm, _class_wrapper
 from ._banded_utils import _shift_rows, diff_penalty_diagonals
 from ._validation import _check_lam, _check_optional_array, _check_scalar_variable
+from .results import WhittakerResult
 from .utils import _mollifier_kernel, pad_edges, padded_convolve, relative_difference
 
 
@@ -74,6 +75,9 @@ def asls(self, data, lam=1e6, p=1e-2, diff_order=2, max_iter=50, tol=1e-3, weigh
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 ``tol`` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -152,7 +156,10 @@ def asls(self, data, lam=1e6, p=1e-2, diff_order=2, max_iter=50, tol=1e-3, weigh
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -168,8 +175,8 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
 
         .. math::
 
-            (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1 + \lambda D_d^{\mathsf{T}} D_d) v
-            = (W^{\mathsf{T}} W + \lambda_1 D_1^{\mathsf{T}} D_1) y
+            (W + \lambda_1 D_1^{\mathsf{T}} D_1 + \lambda D_d^{\mathsf{T}} D_d) v
+            = (W + \lambda_1 D_1^{\mathsf{T}} D_1) y
 
         where y is the input data, :math:`D_d` is the finite difference matrix of order d,
         :math:`D_1` is the first-order finite difference matrix, W is the diagonal matrix
@@ -181,8 +188,8 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
         .. math::
 
             w_i = \left\{\begin{array}{cr}
-                p & y_i > v_i \\
-                1 - p & y_i \le v_i
+                p^2 & y_i > v_i \\
+                (1 - p)^2 & y_i \le v_i
             \end{array}\right.
 
         Parameters
@@ -194,8 +201,8 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
             Default is 1e6.
         p : float, optional
             The penalizing weighting factor. Must be between 0 and 1. Values greater
-            than the baseline will be given `p` weight, and values less than the baseline
-            will be given `1 - p` weight. Default is 1e-2.
+            than the baseline will be given ``p**2`` weight, and values less than the baseline
+            will be given ``(1 - p)**2`` weight. Default is 1e-2.
         lam_1 : float, optional
             The smoothing parameter for the first derivative of the residual. Default is 1e-4.
         max_iter : int, optional
@@ -218,11 +225,19 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
 
             * 'weights': numpy.ndarray, shape (N,)
                 The weight array used for fitting the data.
+
+                .. versionchanged:: 1.3.0
+                    Prior to version 1.3.0, the returned weights were the non-squared
+                    values (ie. ``p`` or ``1 - p``).
+
             * 'tol_history': numpy.ndarray
                 An array containing the calculated tolerance values for
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -231,10 +246,9 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
 
         Notes
         -----
-        Although both ``iasls`` and :meth:`~.Baseline.asls` use ``p`` for defining the weights,
-        the appropriate ``p`` value for ``iasls`` will be approximately equal to the square root
-        of the value used for ``asls`` since ``iasls`` squares the weights within its linear
-        equation.
+        Although both ``iasls`` and :meth:`~.Baseline.asls` use `p` for defining the weights,
+        the appropriate `p` value for ``iasls`` will be approximately equal to the square root
+        of the value used for ``asls`` when `p` is small since ``iasls`` uses squared weights.
 
         Omits the outer loop described by the reference implementation of the IAsLs algorithm,
         in which the baseline fitting is repeated after subtracting the baseline from the data.
@@ -317,8 +331,10 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
 
         y, weight_array, whittaker_system = self._setup_whittaker(data, lam, diff_order, weights)
         lambda_1 = _check_lam(lam_1)
-        diff_1_diags = diff_penalty_diagonals(self._size, 1, whittaker_system.lower, 1)
-        whittaker_system.add_penalty(lambda_1 * diff_1_diags)
+        residual_penalty = lambda_1 * diff_penalty_diagonals(
+            self._size, 1, whittaker_system.lower, padding=diff_order - 1
+        )
+        whittaker_system.add_penalty(residual_penalty)
 
         # fast calculation of lam_1 * (D_1.T @ D_1) @ y
         d1_y = y.copy()
@@ -328,19 +344,23 @@ def iasls(self, data, lam=1e6, p=1e-2, lam_1=1e-4, max_iter=50, tol=1e-3,
         d1_y = lambda_1 * d1_y
         tol_history = np.empty(max_iter + 1)
         for i in range(max_iter + 1):
-            weight_squared = weight_array**2
             baseline = whittaker_system.solve(
-                whittaker_system.add_diagonal(weight_squared), weight_squared * y + d1_y,
+                whittaker_system.add_diagonal(weight_array), weight_array * y + d1_y,
                 overwrite_b=True
             )
-            new_weights = _weighting._asls(y, baseline, p)
+            new_weights = _weighting._asls(y, baseline, p)**2
             calc_difference = relative_difference(weight_array, new_weights)
             tol_history[i] = calc_difference
             if calc_difference < tol:
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(
+                whittaker_system, weight_array, rhs_extra=residual_penalty
+            )
+        }
 
         return baseline, params
 
@@ -404,6 +424,9 @@ def airpls(self, data, lam=1e6, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Notes
         -----
@@ -469,7 +492,10 @@ def airpls(self, data, lam=1e6, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -530,6 +556,9 @@ def arpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -572,7 +601,10 @@ def arpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=None
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -617,6 +649,9 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -650,9 +685,9 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
             penalty_with_weights = _shift_rows(
                 diff_n_diagonals * weight_array, diff_order, diff_order
             )
+            lhs = whittaker_system.penalty + penalty_with_weights
             baseline = whittaker_system.solve(
-                whittaker_system.penalty + penalty_with_weights, weight_array * y,
-                overwrite_ab=True, overwrite_b=True, l_and_u=lower_upper_bands
+                lhs, weight_array * y, overwrite_b=True, l_and_u=lower_upper_bands
             )
             new_weights, exit_early = _weighting._drpls(y, baseline, i)
             if exit_early:
@@ -665,7 +700,10 @@ def drpls(self, data, lam=1e5, eta=0.5, max_iter=50, tol=1e-3, weights=None, dif
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': WhittakerResult(whittaker_system, weight_array, lhs=lhs)
+        }
 
         return baseline, params
 
@@ -706,6 +744,9 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -731,7 +772,10 @@ def iarpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -794,6 +838,9 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -837,7 +884,7 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
             lhs = whittaker_system.penalty * alpha_array
             lhs[whittaker_system.main_diagonal_index] += weight_array
             baseline = whittaker_system.solve(
-                _shift_rows(lhs, diff_order, diff_order), weight_array * y, overwrite_ab=True,
+                _shift_rows(lhs, diff_order, diff_order), weight_array * y,
                 overwrite_b=True, l_and_u=lower_upper_bands
             )
             new_weights, residual, exit_early = _weighting._aspls(
@@ -855,7 +902,8 @@ def aspls(self, data, lam=1e5, diff_order=2, max_iter=100, tol=1e-3,
             alpha_array = abs_d / abs_d.max()
 
         params = {
-            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1]
+            'weights': weight_array, 'alpha': alpha_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(whittaker_system, weight_array, lhs=lhs)
         }
 
         return baseline, params
@@ -912,6 +960,9 @@ def psalsa(self, data, lam=1e5, p=0.5, k=None, diff_order=2, max_iter=50, tol=1e
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -953,7 +1004,10 @@ def psalsa(self, data, lam=1e5, p=0.5, k=None, diff_order=2, max_iter=50, tol=1e
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -1020,6 +1074,9 @@ def derpsalsa(self, data, lam=1e6, p=0.01, k=None, diff_order=2, max_iter=50, to
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Raises
         ------
@@ -1077,7 +1134,10 @@ def derpsalsa(self, data, lam=1e6, p=0.01, k=None, diff_order=2, max_iter=50, to
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i + 1],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
@@ -1129,6 +1189,9 @@ def brpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5
                 `max_iter_2`, `tol_2`), and shape K is the maximum of the number of
                 iterations for the threshold and the maximum number of iterations for all of
                 the fits of the various threshold values (related to `max_iter` and `tol`).
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         References
         ----------
@@ -1180,7 +1243,8 @@ def brpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, max_iter_2=5
             beta = 1 - weight_mean
 
         params = {
-            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1]
+            'weights': baseline_weights, 'tol_history': tol_history[:i + 2, :max(i, j_max) + 1],
+            'result': WhittakerResult(whittaker_system, weight_array)
         }
 
         return baseline, params
@@ -1229,6 +1293,9 @@ def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 each iteration. The length of the array is the number of iterations
                 completed. If the last value in the array is greater than the input
                 `tol` value, then the function did not converge.
+            * 'result': WhittakerResult
+                An object that can use the results of the fit to perform additional
+                calculations.
 
         Notes
         -----
@@ -1267,7 +1334,10 @@ def lsrpls(self, data, lam=1e5, diff_order=2, max_iter=50, tol=1e-3, weights=Non
                 break
             weight_array = new_weights
 
-        params = {'weights': weight_array, 'tol_history': tol_history[:i]}
+        params = {
+            'weights': weight_array, 'tol_history': tol_history[:i],
+            'result': WhittakerResult(whittaker_system, weight_array)
+        }
 
         return baseline, params
 
diff --git a/tests/test_banded_solvers.py b/tests/test_banded_solvers.py
index 06e4f36f..f856892d 100644
--- a/tests/test_banded_solvers.py
+++ b/tests/test_banded_solvers.py
@@ -53,7 +53,7 @@ def pentapy_ptrans1(mat_flat, rhs):
 
     Parameters
     ----------
-    lhs : numpy.ndarray, shape (5, M)
+    mat_flat : numpy.ndarray, shape (5, M)
         The pentadiagonal matrix `A` in row-wise banded format (see
         https://geostat-framework.readthedocs.io/projects/pentapy/en/stable/examples/index.html).
     rhs : numpy.ndarray, shape (M,) or (M, N)
@@ -148,7 +148,7 @@ def pentapy_ptrans2(mat_flat, rhs):
 
     Parameters
     ----------
-    lhs : numpy.ndarray, shape (5, M)
+    mat_flat : numpy.ndarray, shape (5, M)
         The pentadiagonal matrix `A` in row-wise banded format (see
         https://geostat-framework.readthedocs.io/projects/pentapy/en/stable/examples/index.html).
     rhs : numpy.ndarray, shape (M,) or (M, N)
diff --git a/tests/test_banded_utils.py b/tests/test_banded_utils.py
index da3fed6f..383d2b17 100644
--- a/tests/test_banded_utils.py
+++ b/tests/test_banded_utils.py
@@ -11,9 +11,11 @@
 import numpy as np
 from numpy.testing import assert_allclose, assert_array_equal
 import pytest
+from scipy.linalg import cholesky_banded
 from scipy.sparse.linalg import spsolve
 
 from pybaselines import _banded_utils, _spline_utils
+from pybaselines._banded_solvers import penta_factorize
 from pybaselines._compat import dia_object, diags, identity
 
 
@@ -274,6 +276,22 @@ def test_lower_to_full(data_fixture, num_knots, spline_degree):
     assert_allclose(_banded_utils._lower_to_full(BTWB_lower), BTWB_full, 1e-10, 1e-14)
 
 
+@pytest.mark.parametrize('size', (100, 1001))
+def test_lower_to_full_diagonal(size):
+    """Ensures correct usage for a matrix with only a diagonal."""
+    # test both a 1d and 2d input
+    array = np.linspace(-1, 1, size)
+    array_2d = array.reshape((1, size))
+
+    expected = array_2d.copy()
+
+    output = _banded_utils._lower_to_full(array)
+    output_2d = _banded_utils._lower_to_full(array_2d)
+
+    assert_allclose(output, expected, rtol=1e-14, atol=1e-14)
+    assert_allclose(output_2d, expected, rtol=1e-14, atol=1e-14)
+
+
 @pytest.mark.parametrize('padding', (-1, 0, 1, 2))
 @pytest.mark.parametrize('lower_only', (True, False))
 def test_pad_diagonals(padding, lower_only):
@@ -649,12 +667,15 @@ def test_penalized_system_solve(data_fixture, diff_order, allow_lower, allow_pen
     """
     Tests the solve method of a PenalizedSystem object.
 
-    Solves the equation ``(I + lam * D.T @ D) x = y``, where `I` is the identity
+    Solves the equation ``(W + lam * D.T @ D) x = W @ y``, where `W` is the weight
     matrix, and ``D.T @ D`` is the penalty.
 
     """
     x, y = data_fixture
     data_size = len(y)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, x.size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
     lam = {1: 1e2, 2: 1e5, 3: 1e8}[diff_order]
     expected_penalty = _banded_utils.diff_penalty_diagonals(
         data_size, diff_order=diff_order, lower_only=False
@@ -663,16 +684,16 @@ def test_penalized_system_solve(data_fixture, diff_order, allow_lower, allow_pen
         (lam * expected_penalty, np.arange(diff_order, -(diff_order + 1), -1)),
         shape=(data_size, data_size)
     ).tocsr()
-    expected_solution = spsolve(identity(data_size, format='csr') + sparse_penalty, y)
+    expected_solution = spsolve(diags(weights, format='csr') + sparse_penalty, weights * y)
 
     penalized_system = _banded_utils.PenalizedSystem(
         data_size, lam=lam, diff_order=diff_order, allow_lower=allow_lower,
         reverse_diags=False, allow_penta=allow_penta
     )
-    penalized_system.penalty[penalized_system.main_diagonal_index] += 1
-    output = penalized_system.solve(penalized_system.penalty, y)
+    penalized_system.add_diagonal(weights)
+    output = penalized_system.solve(penalized_system.penalty, weights * y)
 
-    assert_allclose(output, expected_solution, 1e-6, 1e-10)
+    assert_allclose(output, expected_solution, rtol=1e-6, atol=1e-10)
 
 
 @pytest.mark.parametrize('diff_order', (1, 2, 3))
@@ -898,6 +919,98 @@ def test_penalized_system_add_diagonal_after_penalty(data_size, diff_order, allo
             )
 
 
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('allow_lower', (True, False))
+def test_penalized_system_update_lam(diff_order, allow_lower):
+    """Tests updating the lam value for PenalizedSystem."""
+    data_size = 100
+    lam_init = 5
+    penalized_system = _banded_utils.PenalizedSystem(
+        data_size, lam=lam_init, diff_order=diff_order, allow_lower=allow_lower
+    )
+    expected_penalty = lam_init * _banded_utils.diff_penalty_diagonals(
+        data_size, diff_order=diff_order, lower_only=penalized_system.lower
+    )
+    diag_index = penalized_system.main_diagonal_index
+
+    assert_allclose(penalized_system.penalty, expected_penalty, rtol=1e-14, atol=1e-14)
+    assert_allclose(
+        penalized_system.main_diagonal, expected_penalty[diag_index], rtol=1e-14, atol=1e-14
+    )
+    assert_allclose(penalized_system.lam, lam_init, rtol=1e-15, atol=1e-15)
+    for lam in (1e3, 5.2e1):
+        expected_penalty = lam * _banded_utils.diff_penalty_diagonals(
+            data_size, diff_order=diff_order, lower_only=penalized_system.lower
+        )
+        penalized_system.update_lam(lam)
+
+        assert_allclose(penalized_system.penalty, expected_penalty, rtol=1e-14, atol=1e-14)
+        assert_allclose(
+            penalized_system.main_diagonal, expected_penalty[diag_index], rtol=1e-14, atol=1e-14
+        )
+        assert_allclose(penalized_system.lam, lam, rtol=1e-15, atol=1e-15)
+
+
+def test_penalized_system_update_lam_invalid_lam():
+    """Ensures PenalizedSystem.update_lam throws an exception when given a non-positive lam."""
+    penalized_system = _banded_utils.PenalizedSystem(100)
+    with pytest.raises(ValueError):
+        penalized_system.update_lam(-1.)
+    with pytest.raises(ValueError):
+        penalized_system.update_lam(0)
+
+
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('allow_lower', (True, False))
+@pytest.mark.parametrize('allow_penta', (True, False))
+def test_penalized_system_factorize_solve(data_fixture, diff_order, allow_lower, allow_penta):
+    """
+    Tests the factorize and factorized_solve methods of a PenalizedSystem object.
+
+    Solves the equation ``(W + lam * D.T @ D) x = W @ y``, where `W` is the weight
+    matrix, and ``D.T @ D`` is the penalty.
+
+    """
+    x, y = data_fixture
+    data_size = len(y)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, x.size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    lam = {1: 1e2, 2: 1e5, 3: 1e8}[diff_order]
+    expected_penalty = _banded_utils.diff_penalty_diagonals(
+        data_size, diff_order=diff_order, lower_only=False
+    )
+    sparse_penalty = dia_object(
+        (lam * expected_penalty, np.arange(diff_order, -(diff_order + 1), -1)),
+        shape=(data_size, data_size)
+    ).tocsr()
+    expected_solution = spsolve(diags(weights, format='csr') + sparse_penalty, weights * y)
+
+    penalized_system = _banded_utils.PenalizedSystem(
+        data_size, lam=lam, diff_order=diff_order, allow_lower=allow_lower,
+        reverse_diags=False, allow_penta=allow_penta
+    )
+    penalized_system.add_diagonal(weights)
+
+    output_factorization = penalized_system.factorize(penalized_system.penalty)
+
+    using_penta = allow_penta and diff_order == 2 and _banded_utils._HAS_NUMBA
+    if allow_lower or using_penta:
+        if using_penta:
+            expected_factorization = penta_factorize(
+                penalized_system.penalty, solver=penalized_system.penta_solver
+            )
+        else:
+            expected_factorization = cholesky_banded(penalized_system.penalty, lower=True)
+
+        assert_allclose(output_factorization, expected_factorization, rtol=1e-14, atol=1e-14)
+    else:
+        assert callable(output_factorization)
+
+    output = penalized_system.factorized_solve(output_factorization, weights * y)
+    assert_allclose(output, expected_solution, rtol=2e-7, atol=1e-10)
+
+
 @pytest.mark.parametrize('dtype', (float, np.float32))
 def test_sparse_to_banded(dtype):
     """Tests basic functionality of _sparse_to_banded."""
@@ -1053,7 +1166,7 @@ def test_sparse_to_banded_truncation():
 
     # sanity check
     assert_array_equal(matrix.toarray(), data)
-    # ensure that the first column isn't truncted by SciPy's sparse conversion
+    # ensure that the first column isn't truncated by SciPy's sparse conversion
     assert_array_equal(matrix.data[::-1], expected_data)
 
     assert_array_equal(banded_data, out)
@@ -1223,3 +1336,107 @@ def test_sparse_to_banded_ragged_truncated():
     assert_array_equal(banded_data, out2)
     assert lower2 == 3
     assert upper2 == 1
+
+
+def test_banded_to_sparse_simple():
+    """Basic test of functionality for _banded_to_sparse."""
+    full_matrix = np.array([
+        [1, 2, 3, 0, 0],
+        [2, 2, 3, 4, 0],
+        [3, 3, 3, 4, 5],
+        [0, 4, 4, 4, 5],
+        [0, 0, 5, 5, 5]
+    ])
+    banded_matrix = np.array([
+        [0, 0, 3, 4, 5],
+        [0, 2, 3, 4, 5],
+        [1, 2, 3, 4, 5],
+        [2, 3, 4, 5, 0],
+        [3, 4, 5, 0, 0]
+    ])
+
+    output_full = _banded_utils._banded_to_sparse(banded_matrix, lower=False)
+    assert_allclose(output_full.toarray(), full_matrix, rtol=1e-14, atol=1e-14)
+
+    output_lower = _banded_utils._banded_to_sparse(banded_matrix[2:], lower=True)
+    assert_allclose(output_lower.toarray(), full_matrix, rtol=1e-14, atol=1e-14)
+
+
+@pytest.mark.parametrize('lower', (True, False))
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('size', (100, 1001))
+def test_banded_to_sparse_symmetric(lower, diff_order, size):
+    """Ensures proper functionality of _banded_to_sparse for symmetric matrices."""
+    expected_matrix = _banded_utils.diff_penalty_matrix(size, diff_order=diff_order)
+    banded_matrix = _banded_utils.diff_penalty_diagonals(
+        size, diff_order=diff_order, lower_only=lower
+    )
+
+    output = _banded_utils._banded_to_sparse(banded_matrix, lower=lower)
+    assert_allclose(output.toarray(), expected_matrix.toarray(), rtol=1e-14, atol=1e-14)
+
+
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('size', (100, 1001))
+def test_banded_to_sparse_nonsymmetric(diff_order, size):
+    """Ensures proper functionality of _banded_to_sparse for non symmetric matrices."""
+    multiplier = np.random.default_rng(123).uniform(0, 1, size)
+    multiplier_matrix = diags(multiplier)
+    penalty_matrix = _banded_utils.diff_penalty_matrix(size, diff_order=diff_order)
+    banded_penalty = _banded_utils.diff_penalty_diagonals(
+        size, diff_order=diff_order, lower_only=False
+    )
+
+    expected_matrix = multiplier_matrix @ penalty_matrix
+    banded_matrix = _banded_utils._shift_rows(
+        banded_penalty[::-1] * multiplier, diff_order, diff_order
+    )
+    # sanity check that the banded multiplication resulted in the correct LAPACK banded format
+    for i in range(diff_order):
+        for j in range(diff_order - i):
+            assert_allclose(banded_matrix[i, j], 0., rtol=1e-16, atol=1e-16)
+            assert_allclose(banded_matrix[-(i + 1), -(j + 1)], 0., rtol=1e-16, atol=1e-16)
+
+    output = _banded_utils._banded_to_sparse(banded_matrix, lower=False)
+    assert_allclose(output.toarray(), expected_matrix.toarray(), rtol=1e-14, atol=1e-14)
+
+
+@pytest.mark.parametrize('size', (100, 1001))
+@pytest.mark.parametrize('lower', (True, False))
+def test_banded_to_sparse_diagonal(size, lower):
+    """Ensures correct conversion for a matrix with only a diagonal."""
+    # test both a 1d and 2d input
+    array = np.linspace(-1, 1, size)
+    array_2d = array.reshape((1, size))
+
+    expected_matrix = diags(array)
+
+    output = _banded_utils._banded_to_sparse(array, lower=lower)
+    output_2d = _banded_utils._banded_to_sparse(array_2d, lower=lower)
+
+    assert_allclose(output.toarray(), expected_matrix.toarray(), rtol=1e-14, atol=1e-14)
+    assert_allclose(output_2d.toarray(), expected_matrix.toarray(), rtol=1e-14, atol=1e-14)
+
+
+@pytest.mark.parametrize('form', ('dia', 'csc', 'csr'))
+@pytest.mark.parametrize('lower', (True, False))
+def test_banded_to_sparse_formats(form, lower):
+    """Ensures that the sparse format is correctly passed to the constructor."""
+    banded_matrix = _banded_utils.diff_penalty_diagonals(200, diff_order=2, lower_only=lower)
+
+    output = _banded_utils._banded_to_sparse(banded_matrix, lower=lower, sparse_format=form)
+    assert output.format == form
+
+
+@pytest.mark.parametrize('lower', (True, False))
+@pytest.mark.parametrize('size', (50, 201))
+def test_banded_dot_vector(lower, size):
+    """Ensures correctness of the dot product of banded matrices with a vector."""
+    matrix = _banded_utils.diff_penalty_matrix(size, diff_order=2)
+    banded_matrix = _banded_utils.diff_penalty_diagonals(size, diff_order=2, lower_only=lower)
+    vector = np.random.default_rng(123).normal(10, 5, size)
+
+    expected = matrix @ vector
+    output = _banded_utils._banded_dot_vector(banded_matrix, vector, lower=lower)
+
+    assert_allclose(output, expected, rtol=5e-14, atol=1e-14)
diff --git a/tests/test_classification.py b/tests/test_classification.py
index 514d21e8..896392ce 100644
--- a/tests/test_classification.py
+++ b/tests/test_classification.py
@@ -389,7 +389,7 @@ class TestFabc(ClassificationTester):
     """Class for testing fabc baseline."""
 
     func_name = 'fabc'
-    checked_keys = ('mask', 'weights')
+    checked_keys = ('mask', 'weights', 'result')
     weight_keys = ('mask', 'weights')
     requires_unique_x = False
 
@@ -573,3 +573,11 @@ def test_kwargs_deprecation(self, smooth_half_window):
                         self.y, smooth_half_window=smooth_half_window,
                         pad_kwargs={'mode': 'extrapolate'}, mode='extrapolate'
                 )
+
+    @pytest.mark.parametrize('lam', (0, 1e4))
+    def test_output(self, lam):
+        """Ensures that the output has the desired format."""
+        additional_keys = []
+        if lam > 0:
+            additional_keys.extend(['weights', 'result'])
+        super().test_output(additional_keys=additional_keys, lam=lam)
diff --git a/tests/test_compat.py b/tests/test_compat.py
index 276e3dd5..9199b647 100644
--- a/tests/test_compat.py
+++ b/tests/test_compat.py
@@ -381,3 +381,45 @@ def test_diags(sparse_format, dtype):
         assert sparse.isspmatrix(output)
     else:
         assert not sparse.isspmatrix(output)
+
+
+def test_allow_1d_slice():
+    """Uses version checking rather than brute force to ensure sparse slicing is available.
+
+    The actual implementation in pybaselines directly checks if 1d slicing can be done on
+    sparse matrices/arrays, which should be slightly more robust than a simple version check, but
+    they should match regardless.
+
+    """
+    try:
+        _scipy_version = [int(val) for val in scipy.__version__.lstrip('v').split('.')[:2]]
+    except Exception as e:
+        # raise the exception so that version parsing can be changed if needed
+        raise ValueError('Issue parsing SciPy version') from e
+
+    # sparse matrices always supported making 1d slices, while sparse arrays didn't support
+    # 1d slicing until scipy version 1.15.0
+    if sparse.isspmatrix(_compat.diags(np.ones(5))):
+        expected = True
+    else:
+        expected = (_scipy_version[0] > 1 or (_scipy_version[0] == 1 and _scipy_version[1] >= 15))
+
+    output = _compat._allows_1d_slice()
+
+    assert expected == output
+
+
+@pytest.mark.parametrize('sparse_format', ('csc', 'csr', 'dia'))
+def test_sparse_col_index(sparse_format):
+    """Ensures sparse matrix column indexing works as expected."""
+    matrix = np.arange(20, dtype=float).reshape(5, 4)
+    sparse_matrix = _compat.csr_object(matrix).asformat(sparse_format)
+
+    expected_shape = (matrix.shape[0],)
+    for col_index in range(matrix.shape[1]):
+        expected_col = matrix[:, col_index]
+        output = _compat._sparse_col_index(sparse_matrix, col_index)
+
+        assert_allclose(output, expected_col, rtol=1e-15, atol=1e-15)
+        assert output.shape == expected_shape
+        assert isinstance(output, np.ndarray)
diff --git a/tests/test_misc.py b/tests/test_misc.py
index bbbcd56e..32b82648 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -81,7 +81,7 @@ class TestBeads(MiscTester):
     """Class for testing beads baseline."""
 
     func_name = 'beads'
-    checked_keys = ('signal', 'tol_history')
+    checked_keys = ('signal', 'tol_history', 'fidelity', 'penalty')
 
     @pytest.mark.parametrize('use_class', (True, False))
     @pytest.mark.parametrize('cost_function', (1, 2, 'l1_v1', 'l1_v2', 'L1_V1'))
diff --git a/tests/test_morphological.py b/tests/test_morphological.py
index 76391042..9d1a8291 100644
--- a/tests/test_morphological.py
+++ b/tests/test_morphological.py
@@ -51,7 +51,7 @@ class TestMPLS(MorphologicalTester, InputWeightsMixin, RecreationMixin):
     """Class for testing mpls baseline."""
 
     func_name = 'mpls'
-    checked_keys = ('half_window', 'weights')
+    checked_keys = ('half_window', 'weights', 'result')
 
     @pytest.mark.parametrize('diff_order', (1, 3))
     def test_diff_orders(self, diff_order):
@@ -99,6 +99,16 @@ def test_recreation(self):
         with pytest.warns(DeprecationWarning):
             super().test_recreation()
 
+    @pytest.mark.parametrize('p', (0, 0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestMor(MorphologicalTester):
     """Class for testing mor baseline."""
@@ -184,7 +194,7 @@ class TestMpspline(MorphologicalTester, InputWeightsMixin, RecreationMixin):
     """Class for testing mpspline baseline."""
 
     func_name = 'mpspline'
-    checked_keys = ('half_window', 'weights')
+    checked_keys = ('half_window', 'weights', 'result')
 
     @pytest.mark.parametrize('diff_order', (1, 3))
     def test_diff_orders(self, diff_order):
@@ -203,6 +213,16 @@ def test_outside_p_fails(self, p):
         with pytest.raises(ValueError):
             self.class_func(self.y, p=p)
 
+    @pytest.mark.parametrize('p', (0, 0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestJBCD(MorphologicalTester):
     """Class for testing jbcd baseline."""
diff --git a/tests/test_optimizers.py b/tests/test_optimizers.py
index 66585c36..bcae6c83 100644
--- a/tests/test_optimizers.py
+++ b/tests/test_optimizers.py
@@ -92,7 +92,7 @@ class TestCollabPLS(OptimizersTester, OptimizerInputWeightsMixin):
     func_name = "collab_pls"
     checked_keys = ('average_weights',)
     # will need to change checked_keys if default method is changed
-    checked_method_keys = ('weights', 'tol_history')
+    checked_method_keys = ('weights', 'tol_history', 'result')
     two_d = True
     weight_keys = ('average_weights', 'weights')
 
@@ -146,12 +146,12 @@ def test_output_alpha(self, average_dataset):
 
 
 class TestOptimizeExtendedRange(OptimizersTester, OptimizerInputWeightsMixin):
-    """Class for testing collab_pls baseline."""
+    """Class for testing optimize_extended_range baseline."""
 
     func_name = "optimize_extended_range"
     checked_keys = ('optimal_parameter', 'min_rmse', 'rmse')
     # will need to change checked_keys if default method is changed
-    checked_method_keys = ('weights', 'tol_history')
+    checked_method_keys = ('weights', 'tol_history', 'result')
     required_kwargs = {'pad_kwargs': {'extrapolate_window': 100}}
 
     @pytest.mark.parametrize('use_class', (True, False))
@@ -172,16 +172,13 @@ def test_input_weights(self, side):
             'poly', 'modpoly', 'imodpoly', 'penalized_poly', 'loess', 'quant_reg', 'goldindec',
             'derpsalsa', 'mpspline', 'mixture_model', 'irsqr', 'dietrich', 'cwt_br', 'fabc',
             'pspline_asls', 'pspline_iasls', 'pspline_airpls', 'pspline_arpls', 'pspline_drpls',
-            'pspline_iarpls', 'pspline_aspls', 'pspline_psalsa', 'pspline_derpsalsa'
+            'pspline_iarpls', 'pspline_aspls', 'pspline_psalsa', 'pspline_derpsalsa', 'rubberband',
         )
     )
     def test_all_methods(self, method):
         """Tests all methods that should work with optimize_extended_range."""
-        if method == 'loess':
-            # reduce number of calculations for loess since it is much slower
-            kwargs = {'min_value': 1, 'max_value': 2}
-        else:
-            kwargs = {}
+        # reduce number of calculations since this is just checking that calling works
+        kwargs = {'min_value': 1, 'max_value': 3}
         # use height_scale=0.1 to avoid exponential overflow warning for arpls and aspls
         output = self.class_func(
             self.y, method=method, height_scale=0.1, **kwargs, **self.kwargs
@@ -213,7 +210,7 @@ def test_whittaker_high_value_fails(self, key):
         Ensures function fails when using a Whittaker method and input lambda exponent is too high.
 
         Since the function uses 10**exponent, do not want to allow a high exponent to be used,
-        since the user probably thought the actual lam value had to be specficied rather than
+        since the user probably thought the actual lam value had to be specifiied rather than
         just the exponent.
 
         """
@@ -222,7 +219,7 @@ def test_whittaker_high_value_fails(self, key):
 
     @pytest.mark.parametrize('side', ('left', 'right', 'both'))
     def test_aspls_alpha_ordering(self, side):
-        """Ensures the `alpha` array for the aspls method is currectly processed."""
+        """Ensures the `alpha` array for the aspls method is correctly processed."""
         alpha = np.random.default_rng(0).normal(0.8, 0.05, len(self.x))
         alpha = np.clip(alpha, 0, 1).astype(float, copy=False)
 
@@ -279,9 +276,10 @@ def test_min_max_ordering(self, method):
         fit_1, params_1 = self.class_func(
             self.y, method=method, min_value=min_value, max_value=max_value
         )
-        # should simply do the fittings in the reversed order
+        # should simply do the fittings in the reversed order; subtract 1 from
+        # min and max values since np.arange(min, max) == np.arange(min - 1, max - 1, -1)[::-1]
         fit_2, params_2 = self.class_func(
-            self.y, method=method, min_value=max_value, max_value=min_value
+            self.y, method=method, min_value=max_value - 1, max_value=min_value - 1, step=-1
         )
 
         # fits and optimal parameter should be the same
@@ -297,22 +295,147 @@ def test_no_step(self, method):
         """Ensures a fit is still done if step is zero or min and max values are equal."""
         min_value = 2
         # case 1: step == 0
-        fit_1, params_1 = self.class_func(
-            self.y, method=method, min_value=min_value, max_value=min_value + 5, step=0
-        )
+        with pytest.warns(utils.ParameterWarning):
+            fit_1, params_1 = self.class_func(
+                self.y, method=method, min_value=min_value, max_value=min_value + 5, step=0
+            )
         # case 2: min and max value are equal
-        fit_2, params_2 = self.class_func(
-            self.y, method=method, min_value=min_value, max_value=min_value
-        )
+        with pytest.warns(utils.ParameterWarning):
+            fit_2, params_2 = self.class_func(
+                self.y, method=method, min_value=min_value, max_value=min_value
+            )
+        # case 3: step is too large
+        with pytest.warns(utils.ParameterWarning):
+            fit_3, params_3 = self.class_func(
+                self.y, method=method, min_value=min_value, max_value=min_value + 1, step=5
+            )
 
         # fits, optimal parameter, and rmse should all be the same
         assert_allclose(fit_2, fit_1, rtol=1e-12, atol=1e-12)
+        assert_allclose(fit_3, fit_1, rtol=1e-12, atol=1e-12)
         assert_allclose(
-            params_1['optimal_parameter'], params_2['optimal_parameter'], rtol=1e-12, atol=1e-12
+            params_2['optimal_parameter'], params_1['optimal_parameter'], rtol=1e-12, atol=1e-12
+        )
+        assert_allclose(
+            params_3['optimal_parameter'], params_1['optimal_parameter'], rtol=1e-12, atol=1e-12
         )
-        assert_allclose(params_1['rmse'], params_2['rmse'], rtol=1e-8, atol=1e-12)
+        assert_allclose(params_2['rmse'], params_1['rmse'], rtol=1e-8, atol=1e-12)
+        assert_allclose(params_3['rmse'], params_1['rmse'], rtol=1e-8, atol=1e-12)
         assert len(params_1['rmse']) == 1
         assert len(params_2['rmse']) == 1
+        assert len(params_3['rmse']) == 1
+
+    def test_value_range(self):
+        """Ensures the correct number of parameters to fit are generated."""
+        min_value = 2
+        max_value = 6
+        for step in (1, 2, 3):
+            expected_tested_values = np.arange(min_value, max_value, step)
+            _, params_1 = self.class_func(
+                self.y, method='modpoly', min_value=min_value, max_value=max_value, step=step
+            )
+            _, params_2 = self.class_func(
+                self.y, method='asls', min_value=min_value, max_value=max_value, step=step
+            )
+            # both methods should have the same number of tested values
+            assert len(params_1['rmse']) == len(expected_tested_values)
+            assert len(params_2['rmse']) == len(expected_tested_values)
+
+        # also test float inputs for lam-based methods
+        min_value = 1.
+        max_value = 5.5
+        step = 0.5
+        expected_tested_values = [1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5]
+        _, params_3 = self.class_func(
+            self.y, method='asls', min_value=min_value, max_value=max_value, step=step
+        )
+        # both methods should have the same number of tested values
+        assert len(params_3['rmse']) == len(expected_tested_values)
+
+
+def test_param_grid():
+    """Ensures basic functionality of _param_grid."""
+    min_value = 1
+    max_value = 5
+    step = 1
+
+    expected_values = np.arange(min_value, max_value, step)
+    output = optimizers._param_grid(min_value, max_value, step, polynomial_fit=True)
+
+    assert_array_equal(output, expected_values)
+
+    output2 = optimizers._param_grid(min_value, max_value, step, polynomial_fit=False)
+    assert_allclose(output2, 10**expected_values, rtol=1e-15, atol=1e-15)
+
+    # also ensure floats are properly handled
+    step = 0.5
+    expected_values = 10**np.array([1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5])
+    output3 = optimizers._param_grid(min_value, max_value, step, polynomial_fit=False)
+    assert_allclose(output3, expected_values, rtol=1e-15, atol=1e-15)
+
+
+@pytest.mark.parametrize('polynomial_fit', (True, False))
+def test_param_grid_no_step(polynomial_fit):
+    """Ensures a single param is still output if step is zero or min and max values are equal."""
+    min_value = 2
+    expected_value = np.array([min_value])
+    if not polynomial_fit:
+        expected_value = 10.0**expected_value
+
+    # case 1: step == 0
+    with pytest.warns(utils.ParameterWarning):
+        output1 = optimizers._param_grid(
+            min_value=min_value, max_value=min_value + 5, step=0, polynomial_fit=polynomial_fit
+        )
+    # case 2: min and max value are equal
+    with pytest.warns(utils.ParameterWarning):
+        output2 = optimizers._param_grid(
+            min_value=min_value, max_value=min_value, step=1, polynomial_fit=polynomial_fit
+        )
+    # case 3: step is too large
+    with pytest.warns(utils.ParameterWarning):
+        output3 = optimizers._param_grid(
+            min_value=min_value, max_value=min_value + 1, step=5, polynomial_fit=polynomial_fit
+        )
+
+    assert_allclose(output1, expected_value, rtol=1e-15, atol=1e-15)
+    assert_allclose(output2, expected_value, rtol=1e-15, atol=1e-15)
+    assert_allclose(output3, expected_value, rtol=1e-15, atol=1e-15)
+
+
+@pytest.mark.parametrize('key', ('min_value', 'max_value', 'step'))
+def test_param_grid_float_poly_fails(key):
+    """Ensures non-integer values raise an error for polynomial fits."""
+    if key == 'min_value':
+        kwargs = {'max_value': 5, 'step': 1}
+    elif key == 'max_value':
+        kwargs = {'min_value': 1, 'step': 1}
+    else:
+        kwargs = {'min_value': 1, 'max_value': 5}
+    kwargs[key] = 2.5
+    with pytest.raises(TypeError):
+        optimizers._param_grid(**kwargs, polynomial_fit=True)
+
+
+@pytest.mark.parametrize('key', ('min_value', 'max_value', 'step'))
+def test_param_grid_nonpoly_fails(key):
+    """
+    Ensures function fails when using a Whittaker method and input lambda exponent is too high.
+
+    Since the function uses 10**exponent, do not want to allow a high exponent to be used,
+    since the user probably thought the actual lam value had to be specifiied rather than
+    just the exponent.
+
+    """
+    if key == 'min_value':
+        kwargs = {'max_value': 5, 'step': 1}
+    elif key == 'max_value':
+        kwargs = {'min_value': 1, 'step': 1}
+    else:
+        kwargs = {'min_value': 1, 'max_value': 5}
+    kwargs[key] = 16
+    with pytest.raises(ValueError):
+        optimizers._param_grid(**kwargs, polynomial_fit=False)
 
 
 @pytest.mark.parametrize(
@@ -455,7 +578,7 @@ class TestCustomBC(OptimizersTester):
     func_name = 'custom_bc'
     checked_keys = ('y_fit', 'x_fit', 'baseline_fit')
     # will need to change checked_keys if default method is changed
-    checked_method_keys = ('weights', 'tol_history')
+    checked_method_keys = ('weights', 'tol_history', 'result')
     required_kwargs = {'sampling': 5}
 
     @pytest.mark.parametrize(
@@ -543,3 +666,63 @@ def test_overlapping_regions_fails(self):
         """Ensures an exception is raised if regions overlap."""
         with pytest.raises(ValueError):
             self.class_func(self.y, regions=((0, 10), (9, 13)))
+
+
+class TestOptimizePLS(OptimizersTester, OptimizerInputWeightsMixin):
+    """Class for testing optimize_pls baseline."""
+
+    func_name = "optimize_pls"
+    checked_keys = ('optimal_parameter', 'metric')
+    # will need to change checked_keys if default method is changed
+    checked_method_keys = ('weights', 'tol_history', 'result')
+    # by default only run a few optimization steps
+    required_kwargs = {'min_value': 2, 'max_value': 3}
+
+    @pytest.mark.parametrize('opt_method', ('U-curve', 'GCV', 'BIC'))
+    def test_output(self, opt_method):
+        """Ensures correct output parameters for different optimization methods."""
+        if opt_method in ('GCV', 'BIC'):
+            additional_keys = ['trace', 'wrss']
+        else:
+            additional_keys = ['penalty', 'fidelity']
+        super().test_output(additional_keys=additional_keys, opt_method=opt_method)
+
+    @pytest.mark.parametrize(
+        'method',
+        (
+            'asls', 'iasls', 'airpls', 'mpls', 'arpls', 'drpls', 'iarpls', 'aspls', 'psalsa',
+            'derpsalsa', 'mpspline', 'mixture_model', 'irsqr', 'fabc', 'rubberband',
+            'pspline_asls', 'pspline_iasls', 'pspline_airpls', 'pspline_arpls', 'pspline_drpls',
+            'pspline_iarpls', 'pspline_aspls', 'pspline_psalsa', 'pspline_derpsalsa'
+        )
+    )
+    @pytest.mark.parametrize('opt_method', ('U-Curve', 'GCV'))
+    def test_all_methods(self, method, opt_method):
+        """Tests most methods that should work with optimize_pls."""
+        output = self.class_func(self.y, method=method, opt_method=opt_method, **self.kwargs)
+        if 'weights' in output[1]['method_params']:
+            assert self.y.shape == output[1]['method_params']['weights'].shape
+        elif 'alpha' in output[1]['method_params']:
+            assert self.y.shape == output[1]['method_params']['alpha'].shape
+
+    @pytest.mark.parametrize('opt_method', ('U-Curve', 'GCV', 'BIC'))
+    def test_beads(self, opt_method):
+        """Ensures beads is also supported for L-curve based optimization methods."""
+        if opt_method in ('GCV', 'BIC'):
+            with pytest.raises(
+                NotImplementedError, match='optimize_pls does not support the beads method'
+            ):
+                self.class_func(self.y, method='beads', opt_method=opt_method, **self.kwargs)
+        else:
+            # just ensure calling does not produce errors
+            self.class_func(self.y, method='beads', opt_method=opt_method, **self.kwargs)
+
+    def test_unknown_method_fails(self):
+        """Ensures method fails when an unknown baseline method is given."""
+        with pytest.raises(AttributeError):
+            self.class_func(self.y, method='aaaaa')
+
+    def test_unknown_opt_method_fails(self):
+        """Ensures method fails when an unknown opt_method is given."""
+        with pytest.raises(ValueError):
+            self.class_func(self.y, opt_method='aaaaa')
diff --git a/tests/test_results.py b/tests/test_results.py
new file mode 100644
index 00000000..d0c647b0
--- /dev/null
+++ b/tests/test_results.py
@@ -0,0 +1,698 @@
+# -*- coding: utf-8 -*-
+"""Tests for pybaselines.results.
+
+@author: Donald Erb
+Created on January 8, 2026
+
+"""
+
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+from scipy.sparse import kron
+from scipy.sparse.linalg import factorized
+
+from pybaselines import _banded_utils, _spline_utils, results
+from pybaselines.two_d._spline_utils import PSpline2D, SplineBasis2D
+from pybaselines.two_d._whittaker_utils import WhittakerSystem2D
+from pybaselines._compat import _sparse_col_index, dia_object, diags, identity
+
+from .base_tests import get_2dspline_inputs
+
+
+@pytest.mark.parametrize('diff_order', (1, 2))
+@pytest.mark.parametrize('allow_lower', (True, False))
+@pytest.mark.parametrize('allow_penta', (True, False))
+@pytest.mark.parametrize('size', (100, 401))
+def test_whittaker_effective_dimension(diff_order, allow_lower, allow_penta, size):
+    """
+    Tests the effective_dimension method of a WhittakerResult object.
+
+    The effective dimension for Whittaker smoothing should be
+    ``trace((W + lam * D.T @ D)^-1 @ W)``, where `W` is the weight matrix, and
+    ``D.T @ D`` is the penalty.
+
+    """
+    weights = np.random.default_rng(0).normal(0.8, 0.05, size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    lam = {1: 1e2, 2: 1e5, 3: 1e8}[diff_order]
+    expected_penalty = _banded_utils.diff_penalty_diagonals(
+        size, diff_order=diff_order, lower_only=False
+    )
+    sparse_penalty = dia_object(
+        (lam * expected_penalty, np.arange(diff_order, -(diff_order + 1), -1)),
+        shape=(size, size)
+    ).tocsr()
+    weights_matrix = diags(weights, format='csc')
+    factorization = factorized(weights_matrix + sparse_penalty)
+    expected_ed = 0
+    for i in range(size):
+        expected_ed += factorization(_sparse_col_index(weights_matrix, i))[i]
+
+    penalized_system = _banded_utils.PenalizedSystem(
+        size, lam=lam, diff_order=diff_order, allow_lower=allow_lower,
+        reverse_diags=False, allow_penta=allow_penta
+    )
+    result_obj = results.WhittakerResult(penalized_system, weights=weights)
+    output = result_obj.effective_dimension(n_samples=0)
+
+    assert_allclose(output, expected_ed, rtol=1e-7, atol=1e-10)
+
+
+@pytest.mark.parametrize('diff_order', (1, 2))
+@pytest.mark.parametrize('allow_lower', (True, False))
+@pytest.mark.parametrize('allow_penta', (True, False))
+@pytest.mark.parametrize('size', (100, 401))
+@pytest.mark.parametrize('n_samples', (100, 201))
+def test_whittaker_effective_dimension_stochastic(diff_order, allow_lower, allow_penta, size,
+                                                  n_samples):
+    """
+    Tests the stochastic effective_dimension calculation of a WhittakerResult object.
+
+    The effective dimension for Whittaker smoothing should be
+    ``trace((W + lam * D.T @ D)^-1 @ W)``, where `W` is the weight matrix, and
+    ``D.T @ D`` is the penalty.
+
+    """
+    weights = np.random.default_rng(0).normal(0.8, 0.05, size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    lam = {1: 1e2, 2: 1e5, 3: 1e8}[diff_order]
+
+    penalized_system = _banded_utils.PenalizedSystem(
+        size, lam=lam, diff_order=diff_order, allow_lower=allow_lower,
+        reverse_diags=False, allow_penta=allow_penta
+    )
+    # true solution is already verified by other tests, so use that as "known" in
+    # this test to only examine the relative difference from using stochastic estimation
+    result_obj = results.WhittakerResult(penalized_system, weights=weights)
+    expected_ed = result_obj.effective_dimension(n_samples=0)
+
+    output = result_obj.effective_dimension(n_samples=n_samples)
+
+    assert_allclose(output, expected_ed, rtol=5e-1, atol=1e-5)
+
+
+@pytest.mark.parametrize('size', (100, 401))
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('large_lam', (True, False))
+@pytest.mark.parametrize('solver', (0, 1, 2))
+def test_whittaker_effective_dimension_lam_extremes(size, diff_order, large_lam, solver):
+    """
+    Tests the effective dimension of Whittaker smoothing for high and low limits of ``lam``.
+
+    When `lam` is ~infinite, the fit should approximate a polynomial of degree
+    ``diff_order - 1``, so the effective dimension should be `diff_order` sinc ED for a
+    polynomial is ``degree + 1``. Likewise, as `lam` approaches 0, the solution should
+    be the same as an interpolating spline of the same spline degree, so its effective
+    dimension should be the number of basis functions, i.e. the number of data points
+    when using Whittaker smoothing.
+
+    References
+    ----------
+    Eilers, P., et al. Flexible Smoothing with B-splines and Penalties. Statistical
+    Science, 1996, 11(2), 89-121.
+
+    Eilers, P. A Perfect Smoother. Analytical Chemistry, 2003, 75(14), 3631-3636.
+
+    """
+    if solver == 0:  # use full banded with solve_banded
+        allow_lower = False
+        allow_penta = False
+    elif solver == 1:  # use lower banded with solveh_banded
+        allow_lower = True
+        allow_penta = False
+    else:  # use full banded with pentadiagonal solver if diff_order=2
+        allow_lower = False
+        allow_penta = True
+
+    if large_lam:
+        lam = 1e14
+        expected_ed = diff_order
+        # limited by how close to infinity lam can get before it causes numerical instability,
+        # and larger diff_orders need larger lam for it to be a polynomial, so have to reduce the
+        # relative tolerance as diff_order increases
+        rtol = {1: 8e-3, 2: 5e-3, 3: 3e-2}[diff_order]
+    else:
+        lam = 1e-16
+        expected_ed = size
+        rtol = 1e-15
+
+    penalized_system = _banded_utils.PenalizedSystem(
+        size, lam=lam, diff_order=diff_order, allow_lower=allow_lower,
+        reverse_diags=False, allow_penta=allow_penta
+    )
+    result_obj = results.WhittakerResult(penalized_system)
+    output = result_obj.effective_dimension(n_samples=0)
+    assert_allclose(output, expected_ed, rtol=rtol, atol=1e-11)
+
+
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('spline_degree', (1, 2, 3))
+@pytest.mark.parametrize('num_knots', (20, 101))
+@pytest.mark.parametrize('large_lam', (True, False))
+@pytest.mark.parametrize('allow_lower', (True, False))
+def test_pspline_effective_dimension_lam_extremes(data_fixture, diff_order, spline_degree,
+                                                  num_knots, large_lam, allow_lower):
+    """
+    Tests the effective dimension of P-spline smoothing for high and low limits of ``lam``.
+
+    When `lam` is ~infinite, the spline fit should approximate a polynomial of degree
+    ``diff_order - 1``, so the effective dimension should be `diff_order` sinc ED for a
+    polynomial is ``degree + 1``. Likewise, as ``lam`` approaches 0, the solution should
+    be the same as an interpolating spline of the same spline degree, so its effective
+    dimension should be the number of basis functions.
+
+    References
+    ----------
+    Eilers, P., et al. Flexible Smoothing with B-splines and Penalties. Statistical
+    Science, 1996, 11(2), 89-121.
+
+    """
+    x, y = data_fixture
+    if large_lam:
+        lam = 1e14
+        expected_ed = diff_order
+        # limited by how close to infinity lam can get before it causes numerical instability,
+        # and both larger num_knots and larger diff_orders need larger lam for it to be a
+        # polynomial, so have to reduce the relative tolerance; num_knots has a larger effect
+        # than diff_order, so base the rtol on it
+        if spline_degree >= diff_order:  # can approximate a polynomial
+            rtol = {20: 6e-3, 101: 5e-2}[num_knots]
+        else:
+            # technically should skip since the spline cannot approximate the polynomial,
+            # but just increase rtol instead
+            rtol = {20: 1e-2, 101: 1e-1}[num_knots]
+    else:
+        lam = 1e-16
+        expected_ed = num_knots + spline_degree - 1
+        rtol = 1e-15
+
+    spline_basis = _spline_utils.SplineBasis(
+        x, num_knots=num_knots, spline_degree=spline_degree
+    )
+    pspline = _spline_utils.PSpline(
+        spline_basis, lam=lam, diff_order=diff_order, allow_lower=allow_lower
+    )
+    result_obj = results.PSplineResult(pspline)
+    output = result_obj.effective_dimension(n_samples=0)
+    assert_allclose(output, expected_ed, rtol=rtol, atol=1e-11)
+
+
+@pytest.mark.parametrize('n_samples', (-1, 50.5))
+def test_whittaker_effective_dimension_stochastic_invalid_samples(data_fixture, n_samples):
+    """Ensures a non-zero, non-positive `n_samples` input raises an exception."""
+    x, y = data_fixture
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    penalized_system = _banded_utils.PenalizedSystem(x.size)
+    result_obj = results.WhittakerResult(penalized_system, weights=weights)
+    with pytest.raises(TypeError):
+        result_obj.effective_dimension(n_samples=n_samples)
+
+
+def test_whittaker_result_no_weights(data_fixture):
+    """Ensures weights are initialized as ones if not given to WhittakerResult."""
+    x, y = data_fixture
+
+    penalized_system = _banded_utils.PenalizedSystem(x.size)
+    result_obj = results.WhittakerResult(penalized_system)
+
+    assert_allclose(result_obj._weights, np.ones(y.shape), rtol=1e-16, atol=0)
+
+
+@pytest.mark.parametrize('num_knots', (20, 51))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, 3))
+@pytest.mark.parametrize('diff_order', (1, 2))
+@pytest.mark.parametrize('lower_only', (True, False))
+def test_pspline_effective_dimension(data_fixture, num_knots, spline_degree, diff_order,
+                                     lower_only):
+    """
+    Tests the effective_dimension method of a PSplineResult object.
+
+    The effective dimension for penalized spline smoothing should be
+    ``trace((B.T @ W @ B + lam * D.T @ D)^-1 @ B.T @ W @ B)``, where `W` is the weight matrix,
+    ``D.T @ D`` is the penalty, and `B` is the spline basis.
+
+    """
+    x, y = data_fixture
+    x = x.astype(float)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    knots = _spline_utils._spline_knots(x, num_knots, spline_degree, True)
+    basis = _spline_utils._spline_basis(x, knots, spline_degree)
+    num_bases = basis.shape[1]
+    penalty_matrix = _banded_utils.diff_penalty_matrix(num_bases, diff_order=diff_order)
+
+    btwb = basis.T @ diags(weights, format='csr') @ basis
+    factorization = factorized(btwb + penalty_matrix)
+    expected_ed = 0
+    for i in range(num_bases):
+        expected_ed += factorization(_sparse_col_index(btwb, i))[i]
+
+    spline_basis = _spline_utils.SplineBasis(
+        x, num_knots=num_knots, spline_degree=spline_degree
+    )
+    pspline = _spline_utils.PSpline(
+        spline_basis, lam=1, diff_order=diff_order, allow_lower=lower_only
+    )
+    result_obj = results.PSplineResult(pspline, weights)
+    output = result_obj.effective_dimension(n_samples=0)
+    assert_allclose(output, expected_ed, rtol=1e-10, atol=1e-12)
+
+
+@pytest.mark.parametrize('num_knots', (20, 51))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, 3))
+@pytest.mark.parametrize('diff_order', (1, 2))
+@pytest.mark.parametrize('lower_only', (True, False))
+@pytest.mark.parametrize('n_samples', (100, 201))
+def test_pspline_stochastic_effective_dimension(data_fixture, num_knots, spline_degree, diff_order,
+                                                lower_only, n_samples):
+    """
+    Tests the effective_dimension method of a PSplineResult object.
+
+    The effective dimension for penalized spline smoothing should be
+    ``trace((B.T @ W @ B + lam * D.T @ D)^-1 @ B.T @ W @ B)``, where `W` is the weight matrix,
+    ``D.T @ D`` is the penalty, and `B` is the spline basis.
+
+    """
+    x, y = data_fixture
+    x = x.astype(float)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    spline_basis = _spline_utils.SplineBasis(
+        x, num_knots=num_knots, spline_degree=spline_degree
+    )
+    pspline = _spline_utils.PSpline(
+        spline_basis, lam=1, diff_order=diff_order, allow_lower=lower_only
+    )
+    # true solution is already verified by other tests, so use that as "known" in
+    # this test to only examine the relative difference from using stochastic estimation
+    result_obj = results.PSplineResult(pspline, weights)
+    expected_ed = result_obj.effective_dimension(n_samples=0)
+
+    output = result_obj.effective_dimension(n_samples=n_samples)
+    assert_allclose(output, expected_ed, rtol=5e-2, atol=1e-5)
+
+
+@pytest.mark.parametrize('n_samples', (-1, 50.5))
+def test_pspline_stochastic_effective_dimension_invalid_samples(data_fixture, n_samples):
+    """Ensures a non-zero, non-positive `n_samples` input raises an exception."""
+    x, y = data_fixture
+    x = x.astype(float)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    spline_basis = _spline_utils.SplineBasis(x)
+    pspline = _spline_utils.PSpline(spline_basis)
+    result_obj = results.PSplineResult(pspline, weights)
+    with pytest.raises(TypeError):
+        result_obj.effective_dimension(n_samples=n_samples)
+
+
+def test_pspline_result_no_weights(data_fixture):
+    """Ensures weights are initialized as ones if not given to PSplineResult."""
+    x, y = data_fixture
+
+    spline_basis = _spline_utils.SplineBasis(x)
+    pspline = _spline_utils.PSpline(spline_basis)
+    result_obj = results.PSplineResult(pspline)
+
+    assert_allclose(result_obj._weights, np.ones(y.shape), rtol=1e-16, atol=0)
+
+
+@pytest.mark.parametrize('num_knots', (10, (11, 20)))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, (2, 3)))
+@pytest.mark.parametrize('diff_order', (1, 2, (2, 3)))
+@pytest.mark.parametrize('lam', (1e-2, (1e1, 1e2)))
+def test_pspline_two_d_effective_dimension(data_fixture2d, num_knots, spline_degree, diff_order,
+                                           lam):
+    """
+    Tests the effective_dimension method of a PSpline object.
+
+    The effective dimension for penalized spline smoothing should be
+    ``trace((B.T @ W @ B + lam * D.T @ D)^-1 @ B.T @ W @ B)``, where `W` is the weight matrix,
+    ``D.T @ D`` is the penalty, and `B` is the spline basis.
+
+    """
+    x, z, y = data_fixture2d
+    (
+        num_knots_r, num_knots_c, spline_degree_r, spline_degree_c,
+        lam_r, lam_c, diff_order_r, diff_order_c
+    ) = get_2dspline_inputs(num_knots, spline_degree, lam, diff_order)
+
+    knots_r = _spline_utils._spline_knots(x, num_knots_r, spline_degree_r, True)
+    basis_r = _spline_utils._spline_basis(x, knots_r, spline_degree_r)
+
+    knots_c = _spline_utils._spline_knots(z, num_knots_c, spline_degree_c, True)
+    basis_c = _spline_utils._spline_basis(z, knots_c, spline_degree_c)
+
+    num_bases = (basis_r.shape[1], basis_c.shape[1])
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    spline_basis = SplineBasis2D(
+        x, z, num_knots=num_knots, spline_degree=spline_degree, check_finite=False
+    )
+    # make B.T @ W @ B using generalized linear array model since it's much faster and
+    # already verified in other tests
+    btwb = spline_basis._make_btwb(weights).tocsc()
+    P_r = kron(
+        _banded_utils.diff_penalty_matrix(num_bases[0], diff_order=diff_order_r),
+        identity(num_bases[1])
+    )
+    P_c = kron(
+        identity(num_bases[0]),
+        _banded_utils.diff_penalty_matrix(num_bases[1], diff_order=diff_order_c)
+    )
+    penalty = lam_r * P_r + lam_c * P_c
+
+    lhs = btwb + penalty
+    factorization = factorized(lhs.tocsc())
+    expected_ed = 0
+    for i in range(np.prod(num_bases)):
+        expected_ed += factorization(_sparse_col_index(btwb, i))[i]
+
+    pspline = PSpline2D(spline_basis, lam=lam, diff_order=diff_order)
+
+    result_obj = results.PSplineResult2D(pspline, weights)
+    output = result_obj.effective_dimension(n_samples=0)
+
+    assert_allclose(output, expected_ed, rtol=1e-14, atol=1e-10)
+
+
+@pytest.mark.parametrize('num_knots', (10, (11, 20)))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, (2, 3)))
+@pytest.mark.parametrize('diff_order', (1, 2, (2, 3)))
+@pytest.mark.parametrize('lam', (1e-2, (1e1, 1e2)))
+@pytest.mark.parametrize('n_samples', (100, 201))
+def test_pspline_two_d_effective_dimension_stochastic(data_fixture2d, num_knots, spline_degree,
+                                                      diff_order, lam, n_samples):
+    """
+    Tests the effective_dimension method of a PSpline object.
+
+    The effective dimension for penalized spline smoothing should be
+    ``trace((B.T @ W @ B + lam * D.T @ D)^-1 @ B.T @ W @ B)``, where `W` is the weight matrix,
+    ``D.T @ D`` is the penalty, and `B` is the spline basis.
+
+    """
+    x, z, y = data_fixture2d
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    spline_basis = SplineBasis2D(
+        x, z, num_knots=num_knots, spline_degree=spline_degree, check_finite=False
+    )
+    pspline = PSpline2D(spline_basis, lam=lam, diff_order=diff_order)
+    # true solution is already verified by other tests, so use that as "known" in
+    # this test to only examine the relative difference from using stochastic estimation
+    result_obj = results.PSplineResult2D(pspline, weights)
+    expected_ed = result_obj.effective_dimension(n_samples=0)
+
+    output = result_obj.effective_dimension(n_samples=n_samples)
+    assert_allclose(output, expected_ed, rtol=1e-1, atol=1e-5)
+
+
+@pytest.mark.parametrize('n_samples', (-1, 50.5))
+def test_pspline_two_d_stochastic_effective_dimension_invalid_samples(data_fixture2d, n_samples):
+    """Ensures a non-zero, non-positive `n_samples` input raises an exception."""
+    x, z, y = data_fixture2d
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    spline_basis = SplineBasis2D(x, z, num_knots=10)
+    pspline = PSpline2D(spline_basis)
+    result_obj = results.PSplineResult2D(pspline, weights)
+    with pytest.raises(TypeError):
+        result_obj.effective_dimension(n_samples=n_samples)
+
+
+def test_pspline_result_two_d_weights(data_fixture2d):
+    """Ensures both 1D and 2D weights are handled correctly by PSplineResult2D."""
+    x, z, y = data_fixture2d
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    spline_basis = SplineBasis2D(x, z, num_knots=10)
+    pspline = PSpline2D(spline_basis)
+
+    result_obj = results.PSplineResult2D(pspline, weights)
+    result_obj_1d = results.PSplineResult2D(pspline, weights.ravel())
+
+    assert_allclose(result_obj._weights, result_obj_1d._weights, rtol=1e-16, atol=0)
+
+
+def test_pspline_result_two_d_no_weights(data_fixture2d):
+    """Ensures weights are initialized as ones if not given to PSplineResult2D."""
+    x, z, y = data_fixture2d
+
+    spline_basis = SplineBasis2D(x, z, num_knots=10)
+    pspline = PSpline2D(spline_basis)
+    result_obj = results.PSplineResult2D(pspline)
+
+    assert_allclose(result_obj._weights, np.ones(y.shape), rtol=1e-16, atol=0)
+
+
+@pytest.mark.parametrize('num_knots', (10, (11, 20)))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, (2, 3)))
+@pytest.mark.parametrize('diff_order', (1, 2, (2, 3)))
+@pytest.mark.parametrize('large_lam', (True, False))
+def test_pspline_two_d_effective_dimension_lam_extremes(data_fixture2d, diff_order, spline_degree,
+                                                        num_knots, large_lam):
+    """
+    Tests the effective dimension of 2D P-spline smoothing for high and low limits of ``lam``.
+
+    Same reasoning as for `test_pspline_effective_dimension_lam_extremes`, except the total
+    effective dimension is just the product of the row and column effective dimensions.
+
+    """
+    x, z, y = data_fixture2d
+    (
+        num_knots_r, num_knots_c, spline_degree_r, spline_degree_c,
+        lam_r, lam_c, diff_order_r, diff_order_c
+    ) = get_2dspline_inputs(num_knots, spline_degree, lam=1, diff_order=diff_order)
+
+    if large_lam:
+        lam = 1e14
+        expected_ed = diff_order_r * diff_order_c
+        # limited by how close to infinity lam can get before it causes numerical instability;
+        # just picked rtol that passes all conditions
+        rtol = 4e-2
+    else:
+        lam = 1e-16
+        expected_ed = (num_knots_r + spline_degree_r - 1) * (num_knots_c + spline_degree_c - 1)
+        rtol = 1e-13
+
+    spline_basis = SplineBasis2D(
+        x, z, num_knots=num_knots, spline_degree=spline_degree, check_finite=False
+    )
+    pspline = PSpline2D(spline_basis, lam=lam, diff_order=diff_order)
+    result_obj = results.PSplineResult2D(pspline)
+
+    output = result_obj.effective_dimension(n_samples=0)
+
+    assert_allclose(output, expected_ed, rtol=rtol, atol=1e-11)
+
+
+@pytest.mark.parametrize('shape', ((20, 23), (51, 6)))
+@pytest.mark.parametrize('diff_order', (1, 2, (2, 3)))
+@pytest.mark.parametrize('lam', (1e2, (1e1, 1e2)))
+@pytest.mark.parametrize('use_svd', (True, False))
+def test_whittaker_two_d_effective_dimension(shape, diff_order, lam, use_svd):
+    """
+    Tests the effective_dimension method of WhittakerResult2D objects.
+
+    The effective dimension for Whittaker smoothing should be
+    ``trace((W + lam * D.T @ D)^-1 @ W)``, where `W` is the weight matrix, and
+    ``D.T @ D`` is the penalty.
+
+    Tests both the analytic and SVD-based solutions.
+
+    """
+    *_, lam_r, lam_c, diff_order_r, diff_order_c = get_2dspline_inputs(
+        lam=lam, diff_order=diff_order
+    )
+
+    weights = np.random.default_rng(0).normal(0.8, 0.05, shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    P_r = kron(
+        _banded_utils.diff_penalty_matrix(shape[0], diff_order=diff_order_r),
+        identity(shape[1])
+    )
+    P_c = kron(
+        identity(shape[0]),
+        _banded_utils.diff_penalty_matrix(shape[1], diff_order=diff_order_c)
+    )
+    penalty = lam_r * P_r + lam_c * P_c
+
+    weights_matrix = diags(weights.ravel(), format='csc')
+    factorization = factorized(weights_matrix + penalty)
+    expected_ed = 0
+    for i in range(np.prod(shape)):
+        expected_ed += factorization(_sparse_col_index(weights_matrix, i))[i]
+
+    # the relative error on the trace when using SVD decreases as the number of
+    # eigenvalues approaches the data size, so just test with a value very close
+    if use_svd:
+        num_eigens = (shape[0] - 1, shape[1] - 1)
+        atol = 1e-1
+        rtol = 5e-2
+    else:
+        num_eigens = None
+        atol = 1e-10
+        rtol = 1e-14
+    whittaker_system = WhittakerSystem2D(
+        shape, lam=lam, diff_order=diff_order, num_eigens=num_eigens
+    )
+    result_obj = results.WhittakerResult2D(whittaker_system, weights=weights)
+    output = result_obj.effective_dimension(n_samples=0)
+
+    assert_allclose(output, expected_ed, rtol=rtol, atol=atol)
+
+
+@pytest.mark.parametrize('shape', ((20, 23), (51, 6)))
+@pytest.mark.parametrize('diff_order', (1, 2, (2, 3)))
+@pytest.mark.parametrize('lam', (1e2, (1e1, 1e2)))
+@pytest.mark.parametrize('use_svd', (True, False))
+@pytest.mark.parametrize('n_samples', (100, 201))
+def test_whittaker_two_d_effective_dimension_stochastic(shape, diff_order, lam, use_svd,
+                                                        n_samples):
+    """
+    Tests the stochastic effective_dimension method of WhittakerResult2D objects.
+
+    The effective dimension for Whittaker smoothing should be
+    ``trace((W + lam * D.T @ D)^-1 @ W)``, where `W` is the weight matrix, and
+    ``D.T @ D`` is the penalty.
+
+    Tests both the analytic and SVD-based solutions.
+
+    """
+    weights = np.random.default_rng(0).normal(0.8, 0.05, shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    if use_svd:
+        num_eigens = (shape[0] - 1, shape[1] - 1)
+        rtol = 1e-2
+    else:
+        num_eigens = None
+        rtol = 1e-1
+
+    whittaker_system = WhittakerSystem2D(
+        shape, lam=lam, diff_order=diff_order, num_eigens=num_eigens
+    )
+
+    # true solution is already verified by other tests, so use that as "known" in
+    # this test to only examine the relative difference from using stochastic estimation
+    result_obj = results.WhittakerResult2D(whittaker_system, weights=weights)
+    expected_ed = result_obj.effective_dimension(n_samples=0)
+
+    output = result_obj.effective_dimension(n_samples=n_samples)
+    assert_allclose(output, expected_ed, rtol=rtol, atol=1e-4)
+
+
+@pytest.mark.parametrize('n_samples', (-1, 50.5))
+@pytest.mark.parametrize('num_eigens', (None, 5))
+def test_whittaker_two_d_effective_dimension_stochastic_invalid_samples(small_data2d, n_samples,
+                                                                        num_eigens):
+    """Ensures a non-zero, non-positive `n_samples` input raises an exception."""
+    weights = np.random.default_rng(0).normal(0.8, 0.05, small_data2d.shape)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    penalized_system = WhittakerSystem2D(small_data2d.shape, num_eigens=num_eigens)
+    result_obj = results.WhittakerResult2D(penalized_system, weights)
+    with pytest.raises(TypeError):
+        result_obj.effective_dimension(n_samples=n_samples)
+
+
+@pytest.mark.parametrize('num_eigens', (None, 5))
+def test_whittaker_result_two_d_weights(data_fixture2d, num_eigens):
+    """Ensures both 1D and 2D weights are handled correctly by WhittakerResult2D."""
+    x, z, y = data_fixture2d
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    penalized_system = WhittakerSystem2D(y.shape, num_eigens=num_eigens)
+
+    result_obj = results.WhittakerResult2D(penalized_system, weights)
+    result_obj_1d = results.WhittakerResult2D(penalized_system, weights.ravel())
+
+    if num_eigens is None:
+        expected_shape = (y.size,)
+    else:
+        expected_shape = y.shape
+    assert result_obj._weights.shape == expected_shape
+    assert result_obj_1d._weights.shape == expected_shape
+    assert_allclose(result_obj._weights, result_obj_1d._weights, rtol=1e-16, atol=0)
+
+
+@pytest.mark.parametrize('num_eigens', (None, 5))
+def test_whittaker_result_two_d_no_weights(data_fixture2d, num_eigens):
+    """Ensures weights are initialized as ones if not given to WhittakerResult2D."""
+    x, z, y = data_fixture2d
+    if num_eigens is None:
+        expected_shape = (y.size,)
+    else:
+        expected_shape = y.shape
+
+    penalized_system = WhittakerSystem2D(y.shape, num_eigens=num_eigens)
+    result_obj = results.WhittakerResult2D(penalized_system)
+
+    assert_allclose(result_obj._weights, np.ones(expected_shape), rtol=1e-16, atol=0)
+
+
+def test_whittaker_result_two_d_lhs_penalty_raises(data_fixture2d):
+    """Ensures an exception is raised if both `lhs` and `penalty` are supplied."""
+    x, z, y = data_fixture2d
+    weights = np.random.default_rng(0).normal(0.8, 0.05, y.shape)
+    weights = np.clip(weights, 0, 1, dtype=float)
+
+    penalized_system = WhittakerSystem2D(y.shape)
+
+    with pytest.raises(ValueError, match='both `lhs` and `penalty` cannot'):
+        results.WhittakerResult2D(
+            penalized_system, weights, lhs=penalized_system.penalty,
+            penalty=penalized_system.penalty
+        )
+
+
+@pytest.mark.parametrize('shape', ((30, 21), (15, 40)))
+@pytest.mark.parametrize('diff_order', (1, 2, 3, (1, 2)))
+@pytest.mark.parametrize('large_lam', (True, False))
+def test_whittaker_two_d_effective_dimension_lam_extremes(shape, diff_order, large_lam):
+    """
+    Tests the effective dimension of 2D Whittaker smoothing for high and low limits of ``lam``.
+
+    Same reasoning as for `test_whittaker_effective_dimension_lam_extremes`, except the total
+    effective dimension is just the product of the row and column effective dimensions.
+
+    """
+    if large_lam:
+        lam = 1e14
+        if isinstance(diff_order, int):
+            expected_ed = diff_order**2
+            max_diff_order = diff_order
+        else:
+            expected_ed = np.prod(diff_order)
+            max_diff_order = max(diff_order)
+        # limited by how close to infinity lam can get before it causes numerical instability,
+        # and larger diff_orders need larger lam for it to be a polynomial, so have to reduce the
+        # relative tolerance as diff_order increases
+        rtol = {1: 8e-3, 2: 2e-2, 3: 7e-2}[max_diff_order]
+    else:
+        lam = 1e-16
+        expected_ed = np.prod(shape)
+        rtol = 1e-15
+
+    whittaker_system = WhittakerSystem2D(
+        shape, lam=lam, diff_order=diff_order, num_eigens=None
+    )
+    result_obj = results.WhittakerResult2D(whittaker_system)
+
+    output = result_obj.effective_dimension(n_samples=0)
+
+    assert_allclose(output, expected_ed, rtol=rtol, atol=1e-11)
diff --git a/tests/test_spline.py b/tests/test_spline.py
index 76bac982..887c1c1e 100644
--- a/tests/test_spline.py
+++ b/tests/test_spline.py
@@ -67,7 +67,7 @@ def test_numba_implementation(self):
 class IterativeSplineTester(SplineTester, InputWeightsMixin, RecreationMixin):
     """Base testing class for iterative spline functions."""
 
-    checked_keys = ('weights', 'tol_history')
+    checked_keys = ('weights', 'tol_history', 'result')
 
     def test_tol_history(self):
         """Ensures the 'tol_history' item in the parameter output is correct."""
@@ -211,6 +211,16 @@ def test_whittaker_comparison(self, lam, p, diff_order):
         """Ensures the P-spline version is the same as the Whittaker version."""
         super().test_whittaker_comparison(lam=lam, p=p, diff_order=diff_order)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestPsplineIAsLS(IterativeSplineTester, WhittakerComparisonMixin):
     """Class for testing pspline_iasls baseline."""
@@ -246,6 +256,16 @@ def test_whittaker_comparison(self, lam, lam_1, p, diff_order):
         """Ensures the P-spline version is the same as the Whittaker version."""
         super().test_whittaker_comparison(lam=lam, lam_1=lam_1, p=p, diff_order=diff_order)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p**2`` or ``(1 - p)**2``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p**2, atol=1e-15, rtol=0)
+            | np.isclose(weights, (1 - p)**2, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestPsplineAirPLS(IterativeSplineTester, WhittakerComparisonMixin):
     """Class for testing pspline_airpls baseline."""
@@ -436,7 +456,7 @@ class TestPsplineAsPLS(IterativeSplineTester, WhittakerComparisonMixin):
     """Class for testing pspline_aspls baseline."""
 
     func_name = 'pspline_aspls'
-    checked_keys = ('weights', 'tol_history', 'alpha')
+    checked_keys = ('weights', 'tol_history', 'alpha', 'result')
     weight_keys = ('weights', 'alpha')
 
     def test_wrong_alpha_shape(self):
@@ -599,7 +619,7 @@ class TestPsplineMPLS(SplineTester, InputWeightsMixin, WhittakerComparisonMixin)
     """Class for testing pspline_mpls baseline."""
 
     func_name = 'pspline_mpls'
-    checked_keys = ('half_window', 'weights')
+    checked_keys = ('half_window', 'weights', 'result')
 
     @pytest.mark.parametrize('diff_order', (1, 3))
     def test_diff_orders(self, diff_order):
diff --git a/tests/test_spline_utils.py b/tests/test_spline_utils.py
index 9e14fd73..f5ce204d 100644
--- a/tests/test_spline_utils.py
+++ b/tests/test_spline_utils.py
@@ -12,11 +12,12 @@
 from numpy.testing import assert_allclose, assert_array_equal
 import pytest
 from scipy.interpolate import BSpline
+from scipy.linalg import cholesky_banded
 from scipy.sparse import issparse
 from scipy.sparse.linalg import spsolve
 
 from pybaselines import _banded_utils, _spline_utils
-from pybaselines._compat import dia_object, diags, _HAS_NUMBA
+from pybaselines._compat import diags, _HAS_NUMBA
 
 
 def _nieve_basis_matrix(x, knots, spline_degree):
@@ -67,7 +68,7 @@ def test_spline_knots_too_few_knots(num_knots):
         _spline_utils._spline_knots(np.arange(10), num_knots)
 
 
-@pytest.mark.parametrize('num_knots', (2, 20, 1001))
+@pytest.mark.parametrize('num_knots', (2, 20, 201))
 @pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
 @pytest.mark.parametrize('penalized', (True, False))
 def test_spline_knots(data_fixture, num_knots, spline_degree, penalized):
@@ -98,7 +99,7 @@ def test_spline_knots(data_fixture, num_knots, spline_degree, penalized):
     assert np.all(np.diff(knots) >= 0)
 
 
-@pytest.mark.parametrize('num_knots', (2, 20, 1001))
+@pytest.mark.parametrize('num_knots', (2, 20, 201))
 @pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
 @pytest.mark.parametrize('source', ('simple', 'numba', 'scipy'))
 def test_spline_basis(data_fixture, num_knots, spline_degree, source):
@@ -127,7 +128,7 @@ def test_spline_basis(data_fixture, num_knots, spline_degree, source):
     assert issparse(basis)
     assert_allclose(basis.toarray(), expected_basis, 1e-14, 1e-14)
     # also test the main interface for the spline basis; only test for one
-    # source to avoid unnecessary repitition
+    # source to avoid unnecessary repetition
     if source == 'simple':
         basis_2 = _spline_utils._spline_basis(x, knots, spline_degree)
         assert issparse(basis_2)
@@ -188,7 +189,7 @@ def test_bspline_has_extrapolate():
         assert _spline_utils._bspline_has_extrapolate.cache_info().misses > 0
 
 
-@pytest.mark.parametrize('num_knots', (2, 20, 1001))
+@pytest.mark.parametrize('num_knots', (2, 20, 201))
 @pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
 def test_numba_basis_len(data_fixture, num_knots, spline_degree):
     """
@@ -211,11 +212,12 @@ def test_numba_basis_len(data_fixture, num_knots, spline_degree):
     assert len(basis.tocsr().data) == len(x) * (spline_degree + 1)
 
 
-@pytest.mark.parametrize('num_knots', (100, 1000))
+@pytest.mark.parametrize('num_knots', (20, 101))
 @pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
 @pytest.mark.parametrize('diff_order', (1, 2, 3, 4))
 @pytest.mark.parametrize('lower_only', (True, False))
-def test_pspline_solve(data_fixture, num_knots, spline_degree, diff_order, lower_only):
+@pytest.mark.parametrize('has_numba', (True, False))
+def test_pspline_solve(data_fixture, num_knots, spline_degree, diff_order, lower_only, has_numba):
     """
     Tests the accuracy of the penalized spline solvers.
 
@@ -237,19 +239,14 @@ def test_pspline_solve(data_fixture, num_knots, spline_degree, diff_order, lower
     basis = _spline_utils._spline_basis(x, knots, spline_degree)
     num_bases = basis.shape[1]
     penalty = _banded_utils.diff_penalty_diagonals(num_bases, diff_order, lower_only)
-    penalty_matrix = dia_object(
-        (_banded_utils.diff_penalty_diagonals(num_bases, diff_order, False),
-        np.arange(diff_order, -(diff_order + 1), -1)), shape=(num_bases, num_bases)
-    ).tocsr()
+    penalty_matrix = _banded_utils.diff_penalty_matrix(num_bases, diff_order=diff_order)
 
     expected_coeffs = spsolve(
         basis.T @ diags(weights, format='csr') @ basis + penalty_matrix,
         basis.T @ (weights * y)
     )
     expected_spline = basis @ expected_coeffs
-
-    with mock.patch.object(_spline_utils, '_HAS_NUMBA', False):
-        # use sparse calculation
+    with mock.patch.object(_spline_utils, '_HAS_NUMBA', has_numba):
         spline_basis = _spline_utils.SplineBasis(
             x, num_knots=num_knots, spline_degree=spline_degree
         )
@@ -264,8 +261,90 @@ def test_pspline_solve(data_fixture, num_knots, spline_degree, diff_order, lower
             pspline.coef, expected_coeffs, 1e-10, 1e-12
         )
 
-    with mock.patch.object(_spline_utils, '_HAS_NUMBA', True):
-        # should use the numba calculation
+
+@pytest.mark.parametrize('num_knots', (20, 101))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
+@pytest.mark.parametrize('diff_order', (1, 2, 3, 4))
+@pytest.mark.parametrize('lower_only', (True, False))
+def test_pspline_factorize_solve(data_fixture, num_knots, spline_degree, diff_order, lower_only):
+    """Tests the factorize and factorized_solve methods of a PSpline object."""
+    x, y = data_fixture
+    # ensure x and y are floats
+    x = x.astype(float)
+    y = y.astype(float)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, x.size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    knots = _spline_utils._spline_knots(x, num_knots, spline_degree, True)
+    basis = _spline_utils._spline_basis(x, knots, spline_degree)
+    num_bases = basis.shape[1]
+    penalty_matrix = _banded_utils.diff_penalty_matrix(num_bases, diff_order=diff_order)
+
+    lhs_sparse = basis.T @ diags(weights, format='csr') @ basis + penalty_matrix
+    rhs = basis.T @ (weights * y)
+    expected_coeffs = spsolve(lhs_sparse, rhs)
+
+    lhs_banded = _banded_utils._sparse_to_banded(lhs_sparse)[0]
+    if lower_only:
+        lhs_banded = lhs_banded[len(lhs_banded) // 2:]
+
+    spline_basis = _spline_utils.SplineBasis(
+        x, num_knots=num_knots, spline_degree=spline_degree
+    )
+    pspline = _spline_utils.PSpline(
+        spline_basis, lam=1, diff_order=diff_order, allow_lower=lower_only
+    )
+    output_factorization = pspline.factorize(lhs_banded)
+    if lower_only:
+        expected_factorization = cholesky_banded(lhs_banded, lower=True)
+
+        assert_allclose(
+            output_factorization, expected_factorization, rtol=1e-14, atol=1e-14
+        )
+    else:
+        assert callable(output_factorization)
+
+    output = pspline.factorized_solve(output_factorization, rhs)
+    assert_allclose(output, expected_coeffs, rtol=1e-10, atol=1e-12)
+
+
+@pytest.mark.parametrize('num_knots', (20, 101))
+@pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5))
+@pytest.mark.parametrize('diff_order', (1, 2, 3, 4))
+@pytest.mark.parametrize('lower_only', (True, False))
+@pytest.mark.parametrize('has_numba', (True, False))
+def test_pspline_make_btwb(data_fixture, num_knots, spline_degree, diff_order, lower_only,
+                           has_numba):
+    """
+    Tests the accuracy of the the PSpline ``B.T @ W @ B`` calculation.
+
+    The PSpline has two routes:
+    1) use the custom numba function (preferred if numba is installed)
+    2) compute ``B.T @ W @ B`` using the sparse system (last resort)
+
+    Both are tested here.
+
+    """
+    x, y = data_fixture
+    # ensure x and y are floats
+    x = x.astype(float)
+    y = y.astype(float)
+    weights = np.random.default_rng(0).normal(0.8, 0.05, x.size)
+    weights = np.clip(weights, 0, 1).astype(float)
+
+    knots = _spline_utils._spline_knots(x, num_knots, spline_degree, True)
+    basis = _spline_utils._spline_basis(x, knots, spline_degree)
+
+    sparse_calc = basis.T @ diags(weights, format='csr') @ basis
+    expected_output = _banded_utils._sparse_to_banded(sparse_calc)[0]
+    if has_numba and len(expected_output) != 2 * spline_degree + 1:
+        # the sparse calculation can truncate rows of just zeros, so refill them
+        zeros = np.zeros(expected_output.shape[1])
+        expected_output = np.vstack((zeros, expected_output, zeros))
+    if lower_only:
+        expected_output = expected_output[len(expected_output) // 2:]
+
+    with mock.patch.object(_spline_utils, '_HAS_NUMBA', has_numba):
         spline_basis = _spline_utils.SplineBasis(
             x, num_knots=num_knots, spline_degree=spline_degree
         )
@@ -273,11 +352,7 @@ def test_pspline_solve(data_fixture, num_knots, spline_degree, diff_order, lower
             spline_basis, lam=1, diff_order=diff_order, allow_lower=lower_only
         )
         assert_allclose(
-            pspline.solve_pspline(y, weights=weights, penalty=penalty),
-            expected_spline, 1e-10, 1e-12
-        )
-        assert_allclose(
-            pspline.coef, expected_coeffs, 1e-10, 1e-12
+            pspline._make_btwb(weights=weights), expected_output, 1e-14, 1e-14
         )
 
 
@@ -329,7 +404,7 @@ def check_penalized_spline(penalized_system, expected_penalty, lam, diff_order,
 
 
 @pytest.mark.parametrize('spline_degree', (1, 2, 3))
-@pytest.mark.parametrize('num_knots', (10, 100))
+@pytest.mark.parametrize('num_knots', (20, 101))
 @pytest.mark.parametrize('diff_order', (1, 2, 3))
 @pytest.mark.parametrize('allow_lower', (True, False))
 @pytest.mark.parametrize('reverse_diags', (True, False))
@@ -412,7 +487,7 @@ def test_spline_basis_negative_spline_degree_fails(data_fixture, spline_degree):
 
 
 @pytest.mark.parametrize('spline_degree', (1, 2, 3))
-@pytest.mark.parametrize('num_knots', (10, 100))
+@pytest.mark.parametrize('num_knots', (20, 101))
 @pytest.mark.parametrize('diff_order', (1, 2))
 @pytest.mark.parametrize('lam', (1e-2, 1e2))
 def test_pspline_tck(data_fixture, num_knots, spline_degree, diff_order, lam):
@@ -457,6 +532,56 @@ def test_pspline_tck_readonly(data_fixture):
         pspline.tck = (1, 2, 3)
 
 
+@pytest.mark.parametrize('diff_order', (1, 2, 3))
+@pytest.mark.parametrize('allow_lower', (True, False))
+@pytest.mark.parametrize('num_knots', (20, 101))
+@pytest.mark.parametrize('spline_degree', (1, 2, 3))
+def test_pspline_update_lam(data_fixture, diff_order, allow_lower, num_knots, spline_degree):
+    """Tests updating the lam value for PSpline."""
+    x, y = data_fixture
+    lam_init = 5
+    basis = _spline_utils.SplineBasis(x, num_knots=num_knots, spline_degree=spline_degree)
+    pspline = _spline_utils.PSpline(
+        basis, diff_order=diff_order, lam=lam_init, allow_lower=allow_lower
+    )
+    data_size = pspline._num_bases
+
+    expected_penalty = lam_init * _banded_utils.diff_penalty_diagonals(
+        data_size, diff_order=diff_order, lower_only=pspline.lower,
+        padding=spline_degree - diff_order
+    )
+    diag_index = pspline.main_diagonal_index
+
+    assert_allclose(pspline.penalty, expected_penalty, rtol=1e-14, atol=1e-14)
+    assert_allclose(
+        pspline.main_diagonal, expected_penalty[diag_index], rtol=1e-14, atol=1e-14
+    )
+    assert_allclose(pspline.lam, lam_init, rtol=1e-15, atol=1e-15)
+    for lam in (1e3, 5.2e1):
+        expected_penalty = lam * _banded_utils.diff_penalty_diagonals(
+            data_size, diff_order=diff_order, lower_only=pspline.lower,
+            padding=spline_degree - diff_order
+        )
+        pspline.update_lam(lam)
+
+        assert_allclose(pspline.penalty, expected_penalty, rtol=1e-14, atol=1e-14)
+        assert_allclose(
+            pspline.main_diagonal, expected_penalty[diag_index], rtol=1e-14, atol=1e-14
+        )
+        assert_allclose(pspline.lam, lam, rtol=1e-15, atol=1e-15)
+
+
+def test_pspline_update_lam_invalid_lam(data_fixture):
+    """Ensures PSpline.update_lam throws an exception when given a non-positive lam."""
+    x, y = data_fixture
+    basis = _spline_utils.SplineBasis(x)
+    pspline = _spline_utils.PSpline(basis)
+    with pytest.raises(ValueError):
+        pspline.update_lam(-1.)
+    with pytest.raises(ValueError):
+        pspline.update_lam(0)
+
+
 def test_spline_basis_tk_readonly(data_fixture):
     """Ensures the tk attribute is read-only."""
     x, y = data_fixture
@@ -466,7 +591,7 @@ def test_spline_basis_tk_readonly(data_fixture):
 
 
 @pytest.mark.parametrize('spline_degree', (1, 2, 3))
-@pytest.mark.parametrize('num_knots', (10, 100))
+@pytest.mark.parametrize('num_knots', (20, 101))
 def test_spline_basis_tk(data_fixture, num_knots, spline_degree):
     """Ensures the tk attribute can correctly recreate the solved spline."""
     x, y = data_fixture
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f5610f95..7be585f5 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -958,3 +958,30 @@ def test_estimate_polyorder_failures(data_fixture):
         utils.estimate_polyorder(y, x, min_value=5, max_value=5)
     with pytest.raises(ValueError):
         utils.estimate_polyorder(y, x, min_value=5, max_value=4)
+
+
+def test_get_rng():
+    """Ensures _get_rng works with integers or existing generators."""
+    seed = 123
+
+    assert_allclose(
+        utils._get_rng(seed).normal(0.5, 0.5, 5),
+        np.random.default_rng(seed).normal(0.5, 0.5, 5), rtol=1e-16, atol=1e-16
+    )
+
+    expected_rng = np.random.default_rng(seed)
+    output_rng = utils._get_rng(expected_rng)
+    assert output_rng is expected_rng
+    # call order matters, so create new generator within accuracy test
+    assert_allclose(
+        output_rng.normal(0.5, 0.5, 5),
+        np.random.default_rng(seed).normal(0.5, 0.5, 5), rtol=1e-16, atol=1e-16
+    )
+
+    expected_rng2 = np.random.RandomState(seed)
+    output_rng2 = utils._get_rng(expected_rng2)
+    assert output_rng2 is expected_rng2
+    assert_allclose(
+        output_rng2.normal(0.5, 0.5, 5),
+        np.random.RandomState(seed).normal(0.5, 0.5, 5), rtol=1e-16, atol=1e-16
+    )
diff --git a/tests/test_weighting.py b/tests/test_weighting.py
index be9a8732..026e3027 100644
--- a/tests/test_weighting.py
+++ b/tests/test_weighting.py
@@ -255,16 +255,16 @@ def expected_airpls(y, baseline, iteration, normalize_weights):
     weights : numpy.ndarray, shape (N,)
         The calculated weights.
 
-    References
-    ----------
-    Zhang, Z.M., et al. Baseline correction using adaptive iteratively
-    reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
-
     Notes
     -----
     Equation 9 in the original algorithm was misprinted according to the author
     (https://github.com/zmzhang/airPLS/issues/8), so the correct weighting is used here.
 
+    References
+    ----------
+    Zhang, Z.M., et al. Baseline correction using adaptive iteratively
+    reweighted penalized least squares. Analyst, 2010, 135(5), 1138-1146.
+
     """
     residual = y - baseline
     neg_mask = residual < 0
diff --git a/tests/test_whittaker.py b/tests/test_whittaker.py
index 791d15a6..61017669 100644
--- a/tests/test_whittaker.py
+++ b/tests/test_whittaker.py
@@ -92,7 +92,7 @@ class WhittakerTester(BaseTester, InputWeightsMixin, RecreationMixin):
     """Base testing class for whittaker functions."""
 
     module = whittaker
-    checked_keys = ('weights', 'tol_history')
+    checked_keys = ('weights', 'tol_history', 'result')
 
     @pytest.mark.parametrize('diff_order', (2, 3))
     def test_scipy_solvers(self, diff_order):
@@ -104,7 +104,7 @@ def test_scipy_solvers(self, diff_order):
             self.algorithm.banded_solver = 4  # force use solve_banded
             solve_output = self.class_func(self.y, diff_order=diff_order)[0]
 
-            assert_allclose(solveh_output, solve_output, rtol=1e-6, atol=1e-8)
+            assert_allclose(solveh_output, solve_output, rtol=5e-6, atol=1e-8)
         finally:
             self.algorithm.banded_solver = original_solver
 
@@ -153,6 +153,16 @@ def test_diff_orders(self, diff_order):
         lam = {1: 1e2, 3: 1e10}[diff_order]
         self.class_func(self.y, lam=lam, diff_order=diff_order)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestIAsLS(WhittakerTester):
     """Class for testing iasls baseline."""
@@ -198,6 +208,16 @@ def test_sparse_comparison(self, diff_order, lam_1, p):
 
         assert_allclose(banded_output, sparse_output, rtol=5e-4, atol=1e-8)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p**2`` or ``(1 - p)**2``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p**2, atol=1e-15, rtol=0)
+            | np.isclose(weights, (1 - p)**2, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestAirPLS(WhittakerTester):
     """Class for testing airpls baseline."""
@@ -380,7 +400,7 @@ class TestAsPLS(WhittakerTester):
     """Class for testing aspls baseline."""
 
     func_name = 'aspls'
-    checked_keys = ('weights', 'alpha', 'tol_history')
+    checked_keys = ('weights', 'alpha', 'tol_history', 'result')
     weight_keys = ('weights', 'alpha')
 
     @pytest.mark.parametrize('diff_order', (1, 3))
@@ -458,7 +478,7 @@ def test_sparse_comparison(self, diff_order, asymmetric_coef, alternate_weightin
             asymmetric_coef=asymmetric_coef, alternate_weighting=alternate_weighting
         )[0]
 
-        rtol = {2: 1.5e-4, 3: 3e-4}[diff_order]
+        rtol = {2: 2e-4, 3: 5e-4}[diff_order]
         assert_allclose(banded_output, sparse_output, rtol=rtol, atol=1e-8)
 
 
diff --git a/tests/two_d/test_algorithm_setup.py b/tests/two_d/test_algorithm_setup.py
index b25b2059..95de84d1 100644
--- a/tests/two_d/test_algorithm_setup.py
+++ b/tests/two_d/test_algorithm_setup.py
@@ -395,7 +395,7 @@ def test_setup_spline_spline_basis(data_fixture2d, num_knots, spline_degree):
 @pytest.mark.parametrize('lam', (1, 20, (3, 10)))
 @pytest.mark.parametrize('diff_order', (1, 2, 3, 4, (2, 3)))
 @pytest.mark.parametrize('spline_degree', (1, 2, 3, 4, (2, 3)))
-@pytest.mark.parametrize('num_knots', (20, 51, (20, 30)))
+@pytest.mark.parametrize('num_knots', (20, (21, 30)))
 def test_setup_spline_diff_matrix(data_fixture2d, lam, diff_order, spline_degree, num_knots):
     """Ensures output difference matrix diagonal data is in desired format."""
     x, z, y = data_fixture2d
diff --git a/tests/two_d/test_api.py b/tests/two_d/test_api.py
index ab54ff4b..3b12dcd4 100644
--- a/tests/two_d/test_api.py
+++ b/tests/two_d/test_api.py
@@ -14,7 +14,7 @@
 
 from pybaselines.two_d import api, morphological, optimizers, polynomial, smooth, spline, whittaker
 
-from ..base_tests import get_data2d
+from ..base_tests import get_data2d, check_param_keys
 
 
 _ALL_CLASSES = (
@@ -166,19 +166,7 @@ def test_all_methods(self, method_and_class):
         )(fit_data, **kwargs)
 
         assert_allclose(api_baseline, class_baseline, rtol=1e-12, atol=1e-12)
-        assert len(api_params.keys()) == len(class_params.keys())
-        for key, value in api_params.items():
-            assert key in class_params
-            class_value = class_params[key]
-            if isinstance(value, (int, float, np.ndarray, list, tuple)):
-                assert_allclose(value, class_value, rtol=1e-12, atol=1e-12)
-            elif isinstance(value, dict):
-                # do not check values of the internal dictionary since the nested structure
-                # is no longer guaranteed to be the same shape for every value
-                for internal_key in value.keys():
-                    assert internal_key in class_value
-            else:
-                assert value == class_value
+        check_param_keys(api_params.keys(), class_params.keys())
 
     def test_method_availability(self):
         """Ensures all public algorithms are available through the Baseline class."""
diff --git a/tests/two_d/test_optimizers.py b/tests/two_d/test_optimizers.py
index e89983cb..d3fe60e6 100644
--- a/tests/two_d/test_optimizers.py
+++ b/tests/two_d/test_optimizers.py
@@ -92,7 +92,7 @@ class TestCollabPLS(OptimizersTester, OptimizerInputWeightsMixin):
     func_name = "collab_pls"
     checked_keys = ('average_weights',)
     # will need to change checked_keys if default method is changed
-    checked_method_keys = ('weights', 'tol_history')
+    checked_method_keys = ('weights', 'tol_history', 'result')
     three_d = True
     weight_keys = ('average_weights', 'weights')
 
diff --git a/tests/two_d/test_spline.py b/tests/two_d/test_spline.py
index 23fd3202..9abb5a9e 100644
--- a/tests/two_d/test_spline.py
+++ b/tests/two_d/test_spline.py
@@ -55,7 +55,7 @@ class SplineTester(BaseTester2D):
 class IterativeSplineTester(SplineTester, InputWeightsMixin, RecreationMixin):
     """Base testing class for iterative spline functions."""
 
-    checked_keys = ('weights', 'tol_history')
+    checked_keys = ('weights', 'tol_history', 'result')
 
     @classmethod
     def setup_class(cls):
@@ -164,6 +164,16 @@ def test_whittaker_comparison(self, lam, p, diff_order):
         """Ensures the P-spline version is the same as the Whittaker version."""
         super().test_whittaker_comparison(lam=lam, p=p, diff_order=diff_order)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestPsplineIAsLS(IterativeSplineTester, WhittakerComparisonMixin):
     """Class for testing pspline_iasls baseline."""
@@ -208,6 +218,16 @@ def test_whittaker_comparison(self, lam, lam_1, p, diff_order):
             lam=lam, lam_1=lam_1, p=p, diff_order=diff_order, uses_eigenvalues=False, test_rtol=1e-5
         )
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p**2`` or ``(1 - p)**2``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p**2, atol=1e-15, rtol=0)
+            | np.isclose(weights, (1 - p)**2, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestPsplineAirPLS(IterativeSplineTester, WhittakerComparisonMixin):
     """Class for testing pspline_airpls baseline."""
diff --git a/tests/two_d/test_spline_utils.py b/tests/two_d/test_spline_utils.py
index 7cf66c06..b6cdafd6 100644
--- a/tests/two_d/test_spline_utils.py
+++ b/tests/two_d/test_spline_utils.py
@@ -14,16 +14,16 @@
 from scipy.sparse.linalg import spsolve
 
 from pybaselines._compat import identity
+from pybaselines._banded_utils import difference_matrix
 from pybaselines.two_d import _spline_utils
-from pybaselines.utils import difference_matrix
 
 from ..base_tests import get_2dspline_inputs
 
 
-@pytest.mark.parametrize('num_knots', (10, 40, (10, 20)))
+@pytest.mark.parametrize('num_knots', (10, (11, 20)))
 @pytest.mark.parametrize('spline_degree', (0, 1, 2, 3, 4, 5, (2, 3)))
 @pytest.mark.parametrize('diff_order', (1, 2, 3, 4, (2, 3)))
-@pytest.mark.parametrize('lam', (1e-2, 1e2, (1e1, 1e2)))
+@pytest.mark.parametrize('lam', (1e-2, (1e1, 1e2)))
 def test_solve_psplines(data_fixture2d, num_knots, spline_degree, diff_order, lam):
     """
     Tests the accuracy of the penalized spline solvers.
@@ -87,7 +87,7 @@ def test_solve_psplines(data_fixture2d, num_knots, spline_degree, diff_order, la
 
 
 @pytest.mark.parametrize('spline_degree', (1, 2, 3, [2, 3]))
-@pytest.mark.parametrize('num_knots', (10, 50, [20, 30]))
+@pytest.mark.parametrize('num_knots', (16, [21, 30]))
 @pytest.mark.parametrize('diff_order', (1, 2, 3, [1, 3]))
 @pytest.mark.parametrize('lam', (5, (3, 5)))
 def test_pspline_setup(data_fixture2d, num_knots, spline_degree, diff_order, lam):
@@ -271,7 +271,7 @@ def test_pspline_tck_none(data_fixture2d):
 def test_pspline_tck_readonly(data_fixture2d):
     """Ensures the tck attribute is read-only."""
     x, z, y = data_fixture2d
-    spline_basis = _spline_utils.SplineBasis2D(x, z)
+    spline_basis = _spline_utils.SplineBasis2D(x, z, num_knots=10)
     pspline = _spline_utils.PSpline2D(spline_basis)
 
     with pytest.raises(AttributeError):
diff --git a/tests/two_d/test_whittaker.py b/tests/two_d/test_whittaker.py
index 3f6488af..ff4b0a01 100644
--- a/tests/two_d/test_whittaker.py
+++ b/tests/two_d/test_whittaker.py
@@ -18,7 +18,7 @@ class WhittakerTester(BaseTester2D, InputWeightsMixin, RecreationMixin):
     """Base testing class for whittaker functions."""
 
     module = whittaker
-    checked_keys = ('weights', 'tol_history')
+    checked_keys = ('weights', 'tol_history', 'result')
 
     def test_tol_history(self):
         """Ensures the 'tol_history' item in the parameter output is correct."""
@@ -75,6 +75,16 @@ def test_diff_orders(self, diff_order):
         """Ensure that other difference orders work."""
         self.class_func(self.y, diff_order=diff_order)
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p`` or ``1 - p``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p, atol=1e-15, rtol=0)
+            | np.isclose(weights, 1 - p, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestIAsLS(WhittakerTester):
     """Class for testing iasls baseline."""
@@ -104,6 +114,16 @@ def test_diff_order_one_fails(self):
         with pytest.raises(ValueError):
             self.class_func(self.y, lam=1e2, diff_order=[2, 1])
 
+    @pytest.mark.parametrize('p', (0.01, 0.2))
+    def test_output_binary_weights(self, p):
+        """Ensures all weights are either ``p**2`` or ``(1 - p)**2``."""
+        _, params = self.class_func(self.y, p=p)
+        weights = params['weights']
+        assert (
+            np.isclose(weights, p**2, atol=1e-15, rtol=0)
+            | np.isclose(weights, (1 - p)**2, atol=1e-15, rtol=0)
+        ).all()
+
 
 class TestAirPLS(EigenvalueMixin, WhittakerTester):
     """Class for testing airpls baseline."""
@@ -174,7 +194,7 @@ class TestAsPLS(WhittakerTester):
     """Class for testing aspls baseline."""
 
     func_name = 'aspls'
-    checked_keys = ('weights', 'alpha', 'tol_history')
+    checked_keys = ('weights', 'alpha', 'tol_history', 'result')
     weight_keys = ('weights', 'alpha')
     required_repeated_kwargs = {'lam': 1e2, 'tol': 1e-1}