diff --git a/autofit/aggregator/summary/aggregate_fits.py b/autofit/aggregator/summary/aggregate_fits.py
index 14fce2b44..64634ad4c 100644
--- a/autofit/aggregator/summary/aggregate_fits.py
+++ b/autofit/aggregator/summary/aggregate_fits.py
@@ -2,8 +2,6 @@
 from enum import Enum
 from typing import Dict, List, Union
 
-from astropy.table import Table
-from astropy.io import fits
 from pathlib import Path
 
 from autofit.aggregator.search_output import SearchOutput
@@ -53,7 +51,7 @@ def __init__(self, aggregator: Union[Aggregator, List[SearchOutput]]):
     def _hdus(
         result: SearchOutput,
         hdus: List[Enum],
-    ) -> List[fits.ImageHDU]:
+    ) -> "List[fits.ImageHDU]":
         """
         Extract the HDUs from a given fits for a given search.
 
@@ -68,6 +66,8 @@ def _hdus(
         -------
         The extracted HDUs.
         """
+        from astropy.io import fits
+
         row = []
         for hdu in hdus:
             source = result.value(subplot_filename(hdu))
@@ -80,7 +80,7 @@ def _hdus(
             )
         return row
 
-    def extract_fits(self, hdus: List[Enum]) -> fits.HDUList:
+    def extract_fits(self, hdus: List[Enum]) -> "fits.HDUList":
         """
         Extract the HDUs from the fits files for every search in the aggregator.
 
@@ -95,6 +95,8 @@ def extract_fits(self, hdus: List[Enum]) -> fits.HDUList:
         -------
         The extracted HDUs.
         """
+        from astropy.io import fits
+
         output = [fits.PrimaryHDU()]
         for result in self.aggregator:
             output.extend(self._hdus(result, hdus))
@@ -117,6 +119,8 @@ def extract_csv(self, filename: str) -> List[Dict]:
         -------
         The extracted HDUs.
         """
+        from astropy.table import Table
+
         output = []
         for result in self.aggregator:
             output.append(Table.read(result.value(filename), format="fits"))
@@ -144,6 +148,8 @@ def output_to_folder(
             The name of the fits file. This is the attribute of the search output that is used to name the file.
             OR a list of names for each HDU.
         """
+        from astropy.io import fits
+
         folder.mkdir(parents=True, exist_ok=True)
 
         for i, result in enumerate(self.aggregator):
diff --git a/autofit/database/model/prior.py b/autofit/database/model/prior.py
index 495c424c1..5e68565b3 100644
--- a/autofit/database/model/prior.py
+++ b/autofit/database/model/prior.py
@@ -100,7 +100,6 @@ class Prior(Object):
     def _from_object(cls, model: abstract.Prior):
         instance = cls()
         instance.cls = type(model)
-        print(model.__database_args__)
         instance._add_children(
             [(key, getattr(model, key)) for key in model.__database_args__]
         )
diff --git a/autofit/example/util.py b/autofit/example/util.py
index 2eae9bbf3..95470529a 100644
--- a/autofit/example/util.py
+++ b/autofit/example/util.py
@@ -1,6 +1,5 @@
 import os
 from os import path
-import matplotlib.pyplot as plt
 import numpy as np
 from typing import Optional
 
@@ -37,6 +36,8 @@ def plot_profile_1d(
     output_format
         Determines where the plot is displayed on your screen ("show") or output to the hard-disk as a png ("png").
     """
+    import matplotlib.pyplot as plt
+
     plt.errorbar(
         x=xvalues, y=profile_1d, yerr=errors, color=color, ecolor="k", elinewidth=1, capsize=2
     )
diff --git a/autofit/example/visualize.py b/autofit/example/visualize.py
index 7493021d6..01723f221 100644
--- a/autofit/example/visualize.py
+++ b/autofit/example/visualize.py
@@ -1,5 +1,4 @@
 import os
-import matplotlib.pyplot as plt
 import numpy as np
 from typing import List
 
@@ -39,6 +38,8 @@ def visualize_before_fit(
             The model which is fitted to the data, which may be used to customize the visualization.
         """
 
+        import matplotlib.pyplot as plt
+
         xvalues = np.arange(analysis.data.shape[0])
 
         plt.errorbar(
@@ -94,6 +95,8 @@ def visualize(
             which may change which images are output.
         """
 
+        import matplotlib.pyplot as plt
+
         xvalues = np.arange(analysis.data.shape[0])
         model_data_1d_list = []
 
diff --git a/autofit/graphical/expectation_propagation/visualise.py b/autofit/graphical/expectation_propagation/visualise.py
index 16e7e87af..443ee72c3 100644
--- a/autofit/graphical/expectation_propagation/visualise.py
+++ b/autofit/graphical/expectation_propagation/visualise.py
@@ -3,8 +3,6 @@
 from pathlib import Path
 import warnings
 
-import matplotlib.pyplot as plt
-
 from autofit.graphical.expectation_propagation.history import EPHistory
 
 logger = logging.getLogger(__name__)
@@ -34,6 +32,8 @@ def __call__(self):
         """
         Save a plot of Evidence and KL Divergence for the ep_history
         """
+        import matplotlib.pyplot as plt
+
         fig, (evidence_plot, kl_plot) = plt.subplots(2)
         fig.suptitle("Evidence and KL Divergence")
         evidence_plot.plot(self.ep_history.evidences(), label="evidence")
diff --git a/autofit/graphical/factor_graphs/transform.py b/autofit/graphical/factor_graphs/transform.py
index 70acf5160..8d22e678d 100644
--- a/autofit/graphical/factor_graphs/transform.py
+++ b/autofit/graphical/factor_graphs/transform.py
@@ -2,7 +2,6 @@
 from typing import Dict, Tuple, Optional, List
 
 import numpy as np
-from scipy.linalg import cho_factor
 
 from autoconf import cached_property
 from autofit.graphical.factor_graphs.abstract import AbstractNode, Value, FactorValue
@@ -63,12 +62,16 @@ def from_scales(cls, scales):
 
     @classmethod
     def from_covariances(cls, covs):
+        from scipy.linalg import cho_factor
+
         return cls(
             {v: InvCholeskyTransform(cho_factor(cov)) for v, cov in covs.items()}
         )
 
     @classmethod
     def from_inv_covariances(cls, inv_covs):
+        from scipy.linalg import cho_factor
+
         return cls(
             {
                 v: CholeskyOperator(cho_factor(inv_cov))
diff --git a/autofit/graphical/laplace/line_search.py b/autofit/graphical/laplace/line_search.py
index dc56d5952..9094061d1 100644
--- a/autofit/graphical/laplace/line_search.py
+++ b/autofit/graphical/laplace/line_search.py
@@ -11,7 +11,6 @@
 from typing import Optional, Dict, Tuple
 
 import numpy as np
-from scipy.optimize import _linesearch as linesearch
 
 from autoconf import cached_property
 from autofit.graphical.factor_graphs.abstract import (
@@ -280,6 +279,8 @@ def line_search_wolfe1(
     gval : array
         Gradient of `f` at the final point
     """
+    from scipy.optimize import _linesearch as linesearch
+
     derphi0 = state.derphi(0)
     old_fval = state.value
     stepsize, _, _ = linesearch.scalar_search_wolfe1(
@@ -339,6 +340,8 @@ def line_search_wolfe2(
     gval : array
         Gradient of `f` at the final point
     """
+    from scipy.optimize import _linesearch as linesearch
+
     derphi0 = state.derphi(0)
     old_fval = state.value
     stepsize, _, _, _ = linesearch.scalar_search_wolfe2(
@@ -364,6 +367,8 @@ def line_search_wolfe2(
 def line_search(
     state: OptimisationState, old_state: Optional[FactorValue] = None, **kwargs
 ) -> Tuple[Optional[float], OptimisationState]:
+    from scipy.optimize import _linesearch as linesearch
+
     stepsize, next_state = line_search_wolfe1(state, old_state, **kwargs)
 
     if stepsize is None:
diff --git a/autofit/graphical/utils.py b/autofit/graphical/utils.py
index 274db47ea..b45437ca4 100644
--- a/autofit/graphical/utils.py
+++ b/autofit/graphical/utils.py
@@ -1,15 +1,16 @@
+from __future__ import annotations
 import logging
 import warnings
 from collections import abc
 from enum import Enum
 from functools import reduce
 from operator import mul
-from typing import Iterable, Tuple, TypeVar, Dict, NamedTuple, Optional, Union
+from typing import Iterable, Tuple, TypeVar, Dict, NamedTuple, Optional, Union, TYPE_CHECKING
 
 import numpy as np
-import six
-from scipy.linalg import block_diag
-from scipy.optimize import OptimizeResult
+
+if TYPE_CHECKING:
+    from scipy.optimize import OptimizeResult
 
 from autofit.mapper.variable import Variable, VariableData
 from autofit.non_linear.result import Result
@@ -50,6 +51,9 @@ def is_variable(v, *args):
 
 
 def is_iterable(arg):
+
+    import six
+
     return isinstance(arg, abc.Iterable) and not isinstance(
         arg, six.string_types
     )
@@ -369,6 +373,9 @@ def unflatten(self, arr: np.ndarray, ndim=None) -> Dict[str, np.ndarray]:
         })
 
     def flatten2d(self, values: Dict[Variable, np.ndarray]) -> np.ndarray:
+
+        from scipy.linalg import block_diag
+
         assert all(np.shape(values[k]) == shape * 2 for k, shape in self.items())
 
         return block_diag(
@@ -386,7 +393,9 @@ def size(self):
         return self.splits[-1]
 
 
+
 class OptResult(NamedTuple):
+
     mode: Dict[Variable, np.ndarray]
     hess_inv: Dict[Variable, np.ndarray]
     log_norm: float
diff --git a/autofit/interpolator/covariance.py b/autofit/interpolator/covariance.py
index c17c4dfb6..9b43f207e 100644
--- a/autofit/interpolator/covariance.py
+++ b/autofit/interpolator/covariance.py
@@ -1,5 +1,4 @@
 import numpy as np
-import scipy
 from typing import List, Dict
 
 from autofit.non_linear.samples.pdf import SamplesPDF
@@ -102,8 +101,10 @@ def inverse_covariance_matrix(self) -> np.ndarray:
         """
         Calculate the inverse covariance matrix of the samples
         """
+        from scipy.linalg import inv
+
         matrices = [
-            scipy.linalg.inv(samples.covariance_matrix) for samples in self.samples_list
+            inv(samples.covariance_matrix) for samples in self.samples_list
         ]
         return self._subsume(matrices)
 
diff --git a/autofit/interpolator/linear.py b/autofit/interpolator/linear.py
index b6de93cee..cb902a11f 100644
--- a/autofit/interpolator/linear.py
+++ b/autofit/interpolator/linear.py
@@ -1,4 +1,3 @@
-from scipy.stats import linregress
 from .abstract import AbstractInterpolator
 from autofit.interpolator.linear_relationship import LinearRelationship
 
@@ -10,6 +9,9 @@ class LinearInterpolator(AbstractInterpolator):
 
     @staticmethod
     def _relationship(x, y):
+
+        from scipy.stats import linregress
+
         slope, intercept, r, p, std_err = linregress(x, y)
 
         return LinearRelationship(slope, intercept)
diff --git a/autofit/interpolator/spline.py b/autofit/interpolator/spline.py
index 35b331e41..4c878c386 100644
--- a/autofit/interpolator/spline.py
+++ b/autofit/interpolator/spline.py
@@ -1,4 +1,3 @@
-from scipy.interpolate import CubicSpline
 from .abstract import AbstractInterpolator
 
 
@@ -9,4 +8,7 @@ class SplineInterpolator(AbstractInterpolator):
 
     @staticmethod
     def _relationship(x, y):
+
+        from scipy.interpolate import CubicSpline
+
         return CubicSpline(x, y)
diff --git a/autofit/jax_wrapper.py b/autofit/jax_wrapper.py
index 92e54f1ac..3f3ccc829 100644
--- a/autofit/jax_wrapper.py
+++ b/autofit/jax_wrapper.py
@@ -28,7 +28,6 @@ def jit(function, *args, **kwargs):
     def grad(function, *args, **kwargs):
         return jax.grad(function, *args, **kwargs)
 
-    from jax._src.scipy.special import erfinv
 
 else:
 
@@ -41,7 +40,6 @@ def grad(function, *args, **kwargs):
     """)
 
     import numpy  # noqa
-    from scipy.special.cython_special import erfinv  # noqa
 
     def jit(function, *_, **__):
         return function
diff --git a/autofit/mapper/model_object.py b/autofit/mapper/model_object.py
index c5a4c48ec..641c04e77 100644
--- a/autofit/mapper/model_object.py
+++ b/autofit/mapper/model_object.py
@@ -150,7 +150,6 @@ def from_dict(
         from autofit.mapper.prior_model.collection import Collection
         from autofit.mapper.prior_model.prior_model import Model
         from autofit.mapper.prior.abstract import Prior
-        from autofit.mapper.prior.gaussian import GaussianPrior
         from autofit.mapper.prior.tuple_prior import TuplePrior
         from autofit.mapper.prior.arithmetic.compound import Compound
         from autofit.mapper.prior.arithmetic.compound import ModifiedPrior
diff --git a/autofit/mapper/operator.py b/autofit/mapper/operator.py
index f5265f70d..afad929f0 100644
--- a/autofit/mapper/operator.py
+++ b/autofit/mapper/operator.py
@@ -3,15 +3,7 @@
 from typing import Tuple, List
 
 import numpy as np
-from scipy._lib._util import _asarray_validated
-from scipy.linalg import (
-    cho_factor,
-    solve_triangular,
-    get_blas_funcs,
-    block_diag,
-    qr_update,
-    qr,
-)
+
 
 from autoconf import cached_property
 
@@ -421,6 +413,11 @@ def _mul_triangular(
         Disabling may give a performance gain, but may result in problems
         (crashes, non-termination) if the inputs do contain infinities or NaNs.
     """
+    from scipy._lib._util import _asarray_validated
+    from scipy.linalg import (
+        get_blas_funcs,
+    )
+
     a1 = _asarray_validated(c, check_finite=check_finite)
     b1 = _asarray_validated(b, check_finite=check_finite)
 
@@ -480,6 +477,9 @@ def M(self) -> np.ndarray:
 
     @classmethod
     def from_dense(cls, hess, shape=None, ldim=None):
+
+        from scipy.linalg import qr
+
         shape = np.shape(hess)
         ldim = ldim or np.ndim(hess) // 2
         length = np.prod(shape[:ldim])
@@ -496,10 +496,16 @@ def __rmul__(self, x):
 
     @_wrap_rightop
     def __rtruediv__(self, x):
+        from scipy.linalg import (
+            solve_triangular,
+        )
         return solve_triangular(self.R.T, x.T @ self.Q.T, lower=True).T
 
     @_wrap_leftop
     def ldiv(self, x):
+        from scipy.linalg import (
+            solve_triangular,
+        )
         return self.Q.T @ solve_triangular(self.R, x, lower=False)
 
     @cached_property
@@ -515,6 +521,9 @@ def to_dense(self):
         return self.M.copy()
 
     def update(self, *args):
+        from scipy.linalg import (
+            qr_update,
+        )
         Q, R = self.Q, self.R
         for u, v in args:
             Q, R = qr_update(Q, R, np.ravel(u), np.ravel(v))
@@ -543,6 +552,9 @@ def __init__(self, cho_factor, shape=None, ldim=None):
 
     @classmethod
     def from_dense(cls, hess, shape=None, ldim=None):
+        from scipy.linalg import (
+            cho_factor,
+        )
         shape = np.shape(hess)
         ldim = ldim or np.ndim(hess) // 2
         length = np.prod(shape[:ldim])
@@ -558,10 +570,16 @@ def __rmul__(self, x):
 
     @_wrap_rightop
     def __rtruediv__(self, x):
+
+        from scipy.linalg import solve_triangular
+
         return solve_triangular(self.L, x.T, lower=True).T
 
     @_wrap_leftop
     def ldiv(self, x):
+
+        from scipy.linalg import solve_triangular
+
         return solve_triangular(self.U, x, lower=False)
 
     @cached_property
@@ -601,6 +619,9 @@ def log_det(self):
         return -np.sum(np.log(self.U.diagonal()))
 
     def to_dense(self):
+
+        from scipy.linalg import solve_triangular
+
         return solve_triangular(self.U, np.triul(self.L), lower=False)
 
 
@@ -816,6 +837,9 @@ def rshape(self):
         return self.shape[2:]
 
     def to_dense(self):
+
+        from scipy.linalg import block_diag
+
         return block_diag(*(self.vec[:, :, None] * self.vecT[:, None, :])).reshape(
             self.shape
         )
diff --git a/autofit/mapper/prior/truncated_gaussian.py b/autofit/mapper/prior/truncated_gaussian.py
index 5cd3b9813..8d8659229 100644
--- a/autofit/mapper/prior/truncated_gaussian.py
+++ b/autofit/mapper/prior/truncated_gaussian.py
@@ -8,7 +8,7 @@
 
 @register_pytree_node_class
 class TruncatedGaussianPrior(Prior):
-    __identifier_fields__ = ("lower_limit", "upper_limit", "mean", "sigma")
+    __identifier_fields__ = ("mean", "sigma", "lower_limit", "upper_limit")
     __database_args__ = ("mean", "sigma", "lower_limit", "upper_limit", "id_")
 
     def __init__(
@@ -61,8 +61,8 @@ def __init__(
             message=TruncatedNormalMessage(
                 mean=mean,
                 sigma=sigma,
-                lower_limit=lower_limit,
-                upper_limit=upper_limit,
+                lower_limit=float(lower_limit),
+                upper_limit=float(upper_limit),
             ),
             id_=id_,
         )
@@ -112,7 +112,8 @@ def dict(self) -> dict:
         """
         prior_dict = super().dict()
         return {
-            **prior_dict, "mean": self.mean,
+            **prior_dict,
+            "mean": self.mean,
             "sigma": self.sigma,
             "lower_limit": self.lower_limit,
             "upper_limit": self.upper_limit
diff --git a/autofit/mapper/prior_model/abstract.py b/autofit/mapper/prior_model/abstract.py
index 5b11b5b57..b53adffa1 100644
--- a/autofit/mapper/prior_model/abstract.py
+++ b/autofit/mapper/prior_model/abstract.py
@@ -14,10 +14,8 @@
 from autoconf import conf
 from autoconf.exc import ConfigException
 from autofit import exc
-from autofit import jax_wrapper
 from autofit.mapper import model
 from autofit.mapper.model import AbstractModel, frozen_cache
-from autofit.mapper.prior import GaussianPrior
 from autofit.mapper.prior import TruncatedGaussianPrior
 from autofit.mapper.prior import UniformPrior
 from autofit.mapper.prior.abstract import Prior
diff --git a/autofit/mapper/variable_operator.py b/autofit/mapper/variable_operator.py
index a170c4781..cf6f8b175 100644
--- a/autofit/mapper/variable_operator.py
+++ b/autofit/mapper/variable_operator.py
@@ -4,7 +4,6 @@
 from collections import ChainMap
 
 import numpy as np
-from scipy.linalg import block_diag
 
 from autoconf import cached_property
 
@@ -14,13 +13,11 @@
     DiagonalMatrix,
     CholeskyOperator,
     MatrixOperator,
-    QROperator,
 )
 from autofit.mapper.variable import (
     Variable,
     VariableData,
     VariableLinearOperator,
-    InverseVariableOperator,
     rtruediv,
     rmul,
 )
@@ -86,6 +83,9 @@ def to_block(self, cls=None):
         return VariableOperator(blocks)
 
     def to_full(self):
+
+        from scipy.linalg import block_diag
+
         full_ops = [op.to_full() for op in self.operators]
         full = block_diag(*(op.operator.to_dense() for op in full_ops))
         param_shapes = FlattenArrays(
diff --git a/autofit/messages/abstract.py b/autofit/messages/abstract.py
index bdc7fbdf4..d3b921108 100644
--- a/autofit/messages/abstract.py
+++ b/autofit/messages/abstract.py
@@ -47,8 +47,8 @@ def __init__(
         id_=None,
     ):
 
-        self.lower_limit = lower_limit
-        self.upper_limit = upper_limit
+        self.lower_limit = float(lower_limit)
+        self.upper_limit = float(upper_limit)
 
         self.id = next(self.ids) if id_ is None else id_
         self.log_norm = log_norm
diff --git a/autofit/messages/beta.py b/autofit/messages/beta.py
index bd5b28a7e..67fabc9c4 100644
--- a/autofit/messages/beta.py
+++ b/autofit/messages/beta.py
@@ -3,8 +3,6 @@
 from typing import Tuple, Union
 
 import numpy as np
-from scipy.special import betaln
-from scipy.special import psi, polygamma
 
 from autoconf import cached_property
 from ..messages.abstract import AbstractMessage
@@ -23,6 +21,8 @@ def grad_betaln(ab: np.ndarray) -> np.ndarray:
     -------
     Gradient array of shape (N, 2) with derivatives of log Beta function w.r.t a and b.
     """
+    from scipy.special import psi
+
     psiab = psi(ab.sum(axis=1, keepdims=True))
     return psi(ab) - psiab
 
@@ -40,6 +40,8 @@ def jac_grad_betaln(ab: np.ndarray) -> np.ndarray:
     -------
     Array of shape (N, 2, 2), the Jacobian matrices for each parameter pair.
     """
+    from scipy.special import polygamma
+
     psi1ab = polygamma(1, ab.sum(axis=1, keepdims=True))
     fii = polygamma(1, ab) - psi1ab
     fij = -psi1ab[:, 0]
@@ -330,6 +332,9 @@ def kl(self, dist: "BetaMessage") -> float:
         TypeError
             If the support of the two distributions does not match.
         """
+        from scipy.special import betaln
+        from scipy.special import psi
+
         # TODO check this is correct
         # https://arxiv.org/pdf/0911.4863.pdf
         if self._support != dist._support:
diff --git a/autofit/messages/fixed.py b/autofit/messages/fixed.py
index db4db6482..496a62804 100644
--- a/autofit/messages/fixed.py
+++ b/autofit/messages/fixed.py
@@ -1,4 +1,3 @@
-import math
 from typing import Optional, Tuple
 
 import numpy as np
diff --git a/autofit/messages/gamma.py b/autofit/messages/gamma.py
index e21989197..ae86a615e 100644
--- a/autofit/messages/gamma.py
+++ b/autofit/messages/gamma.py
@@ -1,7 +1,4 @@
-import math
-
 import numpy as np
-from scipy import special
 
 from autoconf import cached_property
 from autofit.messages.abstract import AbstractMessage
@@ -11,6 +8,8 @@
 class GammaMessage(AbstractMessage):
     @property
     def log_partition(self):
+        from scipy import special
+
         alpha, beta = GammaMessage.invert_natural_parameters(self.natural_parameters)
         return special.gammaln(alpha) - alpha * np.log(beta)
 
@@ -83,6 +82,8 @@ def from_mode(cls, mode, covariance, **kwargs):
         return cls(alpha, beta, **kwargs)
 
     def kl(self, dist):
+        from scipy import special
+
         P, Q = dist, self
         logP = np.log(P.alpha)
         # TODO check this is correct
diff --git a/autofit/messages/normal.py b/autofit/messages/normal.py
index 2afe449fe..554cc4806 100644
--- a/autofit/messages/normal.py
+++ b/autofit/messages/normal.py
@@ -1,10 +1,8 @@
 from collections.abc import Hashable
-import math
+
 from typing import Optional, Tuple, Union
 
 import numpy as np
-from autofit.jax_wrapper import erfinv
-from scipy.stats import norm
 
 from autoconf import cached_property
 from autofit.mapper.operator import LinearOperator
@@ -100,6 +98,8 @@ def cdf(self, x : Union[float, np.ndarray]) -> Union[float, np.ndarray]:
         -------
         The cumulative probability P(X ≤ x).
         """
+        from scipy.stats import norm
+
         return norm.cdf(x, loc=self.mean, scale=self.sigma)
 
     def ppf(self, x : Union[float, np.ndarray]) -> Union[float, np.ndarray]:
@@ -118,6 +118,8 @@ def ppf(self, x : Union[float, np.ndarray]) -> Union[float, np.ndarray]:
         -------
         The value(s) corresponding to the input quantiles.
         """
+        from scipy.stats import norm
+
         return norm.ppf(x, loc=self.mean, scale=self.sigma)
 
     @cached_property
@@ -389,9 +391,13 @@ def value_for(self, unit: float) -> float:
         >>> prior = af.GaussianPrior(mean=1.0, sigma=2.0)
         >>> physical_value = prior.value_for(unit=0.5)
         """
-        try:
+
+        from autofit import jax_wrapper
+
+        if jax_wrapper.use_jax:
+            from jax._src.scipy.special import erfinv
             inv = erfinv(1 - 2.0 * (1.0 - unit))
-        except TypeError:
+        else:
             from scipy.special import erfinv as scipy_erfinv
             inv = scipy_erfinv(1 - 2.0 * (1.0 - unit))
         return self.mean + (self.sigma * np.sqrt(2) * inv)
diff --git a/autofit/messages/transform.py b/autofit/messages/transform.py
index ce12006da..4bb85c49b 100644
--- a/autofit/messages/transform.py
+++ b/autofit/messages/transform.py
@@ -4,8 +4,6 @@
 from typing import Tuple
 
 import numpy as np
-from scipy.special import ndtr, ndtri as ndtri_
-from scipy.stats._continuous_distns import _norm_pdf
 
 from ..mapper.operator import DiagonalMatrix, LinearOperator, ShermanMorrison
 
@@ -13,10 +11,13 @@
 
 
 def ndtri(x):
+
+    from scipy.special import ndtri
+
     x = np.array(x, dtype=float)
     x[(x <= 0) & (x >= -epsilon)] = epsilon
     x[(x >= 1) & (x <= 1 + epsilon)] = 1 - epsilon
-    return ndtri_(x)
+    return ndtri(x)
 
 
 def numerical_jacobian(x, func, eps=1e-8, args=(), **kwargs):
@@ -314,19 +315,30 @@ def shifted_logistic(shift=0, scale=1):
 
 
 def ndtri_grad(x):
+    from scipy.stats._continuous_distns import _norm_pdf
+
     return np.reciprocal(_norm_pdf(ndtri(x)))
 
 
 def ndtri_grad_hess(x):
+    from scipy.stats._continuous_distns import _norm_pdf
+
     f = ndtri(x)
     phi = _norm_pdf(f)
     grad = np.reciprocal(phi)
     hess = grad**2 * f
     return f, grad, hess
 
+def lazy_ndtr(x):
+    from scipy.special import ndtr
+    return ndtr(x)
+
+def lazy_ndtri(x):
+    from scipy.special import ndtri
+    return ndtri(x)
 
 phi_transform = FunctionTransform(
-    ndtri, ndtr, ndtri_grad, func_grad_hess=ndtri_grad_hess
+    lazy_ndtri, lazy_ndtr, ndtri_grad, func_grad_hess=ndtri_grad_hess
 )
 
 
diff --git a/autofit/messages/truncated_normal.py b/autofit/messages/truncated_normal.py
index 1be286725..e5ae00d53 100644
--- a/autofit/messages/truncated_normal.py
+++ b/autofit/messages/truncated_normal.py
@@ -1,11 +1,8 @@
 from collections.abc import Hashable
 import math
-from scipy.stats import truncnorm
 from typing import Optional, Tuple, Union
 
 import numpy as np
-from autofit.jax_wrapper import erfinv
-from scipy.stats import norm
 
 from autoconf import cached_property
 from autofit.mapper.operator import LinearOperator
@@ -44,6 +41,8 @@ def log_partition(self) -> float:
         float
             The log-partition (log of the normalizing constant).
         """
+        from scipy.stats import norm
+
         a = (self.lower_limit - self.mean) / self.sigma
         b = (self.upper_limit - self.mean) / self.sigma
         Z = norm.cdf(b) - norm.cdf(a)
@@ -116,6 +115,8 @@ def cdf(self, x: Union[float, np.ndarray]) -> Union[float, np.ndarray]:
         -------
         The cumulative probability P(X ≤ x) under the truncated Gaussian.
         """
+        from scipy.stats import truncnorm
+
         a = (self.lower_limit - self.mean) / self.sigma
         b = (self.upper_limit - self.mean) / self.sigma
         return truncnorm.cdf(x, a=a, b=b, loc=self.mean, scale=self.sigma)
@@ -136,6 +137,8 @@ def ppf(self, x: Union[float, np.ndarray]) -> Union[float, np.ndarray]:
         -------
         The value(s) corresponding to the input quantiles.
         """
+        from scipy.stats import truncnorm
+
         a = (self.lower_limit - self.mean) / self.sigma
         b = (self.upper_limit - self.mean) / self.sigma
         return truncnorm.ppf(x, a=a, b=b, loc=self.mean, scale=self.sigma)
@@ -279,6 +282,8 @@ def sample(self, n_samples: Optional[int] = None) -> np.ndarray:
         -------
         Sample(s) from the truncated Gaussian distribution.
         """
+        from scipy.stats import truncnorm
+
         a, b = (self.lower_limit - self.mean) / self.sigma, (self.upper_limit - self.mean) / self.sigma
         shape = (n_samples,) + self.shape if n_samples else self.shape
         samples = truncnorm.rvs(a, b, loc=self.mean, scale=self.sigma, size=shape)
@@ -404,6 +409,8 @@ def value_for(self, unit: float) -> float:
         >>> prior = af.TruncatedNormalMessage(mean=1.0, sigma=2.0, lower_limit=0.0, upper_limit=2.0)
         >>> physical_value = prior.value_for(unit=0.5)
         """
+        from scipy.stats import norm
+
         # Standardized truncation bounds
         a = (self.lower_limit - self.mean) / self.sigma
         b = (self.upper_limit - self.mean) / self.sigma
@@ -434,6 +441,8 @@ def log_prior_from_value(self, value: float) -> float:
         -------
         The log prior probability of the given value, or -inf if outside truncation bounds.
         """
+        from scipy.stats import norm
+
         # Check truncation bounds
         if not (self.lower_limit <= value <= self.upper_limit):
             return -np.inf
@@ -560,6 +569,8 @@ def sigma(self) -> float:
         -------
         The truncated Gaussian standard deviation σ.
         """
+        from scipy.stats import truncnorm
+
         precision = -2 * self.parameters[1]
         if precision <= 0 or np.isinf(precision) or np.isnan(precision):
             # Degenerate or invalid precision: fallback to NaN or zero
@@ -586,6 +597,8 @@ def mean(self) -> float:
         -------
         The truncated Gaussian mean μ.
         """
+        from scipy.stats import truncnorm
+
         precision = -2 * self.parameters[1]
         if precision <= 0 or np.isinf(precision) or np.isnan(precision):
             # Degenerate or invalid precision: fallback to NaN or zero
diff --git a/autofit/messages/utils.py b/autofit/messages/utils.py
index 835eb6cbe..318f7b274 100644
--- a/autofit/messages/utils.py
+++ b/autofit/messages/utils.py
@@ -1,5 +1,5 @@
 import numpy as np
-from scipy import special
+
 
 
 def psilog(x: np.ndarray) -> np.ndarray:
@@ -7,6 +7,8 @@ def psilog(x: np.ndarray) -> np.ndarray:
     psi(x) - log(x)
     needed when calculating E[ln[x]] when x is a Gamma variable
     """
+    from scipy import special
+
     return special.digamma(x) - np.log(x)
 
 
@@ -19,6 +21,8 @@ def grad_psilog(x: np.ndarray) -> np.ndarray:
     see:
     invpsilog(c)
     """
+    from scipy import special
+
     return special.polygamma(1, x) - 1 / x
 
 
diff --git a/autofit/non_linear/fitness.py b/autofit/non_linear/fitness.py
index 7b1774631..eb56b42f7 100644
--- a/autofit/non_linear/fitness.py
+++ b/autofit/non_linear/fitness.py
@@ -125,25 +125,25 @@ def call(self, parameters):
         -------
         The figure of merit returned to the non-linear search, which is either the log likelihood or log posterior.
         """
-        try:
-            instance = self.model.instance_from_vector(vector=parameters)
-            log_likelihood = self.analysis.log_likelihood_function(instance=instance)
-            log_likelihood = np.where(np.isnan(log_likelihood), self.resample_figure_of_merit, log_likelihood)
 
-        except exc.FitException:
-            return self.resample_figure_of_merit
+        # Get instance from model
+        instance = self.model.instance_from_vector(vector=parameters)
+
+        # Evaluate log likelihood (must be side-effect free and exception-free)
+        log_likelihood = self.analysis.log_likelihood_function(instance=instance)
+
+        # Penalize NaNs in the log-likelihood
+        log_likelihood = np.where(np.isnan(log_likelihood), self.resample_figure_of_merit, log_likelihood)
 
+        # Determine final figure of merit
         if self.fom_is_log_likelihood:
             figure_of_merit = log_likelihood
         else:
-            log_prior_list = self.model.log_prior_list_from_vector(vector=parameters)
-            figure_of_merit = log_likelihood + sum(log_prior_list)
-
-        if self.store_history:
-
-            self.parameters_history_list.append(parameters)
-            self.log_likelihood_history_list.append(log_likelihood)
+            # Ensure prior list is compatible with JAX (must return a JAX array, not list)
+            log_prior_array = np.array(self.model.log_prior_list_from_vector(vector=parameters))
+            figure_of_merit = log_likelihood + np.sum(log_prior_array)
 
+        # Convert to chi-squared scale if requested
         if self.convert_to_chi_squared:
             figure_of_merit *= -2.0
 
diff --git a/autofit/non_linear/plot/mcmc_plotters.py b/autofit/non_linear/plot/mcmc_plotters.py
index ac9e9a122..a3400c4d7 100644
--- a/autofit/non_linear/plot/mcmc_plotters.py
+++ b/autofit/non_linear/plot/mcmc_plotters.py
@@ -1,4 +1,3 @@
-import matplotlib.pyplot as plt
 import logging
 
 from autofit.non_linear.plot.samples_plotters import SamplesPlotter
diff --git a/autofit/non_linear/plot/mle_plotters.py b/autofit/non_linear/plot/mle_plotters.py
index 4e31e8a5d..28a2a22a2 100644
--- a/autofit/non_linear/plot/mle_plotters.py
+++ b/autofit/non_linear/plot/mle_plotters.py
@@ -1,6 +1,4 @@
-import matplotlib.pyplot as plt
 import logging
-import numpy as np
 
 from autofit.non_linear.plot.samples_plotters import SamplesPlotter
 
@@ -34,6 +32,8 @@ def subplot_parameters(self, use_log_y : bool = False, use_last_50_percent : boo
 
         """
 
+        import matplotlib.pyplot as plt
+
         parameter_lists = self.samples.parameters_extract
 
         plt.subplots(self.model.total_free_parameters, 1, figsize=(12, 3 * len(parameter_lists)))
@@ -88,6 +88,8 @@ def log_likelihood_vs_iteration(self, use_log_y : bool = False, use_last_50_perc
             If True, the y-axis is plotted on a log-scale.
         """
 
+        import matplotlib.pyplot as plt
+
         log_likelihood_list = self.samples.log_likelihood_list
         iteration_list = range(len(log_likelihood_list))
 
diff --git a/autofit/non_linear/plot/output.py b/autofit/non_linear/plot/output.py
index 8bf1e20cc..1c9a5daf5 100644
--- a/autofit/non_linear/plot/output.py
+++ b/autofit/non_linear/plot/output.py
@@ -1,4 +1,3 @@
-import matplotlib
 from pathlib import Path
 from os import path
 from typing import List, Optional, Union
@@ -7,6 +6,9 @@
 
 
 def set_backend():
+
+    import matplotlib
+
     backend = conf.get_matplotlib_backend()
 
     if backend not in "default":
@@ -21,7 +23,6 @@ def set_backend():
         matplotlib.use("Agg")
 
 
-import matplotlib.pyplot as plt
 import os
 
 
@@ -91,6 +92,8 @@ def to_figure(
             The 2D array of image to be output, required for outputting the image as a fits file.
         """
 
+        import matplotlib.pyplot as plt
+
         filename = auto_filename if self.filename is None else self.filename
 
         for format in self.format_list:
@@ -109,6 +112,8 @@ def subplot_to_figure(self, auto_filename=None):
         Output a subplot figure, either as an image on the screen or to the hard-disk as a png or fits file.
         """
 
+        import matplotlib.pyplot as plt
+
         filename = auto_filename if self.filename is None else self.filename
 
         for format in self.format_list:
@@ -123,6 +128,9 @@ def subplot_to_figure(self, auto_filename=None):
 
 
     def to_figure_output_mode(self, filename: str):
+
+        import matplotlib.pyplot as plt
+
         global COUNT
 
         try:
diff --git a/autofit/non_linear/plot/samples_plotters.py b/autofit/non_linear/plot/samples_plotters.py
index 885f01ef1..c908dd99f 100644
--- a/autofit/non_linear/plot/samples_plotters.py
+++ b/autofit/non_linear/plot/samples_plotters.py
@@ -1,4 +1,3 @@
-import matplotlib.pyplot as plt
 import numpy as np
 from functools import wraps
 import logging
@@ -70,6 +69,9 @@ def log_posterior_list(self):
         return self.samples.log_posterior_list
 
     def close(self):
+
+        import matplotlib.pyplot as plt
+
         if plt.fignum_exists(num=1):
             plt.clf()
             plt.close()
diff --git a/autofit/non_linear/search/mcmc/emcee/search.py b/autofit/non_linear/search/mcmc/emcee/search.py
index 4aff74468..397932c4a 100644
--- a/autofit/non_linear/search/mcmc/emcee/search.py
+++ b/autofit/non_linear/search/mcmc/emcee/search.py
@@ -1,7 +1,6 @@
 import os
 from typing import Dict, Optional
 
-import emcee
 import numpy as np
 
 from autoconf import conf
@@ -107,6 +106,8 @@ def _fit(self, model: AbstractPriorModel, analysis):
         A result object comprising the Samples object that inclues the maximum log likelihood instance and full
         chains used by the fit.
         """
+        import emcee
+
         fitness = Fitness(
             model=model,
             analysis=analysis,
@@ -315,6 +316,8 @@ def samples_via_internal_from(self, model, search_internal=None):
         )
 
     def auto_correlations_from(self, search_internal=None):
+        import emcee
+
         search_internal = search_internal or self.backend
 
         times = search_internal.get_autocorr_time(tol=0)
@@ -363,12 +366,13 @@ def backend_filename(self):
         return self.paths.search_internal_path / "search_internal.hdf"
 
     @property
-    def backend(self) -> emcee.backends.HDFBackend:
+    def backend(self) -> "emcee.backends.HDFBackend":
         """
         The `Emcee` hdf5 backend, which provides access to all samples, likelihoods, etc. of the non-linear search.
 
         The sampler is described in the "Results" section at https://dynesty.readthedocs.io/en/latest/quickstart.html
         """
+        import emcee
 
         if os.path.isfile(self.backend_filename):
             return emcee.backends.HDFBackend(filename=str(self.backend_filename))
diff --git a/autofit/non_linear/search/mle/bfgs/search.py b/autofit/non_linear/search/mle/bfgs/search.py
index ba116cd29..3a823eac5 100644
--- a/autofit/non_linear/search/mle/bfgs/search.py
+++ b/autofit/non_linear/search/mle/bfgs/search.py
@@ -12,7 +12,6 @@
 from autofit.non_linear.samples.samples import Samples
 
 import copy
-from scipy import optimize
 import numpy as np
 
 
@@ -82,6 +81,8 @@ def _fit(
         A result object comprising the Samples object that inclues the maximum log likelihood instance and full
         chains used by the fit.
         """
+        from scipy import optimize
+
         fitness = Fitness(
             model=model,
             analysis=analysis,
diff --git a/autofit/non_linear/search/nest/dynesty/search/abstract.py b/autofit/non_linear/search/nest/dynesty/search/abstract.py
index 19dc7d624..b96f476aa 100644
--- a/autofit/non_linear/search/nest/dynesty/search/abstract.py
+++ b/autofit/non_linear/search/nest/dynesty/search/abstract.py
@@ -3,7 +3,6 @@
 from typing import Dict, Optional, Tuple, Union
 
 import numpy as np
-from dynesty import NestedSampler, DynamicNestedSampler
 import warnings
 
 from autoconf import conf
@@ -262,7 +261,7 @@ def search_internal(self):
         raise NotImplementedError
 
     def iterations_from(
-        self, search_internal: Union[NestedSampler, DynamicNestedSampler]
+        self, search_internal: "Union[NestedSampler, DynamicNestedSampler]"
     ) -> Tuple[int, int]:
         """
         Returns the next number of iterations that a dynesty call will use and the total number of iterations
@@ -302,7 +301,7 @@ def iterations_from(
         return self.iterations_per_update, int(total_iterations)
 
     def run_search_internal(
-        self, search_internal: Union[NestedSampler, DynamicNestedSampler]
+        self, search_internal: "Union[NestedSampler, DynamicNestedSampler]"
     ):
         """
         Run the Dynesty sampler, which could be either the static of dynamic sampler.
diff --git a/autofit/non_linear/search/nest/dynesty/search/dynamic.py b/autofit/non_linear/search/nest/dynesty/search/dynamic.py
index afbc5f568..989941810 100644
--- a/autofit/non_linear/search/nest/dynesty/search/dynamic.py
+++ b/autofit/non_linear/search/nest/dynesty/search/dynamic.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 from typing import Optional
 
-from dynesty.dynesty import DynamicNestedSampler
 from autofit.mapper.prior_model.abstract import AbstractPriorModel
 
 from .abstract import AbstractDynesty, prior_transform
@@ -67,6 +66,7 @@ def __init__(
 
     @property
     def search_internal(self):
+        from dynesty.dynesty import DynamicNestedSampler
         return DynamicNestedSampler.restore(self.checkpoint_file)
 
     def search_internal_from(
@@ -99,6 +99,7 @@ def search_internal_from(
             The number of CPU's over which multiprocessing is performed, determining how many samples are stored
             in the dynesty queue for samples.
         """
+        from dynesty.dynesty import DynamicNestedSampler
 
         try:
 
diff --git a/autofit/non_linear/search/nest/dynesty/search/static.py b/autofit/non_linear/search/nest/dynesty/search/static.py
index 7beade101..7e637c039 100644
--- a/autofit/non_linear/search/nest/dynesty/search/static.py
+++ b/autofit/non_linear/search/nest/dynesty/search/static.py
@@ -3,10 +3,8 @@
 from pathlib import Path
 from typing import Optional, Union
 
-from dynesty import NestedSampler as StaticSampler
 
 from autofit.database.sqlalchemy_ import sa
-from autofit import jax_wrapper
 
 from autofit.mapper.prior_model.abstract import AbstractPriorModel
 
@@ -78,6 +76,7 @@ def __init__(
 
     @property
     def search_internal(self):
+        from dynesty import NestedSampler as StaticSampler
         return StaticSampler.restore(self.checkpoint_file)
 
     def search_internal_from(
@@ -110,6 +109,7 @@ def search_internal_from(
             The number of CPU's over which multiprocessing is performed, determining how many samples are stored
             in the dynesty queue for samples.
         """
+        from dynesty import NestedSampler as StaticSampler
 
         gradient = fitness.grad if self.use_gradient else None
 
diff --git a/autofit/visualise.py b/autofit/visualise.py
index 68dbc1977..f778cdc51 100644
--- a/autofit/visualise.py
+++ b/autofit/visualise.py
@@ -1,9 +1,5 @@
 from typing import Dict, List
 
-import networkx as nx
-from pyvis.network import Network
-import colorsys
-
 from autoconf import cached_property
 
 from autofit.mapper.prior_model.abstract import AbstractPriorModel
@@ -53,6 +49,8 @@ def generate_n_colors(n: int) -> List[str]:
     """
     Generate n distinct colors.
     """
+    import colorsys
+
     colors = []
     for i in range(n):
         hue = i / n
@@ -77,11 +75,13 @@ def __init__(self, model: AbstractPriorModel):
         """
         self.model = model
 
-    def graph(self) -> nx.DiGraph:
+    def graph(self) -> "nx.DiGraph":
         """
         Generate a graph of the model with just edges. Could
         be superseded by the network method.
         """
+        import networkx as nx
+
         graph = nx.DiGraph()
 
         def add_model(model):
@@ -130,7 +130,7 @@ def colours(self) -> Dict[type, str]:
             )
         }
 
-    def network(self, notebook: bool = False) -> Network:
+    def network(self, notebook: bool = False) -> "Network":
         """
         Generate a network of the model including stylised nodes and edges.
 
@@ -146,6 +146,7 @@ def network(self, notebook: bool = False) -> Network:
         -------
         A network of the model.
         """
+        from pyvis.network import Network
 
         net = Network(
             notebook=notebook,
diff --git a/test_autofit/graphical/conftest.py b/test_autofit/graphical/conftest.py
index 6febe7a29..df5531e03 100644
--- a/test_autofit/graphical/conftest.py
+++ b/test_autofit/graphical/conftest.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-from scipy import stats
 
 import autofit.mapper.variable
 from autofit import graphical as graph
@@ -18,4 +17,7 @@ def make_x():
 
 @pytest.fixture(name="probit_factor")
 def make_probit_factor(x):
+
+    from scipy import stats
+
     return graph.Factor(stats.norm(loc=0.0, scale=1.0).logcdf, x)