diff --git a/docs/source/contributing/developer_guide.md b/docs/source/contributing/developer_guide.md index 7820b1f438..b051b8fd2f 100644 --- a/docs/source/contributing/developer_guide.md +++ b/docs/source/contributing/developer_guide.md @@ -245,7 +245,7 @@ usually created in order to optimise performance. But getting a possible (see also in {ref}`doc `): -.. code:: python +.. code-block:: python lognorm = Exp().apply(pm.Normal.dist(0., 1.)) @@ -262,7 +262,7 @@ Now, back to ``model.RV(...)`` - things returned from ``model.RV(...)`` are PyTensor tensor variables, and it is clear from looking at ``TransformedRV``: -.. code:: python +.. code-block:: python class TransformedRV(TensorVariable): ... @@ -270,7 +270,7 @@ are PyTensor tensor variables, and it is clear from looking at as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with ``Factor`` as mixin: -.. code:: python +.. code-block:: python class FreeRV(Factor, TensorVariable): ... @@ -283,7 +283,7 @@ distribution into a ``TransformedDistribution``, and then ``model.Var`` is called again to added the RV associated with the ``TransformedDistribution`` as a ``FreeRV``: -.. code:: python +.. code-block:: python ... self.transformed = model.Var( @@ -295,13 +295,13 @@ only add one ``FreeRV``. In another word, you *cannot* do chain transformation by nested applying multiple transforms to a Distribution (however, you can use ``Chain`` transformation. -.. code:: python +.. code-block:: python z = pm.LogNormal.dist(mu=0., sigma=1., transform=tr.Log) z.transform # ==> pymc.distributions.transforms.Log -.. code:: python +.. code-block:: python z2 = Exp().apply(z) z2.transform is None # ==> True diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index b1d188d461..7de2b72e23 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -1357,7 +1357,7 @@ class OrderedProbit: Examples -------- - .. code:: python + .. code-block:: python # Generate data for a simple 1 dimensional example problem n1_c = 300; n2_c = 300; n3_c = 300 diff --git a/pymc/distributions/multivariate.py b/pymc/distributions/multivariate.py index 4415f5ec25..f76a98546e 100644 --- a/pymc/distributions/multivariate.py +++ b/pymc/distributions/multivariate.py @@ -1390,7 +1390,7 @@ class LKJCholeskyCov: Examples -------- - .. code:: python + .. code-block:: python with pm.Model() as model: # Note that we access the distribution for the standard @@ -1682,28 +1682,25 @@ class LKJCorr: Examples -------- - .. code:: python + .. code-block:: python with pm.Model() as model: - # Define the vector of fixed standard deviations - sds = 3*np.ones(10) + sds = 3 * np.ones(10) - corr = pm.LKJCorr( - 'corr', eta=4, n=10, return_matrix=True - ) + corr = pm.LKJCorr("corr", eta=4, n=10, return_matrix=True) # Define a new MvNormal with the given correlation matrix - vals = sds*pm.MvNormal('vals', mu=np.zeros(10), cov=corr, shape=10) + vals = sds * pm.MvNormal("vals", mu=np.zeros(10), cov=corr, shape=10) # Or transform an uncorrelated normal distribution: - vals_raw = pm.Normal('vals_raw', shape=10) + vals_raw = pm.Normal("vals_raw", shape=10) chol = pt.linalg.cholesky(corr) - vals = sds*pt.dot(chol,vals_raw) + vals = sds * pt.dot(chol, vals_raw) # The matrix is internally still sampled as a upper triangular vector # If you want access to it in matrix form in the trace, add - pm.Deterministic('corr_mat', corr) + pm.Deterministic("corr_mat", corr) References @@ -1797,7 +1794,7 @@ class MatrixNormal(Continuous): Define a matrixvariate normal variable for given row and column covariance matrices. - .. code:: python + .. code-block:: python import pymc as pm import numpy as np @@ -1820,16 +1817,20 @@ class MatrixNormal(Continuous): constant, both the covariance and scaling could be learned as follows (see the docstring of `LKJCholeskyCov` for more information about this) - .. code:: python + .. code-block:: python # Setup data - true_colcov = np.array([[1.0, 0.5, 0.1], - [0.5, 1.0, 0.2], - [0.1, 0.2, 1.0]]) + true_colcov = np.array( + [ + [1.0, 0.5, 0.1], + [0.5, 1.0, 0.2], + [0.1, 0.2, 1.0], + ] + ) m = 3 n = true_colcov.shape[0] true_scale = 3 - true_rowcov = np.diag([true_scale**(2*i) for i in range(m)]) + true_rowcov = np.diag([true_scale ** (2 * i) for i in range(m)]) mu = np.zeros((m, n)) true_kron = np.kron(true_rowcov, true_colcov) data = np.random.multivariate_normal(mu.flatten(), true_kron) @@ -1838,13 +1839,12 @@ class MatrixNormal(Continuous): with pm.Model() as model: # Setup right cholesky matrix sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3) - colchol,_,_ = pm.LKJCholeskyCov('colchol', n=3, eta=2,sd_dist=sd_dist) + colchol, _, _ = pm.LKJCholeskyCov("colchol", n=3, eta=2, sd_dist=sd_dist) # Setup left covariance matrix - scale = pm.LogNormal('scale', mu=np.log(true_scale), sigma=0.5) - rowcov = pt.diag([scale**(2*i) for i in range(m)]) + scale = pm.LogNormal("scale", mu=np.log(true_scale), sigma=0.5) + rowcov = pt.diag([scale ** (2 * i) for i in range(m)]) - vals = pm.MatrixNormal('vals', mu=mu, colchol=colchol, rowcov=rowcov, - observed=data) + vals = pm.MatrixNormal("vals", mu=mu, colchol=colchol, rowcov=rowcov, observed=data) """ rv_op = matrixnormal @@ -2010,15 +2010,15 @@ class KroneckerNormal(Continuous): Define a multivariate normal variable with a covariance :math:`K = K_1 \otimes K_2` - .. code:: python + .. code-block:: python - K1 = np.array([[1., 0.5], [0.5, 2]]) - K2 = np.array([[1., 0.4, 0.2], [0.4, 2, 0.3], [0.2, 0.3, 1]]) + K1 = np.array([[1.0, 0.5], [0.5, 2]]) + K2 = np.array([[1.0, 0.4, 0.2], [0.4, 2, 0.3], [0.2, 0.3, 1]]) covs = [K1, K2] N = 6 mu = np.zeros(N) with pm.Model() as model: - vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, shape=N) + vals = pm.KroneckerNormal("vals", mu=mu, covs=covs, shape=N) Efficiency gains are made by cholesky decomposing :math:`K_1` and :math:`K_2` individually rather than the larger :math:`K` matrix. Although @@ -2026,14 +2026,14 @@ class KroneckerNormal(Continuous): number of submatrices can be combined in this way. Choleskys and eigendecompositions can be provided instead - .. code:: python + .. code-block:: python chols = [np.linalg.cholesky(Ki) for Ki in covs] evds = [np.linalg.eigh(Ki) for Ki in covs] with pm.Model() as model: - vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, shape=N) + vals2 = pm.KroneckerNormal("vals2", mu=mu, chols=chols, shape=N) # or - vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, shape=N) + vals3 = pm.KroneckerNormal("vals3", mu=mu, evds=evds, shape=N) neither of which will be converted. Diagonal noise can also be added to the covariance matrix, :math:`K = K_1 \otimes K_2 + \sigma^2 I_N`. @@ -2042,13 +2042,13 @@ class KroneckerNormal(Continuous): utilizing eigendecompositons of the submatrices behind the scenes [1]. Thus, - .. code:: python + .. code-block:: python sigma = 0.1 with pm.Model() as noise_model: - vals = pm.KroneckerNormal('vals', mu=mu, covs=covs, sigma=sigma, shape=N) - vals2 = pm.KroneckerNormal('vals2', mu=mu, chols=chols, sigma=sigma, shape=N) - vals3 = pm.KroneckerNormal('vals3', mu=mu, evds=evds, sigma=sigma, shape=N) + vals = pm.KroneckerNormal("vals", mu=mu, covs=covs, sigma=sigma, shape=N) + vals2 = pm.KroneckerNormal("vals2", mu=mu, chols=chols, sigma=sigma, shape=N) + vals3 = pm.KroneckerNormal("vals3", mu=mu, evds=evds, sigma=sigma, shape=N) are identical, with `covs` and `chols` each converted to eigendecompositions. diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index a029e1e403..0df212aea1 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -956,7 +956,7 @@ class WrappedPeriodic(Covariance): In order to construct a kernel equivalent to the `Periodic` kernel you can do the following (though using `Periodic` will likely be a bit faster): - .. code:: python + .. code-block:: python exp_quad = pm.gp.cov.ExpQuad(1, ls=0.5) cov = pm.gp.cov.WrappedPeriodic(exp_quad, period=5) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index ee198d1e2e..45f45b3e3a 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -117,7 +117,7 @@ class Latent(Base): Examples -------- - .. code:: python + .. code-block:: python # A one dimensional column vector of inputs. X = np.linspace(0, 1, 10)[:, None] @@ -439,7 +439,7 @@ class Marginal(Base): Examples -------- - .. code:: python + .. code-block:: python # A one dimensional column vector of inputs. X = np.linspace(0, 1, 10)[:, None] @@ -719,7 +719,7 @@ class MarginalApprox(Marginal): Examples -------- - .. code:: python + .. code-block:: python # A one dimensional column vector of inputs. X = np.linspace(0, 1, 10)[:, None] @@ -963,7 +963,7 @@ class LatentKron(Base): Examples -------- - .. code:: python + .. code-block:: python # One dimensional column vectors of inputs X1 = np.linspace(0, 1, 10)[:, None] @@ -1066,7 +1066,7 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): 1, and 4, respectively, then `Xnew` must have 7 columns and a covariance between the prediction points - .. code:: python + .. code-block:: python cov_func(Xnew) = cov_func1(Xnew[:, :2]) * cov_func1(Xnew[:, 2:3]) * cov_func1(Xnew[:, 3:]) @@ -1118,13 +1118,13 @@ class MarginalKron(Base): Examples -------- - .. code:: python + .. code-block:: python # One dimensional column vectors of inputs X1 = np.linspace(0, 1, 10)[:, None] X2 = np.linspace(0, 2, 5)[:, None] Xs = [X1, X2] - y = np.random.randn(len(X1)*len(X2)) # toy data + y = np.random.randn(len(X1) * len(X2)) # toy data with pm.Model() as model: # Specify the covariance functions for each Xi cov_func1 = pm.gp.cov.ExpQuad(1, ls=0.1) # Must accept X1 without error @@ -1266,7 +1266,7 @@ def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs): 1, and 4, respectively, then `Xnew` must have 7 columns and a covariance between the prediction points - .. code:: python + .. code-block:: python cov_func(Xnew) = cov_func1(Xnew[:, :2]) * cov_func1(Xnew[:, 2:3]) * cov_func1(Xnew[:, 3:]) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 9bad17ce87..cf889aadd1 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -215,7 +215,7 @@ class HSGP(Base): Examples -------- - .. code:: python + .. code-block:: python # A three dimensional column vector of inputs. X = np.random.rand(100, 3) @@ -357,7 +357,7 @@ def prior_linearized(self, X: TensorLike): Examples -------- - .. code:: python + .. code-block:: python # A one dimensional column vector of inputs. X = np.linspace(0, 10, 100)[:, None] @@ -544,7 +544,7 @@ class HSGPPeriodic(Base): Examples -------- - .. code:: python + .. code-block:: python # A three dimensional column vector of inputs. X = np.random.rand(100, 3) @@ -640,7 +640,7 @@ def prior_linearized(self, X: TensorLike): Examples -------- - .. code:: python + .. code-block:: python # A one dimensional column vector of inputs. X = np.linspace(0, 10, 100)[:, None] @@ -665,8 +665,7 @@ def prior_linearized(self, X: TensorLike): # The (non-centered) GP approximation is given by f = pm.Deterministic( - "f", - phi_cos @ (psd * beta[:m]) + phi_sin[..., 1:] @ (psd[1:] * beta[m:]) + "f", phi_cos @ (psd * beta[:m]) + phi_sin[..., 1:] @ (psd[1:] * beta[m:]) ) ... diff --git a/pymc/model/core.py b/pymc/model/core.py index 5ec5c0ec34..26c5f15184 100644 --- a/pymc/model/core.py +++ b/pymc/model/core.py @@ -2204,7 +2204,7 @@ def Deterministic(name, var, model=None, dims=None): Indeed, PyMC allows for arbitrary combinations of random variables, for example in the case of a logistic regression - .. code:: python + .. code-block:: python with pm.Model(): alpha = pm.Normal("alpha", 0, 1) @@ -2218,7 +2218,7 @@ def Deterministic(name, var, model=None, dims=None): ``p`` is important and one would like to track its value in the sampling trace, then one can use a deterministic node: - .. code:: python + .. code-block:: python with pm.Model(): alpha = pm.Normal("alpha", 0, 1) @@ -2294,7 +2294,7 @@ def Potential(name, var: TensorVariable, model=None, dims=None) -> TensorVariabl The statement ``pm.math.log(pm.math.switch(constraint, 0, 1))`` adds either 0 or -inf to the model logp, depending on whether the constraint is met. During sampling, any proposals where ``x`` is negative will be rejected. - .. code:: python + .. code-block:: python import pymc as pm @@ -2308,7 +2308,7 @@ def Potential(name, var: TensorVariable, model=None, dims=None) -> TensorVariabl Instead, with a soft constraint like ``pm.math.log(pm.math.switch(constraint, 1, 0.5))``, the sampler will be less likely, but not forbidden, from accepting negative values for `x`. - .. code:: python + .. code-block:: python import pymc as pm @@ -2316,33 +2316,35 @@ def Potential(name, var: TensorVariable, model=None, dims=None) -> TensorVariabl x = pm.Normal("x", mu=0, sigma=1) constraint = x >= 0 - potential = pm.Potential("x_constraint", pm.math.log(pm.math.switch(constraint, 1.0, 0.5))) + potential = pm.Potential( + "x_constraint", pm.math.log(pm.math.switch(constraint, 1.0, 0.5)) + ) A Potential term can depend on multiple variables. In the following example, the ``soft_sum_constraint`` potential encourages ``x`` and ``y`` to have a small sum. The more the sum deviates from zero, the more negative the penalty value of ``(-((x + y)**2))``. - .. code:: python + .. code-block:: python import pymc as pm with pm.Model() as model: x = pm.Normal("x", mu=0, sigma=10) y = pm.Normal("y", mu=0, sigma=10) - soft_sum_constraint = pm.Potential("soft_sum_constraint", -((x + y)**2)) + soft_sum_constraint = pm.Potential("soft_sum_constraint", -((x + y) ** 2)) A Potential can be used to define a specific prior term. The following example imposes a power law prior on `max_items`, under the form ``log(1/max_items)``, which penalizes very large values of `max_items`. - .. code:: python + .. code-block:: python import pymc as pm with pm.Model() as model: # p(max_items) = 1 / max_items max_items = pm.Uniform("max_items", lower=1, upper=100) - pm.Potential("power_prior", pm.math.log(1/max_items)) + pm.Potential("power_prior", pm.math.log(1 / max_items)) n_items = pm.Uniform("n_items", lower=1, upper=max_items, observed=60) @@ -2350,13 +2352,15 @@ def Potential(name, var: TensorVariable, model=None, dims=None) -> TensorVariabl In the following example, a normal likelihood term is added to fixed data. The same result would be obtained by using an observed `Normal` variable. - .. code:: python + .. code-block:: python import pymc as pm + def normal_logp(value, mu, sigma): return -0.5 * ((value - mu) / sigma) ** 2 - pm.math.log(sigma) + with pm.Model() as model: mu = pm.Normal("x") sigma = pm.HalfNormal("sigma") diff --git a/pymc/pytensorf.py b/pymc/pytensorf.py index 3f90a69ff7..804967a2c6 100644 --- a/pymc/pytensorf.py +++ b/pymc/pytensorf.py @@ -722,17 +722,19 @@ def collect_default_updates( Examples -------- - .. code:: python + .. code-block:: python import pymc as pm from pytensor.scan import scan from pymc.pytensorf import collect_default_updates + def scan_step(xtm1): x = xtm1 + pm.Normal.dist() x_update = collect_default_updates([x]) return x, x_update + x0 = pm.Normal.dist() xs, updates = scan( diff --git a/pymc/sampling/deterministic.py b/pymc/sampling/deterministic.py index 85ce0a1164..ea21273503 100644 --- a/pymc/sampling/deterministic.py +++ b/pymc/sampling/deterministic.py @@ -61,7 +61,7 @@ def compute_deterministics( Examples -------- - .. code:: python + .. code-block:: python import pymc as pm diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py index c53dde90f2..d1703e82a3 100644 --- a/pymc/sampling/forward.py +++ b/pymc/sampling/forward.py @@ -603,7 +603,7 @@ def sample_posterior_predictive( The most common use of `sample_posterior_predictive` is to perform posterior predictive checks (in-sample predictions) and new model predictions (out-of-sample predictions). - .. code:: python + .. code-block:: python import pymc as pm @@ -632,7 +632,7 @@ def sample_posterior_predictive( For the last example we could have created a new predictions model. Note that we have to specify `var_names` explicitly, because the newly defined `y` was not given any observations: - .. code:: python + .. code-block:: python with pm.Model(coords_mutable={"trial": [3, 4]}) as predictions_model: x = pm.MutableData("x", [-2, 2], dims=["trial"]) @@ -640,14 +640,16 @@ def sample_posterior_predictive( noise = pm.HalfNormal("noise") y = pm.Normal("y", mu=x * beta, sigma=noise, dims=["trial"]) - predictions = pm.sample_posterior_predictive(idata, var_names=["y"], predictions=True).predictions + predictions = pm.sample_posterior_predictive( + idata, var_names=["y"], predictions=True + ).predictions The new model may even have a different structure and unobserved variables that don't exist in the trace. These variables will also be forward sampled. In the following example we added a new ``extra_noise`` variable between the inferred posterior ``noise`` and the new StudentT observational distribution ``y``: - .. code:: python + .. code-block:: python with pm.Model(coords_mutable={"trial": [3, 4]}) as distinct_predictions_model: x = pm.MutableData("x", [-2, 2], dims=["trial"]) @@ -656,7 +658,9 @@ def sample_posterior_predictive( extra_noise = pm.HalfNormal("extra_noise", sigma=noise) y = pm.StudentT("y", nu=4, mu=x * beta, sigma=extra_noise, dims=["trial"]) - predictions = pm.sample_posterior_predictive(idata, var_names=["y"], predictions=True).predictions + predictions = pm.sample_posterior_predictive( + idata, var_names=["y"], predictions=True + ).predictions For more about out-of-model predictions, see this `blog post `_. @@ -682,7 +686,7 @@ def sample_posterior_predictive( The following code block explores how the behavior changes with different `var_names`: - .. code:: python + .. code-block:: python from logging import getLogger import pymc as pm @@ -705,7 +709,7 @@ def sample_posterior_predictive( Default behavior. Generate samples of ``obs``, conditioned on the posterior samples of ``z`` found in the trace. These are often referred to as posterior predictive samples in the literature: - .. code:: python + .. code-block:: python with model: pm.sample_posterior_predictive(idata, var_names=["obs"], **kwargs) @@ -782,7 +786,7 @@ def sample_posterior_predictive( You can manipulate the InferenceData to control the number of samples - .. code:: python + .. code-block:: python import pymc as pm @@ -792,7 +796,7 @@ def sample_posterior_predictive( Generate 1 posterior predictive sample for every 5 posterior samples. - .. code:: python + .. code-block:: python thinned_idata = idata.sel(draw=slice(None, None, 5)) with model: @@ -801,7 +805,7 @@ def sample_posterior_predictive( Generate 5 posterior predictive samples for every posterior sample. - .. code:: python + .. code-block:: python expanded_idata = idata.copy() expanded_idata.posterior = idata.posterior.expand_dims(pred_id=5) diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py index 542797caa8..de341c68cd 100644 --- a/pymc/sampling/mcmc.py +++ b/pymc/sampling/mcmc.py @@ -669,7 +669,7 @@ def sample( Examples -------- - .. code:: ipython + .. code-block:: ipython In [1]: import pymc as pm ...: n = 100 diff --git a/pymc/variational/opvi.py b/pymc/variational/opvi.py index deedfc8d9f..3cd5cc3dcf 100644 --- a/pymc/variational/opvi.py +++ b/pymc/variational/opvi.py @@ -628,7 +628,7 @@ class Group(WithMemoization): Passing the correct `vfam` (Variational FAMily) argument you'll tell what parametrization is desired for the group. This helps not to overload code with lots of classes. - .. code:: python + .. code-block:: python >>> group = Group([latent1, latent2], vfam="mean_field") @@ -639,7 +639,7 @@ class Group(WithMemoization): passing both `vfam` and `params` is prohibited. Partial parametrization is prohibited by design to avoid corner cases and possible problems. - .. code:: python + .. code-block:: python >>> group = Group([latent3], params=dict(mu=my_mu, rho=my_rho)) @@ -663,7 +663,7 @@ class Group(WithMemoization): correctly initialized. For those groups which have group equal to `None` it will collect all the rest variables not covered by other groups and perform delayed init. - .. code:: python + .. code-block:: python >>> group_1 = Group([latent1], vfam="fr") # latent1 has full rank approximation >>> group_other = Group(None, vfam="mf") # other variables have mean field Q @@ -674,7 +674,7 @@ class Group(WithMemoization): When you have created all the groups they need to pass all the groups to :class:`Approximation`. It does not accept any other parameter rather than `groups` - .. code:: python + .. code-block:: python >>> approx = Approximation(my_groups)