Skip to content

Commit fde112c

Browse files
authored
Merge pull request #224 from Jammy2211/feature/documentation
docs: refactor and complete docstrings for autoarray/dataset
2 parents 4ea1320 + be35d1d commit fde112c

File tree

8 files changed

+433
-75
lines changed

8 files changed

+433
-75
lines changed

autoarray/dataset/abstract/dataset.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,34 +134,62 @@ def __init__(
134134

135135
@property
136136
def grid(self):
137+
"""
138+
The primary coordinate grid of the dataset, equivalent to `grids.lp`.
139+
140+
Returns the light-profile `Grid2D` aligned with the centres of all unmasked image pixels.
141+
This is the grid used for the majority of model calculations (e.g. evaluating galaxy light
142+
profiles).
143+
"""
137144
return self.grids.lp
138145

139146
@property
140147
def shape_native(self):
148+
"""
149+
The 2D shape of the dataset image in its native (unmasked) dimensions, e.g. (rows, columns).
150+
"""
141151
return self.mask.shape_native
142152

143153
@property
144154
def shape_slim(self):
155+
"""
156+
The 1D size of the dataset data array after masking, i.e. the number of unmasked pixels.
157+
"""
145158
return self.data.shape_slim
146159

147160
@property
148161
def pixel_scales(self):
162+
"""
163+
The (y, x) arcsecond-to-pixel conversion factor of the dataset, as a (float, float) tuple.
164+
"""
149165
return self.mask.pixel_scales
150166

151167
@property
152168
def mask(self) -> Union[Mask1D, Mask2D]:
169+
"""
170+
The mask of the dataset, derived from the mask of the `data` array.
171+
"""
153172
return self.data.mask
154173

155174
def apply_over_sampling(self):
175+
"""
176+
Apply new over-sampling sizes to the dataset grids.
177+
178+
Subclasses must implement this method to rebuild the `GridsDataset` with updated
179+
`over_sample_size_lp` and `over_sample_size_pixelization` values.
180+
"""
156181
raise NotImplementedError
157182

158183
@property
159184
def signal_to_noise_map(self) -> Structure:
160185
"""
161-
The estimated signal-to-noise_maps mappers of the image.
186+
The signal-to-noise map of the dataset, computed as `data / noise_map`.
162187
163-
Warnings arise when masked native noise-maps are used, whose masked entries are given values of 0.0. We
164-
use the warnings module to suppress these RunTimeWarnings.
188+
Values below zero are clamped to zero, as negative signal-to-noise is not physically
189+
meaningful (it indicates the data is below zero due to noise, not a real negative signal).
190+
191+
RuntimeWarnings from dividing by zero in masked pixels (where the noise map is 0.0) are
192+
suppressed, as these masked values are never used in downstream calculations.
165193
"""
166194
warnings.filterwarnings("ignore")
167195

@@ -172,7 +200,7 @@ def signal_to_noise_map(self) -> Structure:
172200
@property
173201
def signal_to_noise_max(self) -> float:
174202
"""
175-
The maximum value of signal-to-noise_maps in an image pixel in the image's signal-to-noise_maps mappers.
203+
The maximum signal-to-noise value across all unmasked pixels in the dataset.
176204
"""
177205
return np.max(self.signal_to_noise_map)
178206

@@ -185,6 +213,27 @@ def noise_covariance_matrix_inv(self) -> np.ndarray:
185213
return np.linalg.inv(self.noise_covariance_matrix)
186214

187215
def trimmed_after_convolution_from(self, kernel_shape) -> "AbstractDataset":
216+
"""
217+
Return a copy of the dataset with all arrays trimmed to remove the border pixels affected
218+
by PSF convolution edge effects.
219+
220+
When a model image is convolved with a PSF kernel, the pixels at the border of the image
221+
cannot be correctly convolved because they lack sufficient neighbouring pixels. These border
222+
pixels have unreliable values after convolution. This method trims the `data`, `noise_map`,
223+
`over_sample_size_lp` and `over_sample_size_pixelization` arrays by the kernel half-width
224+
on each side, so that only pixels with a complete convolution kernel neighbourhood remain.
225+
226+
Parameters
227+
----------
228+
kernel_shape
229+
The (rows, cols) shape of the PSF convolution kernel. The dataset arrays are trimmed
230+
by `kernel_shape // 2` pixels on each side in each dimension.
231+
232+
Returns
233+
-------
234+
AbstractDataset
235+
A shallow copy of the dataset with all arrays trimmed to the post-convolution shape.
236+
"""
188237
dataset = copy.copy(self)
189238

190239
dataset.data = dataset.data.trimmed_after_convolution_from(

autoarray/dataset/grids.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ def __init__(
4444
4545
Parameters
4646
----------
47+
mask
48+
The 2D mask defining which pixels are included in the dataset. All grids are constructed
49+
to align with the centres of the unmasked pixels in this mask.
4750
over_sample_size_lp
4851
The over sampling scheme size, which divides the grid into a sub grid of smaller pixels when computing
4952
values (e.g. images) from the grid to approximate the 2D line integral of the amount of light that falls
@@ -53,7 +56,8 @@ def __init__(
5356
passed into the calculations performed in the `inversion` module.
5457
psf
5558
The Point Spread Function kernel of the image which accounts for diffraction due to the telescope optics
56-
via 2D convolution.
59+
via 2D convolution. Required to compute the blurring grid; if `None` the blurring grid
60+
is not constructed.
5761
"""
5862
self.mask = mask
5963
self.over_sample_size_lp = over_sample_size_lp
@@ -67,7 +71,17 @@ def __init__(
6771

6872
@property
6973
def lp(self):
74+
"""
75+
The light-profile grid: a `Grid2D` of (y,x) Cartesian coordinates at the centre of every
76+
unmasked image pixel, used for evaluating light profiles and other spatial calculations
77+
during model fitting.
78+
79+
The grid uses `over_sample_size_lp` to perform over-sampled sub-pixel integration,
80+
approximating the 2D line integral of the light profile within each pixel. This grid is
81+
what most model-fitting calculations use (e.g. computing galaxy images).
7082
83+
This property is lazily evaluated and cached on first access.
84+
"""
7185
if self._lp is not None:
7286
return self._lp
7387

@@ -80,6 +94,16 @@ def lp(self):
8094

8195
@property
8296
def pixelization(self):
97+
"""
98+
The pixelization grid: a `Grid2D` of (y,x) Cartesian coordinates at the centre of every
99+
unmasked image pixel, dedicated to pixelized source reconstructions via the `inversion` module.
100+
101+
This grid uses `over_sample_size_pixelization` which can differ from `over_sample_size_lp`,
102+
allowing the pixelization to benefit from a different (e.g. lower) over-sampling resolution
103+
than the light-profile grid.
104+
105+
This property is lazily evaluated and cached on first access.
106+
"""
83107
if self._pixelization is not None:
84108
return self._pixelization
85109

@@ -92,7 +116,18 @@ def pixelization(self):
92116

93117
@property
94118
def blurring(self):
119+
"""
120+
The blurring grid: a `Grid2D` of (y,x) coordinates for pixels that lie just outside the
121+
mask but whose light can be scattered into the unmasked region by the PSF.
122+
123+
When convolving a model image with the PSF, pixels neighbouring the mask boundary can
124+
contribute flux to unmasked pixels. The blurring grid provides the coordinates of these
125+
border pixels so their light profile values can be evaluated and included in the convolution.
126+
127+
Returns `None` if no PSF was supplied (i.e. no blurring is performed).
95128
129+
This property is lazily evaluated and cached on first access.
130+
"""
96131
if self._blurring is not None:
97132
return self._blurring
98133

@@ -113,7 +148,16 @@ def blurring(self):
113148

114149
@property
115150
def border_relocator(self) -> BorderRelocator:
151+
"""
152+
The border relocator for the pixelization grid.
153+
154+
During pixelized source reconstruction, source-plane coordinates that map outside the
155+
border of the pixelization mesh can cause numerical problems. The `BorderRelocator`
156+
detects these coordinates and relocates them to the border of the mesh, preventing
157+
ill-conditioned inversions.
116158
159+
This property is lazily evaluated and cached on first access.
160+
"""
117161
if self._border_relocator is not None:
118162
return self._border_relocator
119163

@@ -133,6 +177,26 @@ def __init__(
133177
blurring=None,
134178
border_relocator=None,
135179
):
180+
"""
181+
A lightweight plain-data container for pre-constructed dataset grids.
182+
183+
Unlike `GridsDataset`, this class performs no computation — it simply holds grids that have
184+
already been created elsewhere. It is used in test fixtures and mock datasets where a full
185+
`GridsDataset` is not needed, but code that accesses `dataset.grids.lp` or
186+
`dataset.grids.pixelization` still needs to work.
187+
188+
Parameters
189+
----------
190+
lp
191+
The light-profile `Grid2D` used for evaluating light profiles during model fitting.
192+
pixelization
193+
The pixelization `Grid2D` used for source reconstruction via the inversion module.
194+
blurring
195+
The blurring `Grid2D` for pixels outside the mask that contribute flux via PSF convolution.
196+
border_relocator
197+
The `BorderRelocator` used to remap out-of-bounds source-plane coordinates to the
198+
pixelization mesh border.
199+
"""
136200
self.lp = lp
137201
self.pixelization = pixelization
138202
self.blurring = blurring

autoarray/dataset/imaging/dataset.py

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ def __init__(
6868
psf
6969
The Point Spread Function kernel of the image which accounts for diffraction due to the telescope optics
7070
via 2D convolution.
71+
psf_setup_state
72+
If `True`, a `ConvolverState` is precomputed from the PSF kernel and mask, storing the
73+
convolution pair indices required for efficient 2D convolution. This is set automatically
74+
to `True` when a mask is applied via `apply_mask()` and should not normally be set by hand.
7175
noise_covariance_matrix
7276
A noise-map covariance matrix representing the covariance between noise in every `data` value, which
7377
can be used via a bespoke fit to account for correlated noise in the data.
@@ -236,13 +240,25 @@ def apply_mask(self, mask: Mask2D) -> "Imaging":
236240
Apply a mask to the imaging dataset, whereby the mask is applied to the image data, noise-map and other
237241
quantities one-by-one.
238242
239-
The `apply_mask` function cannot be called multiple times, if it is a mask may remove data, therefore
240-
an exception is raised. If you wish to apply a new mask, reload the dataset from .fits files.
243+
The mask is applied to the `data`, `noise_map`, `over_sample_size_lp` and
244+
`over_sample_size_pixelization` arrays. If a `noise_covariance_matrix` is present, the rows
245+
and columns corresponding to masked pixels are removed so it stays consistent with the
246+
remaining unmasked pixels. The PSF `ConvolverState` is recomputed for the new mask.
247+
248+
The `apply_mask` function cannot be called multiple times — a new mask cannot expand the
249+
unmasked region beyond what was already unmasked, as the underlying data has already been
250+
trimmed. An exception is raised if this is attempted. If you wish to apply a different mask,
251+
reload the dataset from .fits files.
241252
242253
Parameters
243254
----------
244255
mask
245256
The 2D mask that is applied to the image.
257+
258+
Returns
259+
-------
260+
Imaging
261+
A new `Imaging` dataset with the mask applied to all arrays.
246262
"""
247263
invalid = np.logical_and(self.data.mask, np.logical_not(mask))
248264

@@ -413,22 +429,27 @@ def apply_sparse_operator(
413429
batch_size: int = 128,
414430
):
415431
"""
432+
Precompute the PSF precision operator for efficient pixelized source reconstruction.
433+
416434
The sparse linear algebra formalism precomputes the convolution of every pair of masked
417-
noise-map values given the PSF (see `inversion.inversion_util`).
435+
noise-map values given the PSF (see `inversion.inversion_util`). This is the imaging
436+
equivalent of the interferometer NUFFT precision matrix.
418437
419-
The `WTilde` object stores these precomputed values in the imaging dataset ensuring they are only computed once
420-
per analysis.
438+
The `ImagingSparseOperator` stores these precomputed values in the imaging dataset ensuring
439+
they are only computed once per analysis, enabling fast repeated likelihood evaluations during
440+
model fitting.
421441
422-
This uses lazy allocation such that the calculation is only performed when the wtilde matrices are used,
423-
ensuring efficient set up of the `Imaging` class.
442+
Parameters
443+
----------
444+
batch_size
445+
The number of image pixels processed per batch when computing the sparse operator via
446+
FFT-based convolution. Reducing this lowers peak memory usage at the cost of speed.
424447
425448
Returns
426449
-------
427-
batch_size
428-
The size of batches used to compute the w-tilde curvature matrix via FFT-based convolution,
429-
which can be reduced to produce lower memory usage at the cost of speed
430-
use_jax
431-
Whether to use JAX to compute W-Tilde. This requires JAX to be installed.
450+
Imaging
451+
A new `Imaging` dataset with the precomputed `ImagingSparseOperator` attached, enabling
452+
efficient pixelized source reconstruction via the sparse linear algebra formalism.
432453
"""
433454

434455
logger.info(
@@ -459,22 +480,20 @@ def apply_sparse_operator_cpu(
459480
self,
460481
):
461482
"""
462-
The sparse linear algebra formalism precomputes the convolution of every pair of masked
463-
noise-map values given the PSF (see `inversion.inversion_util`).
483+
Precompute the PSF precision operator using a CPU-only Numba implementation.
464484
465-
The `WTilde` object stores these precomputed values in the imaging dataset ensuring they are only computed once
466-
per analysis.
485+
This is the CPU alternative to `apply_sparse_operator()`, using Numba JIT compilation
486+
for the convolution loop rather than JAX. It requires `numba` to be installed; an
487+
`InversionException` is raised if it is not available.
467488
468-
This uses lazy allocation such that the calculation is only performed when the wtilde matrices are used,
469-
ensuring efficient set up of the `Imaging` class.
489+
The resulting `SparseLinAlgImagingNumba` operator is stored on the returned `Imaging`
490+
dataset and used by `FitImaging` when performing pixelized source reconstructions.
470491
471492
Returns
472493
-------
473-
batch_size
474-
The size of batches used to compute the w-tilde curvature matrix via FFT-based convolution,
475-
which can be reduced to produce lower memory usage at the cost of speed.
476-
use_jax
477-
Whether to use JAX to compute W-Tilde. This requires JAX to be installed.
494+
Imaging
495+
A new `Imaging` dataset with a precomputed Numba-based sparse operator attached,
496+
enabling efficient pixelized source reconstruction on CPU hardware.
478497
"""
479498
try:
480499
import numba

autoarray/dataset/imaging/simulator.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def __init__(
3333
3434
The simulation of an `Imaging` dataset uses the following steps:
3535
36-
1) Receive as input a raw image of what the data looks like before any simulaiton process is applied.
37-
2) Include dirrection due to the telescope optics by convolve the image with an input Point Spread
36+
1) Receive as input a raw image of what the data looks like before any simulation process is applied.
37+
2) Include diffraction due to the telescope optics by convolving the image with an input Point Spread
3838
Function (PSF).
3939
3) Use input values of the background sky level in every pixel of the image to add the background sky to
4040
the PSF convolved image.
@@ -121,7 +121,18 @@ def via_image_from(
121121
Parameters
122122
----------
123123
image
124-
The 2D image from which the Imaging dataset is simulated.
124+
The 2D image from which the Imaging dataset is simulated (e.g. a model galaxy image
125+
before any telescope effects are applied). Must be an `Array2D`.
126+
over_sample_size
127+
If provided, the returned dataset has its over-sampling updated via `apply_over_sampling`.
128+
Should be an `Array2D` of integer sub-grid sizes with the same shape as the image.
129+
xp
130+
The array module to use for PSF convolution (default `np` for NumPy, or `jnp` for JAX).
131+
132+
Returns
133+
-------
134+
Imaging
135+
The simulated imaging dataset with PSF convolution, noise and background sky applied.
125136
"""
126137

127138
exposure_time_map = Array2D.full(

0 commit comments

Comments
 (0)