added full use_mixed_precision path

Jammy2211 · Jammy2211 · commit 92809b2fcee6 · 2026-02-07T17:22:21.000Z
diff --git a/autoarray/inversion/inversion/imaging/mapping.py b/autoarray/inversion/inversion/imaging/mapping.py
@@ -98,6 +98,7 @@ def curvature_matrix(self):
             settings=self.settings,
             add_to_curvature_diag=True,
             no_regularization_index_list=self.no_regularization_index_list,
+            use_mixed_precision=self.settings.use_mixed_precision,
             xp=self._xp,
         )
 
diff --git a/autoarray/inversion/inversion/inversion_util.py b/autoarray/inversion/inversion/inversion_util.py
@@ -78,16 +78,13 @@ def curvature_matrix_mirrored_from(curvature_matrix: np.ndarray, xp=np) -> np.nd
 
 
 def curvature_matrix_via_mapping_matrix_from(
-    mapping_matrix: np.ndarray,
-    noise_map: np.ndarray,
+    mapping_matrix: "np.ndarray",
+    noise_map: "np.ndarray",
     add_to_curvature_diag: bool = False,
     no_regularization_index_list: Optional[List] = None,
-    settings: SettingsInversion = SettingsInversion(),
+    settings: "SettingsInversion" = SettingsInversion(),
+    use_mixed_precision: bool = False,
     xp=np,
-    *,
-    mp_gemm: bool = True,  # mixed precision matmul
-    gemm_dtype=None,  # e.g. xp.float32
-    out_dtype=None,  # e.g. xp.float64
 ) -> np.ndarray:
     """
     Returns the curvature matrix `F` from a blurred mapping matrix `f` and the 1D noise-map $\sigma$
@@ -101,15 +98,26 @@ def curvature_matrix_via_mapping_matrix_from(
     noise_map
         Flattened 1D array of the noise-map used by the inversion during the fit.
     """
-    if gemm_dtype is None:
-        gemm_dtype = xp.float32 if (mp_gemm and xp is not np) else mapping_matrix.dtype
-
-    # form A in chosen dtype (usually float32 on device)
-    A = (mapping_matrix / noise_map[:, None]).astype(gemm_dtype)
-
-    curvature_matrix = xp.dot(A.T, A)  # float32 GEMM if A is float32
-
-    if add_to_curvature_diag and len(no_regularization_index_list) > 0:
+    # NumPy path: keep it simple + stable
+    if xp is np:
+        A = mapping_matrix / noise_map[:, None]
+        curvature_matrix = xp.dot(A.T, A)
+    else:
+        # Choose compute dtype
+
+        compute_dtype = xp.float32 if use_mixed_precision else xp.float64
+        out_dtype = xp.float64  # always return float64 for downstream stability
+
+        A = mapping_matrix
+        w = (1.0 / noise_map).astype(compute_dtype)
+        A = A * w[:, None]
+        curvature_matrix = xp.dot(A.T, A).astype(out_dtype)
+
+    if (
+        add_to_curvature_diag
+        and no_regularization_index_list
+        and len(no_regularization_index_list) > 0
+    ):
         curvature_matrix = curvature_matrix_with_added_to_diag_from(
             curvature_matrix=curvature_matrix,
             value=settings.no_regularization_add_to_curvature_diag_value,
diff --git a/autoarray/inversion/inversion/settings.py b/autoarray/inversion/inversion/settings.py
@@ -10,7 +10,7 @@
 class SettingsInversion:
     def __init__(
         self,
-        use_mixed_precision : bool = False,
+        use_mixed_precision: bool = False,
         use_positive_only_solver: Optional[bool] = None,
         positive_only_uses_p_initial: Optional[bool] = None,
         use_border_relocator: Optional[bool] = None,
diff --git a/autoarray/inversion/linear_obj/func_list.py b/autoarray/inversion/linear_obj/func_list.py
@@ -7,6 +7,7 @@
 from autoarray.inversion.linear_obj.neighbors import Neighbors
 from autoarray.inversion.linear_obj.unique_mappings import UniqueMappings
 from autoarray.inversion.regularization.abstract import AbstractRegularization
+from autoarray.inversion.inversion.settings import SettingsInversion
 from autoarray.type import Grid1D2DLike
 
 
@@ -15,6 +16,7 @@ def __init__(
         self,
         grid: Grid1D2DLike,
         regularization: Optional[AbstractRegularization],
+        settings=SettingsInversion(),
         xp=np,
     ):
         """
@@ -45,6 +47,7 @@ def __init__(
         super().__init__(regularization=regularization, xp=xp)
 
         self.grid = grid
+        self.settings = settings
 
     @cached_property
     def neighbors(self) -> Neighbors:
diff --git a/autoarray/inversion/pixelization/mappers/abstract.py b/autoarray/inversion/pixelization/mappers/abstract.py
@@ -11,6 +11,7 @@
 from autoarray.inversion.pixelization.border_relocator import BorderRelocator
 from autoarray.inversion.pixelization.mappers.mapper_grids import MapperGrids
 from autoarray.inversion.regularization.abstract import AbstractRegularization
+from autoarray.inversion.inversion.settings import SettingsInversion
 from autoarray.structures.arrays.uniform_2d import Array2D
 from autoarray.structures.grids.uniform_2d import Grid2D
 from autoarray.structures.mesh.abstract_2d import Abstract2DMesh
@@ -25,6 +26,7 @@ def __init__(
         mapper_grids: MapperGrids,
         regularization: Optional[AbstractRegularization],
         border_relocator: BorderRelocator,
+        settings: SettingsInversion = SettingsInversion(),
         preloads=None,
         xp=np,
     ):
@@ -90,6 +92,7 @@ def __init__(
         self.border_relocator = border_relocator
         self.mapper_grids = mapper_grids
         self.preloads = preloads
+        self.settings = settings
 
     @property
     def params(self) -> int:
@@ -265,6 +268,7 @@ def mapping_matrix(self) -> np.ndarray:
             total_mask_pixels=self.over_sampler.mask.pixels_in_mask,
             slim_index_for_sub_slim_index=self.slim_index_for_sub_slim_index,
             sub_fraction=self.over_sampler.sub_fraction.array,
+            use_mixed_precision=self.settings.use_mixed_precision,
             xp=self._xp,
         )
 
diff --git a/autoarray/inversion/pixelization/mappers/factory.py b/autoarray/inversion/pixelization/mappers/factory.py
@@ -4,6 +4,7 @@
 from autoarray.inversion.pixelization.mappers.mapper_grids import MapperGrids
 from autoarray.inversion.pixelization.border_relocator import BorderRelocator
 from autoarray.inversion.regularization.abstract import AbstractRegularization
+from autoarray.inversion.inversion.settings import SettingsInversion
 from autoarray.structures.mesh.rectangular_2d import Mesh2DRectangular
 from autoarray.structures.mesh.rectangular_2d_uniform import Mesh2DRectangularUniform
 from autoarray.structures.mesh.delaunay_2d import Mesh2DDelaunay
@@ -13,6 +14,7 @@ def mapper_from(
     mapper_grids: MapperGrids,
     regularization: Optional[AbstractRegularization],
     border_relocator: Optional[BorderRelocator] = None,
+    settings=SettingsInversion(),
     preloads=None,
     xp=np,
 ):
@@ -53,13 +55,17 @@ def mapper_from(
             mapper_grids=mapper_grids,
             border_relocator=border_relocator,
             regularization=regularization,
+            settings=settings,
+            preloads=preloads,
             xp=xp,
         )
     elif isinstance(mapper_grids.source_plane_mesh_grid, Mesh2DRectangular):
         return MapperRectangular(
             mapper_grids=mapper_grids,
             border_relocator=border_relocator,
             regularization=regularization,
+            settings=settings,
+            preloads=preloads,
             xp=xp,
         )
     elif isinstance(mapper_grids.source_plane_mesh_grid, Mesh2DDelaunay):
diff --git a/autoarray/inversion/pixelization/mappers/mapper_util.py b/autoarray/inversion/pixelization/mappers/mapper_util.py
@@ -548,6 +548,7 @@ def mapping_matrix_from(
     total_mask_pixels: int,
     slim_index_for_sub_slim_index: np.ndarray,
     sub_fraction: np.ndarray,
+    use_mixed_precision: bool = False,
     xp=np,
 ) -> np.ndarray:
     """
@@ -621,39 +622,56 @@ def mapping_matrix_from(
     sub_fraction
         The fractional area each sub-pixel takes up in an pixel.
     """
+
     M_sub, B = pix_indexes_for_sub_slim_index.shape
-    M = total_mask_pixels
-    S = pixels
+    M = int(total_mask_pixels)
+    S = int(pixels)
+
+    # Indices always int32
+    pix_idx = xp.asarray(pix_indexes_for_sub_slim_index, dtype=xp.int32)
+    pix_size = xp.asarray(pix_size_for_sub_slim_index, dtype=xp.int32)
+    slim_parent = xp.asarray(slim_index_for_sub_slim_index, dtype=xp.int32)
+
+    # Everything else computed in float64
+    w64 = xp.asarray(pix_weights_for_sub_slim_index, dtype=xp.float64)
+    frac64 = xp.asarray(sub_fraction, dtype=xp.float64)
+
+    # Output dtype only (big allocation)
+    out_dtype = xp.float32 if use_mixed_precision else xp.float64
 
     # 1) Flatten
-    flat_pixidx = pix_indexes_for_sub_slim_index.reshape(-1)  # (M_sub*B,)
-    flat_w = pix_weights_for_sub_slim_index.reshape(-1)  # (M_sub*B,)
-    flat_parent = xp.repeat(slim_index_for_sub_slim_index, B)  # (M_sub*B,)
-    flat_count = xp.repeat(pix_size_for_sub_slim_index, B)  # (M_sub*B,)
+    flat_pixidx = pix_idx.reshape(-1)  # (M_sub*B,)
+    flat_w = w64.reshape(-1)  # float64
+    flat_parent = xp.repeat(slim_parent, B)  # int32
+    flat_count = xp.repeat(pix_size, B)  # int32
 
-    # 2) Build valid mask: k < pix_size[i]
-    k = xp.tile(xp.arange(B), M_sub)  # (M_sub*B,)
-    valid = k < flat_count  # (M_sub*B,)
+    # 2) valid mask: k < pix_size[i]
+    k = xp.tile(xp.arange(B, dtype=xp.int32), M_sub)
+    valid = k < flat_count
 
-    # 3) Zero out invalid weights
-    flat_w = flat_w * valid.astype(flat_w.dtype)
+    # 3) Zero out invalid weights (float64)
+    flat_w = flat_w * valid.astype(xp.float64)
 
     # 4) Redirect -1 indices to extra bin S
     OUT = S
     flat_pixidx = xp.where(flat_pixidx < 0, OUT, flat_pixidx)
 
-    # 5) Multiply by sub_fraction of the slim row
-    flat_frac = xp.take(sub_fraction, flat_parent, axis=0)  # (M_sub*B,)
-    flat_contrib = flat_w * flat_frac  # (M_sub*B,)
+    # 5) Multiply by sub_fraction of the slim row (float64)
+    flat_frac = xp.take(frac64, flat_parent, axis=0)
+    flat_contrib64 = flat_w * flat_frac
+
+    # 6) Scatter into (M × (S+1)) (destination float32 or float64)
+    mat = xp.zeros((M, S + 1), dtype=out_dtype)
+
+    # Cast only at the write (keeps upstream math float64)
+    flat_contrib_out = flat_contrib64.astype(out_dtype)
 
-    # 6) Scatter into (M × (S+1)), summing duplicates
-    mat = xp.zeros((M, S + 1), dtype=flat_contrib.dtype)
     if xp.__name__.startswith("jax"):
-        mat = mat.at[flat_parent, flat_pixidx].add(flat_contrib)
+        mat = mat.at[flat_parent, flat_pixidx].add(flat_contrib_out)
     else:
-        xp.add.at(mat, (flat_parent, flat_pixidx), flat_contrib)
+        xp.add.at(mat, (flat_parent, flat_pixidx), flat_contrib_out)
 
-    # 7) Drop the extra column and return
+    # 7) Drop extra column
     return mat[:, :S]
 
 
diff --git a/autoarray/operators/mock/mock_psf.py b/autoarray/operators/mock/mock_psf.py
@@ -5,5 +5,7 @@ class MockPSF:
     def __init__(self, operated_mapping_matrix=None):
         self.operated_mapping_matrix = operated_mapping_matrix
 
-    def convolved_mapping_matrix_from(self, mapping_matrix, mask, xp=np):
+    def convolved_mapping_matrix_from(
+        self, mapping_matrix, mask, use_mixed_precision=False, xp=np
+    ):
         return self.operated_mapping_matrix
diff --git a/autoarray/structures/arrays/kernel_2d.py b/autoarray/structures/arrays/kernel_2d.py
diff --git a/autoarray/structures/decorators/abstract.py b/autoarray/structures/decorators/abstract.py

Original file line number	Diff line number	Diff line change
`@@ -98,6 +98,7 @@ def curvature_matrix(self):`
`98`	`98`	`settings=self.settings,`
`99`	`99`	`add_to_curvature_diag=True,`
`100`	`100`	`no_regularization_index_list=self.no_regularization_index_list,`
	`101`	`+ use_mixed_precision=self.settings.use_mixed_precision,`
`101`	`102`	`xp=self._xp,`
`102`	`103`	`)`
`103`	`104`