[REFACTOR] Optimize dual contouring vertex generation and add vertex overlap configuration (#36)

Leguark · web-flow · commit 98293b53a354 · 2025-10-09T12:46:51.000+02:00
# Optimize Dual Contouring Vertex Generation and Add Configuration Flag

This PR introduces several optimizations to the dual contouring vertex generation process:

- Adds a new `DUAL_CONTOURING_VERTEX_OVERLAP` configuration flag to control vertex overlap behavior
- Refactors the vertex generation code with significant performance optimizations:
  - Eliminates redundant memory copies and intermediate arrays
  - Improves matrix multiplication efficiency with backend-specific optimizations
  - Enhances voxel filtering logic to only process valid voxels
  - Optimizes mass point calculation with more efficient algorithms
- Simplifies function interfaces by merging `_generate_vertices` into `generate_dual_contouring_vertices`
- Improves vertex overlap handling with better parameter naming and conditional execution
- Updates overlap logging to show only relevant keys instead of full data structures

These changes maintain compatibility while improving performance and code clarity in the dual contouring implementation.
diff --git a/gempy_engine/API/dual_contouring/multi_scalar_dual_contouring.py b/gempy_engine/API/dual_contouring/multi_scalar_dual_contouring.py
@@ -4,9 +4,12 @@
 import numpy as np
 
 from gempy_engine.modules.dual_contouring._dual_contouring import compute_dual_contouring
+
+from ...modules.dual_contouring._dual_contouring_v2 import compute_dual_contouring_v2
 from ._experimental_water_tight_DC_1 import _experimental_water_tight
 from ._mask_buffer import MaskBuffer
 from ..interp_single.interp_features import interpolate_all_fields_no_octree
+from ...config import DUAL_CONTOURING_VERTEX_OVERLAP
 from ...core.backend_tensor import BackendTensor
 from ...core.data import InterpolationOptions
 from ...core.data.dual_contouring_data import DualContouringData
@@ -100,6 +103,8 @@ def dual_contouring_multi_scalar(
     
     # endregion
 
+    compute_overlap = (len(all_left_right_codes) > 1) and DUAL_CONTOURING_VERTEX_OVERLAP
+    
     # region Vertex gen and triangulation
     left_right_per_mesh = []
     # Generate meshes for each scalar field
@@ -132,19 +137,18 @@ def dual_contouring_multi_scalar(
                 )
                 
                 dc_data_per_surface_all.append(dc_data_per_surface)
+                if (compute_overlap):
+                    left_right_per_mesh.append(all_left_right_codes[n_scalar_field][dc_data_per_surface.valid_voxels])
 
-        from gempy_engine.modules.dual_contouring._dual_contouring_v2 import compute_dual_contouring_v2
         all_meshes = compute_dual_contouring_v2(
             dc_data_list=dc_data_per_surface_all,
         )
     # endregion
-    if (options.debug or len(all_left_right_codes) > 1) and False:
-        apply_faults_vertex_overlap(all_meshes, data_descriptor.stack_structure, left_right_per_mesh)
+        if compute_overlap:
+            apply_faults_vertex_overlap(all_meshes, data_descriptor.stack_structure, left_right_per_mesh)
 
     return all_meshes
 
-    # ... existing code ...
-
 
 def _compute_meshes_legacy(all_left_right_codes: list[Any], all_mask_arrays: np.ndarray,
                            all_meshes: list[DualContouringMesh], all_stack_intersection: list[Any],
diff --git a/gempy_engine/config.py b/gempy_engine/config.py
@@ -32,6 +32,7 @@ class AvailableBackends(Flag):
 LINE_PROFILER_ENABLED = os.getenv('LINE_PROFILER_ENABLED', 'False') == 'True'
 SET_RAW_ARRAYS_IN_SOLUTION = os.getenv('SET_RAW_ARRAYS_IN_SOLUTION', 'True') == 'True'
 NOT_MAKE_INPUT_DEEP_COPY = os.getenv('NOT_MAKE_INPUT_DEEP_COPY', 'False') == 'True'
+DUAL_CONTOURING_VERTEX_OVERLAP = os.getenv('NOT_MAKE_INPUT_DEEP_COPY', 'False') == 'True'
 
 is_numpy_installed = find_spec("numpy") is not None
 is_tensorflow_installed = find_spec("tensorflow") is not None
diff --git a/gempy_engine/modules/dual_contouring/_dual_contouring_v2.py b/gempy_engine/modules/dual_contouring/_dual_contouring_v2.py
@@ -1,7 +1,7 @@
 import os
 from typing import List
 
-from ._gen_vertices import _generate_vertices
+from ._gen_vertices import generate_dual_contouring_vertices
 from ._parallel_triangulation import _should_use_parallel_processing, _init_worker
 from ._sequential_triangulation import _compute_triangulation
 from ... import optional_dependencies
@@ -130,8 +130,7 @@ def _process_surface_batch_v2(surface_indices, dc_data_dicts, left_right_codes):
 
     return results
 def _process_one_surface(dc_data: DualContouringData, left_right_codes) -> DualContouringMesh:
-    vertices = _generate_vertices(dc_data, False, None)
-
+    vertices = generate_dual_contouring_vertices(dc_data, slice_surface=None, debug=False)
     # * Average gradient for the edges
     valid_edges = dc_data.valid_edges
     edges_normals = BackendTensor.t.zeros((valid_edges.shape[0], 12, 3), dtype=BackendTensor.dtype_obj)
diff --git a/gempy_engine/modules/dual_contouring/_gen_vertices.py b/gempy_engine/modules/dual_contouring/_gen_vertices.py
@@ -14,7 +14,7 @@ def _compute_vertices(dc_data_per_stack: DualContouringData,
                       valid_edges_per_surface) -> tuple[DualContouringData, Any]:
     """Compute vertices for a specific surface."""
     valid_edges: np.ndarray = valid_edges_per_surface[surface_i]
-   
+
     slice_object = _surface_slicer(surface_i, valid_edges_per_surface)
 
     dc_data_per_surface = DualContouringData(
@@ -27,19 +27,10 @@ def _compute_vertices(dc_data_per_stack: DualContouringData,
         tree_depth=dc_data_per_stack.tree_depth
     )
 
-    vertices_numpy = _generate_vertices(dc_data_per_surface, debug, slice_object)
+    vertices_numpy = generate_dual_contouring_vertices(dc_data_per_surface, slice_object, debug)
     return dc_data_per_surface, vertices_numpy
 
 
-def _generate_vertices(dc_data_per_surface: DualContouringData, debug: bool, slice_object: slice) -> Any:
-    vertices: np.ndarray = generate_dual_contouring_vertices(
-        dc_data_per_stack=dc_data_per_surface,
-        slice_surface=slice_object,
-        debug=debug
-    )
-    return vertices
-
-
 def generate_dual_contouring_vertices(dc_data_per_stack: DualContouringData, slice_surface: Optional[slice] = None, debug: bool = False):
     # @off
     n_edges = dc_data_per_stack.n_valid_edges
@@ -48,75 +39,77 @@ def generate_dual_contouring_vertices(dc_data_per_stack: DualContouringData, sli
     if slice_surface is not None:
         xyz_on_edge = dc_data_per_stack.xyz_on_edge[slice_surface]
         gradients = dc_data_per_stack.gradients[slice_surface]
-    else: 
+    else:
         xyz_on_edge = dc_data_per_stack.xyz_on_edge
-        gradients = dc_data_per_stack.gradients 
+        gradients = dc_data_per_stack.gradients
     # @on
 
-    # * Coordinates for all posible edges (12) and 3 dummy edges_normals in the center
-    edges_xyz = BackendTensor.tfnp.zeros((n_edges, 15, 3), dtype=BackendTensor.dtype_obj)
-    valid_edges = valid_edges > 0
-    edges_xyz[:, :12][valid_edges] = xyz_on_edge
-
-    # Normals
-    edges_normals = BackendTensor.tfnp.zeros((n_edges, 15, 3), dtype=BackendTensor.dtype_obj)
-    edges_normals[:, :12][valid_edges] = gradients
-
-    if OLD_METHOD := False:
-        # ! Moureze model does not seems to work with the new method
-        # ! This branch is all nans at least with ch1_1 model
-        bias_xyz = BackendTensor.tfnp.copy(edges_xyz[:, :12])
-        isclose = BackendTensor.tfnp.isclose(bias_xyz, 0)
-        bias_xyz[isclose] = BackendTensor.tfnp.nan  # zero values to nans
-        mass_points = BackendTensor.tfnp.nanmean(bias_xyz, axis=1)  # Mean ignoring nans
-    else:  # ? This is actually doing something
-        bias_xyz = BackendTensor.tfnp.copy(edges_xyz[:, :12])
-        if BackendTensor.engine_backend == AvailableBackends.PYTORCH:
-            # PyTorch doesn't have masked arrays, so we'll use a different approach
-            mask = bias_xyz == 0
-            # Replace zeros with NaN for mean calculation
-            bias_xyz_masked = BackendTensor.tfnp.where(mask, float('nan'), bias_xyz)
-            mass_points = BackendTensor.tfnp.nanmean(bias_xyz_masked, axis=1)
-        else:
-            # NumPy approach with masked arrays
-            bias_xyz = BackendTensor.tfnp.to_numpy(bias_xyz)
-            import numpy as np
-            mask = bias_xyz == 0
-            masked_arr = np.ma.masked_array(bias_xyz, mask)
-            mass_points = masked_arr.mean(axis=1)
-            mass_points = BackendTensor.tfnp.array(mass_points)
-
-    edges_xyz[:, 12] = mass_points
-    edges_xyz[:, 13] = mass_points
-    edges_xyz[:, 14] = mass_points
-
-    BIAS_STRENGTH = 1
-
-    bias_x = BackendTensor.tfnp.array([BIAS_STRENGTH, 0, 0], dtype=BackendTensor.dtype_obj)
-    bias_y = BackendTensor.tfnp.array([0, BIAS_STRENGTH, 0], dtype=BackendTensor.dtype_obj)
-    bias_z = BackendTensor.tfnp.array([0, 0, BIAS_STRENGTH], dtype=BackendTensor.dtype_obj)
+    n_valid_voxels = BackendTensor.tfnp.sum(valid_voxels)
+    edges_xyz = BackendTensor.tfnp.zeros((n_valid_voxels, 15, 3), dtype=BackendTensor.dtype_obj)
+    edges_normals = BackendTensor.tfnp.zeros((n_valid_voxels, 15, 3), dtype=BackendTensor.dtype_obj)
+    
+    # Filter valid_edges to only valid voxels
+    valid_edges_bool = valid_edges[valid_voxels] > 0
+    
+    # Assign edge data (now only to valid voxels)
+    edges_xyz[:, :12][valid_edges_bool] = xyz_on_edge
+    edges_normals[:, :12][valid_edges_bool] = gradients
+
+    # Use nanmean directly without intermediate copy
+    bias_xyz_slice = edges_xyz[:, :12]
+    
+    if BackendTensor.engine_backend == AvailableBackends.PYTORCH:
+        mask = bias_xyz_slice == 0
+        bias_xyz_masked = BackendTensor.tfnp.where(mask, float('nan'), bias_xyz_slice)
+        mass_points = BackendTensor.tfnp.nanmean(bias_xyz_masked, axis=1)
+    else:
+        # NumPy: more efficient approach using sum and count
+        mask = bias_xyz_slice != 0
+        sum_valid = (bias_xyz_slice * mask).sum(axis=1)
+        count_valid = mask.sum(axis=1)
+        # Avoid division by zero
+        count_valid = BackendTensor.tfnp.maximum(count_valid, 1)
+        mass_points = sum_valid / count_valid
 
-    edges_normals[:, 12] = bias_x
-    edges_normals[:, 13] = bias_y
-    edges_normals[:, 14] = bias_z
+    # Assign mass points to bias positions
+    edges_xyz[:, 12:15] = mass_points[:, None, :]
 
-    # Remove unused voxels
-    edges_xyz = edges_xyz[valid_voxels]
-    edges_normals = edges_normals[valid_voxels]
+    BIAS_STRENGTH = 1
+    bias_normals = BackendTensor.tfnp.array([
+        [BIAS_STRENGTH, 0, 0],
+        [0, BIAS_STRENGTH, 0],
+        [0, 0, BIAS_STRENGTH]
+    ], dtype=BackendTensor.dtype_obj)
+    
+    edges_normals[:, 12:15] = bias_normals[None, :, :]
 
-    # Compute LSTSQS in all voxels at the same time
     A = edges_normals
-    b = (A * edges_xyz).sum(axis=2)
-
+    
+    # Compute A^T @ A more efficiently
     if BackendTensor.engine_backend == AvailableBackends.PYTORCH:
-        transpose_shape = (2, 1, 0)  # For PyTorch: (batch, dim2, dim1)
+        # For PyTorch: use bmm (batch matrix multiply) which is optimized
+        A_T = A.transpose(1, 2)
+        ATA = BackendTensor.tfnp.matmul(A_T, A)  # (n_voxels, 3, 3)
+        
+        # Compute A^T @ (A * edges_xyz).sum(axis=2)
+        b = (A * edges_xyz).sum(axis=2)  # (n_voxels, 15)
+        ATb = BackendTensor.tfnp.matmul(A_T, b.unsqueeze(-1)).squeeze(-1)  # (n_voxels, 3)
+        
+        # Solve ATA @ x = ATb
+        ATA_inv = BackendTensor.tfnp.linalg.inv(ATA)
+        vertices = BackendTensor.tfnp.matmul(ATA_inv, ATb.unsqueeze(-1)).squeeze(-1)
     else:
-        transpose_shape = (0, 2, 1)  # For NumPy: (batch, dim2, dim1)
-
-    term1 = BackendTensor.tfnp.einsum("ijk, ilj->ikl", A, BackendTensor.tfnp.transpose(A, transpose_shape))
-    term2 = BackendTensor.tfnp.linalg.inv(term1)
-    term3 = BackendTensor.tfnp.einsum("ijk,ik->ij", BackendTensor.tfnp.transpose(A, transpose_shape), b)
-    vertices = BackendTensor.tfnp.einsum("ijk, ij->ik", term2, term3)
+        # NumPy: use efficient einsum
+        b = (A * edges_xyz).sum(axis=2)
+        
+        # A^T @ A
+        ATA = BackendTensor.tfnp.einsum("ijk,ijl->ikl", A, A)
+        # A^T @ b
+        ATb = BackendTensor.tfnp.einsum("ijk,ij->ik", A, b)
+        
+        # Solve
+        ATA_inv = BackendTensor.tfnp.linalg.inv(ATA)
+        vertices = BackendTensor.tfnp.einsum("ijk,ij->ik", ATA_inv, ATb)
 
     if debug:
         dc_data_per_stack.bias_center_mass = edges_xyz[:, 12:].reshape(-1, 3)
diff --git a/gempy_engine/modules/dual_contouring/_vertex_overlap.py b/gempy_engine/modules/dual_contouring/_vertex_overlap.py
@@ -35,10 +35,10 @@ def _apply_fault_relations_to_overlaps(
             
             if overlap_key in voxel_overlaps:
                 _apply_vertex_sharing(
-                    all_meshes, 
-                    origin_stack, 
-                    surface_n, 
-                    voxel_overlaps[overlap_key]
+                    all_meshes=all_meshes, 
+                    origin_mesh_idx=origin_stack, 
+                    destination_mesh_idx=surface_n, 
+                    overlap_data=voxel_overlaps[overlap_key]
                 )
 
 
@@ -135,9 +135,10 @@ def _find_overlaps_between_stacks(
     for i in range(len(stack_codes)):
         for j in range(i + 1, len(stack_codes)):
             overlap_data = _process_stack_pair(
-                stack_codes[i], stack_codes[j], 
-                all_left_right_codes[i], all_left_right_codes[j], 
-                i, j
+                codes_i=stack_codes[i], 
+                codes_j=stack_codes[j], 
+                left_right_i=all_left_right_codes[i], 
+                left_right_j=all_left_right_codes[j], 
             )
             
             if overlap_data:
@@ -151,8 +152,6 @@ def _process_stack_pair(
         codes_j: np.ndarray,
         left_right_i: np.ndarray, 
         left_right_j: np.ndarray,
-        stack_i: int, 
-        stack_j: int
 ) -> Optional[dict]:
     """Process a pair of stacks to find overlapping voxels."""
     if codes_i.size == 0 or codes_j.size == 0:
diff --git a/gempy_engine/modules/dual_contouring/dual_contouring_interface.py b/gempy_engine/modules/dual_contouring/dual_contouring_interface.py
@@ -8,7 +8,6 @@
 from ...core.backend_tensor import BackendTensor
 from ...core.data import InterpolationOptions
 from ...core.data.dual_contouring_mesh import DualContouringMesh
-from ...core.data.input_data_descriptor import InputDataDescriptor
 from ...core.data.interp_output import InterpOutput
 from ...core.data.octree_level import OctreeLevel
 from ...core.data.options import MeshExtractionMaskingOptions
@@ -205,5 +204,5 @@ def apply_faults_vertex_overlap(all_meshes: list[DualContouringMesh],
     voxel_overlaps = find_repeated_voxels_across_stacks(left_right_per_mesh)
     
     if voxel_overlaps:
-        print(f"Found voxel overlaps between stacks: {voxel_overlaps}")
+        print(f"Found voxel overlaps between stacks: {voxel_overlaps.keys()}")
         _apply_fault_relations_to_overlaps(all_meshes, voxel_overlaps, stack_structure)