From 65bf9c33d1c0b9d34e69072c6f34179ac92492a8 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 15 Jan 2026 18:03:06 +0000
Subject: [PATCH 01/41] Enable the use of 0th order assembled newton polynomial
 form of gmres polynomial

---
 src/Approx_Inverse_Setup.F90 |   6 +-
 src/Gmres_Poly_Newton.F90    | 168 ++++++++++++++++++++++++++++-------
 tests/Makefile               |  14 +++
 3 files changed, 150 insertions(+), 38 deletions(-)

diff --git a/src/Approx_Inverse_Setup.F90 b/src/Approx_Inverse_Setup.F90
index 35559ca..f83f6e4 100644
--- a/src/Approx_Inverse_Setup.F90
+++ b/src/Approx_Inverse_Setup.F90
@@ -303,13 +303,9 @@ subroutine finish_approximate_inverse(matrix, inverse_type, &
       ! Gmres polynomial with newton basis
       else if (inverse_type == PFLAREINV_NEWTON .OR. inverse_type == PFLAREINV_NEWTON_NO_EXTRA) then
 
-         if (.NOT. matrix_free) then
-            print *, "GMRES polynomial with Newton basis must be applied matrix-free"
-            call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
-         end if
-
          call build_gmres_polynomial_newton_inverse(matrix, poly_order, &
                            coefficients, &
+                           inverse_sparsity_order, matrix_free, reuse_mat, reuse_submatrices, &
                            inv_matrix)         
 
       ! Neumann polynomial
diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 87e5f8d..4b81ae2 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -674,6 +674,7 @@ end subroutine petsc_matvec_gmres_newton_mf_residual
 
    subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
                   coefficients, &
+                  poly_sparsity_order, matrix_free, reuse_mat, reuse_submatrices, &
                   inv_matrix)
 
       ! Builds a matrix which is an approximation to the inverse of a matrix using the 
@@ -684,7 +685,10 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       type(tMat), intent(in)                                      :: matrix
       integer, intent(in)                                         :: poly_order
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
-      type(tMat), intent(inout)                                   :: inv_matrix
+      integer, intent(in)                                         :: poly_sparsity_order
+      logical, intent(in)                                         :: matrix_free      
+      type(tMat), intent(inout)                                   :: reuse_mat, inv_matrix
+      type(tMat), dimension(:), pointer, intent(inout)            :: reuse_submatrices
 
       ! Local variables
       PetscInt :: global_rows, global_cols, local_rows, local_cols
@@ -692,6 +696,7 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       PetscErrorCode :: ierr      
       MPIU_Comm :: MPI_COMM_MATRIX
       type(mat_ctxtype), pointer :: mat_ctx=>null()
+      logical :: reuse_triggered      
 
       ! ~~~~~~       
 
@@ -708,47 +713,144 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! ~~~~~~~
       ! Just build a matshell that applies our polynomial matrix-free
       ! ~~~~~~~
+      if (matrix_free) then      
 
-      ! If not re-using
-      if (PetscObjectIsNull(inv_matrix)) then
+         ! If not re-using
+         if (PetscObjectIsNull(inv_matrix)) then
 
-         ! Have to dynamically allocate this
-         allocate(mat_ctx)      
+            ! Have to dynamically allocate this
+            allocate(mat_ctx)      
 
-         ! We pass in the polynomial coefficients as the context
-         call MatCreateShell(MPI_COMM_MATRIX, local_rows, local_cols, global_rows, global_cols, &
-                     mat_ctx, inv_matrix, ierr)
-         ! The subroutine petsc_matvec_gmres_newton_mf applies the polynomial inverse
-         call MatShellSetOperation(inv_matrix, &
-                     MATOP_MULT, petsc_matvec_gmres_newton_mf, ierr)
+            ! We pass in the polynomial coefficients as the context
+            call MatCreateShell(MPI_COMM_MATRIX, local_rows, local_cols, global_rows, global_cols, &
+                        mat_ctx, inv_matrix, ierr)
+            ! The subroutine petsc_matvec_gmres_newton_mf applies the polynomial inverse
+            call MatShellSetOperation(inv_matrix, &
+                        MATOP_MULT, petsc_matvec_gmres_newton_mf, ierr)
 
-         call MatAssemblyBegin(inv_matrix, MAT_FINAL_ASSEMBLY, ierr)
-         call MatAssemblyEnd(inv_matrix, MAT_FINAL_ASSEMBLY, ierr)
-         ! Have to make sure to set the type of vectors the shell creates
-         call ShellSetVecType(matrix, inv_matrix)          
+            call MatAssemblyBegin(inv_matrix, MAT_FINAL_ASSEMBLY, ierr)
+            call MatAssemblyEnd(inv_matrix, MAT_FINAL_ASSEMBLY, ierr)
+            ! Have to make sure to set the type of vectors the shell creates
+            call ShellSetVecType(matrix, inv_matrix)          
+            
+            ! Create temporary vectors we use during application
+            ! Make sure to use matrix here to get the right type (as the shell doesn't know about gpus)
+            call MatCreateVecs(matrix, mat_ctx%mf_temp_vec(MF_VEC_TEMP), PETSC_NULL_VEC, ierr)          
+            call MatCreateVecs(matrix, mat_ctx%mf_temp_vec(MF_VEC_RHS), mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr)                
+
+         ! Reusing 
+         else
+            call MatShellGetContext(inv_matrix, mat_ctx, ierr)
+
+         end if
+
+         mat_ctx%real_roots => coefficients(:, 1)
+         mat_ctx%imag_roots => coefficients(:, 2)
+         ! Now because the context reset deallocates the coefficient pointer 
+         ! we want to make sure we don't leak memory, so we use pointer remapping here 
+         ! to turn the 2D coefficient pointer into a 1D that we can store in mat_ctx%coefficients
+         ! and then the deallocate on mat_ctx%coefficients should still delete all the memory
+         mat_ctx%coefficients(1:2*size(coefficients,1)) => coefficients(:, :)
+         ! This is the matrix whose inverse we are applying (just copying the pointer here)
+         mat_ctx%mat = matrix     
          
-         ! Create temporary vectors we use during application
-         ! Make sure to use matrix here to get the right type (as the shell doesn't know about gpus)
-         call MatCreateVecs(matrix, mat_ctx%mf_temp_vec(MF_VEC_TEMP), PETSC_NULL_VEC, ierr)          
-         call MatCreateVecs(matrix, mat_ctx%mf_temp_vec(MF_VEC_RHS), mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr)                
+         ! We're done
+         return
+      endif
 
-      ! Reusing 
-      else
-         call MatShellGetContext(inv_matrix, mat_ctx, ierr)
+      ! ~~~~~~~~~~~~
+      ! If we're here then we want an assembled approximate inverse
+      ! ~~~~~~~~~~~~         
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)     
+
+      ! If we're zeroth order poly this is trivial as it's just 1/theta_1 I
+      if (poly_order == 0) then
+
+         call build_gmres_polynomial_newton_inverse_0th_order(matrix, poly_order, coefficients, &
+               inv_matrix) 
+
+         ! Then just return
+         return      
+
+      ! For poly_order 1 and poly_sparsity_order 1 this is easy
+      else if (poly_order == 1 .AND. poly_sparsity_order == 1) then
+         
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, inv_matrix)
+
+         ! Flags to prevent reductions when assembling (there are assembles in the shift)
+         call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr) 
+         call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
+         call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
+
+         ! ! We want 1/theta_1 (I - A/theta_1)
+         ! ! result = -A_ff/theta_1^2
+         ! ! We know if we have only a first order polynomial the first root
+         ! ! is purely real (as complex roots come in conjugate pairs)
+         ! call MatScale(inv_matrix, -1d0/(coefficients(1, 1))**2, ierr)
+
+         ! ! result = -A_ff/theta_1^2 + 1/theta_1 I
+         ! ! Don't need an assemble as there is one called in this
+         ! call MatShift(inv_matrix, 1d0/coefficients(1, 1), ierr)       
+
+         ! Then just return
+         return
 
       end if
+      
+      
+
+
+   end subroutine build_gmres_polynomial_newton_inverse     
+   
+! -------------------------------------------------------------------------------------------------------------------------------
+
+   subroutine build_gmres_polynomial_newton_inverse_0th_order(matrix, poly_order, coefficients, &
+                  inv_matrix)
+
+      ! Specific 0th order inverse
+
+      ! ~~~~~~
+      type(tMat), intent(in)                            :: matrix
+      integer, intent(in)                               :: poly_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                         :: inv_matrix
+
+      ! Local variables
+      integer :: errorcode
+      PetscErrorCode :: ierr      
+      logical :: reuse_triggered
+      type(tVec) :: diag_vec   
+
+      ! ~~~~~~      
+      
+      if (poly_order /= 0) then
+         print *, "This is a 0th order inverse, but poly_order is not 0"
+         call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
+      end if
+
+      ! Let's create a matrix to represent the inverse diagonal
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)       
+
+      if (.NOT. reuse_triggered) then
+         call MatCreateVecs(matrix, PETSC_NULL_VEC, diag_vec, ierr)
+      else
+         call MatDiagonalGetDiagonal(inv_matrix, diag_vec, ierr)
+      end if
+
+      ! Must be real as we only have one coefficient
+      call VecSet(diag_vec, 1d0/coefficients(1, 1), ierr)
+
+      ! We may be reusing with the same sparsity
+      if (.NOT. reuse_triggered) then
+         ! The matrix takes ownership of diag_vec and increases ref counter
+         call MatCreateDiagonal(diag_vec, inv_matrix, ierr)
+         call VecDestroy(diag_vec, ierr)
+      else
+         call MatDiagonalRestoreDiagonal(inv_matrix, diag_vec, ierr)
+      end if             
 
-      mat_ctx%real_roots => coefficients(:, 1)
-      mat_ctx%imag_roots => coefficients(:, 2)
-      ! Now because the context reset deallocates the coefficient pointer 
-      ! we want to make sure we don't leak memory, so we use pointer remapping here 
-      ! to turn the 2D coefficient pointer into a 1D that we can store in mat_ctx%coefficients
-      ! and then the deallocate on mat_ctx%coefficients should still delete all the memory
-      mat_ctx%coefficients(1:2*size(coefficients,1)) => coefficients(:, :)
-      ! This is the matrix whose inverse we are applying (just copying the pointer here)
-      mat_ctx%mat = matrix       
-
-   end subroutine build_gmres_polynomial_newton_inverse       
+   end subroutine build_gmres_polynomial_newton_inverse_0th_order   
 
 ! -------------------------------------------------------------------------------------------------------------------------------
 
diff --git a/tests/Makefile b/tests/Makefile
index 5dd2e45..e5a119f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -260,6 +260,12 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type power -ksp_max_it 10 -pc_air_inverse_sparsity_order 0
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change with 0th order fixed sparsity"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type power -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 0
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC reused with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 0
+	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC regenerated with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 0	
 # 
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change and polynomial coeffs stored"
@@ -484,6 +490,14 @@ run_tests_no_load_parallel:
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change in parallel with 0th order fixed sparsity"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 0 \
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type power
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC reused with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -ksp_max_it 10 -pc_air_poly_order 0 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
+	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC regenerated with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 0 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 
 # 
 	@echo ""
 	@echo "Test solving isotropic diffusion with fast coarsening and near-nullspace in parallel"

From 90ca1d1bba4923af0af0bf0e38effe88acaa8bc2 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 15 Jan 2026 20:07:40 +0000
Subject: [PATCH 02/41] Enable the use of 1st order assembled newton polynomial
 form of gmres polynomial

---
 src/AIR_MG_Setup.F90      |  4 ++-
 src/Gmres_Poly_Newton.F90 | 59 +++++++++++++++++++++++++++++++--------
 src/PCPFLAREINV.c         |  9 ++----
 tests/Makefile            | 16 ++++++++++-
 4 files changed, 68 insertions(+), 20 deletions(-)

diff --git a/src/AIR_MG_Setup.F90 b/src/AIR_MG_Setup.F90
index 180d0f0..f35bdfa 100644
--- a/src/AIR_MG_Setup.F90
+++ b/src/AIR_MG_Setup.F90
@@ -424,8 +424,10 @@ subroutine setup_air_pcmg(amat, pmat, air_data, pcmg_input)
          end if
 
          ! Convert Aff to a matdiagonal type
-         ! Haven't rewritten sai to take advantage of matdiagonal
+         ! Haven't rewritten some inverse types to take advantage of matdiagonal
          if (aff_diag .AND. &
+                  inverse_type_aff /= PFLAREINV_NEWTON .AND. &
+                  inverse_type_aff /= PFLAREINV_NEWTON_NO_EXTRA .AND. &
                   inverse_type_aff /= PFLAREINV_SAI .AND. &
                   inverse_type_aff /= PFLAREINV_ISAI) then
 
diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 4b81ae2..d4f4e27 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -697,6 +697,7 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       MPIU_Comm :: MPI_COMM_MATRIX
       type(mat_ctxtype), pointer :: mat_ctx=>null()
       logical :: reuse_triggered      
+      PetscReal :: square_sum
 
       ! ~~~~~~       
 
@@ -761,7 +762,11 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! ~~~~~~~~~~~~
       ! If we're here then we want an assembled approximate inverse
       ! ~~~~~~~~~~~~         
-      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)     
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)   
+      
+      ! For the 0th and 1st order assembled polynomial we just combine the coefficients
+      ! to get the mononomial form and assemble it, which should be stable for such low order
+      ! For higher order we use the actual Newton form 
 
       ! If we're zeroth order poly this is trivial as it's just 1/theta_1 I
       if (poly_order == 0) then
@@ -783,22 +788,54 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
          call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
          call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
 
-         ! ! We want 1/theta_1 (I - A/theta_1)
-         ! ! result = -A_ff/theta_1^2
-         ! ! We know if we have only a first order polynomial the first root
-         ! ! is purely real (as complex roots come in conjugate pairs)
-         ! call MatScale(inv_matrix, -1d0/(coefficients(1, 1))**2, ierr)
+         ! We only have two coefficients, so they are either both real or complex conjugates
+         ! If real
+         if (coefficients(1,2) == 0d0) then
+
+            ! Have to be careful here, as we may be first order, but the second eigenvaule
+            ! might have been set to zero thanks to the rank reducing solve 
+            ! So we just check if the second imaginary part is zero and if it is
+            ! we just compute a 0th order inverse - annoyingly we can't call 
+            ! build_gmres_polynomial_newton_inverse_0th_order as that builds a MATDIAGONAL
+            ! and in the tests there is a problem where we reuse the sparsity, in the first
+            ! solve we don't have a zero coefficient but in the second solve we do
+            ! So the mat type needs to remain consistent
+            ! This can't happen in the complex case
+            if (coefficients(2,1) == 0d0) then
+
+               ! Set to zero
+               call MatScale(inv_matrix, 0d0, ierr)
+               ! Then add in the 0th order inverse
+               call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)
+               
+               ! Then just return
+               return  
+            end if
 
-         ! ! result = -A_ff/theta_1^2 + 1/theta_1 I
-         ! ! Don't need an assemble as there is one called in this
-         ! call MatShift(inv_matrix, 1d0/coefficients(1, 1), ierr)       
+            ! result = -A_ff/(theta_1 * theta_2)
+            call MatScale(inv_matrix, -1d0/(coefficients(1, 1) * coefficients(2, 1)), ierr)
+
+            ! result = I * (1/theta_1 + 1/theta_2) - A_ff/(theta_1 * theta_2)
+            ! Don't need an assemble as there is one called in this
+            call MatShift(inv_matrix, 1d0/(coefficients(1, 1)) + 1d0/(coefficients(2, 1)), ierr)       
+
+         ! Complex conjugate roots, a +- ib
+         else
+            ! a^2 + b^2
+            square_sum = coefficients(1,1)**2 + coefficients(1,2)**2
+
+            ! Complex conjugate roots
+            ! result = -A_ff / (a^2 + b^2)
+            call MatScale(inv_matrix, -1d0/square_sum, ierr)
+            ! result = 2a/(a^2 + b^2) I - A_ff / (a^2 + b^2)
+            ! Don't need an assemble as there is one called in this
+            call MatShift(inv_matrix, 2d0 * coefficients(1,1)/square_sum, ierr)       
+         end if    
 
          ! Then just return
          return
 
       end if
-      
-      
 
 
    end subroutine build_gmres_polynomial_newton_inverse     
diff --git a/src/PCPFLAREINV.c b/src/PCPFLAREINV.c
index b7f3a8e..8b6826b 100644
--- a/src/PCPFLAREINV.c
+++ b/src/PCPFLAREINV.c
@@ -18,8 +18,8 @@ PETSC_EXTERN void calculate_and_build_approximate_inverse_c(Mat *input_mat, Pets
 //
 // PFLAREINV_POWER      - GMRES polynomial with the power basis 
 // PFLAREINV_ARNOLDI    - GMRES polynomial with the arnoldi basis 
-// PFLAREINV_NEWTON     - GMRES polynomial with the newton basis with extra roots for stability - can only be used matrix-free atm      
-// PFLAREINV_NEWTON_NO_EXTRA     - GMRES polynomial with the newton basis without extra roots - can only be used matrix-free atm      
+// PFLAREINV_NEWTON     - GMRES polynomial with the newton basis with extra roots for stability 
+// PFLAREINV_NEWTON_NO_EXTRA     - GMRES polynomial with the newton basis without extra roots    
 // PFLAREINV_NEUMANN    - Neumann polynomial
 // PFLAREINV_SAI        - SAI - cannot be used matrix-free atm
 // PFLAREINV_ISAI       - Incomplete SAI - cannot be used matrix-free atm
@@ -335,11 +335,6 @@ static PetscErrorCode PCSetUp_PFLAREINV_c(PC pc)
    // ~~~~~~~
    PetscCall(PCPFLAREINVGetType(pc, &type));
 
-   // Newton has to be matrix free
-   if (type == PFLAREINV_NEWTON || type == PFLAREINV_NEWTON_NO_EXTRA)
-   {
-      PetscCheck(inv_data->matrix_free, comm, PETSC_ERR_ARG_WRONGSTATE, "GMRES polynomial with Newton basis must be applied matrix-free");
-   }
    // SAI/ISAI can't be matrix free
    if (type == PFLAREINV_SAI || type == PFLAREINV_ISAI)
    {
diff --git a/tests/Makefile b/tests/Makefile
index e5a119f..32f49de 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -266,6 +266,12 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 0
 	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC regenerated with no sparsity change"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 0	
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC reused with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 1
+	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC regenerated with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 1		
 # 
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change and polynomial coeffs stored"
@@ -497,7 +503,15 @@ run_tests_no_load_parallel:
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
 	@echo "Test AIRG Newton with 0th order GMRES polynomials with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 0 \
-	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC reused with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -ksp_max_it 10 -pc_air_poly_order 1 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
+	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC regenerated with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 1 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 	 
 # 
 	@echo ""
 	@echo "Test solving isotropic diffusion with fast coarsening and near-nullspace in parallel"

From aa688c2100538cfda7964792da324a15c05134e5 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 15 Jan 2026 23:11:31 +0000
Subject: [PATCH 03/41] Enable the use of not fixed sparsity assembled newton
 polynomial form of gmres polynomial

---
 src/Gmres_Poly_Newton.F90 | 162 +++++++++++++++++++++++++++++++++++++-
 tests/Makefile            |  19 +++++
 2 files changed, 180 insertions(+), 1 deletion(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index d4f4e27..5f32bab 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -692,12 +692,13 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
 
       ! Local variables
       PetscInt :: global_rows, global_cols, local_rows, local_cols
-      integer :: comm_size, errorcode
+      integer :: comm_size, errorcode, order
       PetscErrorCode :: ierr      
       MPIU_Comm :: MPI_COMM_MATRIX
       type(mat_ctxtype), pointer :: mat_ctx=>null()
       logical :: reuse_triggered      
       PetscReal :: square_sum
+      type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
 
       ! ~~~~~~       
 
@@ -837,6 +838,165 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
 
       end if
 
+      ! If we're constraining sparsity we've built a custom matrix-powers that assumes fixed sparsity
+      if (poly_sparsity_order < poly_order) then    
+
+         ! ! This routine is a custom one that builds our matrix powers and assumes fixed sparsity
+         ! ! so that it doen't have to do much comms
+         ! ! This also finishes off the asyn comms and computes the coefficients
+         ! call mat_mult_powers_share_sparsity(matrix, poly_order, poly_sparsity_order, buffers, coefficients, &
+         !          reuse_mat, reuse_submatrices, inv_matrix)
+
+         ! ! Then just return
+         return         
+         
+      end if
+
+      ! ~~~~~~~~~~
+      ! We are only here if we don't constrain_sparsity
+      ! ~~~~~~~~~~
+
+      ! If not re-using
+      ! Copy in the initial matrix
+      if (.NOT. reuse_triggered) then
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         call mat_duplicate_copy_plus_diag(matrix, .FALSE., inv_matrix)
+      else
+         ! For the powers > 1 the pattern of the original matrix will be different
+         ! to the resulting inverse
+         call MatCopy(matrix, inv_matrix, DIFFERENT_NONZERO_PATTERN, ierr)
+      end if
+
+      ! Set to zero as we add in each product of terms
+      call MatScale(inv_matrix, 0d0, ierr)
+
+      ! Don't set any off processor entries so no need for a reduction when assembling
+      call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)  
+
+      ! We start with an identity in mat_product
+      call generate_identity(matrix, mat_product)
+
+      ! ~~~~~~~~~~~~
+      ! Iterate over the order
+      ! This is basically the same as the MF application but we have to build the powers
+      ! ~~~~~~~~~~~~      
+      order = 1
+      do while (order .le. poly_order - 1)
+
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         ! temp_mat_A is going to store things with the sparsity of A
+         if (PetscObjectIsNull(temp_mat_A)) then
+            call mat_duplicate_copy_plus_diag(matrix, .FALSE., temp_mat_A)     
+         else
+            ! Can reuse the sparsity 
+            call mat_duplicate_copy_plus_diag(matrix, .TRUE., temp_mat_A)     
+         end if         
+
+         ! If real this is easy
+         if (coefficients(order,2) == 0d0) then
+
+            ! Skips eigenvalues that are numerically zero - see 
+            ! the comment in calculate_gmres_polynomial_roots_newton 
+            if (abs(coefficients(order,1)) < 1e-12) then
+               order = order + 1
+               cycle
+            end if        
+
+            ! Then add the scaled version of each product
+            if (reuse_triggered) then
+               ! If doing reuse we know our nonzeros are a subset
+               call MatAXPY(inv_matrix, 1d0/coefficients(order,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+            else
+               ! Have to use the DIFFERENT_NONZERO_PATTERN here
+               call MatAXPYWrapper(inv_matrix, 1d0/coefficients(order,1), mat_product)
+            end if
+
+            ! temp_mat_A = A_ff/theta_k       
+            call MatScale(temp_mat_A, -1d0/coefficients(order,1), ierr)
+            ! temp_mat_A = I - A_ff/theta_k
+            call MatShift(temp_mat_A, 1d0, ierr)    
+            
+            ! mat_product_k_plus_1 = mat_product * temp_mat_A
+            call MatMatMult(temp_mat_A, mat_product, &
+                  MAT_INITIAL_MATRIX, 1.5d0, mat_product_k_plus_1, ierr)      
+            call MatDestroy(mat_product, ierr)  
+            mat_product = mat_product_k_plus_1     
+            
+            order = order + 1
+
+         ! Complex 
+         else
+
+            ! Skips eigenvalues that are numerically zero
+            if (coefficients(order,1)**2 + coefficients(order,2)**2 < 1e-12) then
+               order = order + 2
+               cycle
+            end if
+
+            ! Compute 2a I - A
+            ! Have to use the DIFFERENT_NONZERO_PATTERN here
+            ! temp_mat_A = -A    
+            call MatScale(temp_mat_A, -1d0, ierr)
+            ! temp_mat_A = 2a I - A_ff
+            call MatShift(temp_mat_A, 2d0 * coefficients(order,1), ierr)   
+            ! temp_mat_A = (2a I - A_ff)/(a^2 + b^2)
+            call MatScale(temp_mat_A, 1d0/(coefficients(order,1)**2 + coefficients(order,2)**2), ierr) 
+
+            call MatMatMult(temp_mat_A, mat_product, &
+                  MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)      
+
+            ! Then add the scaled version of each product
+            if (reuse_triggered) then
+               ! If doing reuse we know our nonzeros are a subset
+               call MatAXPY(inv_matrix, 1d0, temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
+            else
+               ! Have to use the DIFFERENT_NONZERO_PATTERN here
+               call MatAXPYWrapper(inv_matrix, 1d0, temp_mat_two)
+            end if            
+
+            if (order .le. size(coefficients, 1) - 2) then
+               ! temp_mat_three = matrix * temp_mat_two
+               call MatMatMult(matrix, temp_mat_two, &
+                     MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
+               call MatDestroy(temp_mat_two, ierr)  
+
+               ! Then add the scaled version of each product
+               if (reuse_triggered) then
+                  ! If doing reuse we know our nonzeros are a subset
+                  call MatAXPY(mat_product, -1d0, temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
+               else
+                  ! Have to use the DIFFERENT_NONZERO_PATTERN here
+                  call MatAXPYWrapper(mat_product, -1d0, temp_mat_three)
+               end if               
+               call MatDestroy(temp_mat_three, ierr) 
+            else
+               call MatDestroy(temp_mat_two, ierr)  
+            end if
+
+            ! Skip two evals
+            order = order + 2
+
+         end if       
+      end do
+
+      ! Final step if last root is real
+      if (coefficients(order,2) == 0d0) then
+         ! Add in the final term multiplied by 1/theta_poly_order
+
+         ! Skips eigenvalues that are numerically zero
+         if (abs(coefficients(order,1)) > 1e-12) then            
+            if (reuse_triggered) then
+               ! If doing reuse we know our nonzeros are a subset
+               call MatAXPY(inv_matrix, 1d0/coefficients(order,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+            else
+               ! Have to use the DIFFERENT_NONZERO_PATTERN here
+               call MatAXPYWrapper(inv_matrix, 1d0/coefficients(order,1), mat_product)
+            end if     
+         end if       
+      end if        
+
+      call MatDestroy(temp_mat_A, ierr)
+      call MatDestroy(mat_product, ierr)
 
    end subroutine build_gmres_polynomial_newton_inverse     
    
diff --git a/tests/Makefile b/tests/Makefile
index 32f49de..d7c378c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -272,6 +272,14 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 1
 	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC regenerated with no sparsity change"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 1		
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC reused with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
+	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
+	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
 # 
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change and polynomial coeffs stored"
@@ -511,6 +519,17 @@ run_tests_no_load_parallel:
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
 	@echo "Test AIRG Newton with 1st order GMRES polynomials with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 1 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 
+# 
+	@echo ""
+	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC reused with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -ksp_max_it 10 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
+	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
+	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 	 
 # 
 	@echo ""

From f336df67056ce4b274ad70867549bc0d4a911f11 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 15 Jan 2026 23:25:31 +0000
Subject: [PATCH 04/41] Rewrite the newton first order assembled so it doesn't
 compute theta_1*theta_2

---
 src/Gmres_Poly_Newton.F90 | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 5f32bab..3721c54 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -764,10 +764,6 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! If we're here then we want an assembled approximate inverse
       ! ~~~~~~~~~~~~         
       reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)   
-      
-      ! For the 0th and 1st order assembled polynomial we just combine the coefficients
-      ! to get the mononomial form and assemble it, which should be stable for such low order
-      ! For higher order we use the actual Newton form 
 
       ! If we're zeroth order poly this is trivial as it's just 1/theta_1 I
       if (poly_order == 0) then
@@ -795,7 +791,7 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
 
             ! Have to be careful here, as we may be first order, but the second eigenvaule
             ! might have been set to zero thanks to the rank reducing solve 
-            ! So we just check if the second imaginary part is zero and if it is
+            ! So we just check if the second real part is zero and if it is
             ! we just compute a 0th order inverse - annoyingly we can't call 
             ! build_gmres_polynomial_newton_inverse_0th_order as that builds a MATDIAGONAL
             ! and in the tests there is a problem where we reuse the sparsity, in the first
@@ -813,12 +809,20 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
                return  
             end if
 
-            ! result = -A_ff/(theta_1 * theta_2)
-            call MatScale(inv_matrix, -1d0/(coefficients(1, 1) * coefficients(2, 1)), ierr)
+            ! Could just compute the equivalent mononomial here to save some flops
+            ! but the whole point to doing the Newton form is to avoid the 
+            ! theta_1 * theta_2 that would result
+
+            ! result = -A_ff/theta_1
+            call MatScale(inv_matrix, -1d0/(coefficients(1, 1)), ierr)
+            ! result = I -A_ff/theta_1
+            call MatShift(inv_matrix, 1d0, ierr) 
+            ! result = 1/theta_2 * (I -A_ff/theta_1)
+            call MatScale(inv_matrix, 1d0/(coefficients(2, 1)), ierr)      
 
-            ! result = I * (1/theta_1 + 1/theta_2) - A_ff/(theta_1 * theta_2)
+            ! result = 1/theta_1 + 1/theta_2 * (I -A_ff/theta_1)
             ! Don't need an assemble as there is one called in this
-            call MatShift(inv_matrix, 1d0/(coefficients(1, 1)) + 1d0/(coefficients(2, 1)), ierr)       
+            call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)       
 
          ! Complex conjugate roots, a +- ib
          else

From bf9a0da159c2d53262384fd4d987fe854a083cc2 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 15 Jan 2026 23:35:57 +0000
Subject: [PATCH 05/41] Was accidently finishing an order early for the newton
 assembled not fixed sparsity

---
 src/Gmres_Poly_Newton.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 3721c54..5ec8433 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -885,7 +885,7 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! This is basically the same as the MF application but we have to build the powers
       ! ~~~~~~~~~~~~      
       order = 1
-      do while (order .le. poly_order - 1)
+      do while (order .le. size(coefficients, 1) - 1)
 
          ! Duplicate & copy the matrix, but ensure there is a diagonal present
          ! temp_mat_A is going to store things with the sparsity of A
@@ -984,7 +984,7 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       end do
 
       ! Final step if last root is real
-      if (coefficients(order,2) == 0d0) then
+      if (coefficients(size(coefficients,1),2) == 0d0) then
          ! Add in the final term multiplied by 1/theta_poly_order
 
          ! Skips eigenvalues that are numerically zero

From ad448b89ec16ee1f5a7fd8cc1ab92fe17daadc45 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Fri, 16 Jan 2026 16:10:09 +0000
Subject: [PATCH 06/41] Tidy variable names and fix comment

---
 src/Gmres_Poly.F90 | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/Gmres_Poly.F90 b/src/Gmres_Poly.F90
index 552f77a..309b42e 100644
--- a/src/Gmres_Poly.F90
+++ b/src/Gmres_Poly.F90
@@ -851,7 +851,6 @@ subroutine mat_mult_powers_share_sparsity_cpu(matrix, poly_order, poly_sparsity_
 
       ! Compute matrix powers c = coeff(1) * I + coeff(2) * A + coeff(3) * A^2 + coeff(4) * A^3 + ... 
       ! where a c and the powers all share the same sparsity as the power input in poly_sparsity_order
-      ! Assuming cmat has not been built/allocated
       ! This also finishes the async comms required to compute the gmres poly coefficients if buffers%request is allocated
    
       ! ~~~~~~~~~~
@@ -1655,7 +1654,7 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity(matrix, poly_order,
       integer :: order
       PetscErrorCode :: ierr      
       logical :: reuse_triggered
-      type(tVec) :: rhs_copy, diag_vec, power_vec
+      type(tVec) :: inv_vec, diag_vec, power_vec
       ! ~~~~~~
 
       reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)
@@ -1666,9 +1665,9 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity(matrix, poly_order,
 
       ! This stores D^order
       if (.NOT. reuse_triggered) then
-         call VecDuplicate(diag_vec, rhs_copy, ierr)
+         call VecDuplicate(diag_vec, inv_vec, ierr)
       else
-         call MatDiagonalGetDiagonal(inv_matrix, rhs_copy, ierr)
+         call MatDiagonalGetDiagonal(inv_matrix, inv_vec, ierr)
       end if
       call VecCopy(diag_vec, power_vec, ierr)
 
@@ -1678,22 +1677,22 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity(matrix, poly_order,
       call finish_gmres_polynomial_coefficients_power(poly_order, buffers, coefficients)         
 
       ! Set: alpha_0 * I
-      call VecSet(rhs_copy, coefficients(1), ierr)
+      call VecSet(inv_vec, coefficients(1), ierr)
 
       ! Calculate: alpha_0 * I + alpha_1 * D + alpha_2 * D^2
       do order = 1, poly_order
-         call VecAXPY(rhs_copy, coefficients(order+1), power_vec, ierr)
+         call VecAXPY(inv_vec, coefficients(order+1), power_vec, ierr)
          ! Compute power_vec = power_vec * D
          if (order /= poly_order) call VecPointwiseMult(power_vec, power_vec, diag_vec, ierr)
       end do
 
       ! We may be reusing with the same sparsity
       if (.NOT. reuse_triggered) then
-         ! The matrix takes ownership of rhs_copy and increases ref counter
-         call MatCreateDiagonal(rhs_copy, inv_matrix, ierr)
-         call VecDestroy(rhs_copy, ierr)
+         ! The matrix takes ownership of inv_vec and increases ref counter
+         call MatCreateDiagonal(inv_vec, inv_matrix, ierr)
+         call VecDestroy(inv_vec, ierr)
       else
-         call MatDiagonalRestoreDiagonal(inv_matrix, rhs_copy, ierr)
+         call MatDiagonalRestoreDiagonal(inv_matrix, inv_vec, ierr)
       end if  
 
       call VecDestroy(diag_vec, ierr)

From 4e94affb194f4df50b52152fed7e78ae985e151b Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Fri, 16 Jan 2026 16:10:56 +0000
Subject: [PATCH 07/41] Enable the use of 0th order fixed sparsity (diagonal)
 assembled newton polynomial form of gmres polynomial

---
 src/Gmres_Poly_Newton.F90 | 694 +++++++++++++++++++++++++++++++++++++-
 tests/Makefile            |  16 +-
 2 files changed, 704 insertions(+), 6 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 5ec8433..a7896ce 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -669,6 +669,560 @@ subroutine petsc_matvec_gmres_newton_mf_residual(mat, x, y)
       end do
 
    end subroutine petsc_matvec_gmres_newton_mf_residual
+!------------------------------------------------------------------------------------------------------------------------
+   
+   subroutine mat_mult_powers_share_sparsity_newton(matrix, poly_order, poly_sparsity_order, coefficients, &
+                  reuse_mat, reuse_submatrices, cmat)
+
+      ! Wrapper around mat_mult_powers_share_sparsity_cpu and mat_mult_powers_share_sparsity_kokkos     
+   
+      ! ~~~~~~~~~~
+      ! Input 
+      type(tMat), target, intent(in)                     :: matrix
+      integer, intent(in)                                :: poly_order, poly_sparsity_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                          :: reuse_mat, cmat
+      type(tMat), dimension(:), pointer, intent(inout)   :: reuse_submatrices
+
+#if defined(PETSC_HAVE_KOKKOS)                     
+      integer(c_long_long) :: A_array, B_array, reuse_array
+      integer :: errorcode, reuse_int_cmat, reuse_int_reuse_mat
+      PetscErrorCode :: ierr
+      MatType :: mat_type
+      Mat :: temp_mat, temp_mat_reuse, temp_mat_compare
+      PetscScalar normy;
+      logical :: reuse_triggered_cmat, reuse_triggered_reuse_mat
+      type(c_ptr)  :: coefficients_ptr
+      type(tMat) :: reuse_mat_cpu
+      type(tMat), dimension(:), pointer :: reuse_submatrices_cpu
+#endif      
+      ! ~~~~~~~~~~
+
+      ! ~~~~~~~~~~
+      ! Special case if we just want to return a gmres polynomial with the sparsity of the diagonal
+      ! This is like a damped Jacobi
+      ! ~~~~~~~~~~
+if (poly_sparsity_order == 0) then
+
+      call build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly_order, &
+               coefficients, cmat)     
+
+      return
+end if
+
+#if defined(PETSC_HAVE_KOKKOS)    
+
+      call MatGetType(matrix, mat_type, ierr)
+      if (mat_type == MATMPIAIJKOKKOS .OR. mat_type == MATSEQAIJKOKKOS .OR. &
+            mat_type == MATAIJKOKKOS) then                  
+
+         A_array = matrix%v             
+         reuse_triggered_cmat = .NOT. PetscObjectIsNull(cmat) 
+         reuse_triggered_reuse_mat = .NOT. PetscObjectIsNull(reuse_mat) 
+         reuse_int_cmat = 0
+         if (reuse_triggered_cmat) then
+            reuse_int_cmat = 1
+            B_array = cmat%v
+         end if
+         reuse_int_reuse_mat = 0
+         if (reuse_triggered_reuse_mat) then
+            reuse_int_reuse_mat = 1
+         end if         
+         reuse_array = reuse_mat%v
+         coefficients_ptr = c_loc(coefficients)
+
+         ! call mat_mult_powers_share_sparsity_newton_kokkos(A_array, poly_order, poly_sparsity_order, &
+         !         coefficients_ptr, reuse_int_reuse_mat, reuse_array, reuse_int_cmat, B_array)
+                         
+         reuse_mat%v = reuse_array
+         cmat%v = B_array
+
+         ! If debugging do a comparison between CPU and Kokkos results
+         if (kokkos_debug()) then
+
+            ! If we're doing reuse and debug, then we have to always output the result 
+            ! from the cpu version, as it will have coo preallocation structures set
+            ! They aren't copied over if you do a matcopy (or matconvert)
+            ! If we didn't do that the next time we come through this routine 
+            ! and try to call the cpu version with reuse, it will segfault
+            if (reuse_triggered_cmat) then
+               temp_mat = cmat
+               call MatConvert(cmat, MATSAME, MAT_INITIAL_MATRIX, temp_mat_compare, ierr)  
+            else
+               temp_mat_compare = cmat                         
+            end if            
+
+            ! Debug check if the CPU and Kokkos versions are the same
+            ! We send in an empty reuse_mat_cpu here always, as we can't pass through
+            ! the same one Kokkos uses as it now only gets out the non-local rows we need
+            ! (ie reuse_mat and reuse_mat_cpu are no longer the same size)
+            reuse_submatrices_cpu => null()
+            call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
+                     coefficients, reuse_mat_cpu, reuse_submatrices_cpu, temp_mat)
+            call destroy_matrix_reuse(reuse_mat_cpu, reuse_submatrices_cpu)         
+                     
+            call MatConvert(temp_mat, MATSAME, MAT_INITIAL_MATRIX, &
+                        temp_mat_reuse, ierr)                        
+
+            call MatAXPYWrapper(temp_mat_reuse, -1d0, temp_mat_compare)
+            call MatNorm(temp_mat_reuse, NORM_FROBENIUS, normy, ierr)
+            ! There is floating point compute in these inverses, so we have to be a 
+            ! bit more tolerant to rounding differences
+            if (normy .gt. 1d-11 .OR. normy/=normy) then
+               !call MatFilter(temp_mat_reuse, 1d-14, PETSC_TRUE, PETSC_FALSE, ierr)
+               !call MatView(temp_mat_reuse, PETSC_VIEWER_STDOUT_WORLD, ierr)
+               print *, "Kokkos and CPU versions of mat_mult_powers_share_sparsity do not match"
+
+               call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)  
+            end if
+            call MatDestroy(temp_mat_reuse, ierr)
+            if (.NOT. reuse_triggered_cmat) then
+               call MatDestroy(cmat, ierr)
+            else
+               call MatDestroy(temp_mat_compare, ierr)
+            end if
+            cmat = temp_mat
+         end if
+
+      else
+
+         call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
+                  coefficients, reuse_mat, reuse_submatrices, cmat)       
+
+      end if
+#else
+      call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
+                  coefficients, reuse_mat, reuse_submatrices, cmat)
+#endif         
+
+      ! ~~~~~~~~~~
+      
+   end subroutine mat_mult_powers_share_sparsity_newton
+
+!------------------------------------------------------------------------------------------------------------------------
+   
+   subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, coefficients, &
+                  reuse_mat, reuse_submatrices, cmat)
+
+      ! Compute newton powers with the same sparsity
+   
+      ! ~~~~~~~~~~
+      ! Input 
+      type(tMat), target, intent(in)                     :: matrix
+      integer, intent(in)                                :: poly_order, poly_sparsity_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                          :: reuse_mat, cmat
+      type(tMat), dimension(:), pointer, intent(inout)   :: reuse_submatrices
+      
+      PetscInt :: local_rows, local_cols, global_rows, global_cols
+      PetscInt :: global_row_start, global_row_end_plus_one, row_index_into_submatrix
+      PetscInt :: global_col_start, global_col_end_plus_one, n, ncols, ncols_two, ifree, max_nnzs
+      PetscInt :: i_loc, j_loc, row_size, rows_ao, cols_ao, rows_ad, cols_ad, shift = 0
+      integer :: errorcode, match_counter, term, order
+      integer :: comm_size
+      PetscErrorCode :: ierr      
+      integer, dimension(:), allocatable :: cols_index_one, cols_index_two
+      PetscInt, dimension(:), allocatable :: col_indices_off_proc_array, ad_indices, cols
+      PetscReal, dimension(:), allocatable :: vals
+      type(tIS), dimension(1) :: col_indices, row_indices
+      type(tMat) :: Ad, Ao
+      PetscInt, dimension(:), pointer :: colmap
+      logical :: deallocate_submatrices = .FALSE.
+      type(c_ptr) :: vals_c_ptr
+      type(tMat), dimension(size(coefficients)-1), target :: matrix_powers
+      type(tMat), pointer :: mat_sparsity_match
+      type(int_vec), dimension(:), allocatable :: symbolic_ones
+      type(real_vec), dimension(:), allocatable :: symbolic_vals
+      integer(c_long_long) A_array
+      MPIU_Comm :: MPI_COMM_MATRIX
+      PetscReal, dimension(:), allocatable :: vals_power_temp, vals_previous_power_temp
+      PetscInt, dimension(:), pointer :: submatrices_ia, submatrices_ja, cols_two_ptr, cols_ptr
+      PetscReal, dimension(:), pointer :: vals_two_ptr, vals_ptr
+      real(c_double), pointer :: submatrices_vals(:)
+      logical :: reuse_triggered
+      PetscBool :: symmetric = PETSC_FALSE, inodecompressed = PETSC_FALSE, done
+      PetscInt, parameter :: one = 1, zero = 0
+      
+      ! ~~~~~~~~~~  
+
+      if (poly_sparsity_order .ge. size(coefficients)-1) then      
+         print *, "Requested sparsity is greater than or equal to the order"
+         call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
+      end if      
+
+      call PetscObjectGetComm(matrix, MPI_COMM_MATRIX, ierr)    
+      ! Get the comm size 
+      call MPI_Comm_size(MPI_COMM_MATRIX, comm_size, errorcode)
+
+      ! Get the local sizes
+      call MatGetLocalSize(matrix, local_rows, local_cols, ierr)
+      call MatGetSize(matrix, global_rows, global_cols, ierr)
+      ! This returns the global index of the local portion of the matrix
+      call MatGetOwnershipRange(matrix, global_row_start, global_row_end_plus_one, ierr)  
+      call MatGetOwnershipRangeColumn(matrix, global_col_start, global_col_end_plus_one, ierr)  
+
+      reuse_triggered = .NOT. PetscObjectIsNull(cmat) 
+
+      ! ! ~~~~~~~~~~
+      ! ! Compute any matrix powers we might need to constrain sparsity and start assembling the 
+      ! ! components of the output matrix up to the order of poly_sparsity_order
+      ! ! The powers higher than poly_sparsity_order can be done with only
+      ! ! a single bit of comms and is done below this
+      ! ! ~~~~~~~~~~
+
+      ! ! matrix_powers stores all the powers of the input matrix
+      ! matrix_powers(1) = matrix
+
+      ! ! What power of A do we want to match the sparsity of
+      ! ! Compute the power we need if we're two or above
+      ! do order = 2, poly_sparsity_order
+
+      !    ! Let's just store each power, that way we can set the sparsity 
+      !    ! as the highest (unconstrained) power and do the mataxpy with a subset of entries
+      !    ! Takes more memory to do this but is faster
+      !    call MatMatMult(matrix, matrix_powers(order-1), &
+      !          MAT_INITIAL_MATRIX, 1.5d0, matrix_powers(order), ierr)        
+      ! end do  
+      
+      ! ! mat_sparsity_match now contains the sparsity of the power of A we want to match
+      ! mat_sparsity_match => matrix_powers(poly_sparsity_order)
+
+      ! ! Copy in the highest unconstrained power
+      ! ! Duplicate & copy the matrix, but ensure there is a diagonal present
+      ! call mat_duplicate_copy_plus_diag(matrix_powers(poly_sparsity_order), reuse_triggered, cmat)
+
+      ! ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
+      ! call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
+      ! call MatSetOption(cmat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr) 
+      ! ! We know we are only going to insert local vals
+      ! ! These options should turn off any reductions in the assembly
+      ! call MatSetOption(cmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)     
+      
+      ! ! ~~~~~~~~~~~~
+      ! ! If we're in parallel we need to get the off-process rows of matrix that correspond
+      ! ! to the columns of mat_sparsity_match
+      ! ! We can therefore do the matmult for every constrained power locally with just that data
+      ! ! ~~~~~~~~~~~~
+      ! ! Have to double check comm_size /= 1 as we might be on a subcommunicator and we can't call
+      ! ! MatMPIAIJGetSeqAIJ specifically if that's the case
+      ! if (comm_size /= 1) then
+
+      !    ! ~~~~
+      !    ! Get the cols
+      !    ! ~~~~
+      !    call MatMPIAIJGetSeqAIJ(mat_sparsity_match, Ad, Ao, colmap, ierr)
+
+      !    call MatGetSize(Ad, rows_ad, cols_ad, ierr)             
+      !    ! We know the col size of Ao is the size of colmap, the number of non-zero offprocessor columns
+      !    call MatGetSize(Ao, rows_ao, cols_ao, ierr)         
+
+      !    ! For the column indices we need to take all the columns of mat_sparsity_match
+      !    A_array = mat_sparsity_match%v
+
+      !    ! These are the global indices of the columns we want
+      !    allocate(col_indices_off_proc_array(cols_ad + cols_ao))
+      !    allocate(ad_indices(cols_ad))
+      !    ! Local rows (as global indices)
+      !    do ifree = 1, cols_ad
+      !       ad_indices(ifree) = global_row_start + ifree - 1
+      !    end do
+
+      !    ! col_indices_off_proc_array is now sorted, which are the global indices of the columns we want
+      !    call merge_pre_sorted(ad_indices, colmap, col_indices_off_proc_array)
+      !    deallocate(ad_indices)
+
+      !    ! Create the sequential IS we want with the cols we want (written as global indices)
+      !    call ISCreateGeneral(PETSC_COMM_SELF, cols_ad + cols_ao, &
+      !                col_indices_off_proc_array, PETSC_USE_POINTER, col_indices(1), ierr) 
+      !    call ISCreateGeneral(PETSC_COMM_SELF, cols_ao, &
+      !                colmap, PETSC_USE_POINTER, row_indices(1), ierr)                      
+
+      !    ! ~~~~~~~
+      !    ! Now we can pull out the chunk of matrix that we need
+      !    ! ~~~~~~~
+
+      !    ! We need off-processor rows to compute matrix powers   
+      !    ! Setting this is necessary to avoid an allreduce when calling createsubmatrices
+      !    ! This will be reset to false after the call to createsubmatrices
+      !    call MatSetOption(matrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE, ierr)       
+         
+      !    ! Now this will be doing comms to get the non-local rows we want
+      !    ! But only including the columns of the local fixed sparsity, as we don't need all the 
+      !    ! columns of the non-local entries unless we are doing a full matmatmult
+      !    ! This returns a sequential matrix
+      !    if (.NOT. PetscObjectIsNull(reuse_mat)) then
+      !       reuse_submatrices(1) = reuse_mat
+      !       call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_REUSE_MATRIX, reuse_submatrices, ierr)
+      !    else
+      !       call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_INITIAL_MATRIX, reuse_submatrices, ierr)
+      !       reuse_mat = reuse_submatrices(1)
+      !    end if
+      !    row_size = size(col_indices_off_proc_array)
+      !    call ISDestroy(col_indices(1), ierr)
+      !    call ISDestroy(row_indices(1), ierr)
+
+      ! ! Easy in serial as we have everything we neeed
+      ! else
+
+      !    Ad = mat_sparsity_match
+      !    cols_ad = local_cols
+      !    allocate(reuse_submatrices(1))
+      !    deallocate_submatrices = .TRUE.
+      !    reuse_submatrices(1) = matrix
+      !    row_size = local_rows
+      !    allocate(col_indices_off_proc_array(local_rows))
+      !    do ifree = 1, local_rows
+      !       col_indices_off_proc_array(ifree) = ifree-1
+      !    end do
+      ! end if   
+      
+      ! ! ~~~~~~~~~
+      ! ! Now that we are here, reuse_submatrices(1) contains A^poly_sparsity_order with all of the rows
+      ! ! that correspond to the non-zero columns of matrix
+      ! ! ~~~~~~~~~      
+
+      ! ! Have to get the max nnzs of the local and off-local rows we've just retrieved
+      ! max_nnzs = 0
+      ! do ifree = global_row_start, global_row_end_plus_one-1     
+      !    call MatGetRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      !    if (ncols > max_nnzs) max_nnzs = ncols
+      !    call MatRestoreRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      ! end do      
+      ! if (comm_size /= 1) then
+      !    do ifree = 1, cols_ao            
+      !       call MatGetRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      !       if (ncols > max_nnzs) max_nnzs = ncols
+      !       call MatRestoreRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      !    end do
+      ! end if
+      ! ! and also the sparsity power
+      ! do ifree = global_row_start, global_row_end_plus_one-1     
+      !    call MatGetRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      !    if (ncols > max_nnzs) max_nnzs = ncols
+      !    call MatRestoreRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      ! end do 
+      
+      ! ! ~~~~~~~~
+      ! ! Get pointers to the sequential aij structure so we don't have to put critical regions
+      ! ! around the matgetrow
+      ! ! ~~~~~~~~
+      ! call MatGetRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
+      ! if (.NOT. done) then
+      !    print *, "Pointers not set in call to MatGetRowIJF"
+      !    call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
+      ! end if
+      ! ! Returns the wrong size pointer and can break if that size goes negative??
+      ! !call MatSeqAIJGetArrayF90(reuse_submatrices(1),submatrices_vals,ierr);
+      ! A_array = reuse_submatrices(1)%v
+      ! ! Now we must never overwrite the values in this pointer, and we must 
+      ! ! never call restore on it, see comment on top of the commented out
+      ! ! MatSeqAIJRestoreArray below
+      ! call MatSeqAIJGetArrayF90_mine(A_array, vals_c_ptr)
+      ! call c_f_pointer(vals_c_ptr, submatrices_vals, shape=[size(submatrices_ja)])
+      
+      ! ! ~~~~~~~~~~
+      
+      ! allocate(cols(max_nnzs))
+      ! allocate(vals(max_nnzs))
+      ! allocate(vals_power_temp(max_nnzs))
+      ! allocate(vals_previous_power_temp(max_nnzs))
+      ! allocate(cols_index_one(max_nnzs))
+      ! allocate(cols_index_two(max_nnzs))      
+
+      ! ! Scale the highest constrained power
+      ! call MatScale(cmat, coefficients(poly_sparsity_order+1), ierr)
+
+      ! ! Then go backwards and add in each of the coefficients * A^order from the second highest order down
+      ! do order = poly_sparsity_order - 1, 1, -1
+
+      !    ! Do result = alpha_1 * A_ff + alpha_2 * A_ff^2 + ....
+      !    ! Can use SUBSET_NONZERO_PATTERN as we have put the highest order power in first
+      !    call MatAXPY(cmat, coefficients(order+1), matrix_powers(order), SUBSET_NONZERO_PATTERN , ierr) 
+      ! end do 
+
+      ! ! Add in the 0th order term
+      ! do i_loc = 1, local_rows   
+         
+      !    ! Add in the I term - 0th order term
+      !    call MatSetValue(cmat, global_row_start + i_loc-1, global_row_start + i_loc-1, &
+      !                coefficients(1), ADD_VALUES, ierr)           
+      ! end do
+
+      ! ! ~~~~~~~~~~~~
+      ! ! From here we now have cmat with the correct values up to the power poly_sparsity_order
+      ! ! and hence we want to add in the sparsity constrained powers
+      ! ! ~~~~~~~~~~~~
+      
+      ! ! Now go through and compute the sum of the matrix powers
+      ! ! We're doing row-wise matmatmults here assuming the fixed sparsity
+      ! ! We exploit the fact that the subsequent matrix powers can be done
+      ! ! one row at a time, so we only have to retrieve the needed vals from mat_sparsity_match once
+      ! do i_loc = 1, local_rows 
+                          
+      !    ! Get the row of mat_sparsity_match
+      !    call MatGetRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
+      !             cols_ptr, vals_ptr, ierr)
+      !    ! Copying here because mat_sparsity_match and matrix are often the same matrix
+      !    ! and hence we can only have one active matgetrow
+      !    ncols = ncols_two
+      !    cols(1:ncols) = cols_ptr(1:ncols)
+      !    vals(1:ncols) = vals_ptr(1:ncols)
+      !    call MatRestoreRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
+      !             cols_ptr, vals_ptr, ierr)         
+
+      !    ! This is just a symbolic for the set of rows given in cols
+      !    ! Let's just do all the column matching and extraction of the values once
+            
+      !    ! Allocate some space to store the matching indices
+      !    allocate(symbolic_ones(ncols))
+      !    allocate(symbolic_vals(ncols))
+      !    row_index_into_submatrix = 1
+
+      !    ! This is a row-wise product
+      !    do j_loc = 1, ncols
+
+      !       ! If we're trying to access a local row in matrix
+      !       if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
+
+      !          call MatGetRow(matrix, cols(j_loc), ncols_two, &
+      !                   cols_two_ptr, vals_two_ptr, ierr)
+
+      !       ! If we're trying to access a non-local row in matrix
+      !       else
+
+      !          ! this is local row index we want into reuse_submatrices(1) (as row_indices used to extract are just colmap)  
+      !          ! We know cols is sorted, so every non-local index will be greater than the last one
+      !          ! (it's just that cols could have some local ones between different non-local)
+      !          ! colmap is also sorted and we know every single non-local entry in cols(j_loc) is in colmap
+      !          do while (row_index_into_submatrix .le. cols_ao .AND. colmap(row_index_into_submatrix) .lt. cols(j_loc))
+      !             row_index_into_submatrix = row_index_into_submatrix + 1
+      !          end do
+
+      !          ! This is the number of columns
+      !          ncols_two = submatrices_ia(row_index_into_submatrix+1) - submatrices_ia(row_index_into_submatrix)
+      !          allocate(cols_two_ptr(ncols_two))
+      !          ! This is the local column indices in reuse_submatrices(1)
+      !          cols_two_ptr = submatrices_ja(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
+      !          ! Because col_indices_off_proc_array (and hence the column indices in reuse_submatrices(1) is sorted, 
+      !          ! then cols_two_ptr contains the sorted global column indices
+      !          cols_two_ptr = col_indices_off_proc_array(cols_two_ptr+1)
+
+      !          ! This is the values
+      !          vals_two_ptr => &
+      !           submatrices_vals(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
+      !       end if
+            
+      !       ! Search for the matching column
+      !       ! We're intersecting the global column indices of mat_sparsity_match (cols) and matrix (cols_two_ptr)
+      !       call intersect_pre_sorted_indices_only(cols(1:ncols), cols_two_ptr, cols_index_one, cols_index_two, match_counter)      
+            
+      !       ! Don't need to do anything if we have no matches
+      !       if (match_counter == 0) then 
+      !          ! Store that we can skip this entry
+      !          symbolic_ones(j_loc)%ptr => null()
+      !          symbolic_vals(j_loc)%ptr => null()                        
+      !       else
+
+      !          ! These are the matching local column indices for this row of mat_sparsity_match
+      !          allocate(symbolic_ones(j_loc)%ptr(match_counter))
+      !          symbolic_ones(j_loc)%ptr = cols_index_one(1:match_counter)
+
+      !          ! These are the matching values of matrix
+      !          allocate(symbolic_vals(j_loc)%ptr(match_counter))
+      !          symbolic_vals(j_loc)%ptr = vals_two_ptr(cols_index_two(1:match_counter)) 
+      !       end if   
+            
+      !       ! Restore local row of matrix
+      !       if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
+      !          call MatRestoreRow(matrix, cols(j_loc), ncols_two, &
+      !                   cols_two_ptr, vals_two_ptr, ierr)
+      !       else
+      !          deallocate(cols_two_ptr)
+      !       end if            
+      !    end do
+         
+      !    ! Start with the values of mat_sparsity_match in it
+      !    vals_previous_power_temp(1:ncols) = vals(1:ncols)
+                     
+      !    ! Loop over any matrix powers
+      !    ! vals_power_temp stores the value of A^(term-1) for this row, and we update this as we go through 
+      !    ! the term loop
+      !    do term = poly_sparsity_order+2, size(coefficients)
+
+      !       ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
+      !       vals_power_temp(1:ncols) = 0
+
+      !       ! Have to finish all the columns before we move onto the next coefficient
+      !       do j_loc = 1, ncols
+
+      !          ! If we have no matching columns cycle this row
+      !          if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
+
+      !          ! symbolic_vals(j_loc)%ptr has the matching values of A in it
+      !          vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) + &
+      !                   vals_previous_power_temp(j_loc) * symbolic_vals(j_loc)%ptr
+
+      !       end do
+               
+      !       ! ~~~~~~~~~~~
+      !       ! Now can add the value of coeff * A^(term-1) to our matrix
+      !       ! Can skip this if coeff is zero, but still need to compute A^(term-1)
+      !       ! for the next time through
+      !       ! ~~~~~~~~~~~
+      !       if (ncols /= 0 .AND. coefficients(term) /= 0d0) then
+      !          call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+      !                coefficients(term) * vals_power_temp, ADD_VALUES, ierr)   
+      !       end if
+
+      !       ! This should now have the value of A^(term-1) in it
+      !       vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
+      !    end do      
+
+      !    ! Delete our symbolic
+      !    do j_loc = 1, ncols
+      !       if (associated(symbolic_ones(j_loc)%ptr)) then
+      !          deallocate(symbolic_ones(j_loc)%ptr)
+      !          deallocate(symbolic_vals(j_loc)%ptr)
+      !       end if      
+      !    end do  
+      !    deallocate(symbolic_vals, symbolic_ones)  
+      ! end do
+
+      ! call MatRestoreRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
+      ! ! We very deliberately don't call restorearray here!
+      ! ! There is no matseqaijgetarrayread or matseqaijrestorearrayread in Fortran
+      ! ! Those routines don't increment the PetscObjectStateGet which tells petsc
+      ! ! the mat has changed. Hence above we directly access the data pointer with 
+      ! ! a call to MatSeqAIJGetArrayF90_mine and then never write into it
+      ! ! If we call the restorearrayf90, that does increment the object state
+      ! ! even though we only read from the array
+      ! ! That would mean if we pass in a pc->pmat for example, just setting up a pc
+      ! ! would trigger petsc setting up the pc on every iteration of the pc
+      ! ! call MatSeqAIJRestoreArray(reuse_submatrices(1),submatrices_vals,ierr);
+
+      ! ! ~~~~~~~~~~~
+
+      ! ! Do the assembly, should need zero reductions in this given we've set the 
+      ! ! flags above
+      ! call MatAssemblyBegin(cmat, MAT_FINAL_ASSEMBLY, ierr)
+
+      ! ! Delete temporaries
+      ! do order = 2, poly_sparsity_order
+      !    call MatDestroy(matrix_powers(order), ierr)
+      ! end do
+      ! if (deallocate_submatrices) then
+      !    deallocate(reuse_submatrices)
+      !    reuse_submatrices => null()
+      ! end if
+
+      ! deallocate(col_indices_off_proc_array)
+      ! deallocate(cols, vals, vals_power_temp, vals_previous_power_temp, cols_index_one, cols_index_two)
+
+      ! ! Finish assembly
+      ! call MatAssemblyEnd(cmat, MAT_FINAL_ASSEMBLY, ierr) 
+
+         
+   end subroutine mat_mult_powers_share_sparsity_newton_cpu   
 
 ! -------------------------------------------------------------------------------------------------------------------------------
 
@@ -845,11 +1399,10 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! If we're constraining sparsity we've built a custom matrix-powers that assumes fixed sparsity
       if (poly_sparsity_order < poly_order) then    
 
-         ! ! This routine is a custom one that builds our matrix powers and assumes fixed sparsity
-         ! ! so that it doen't have to do much comms
-         ! ! This also finishes off the asyn comms and computes the coefficients
-         ! call mat_mult_powers_share_sparsity(matrix, poly_order, poly_sparsity_order, buffers, coefficients, &
-         !          reuse_mat, reuse_submatrices, inv_matrix)
+         ! This routine is a custom one that builds our matrix powers and assumes fixed sparsity
+         ! so that it doen't have to do much comms
+         call mat_mult_powers_share_sparsity_newton(matrix, poly_order, poly_sparsity_order, coefficients, &
+                  reuse_mat, reuse_submatrices, inv_matrix)
 
          ! ! Then just return
          return         
@@ -1053,6 +1606,137 @@ subroutine build_gmres_polynomial_newton_inverse_0th_order(matrix, poly_order, c
 
    end subroutine build_gmres_polynomial_newton_inverse_0th_order   
 
+
+! -------------------------------------------------------------------------------------------------------------------------------
+
+   subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly_order, coefficients, &
+                  inv_matrix)
+
+      ! Specific inverse with 0th order sparsity
+
+      ! ~~~~~~
+      type(tMat), intent(in)                            :: matrix
+      integer, intent(in)                               :: poly_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                         :: inv_matrix
+
+      ! Local variables
+      integer :: order
+      PetscErrorCode :: ierr      
+      logical :: reuse_triggered
+      type(tVec) :: inv_vec, diag_vec, product_vec, temp_vec_A, one_vec, temp_vec_two
+      ! ~~~~~~
+
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)
+
+       ! Our matrix has to be square
+      call MatCreateVecs(matrix, product_vec, diag_vec, ierr)
+      call MatGetDiagonal(matrix, diag_vec, ierr)
+
+      ! This stores D^order
+      if (.NOT. reuse_triggered) then
+         call VecDuplicate(diag_vec, inv_vec, ierr)
+      else
+         call MatDiagonalGetDiagonal(inv_matrix, inv_vec, ierr)
+      end if
+      call VecDuplicate(diag_vec, temp_vec_A, ierr)
+      call VecDuplicate(diag_vec, one_vec, ierr)
+      call VecDuplicate(diag_vec, temp_vec_two, ierr)
+      
+      ! Set to zero as we add to it
+      call VecSet(inv_vec, 0d0, ierr) 
+      ! We start with an identity in product_vec    
+      call VecSet(product_vec, 1d0, ierr)
+      call VecSet(one_vec, 1d0, ierr)
+
+      order = 1
+      do while (order .le. size(coefficients, 1) - 1)
+
+         ! temp_vec_A is going to store things with the sparsity of A
+         call VecCopy(diag_vec, temp_vec_A, ierr)     
+
+         ! If real this is easy
+         if (coefficients(order,2) == 0d0) then
+
+            ! Skips eigenvalues that are numerically zero - see 
+            ! the comment in calculate_gmres_polynomial_roots_newton 
+            if (abs(coefficients(order,1)) < 1e-12) then
+               order = order + 1
+               cycle
+            end if        
+
+            call VecAXPY(inv_vec, 1d0/coefficients(order,1), product_vec, ierr)
+
+            ! temp_vec_A = A_ff/theta_k       
+            call VecScale(temp_vec_A, -1d0/coefficients(order,1), ierr)
+            ! temp_vec_A = I - A_ff/theta_k
+            call VecAXPY(temp_vec_A, 1d0, one_vec, ierr)
+            
+            ! product_vec = product_vec * temp_vec_A
+            call VecPointwiseMult(product_vec, product_vec, temp_vec_A, ierr)   
+            
+            order = order + 1
+
+         ! Complex 
+         else
+
+            ! Skips eigenvalues that are numerically zero
+            if (coefficients(order,1)**2 + coefficients(order,2)**2 < 1e-12) then
+               order = order + 2
+               cycle
+            end if
+
+            ! Compute 2a I - A
+            ! temp_vec_A = -A    
+            call VecScale(temp_vec_A, -1d0, ierr)
+            ! temp_vec_A = 2a I - A_ff
+            call VecAXPY(temp_vec_A, 2d0 * coefficients(order,1), one_vec, ierr) 
+            ! temp_vec_A = (2a I - A_ff)/(a^2 + b^2)
+            call VecScale(temp_vec_A, 1d0/(coefficients(order,1)**2 + coefficients(order,2)**2), ierr) 
+
+            ! temp_vec_two = temp_vec_A * product_vec
+            call VecPointwiseMult(temp_vec_two, temp_vec_A, product_vec, ierr)   
+            call VecAXPY(inv_vec, 1d0, temp_vec_two, ierr)         
+
+            if (order .le. size(coefficients, 1) - 2) then
+               ! temp_vec_two = A * temp_vec_two
+               call VecPointwiseMult(temp_vec_two, diag_vec, temp_vec_two, ierr) 
+               call VecAXPY(product_vec, -1d0, temp_vec_two, ierr)
+            end if
+
+            ! Skip two evals
+            order = order + 2
+
+         end if       
+      end do
+
+      ! Final step if last root is real
+      if (coefficients(size(coefficients,1),2) == 0d0) then
+         ! Add in the final term multiplied by 1/theta_poly_order
+
+         ! Skips eigenvalues that are numerically zero
+         if (abs(coefficients(order,1)) > 1e-12) then      
+            call VecAXPY(inv_vec, 1d0/coefficients(order,1), product_vec, ierr)  
+         end if       
+      end if
+
+      ! We may be reusing with the same sparsity
+      if (.NOT. reuse_triggered) then
+         ! The matrix takes ownership of inv_vec and increases ref counter
+         call MatCreateDiagonal(inv_vec, inv_matrix, ierr)
+         call VecDestroy(inv_vec, ierr)
+      else
+         call MatDiagonalRestoreDiagonal(inv_matrix, inv_vec, ierr)
+      end if  
+
+      call VecDestroy(diag_vec, ierr)
+      call VecDestroy(product_vec, ierr)     
+      call VecDestroy(temp_vec_A, ierr)   
+      call VecDestroy(one_vec, ierr)                      
+      call VecDestroy(temp_vec_two, ierr)
+
+   end subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton     
+
 ! -------------------------------------------------------------------------------------------------------------------------------
 
 
diff --git a/tests/Makefile b/tests/Makefile
index d7c378c..514ae5e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -280,6 +280,12 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
 	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
+# 
+	@echo ""
+	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change with 0th order fixed sparsity"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_inverse_sparsity_order 0
+	@echo "Test AIRG Newton with GMRES polynomials with PC regenerated with no sparsity change with 0th order fixed sparsity"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 0	
 # 
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials with PC regenerated with no sparsity change and polynomial coeffs stored"
@@ -530,7 +536,15 @@ run_tests_no_load_parallel:
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
 	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
-	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	 	 
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
+# 
+	@echo ""
+	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change in parallel with 0th order fixed sparsity"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -ksp_max_it 10 -pc_air_inverse_sparsity_order 0 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
+	@echo "Test AIRG Newton with GMRES polynomials with PC regenerated with no sparsity change in parallel with 0th order fixed sparsity"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 0 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	  	
 # 
 	@echo ""
 	@echo "Test solving isotropic diffusion with fast coarsening and near-nullspace in parallel"

From 784dcc7dd10744a6fd9c30a16ff1f5a67c5cfbce Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Sat, 17 Jan 2026 00:36:15 +0000
Subject: [PATCH 08/41] Most of the work for the fixed sparsity Newton
 polynomials on CPUs is done, just need to finish the higher order fixed
 sparsity terms in mat_mult_powers_share_sparsity_newton_cpu.

---
 src/Gmres_Poly_Newton.F90 | 1333 +++++++++++++++++++++----------------
 1 file changed, 754 insertions(+), 579 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index a7896ce..f47f437 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -469,7 +469,7 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
       type(tVec) :: y
 
       ! Local
-      integer :: order, errorcode
+      integer :: i, errorcode
       PetscErrorCode :: ierr      
       type(mat_ctxtype), pointer :: mat_ctx => null()
 
@@ -487,24 +487,24 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
       call VecSet(y, 0d0, ierr)
 
       ! ~~~~~~~~~~~~
-      ! Iterate over the order
+      ! Iterate over the i
       ! ~~~~~~~~~~~~
-      order = 1
-      do while (order .le. size(mat_ctx%real_roots) - 1)
+      i = 1
+      do while (i .le. size(mat_ctx%real_roots) - 1)
 
          ! If real this is easy
-         if (mat_ctx%imag_roots(order) == 0d0) then
+         if (mat_ctx%imag_roots(i) == 0d0) then
 
             ! Skips eigenvalues that are numerically zero - see 
             ! the comment in calculate_gmres_polynomial_roots_newton 
-            if (abs(mat_ctx%real_roots(order)) < 1e-12) then
-               order = order + 1
+            if (abs(mat_ctx%real_roots(i)) < 1e-12) then
+               i = i + 1
                cycle
             end if
 
             ! y = y + theta_i * MF_VEC_TEMP
             call VecAXPY(y, &
-                     1d0/mat_ctx%real_roots(order), &
+                     1d0/mat_ctx%real_roots(i), &
                      mat_ctx%mf_temp_vec(MF_VEC_TEMP), ierr)   
                                           
             ! MF_VEC_DIAG = A * MF_VEC_TEMP
@@ -512,10 +512,10 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
             call MatMult(mat_ctx%mat, mat_ctx%mf_temp_vec(MF_VEC_TEMP), mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr)
             ! MF_VEC_TEMP = MF_VEC_TEMP - theta_i * MF_VEC_DIAG
             call VecAXPY(mat_ctx%mf_temp_vec(MF_VEC_TEMP), &
-                     -1d0/mat_ctx%real_roots(order), &
+                     -1d0/mat_ctx%real_roots(i), &
                      mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr) 
 
-            order = order + 1
+            i = i + 1
 
          ! If imaginary, then have to combine the e'val and its
          ! complex conjugate to keep the arithmetic real
@@ -523,8 +523,8 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
          else
 
             ! Skips eigenvalues that are numerically zero
-            if (mat_ctx%real_roots(order)**2 + mat_ctx%imag_roots(order)**2 < 1e-12) then
-               order = order + 2
+            if (mat_ctx%real_roots(i)**2 + mat_ctx%imag_roots(i)**2 < 1e-12) then
+               i = i + 2
                cycle
             end if            
 
@@ -532,27 +532,27 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
             call MatMult(mat_ctx%mat, mat_ctx%mf_temp_vec(MF_VEC_TEMP), mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr)    
             ! MF_VEC_DIAG = 2 * Re(theta_i) * MF_VEC_TEMP - MF_VEC_DIAG
             call VecAXPBY(mat_ctx%mf_temp_vec(MF_VEC_DIAG), &
-                  2 * mat_ctx%real_roots(order), &
+                  2 * mat_ctx%real_roots(i), &
                   -1d0, &
                   mat_ctx%mf_temp_vec(MF_VEC_TEMP), ierr)
 
             ! y = y + 1/(Re(theta_i)^2 + Imag(theta_i)^2) * MF_VEC_DIAG
             call VecAXPY(y, &
-                     1d0/(mat_ctx%real_roots(order)**2 + mat_ctx%imag_roots(order)**2), &
+                     1d0/(mat_ctx%real_roots(i)**2 + mat_ctx%imag_roots(i)**2), &
                      mat_ctx%mf_temp_vec(MF_VEC_DIAG), ierr)  
                      
-            if (order .le. size(mat_ctx%real_roots) - 2) then
+            if (i .le. size(mat_ctx%real_roots) - 2) then
                ! MF_VEC_RHS = A * MF_VEC_DIAG
                call MatMult(mat_ctx%mat, mat_ctx%mf_temp_vec(MF_VEC_DIAG), mat_ctx%mf_temp_vec(MF_VEC_RHS), ierr)    
 
                ! MF_VEC_TEMP = MF_VEC_TEMP - 1/(Re(theta_i)^2 + Imag(theta_i)^2) * MF_VEC_RHS
                call VecAXPY(mat_ctx%mf_temp_vec(MF_VEC_TEMP), &
-                        -1d0/(mat_ctx%real_roots(order)**2 + mat_ctx%imag_roots(order)**2), &
+                        -1d0/(mat_ctx%real_roots(i)**2 + mat_ctx%imag_roots(i)**2), &
                         mat_ctx%mf_temp_vec(MF_VEC_RHS), ierr)               
             end if
 
             ! Skip two evals
-            order = order + 2
+            i = i + 2
 
          end if
       end do
@@ -561,11 +561,11 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
       if (mat_ctx%imag_roots(size(mat_ctx%real_roots)) == 0d0) then
 
          ! Skips eigenvalues that are numerically zero
-         if (abs(mat_ctx%real_roots(order)) > 1e-12) then
+         if (abs(mat_ctx%real_roots(i)) > 1e-12) then
 
             ! y = y + theta_i * MF_VEC_TEMP
             call VecAXPBY(y, &
-                     1d0/mat_ctx%real_roots(order), &
+                     1d0/mat_ctx%real_roots(i), &
                      1d0, &
                      mat_ctx%mf_temp_vec(MF_VEC_TEMP), ierr) 
          end if
@@ -818,7 +818,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt :: global_row_start, global_row_end_plus_one, row_index_into_submatrix
       PetscInt :: global_col_start, global_col_end_plus_one, n, ncols, ncols_two, ifree, max_nnzs
       PetscInt :: i_loc, j_loc, row_size, rows_ao, cols_ao, rows_ad, cols_ad, shift = 0
-      integer :: errorcode, match_counter, term, order
+      integer :: errorcode, match_counter, term
       integer :: comm_size
       PetscErrorCode :: ierr      
       integer, dimension(:), allocatable :: cols_index_one, cols_index_two
@@ -829,7 +829,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt, dimension(:), pointer :: colmap
       logical :: deallocate_submatrices = .FALSE.
       type(c_ptr) :: vals_c_ptr
-      type(tMat), dimension(size(coefficients)-1), target :: matrix_powers
       type(tMat), pointer :: mat_sparsity_match
       type(int_vec), dimension(:), allocatable :: symbolic_ones
       type(real_vec), dimension(:), allocatable :: symbolic_vals
@@ -842,6 +841,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       logical :: reuse_triggered
       PetscBool :: symmetric = PETSC_FALSE, inodecompressed = PETSC_FALSE, done
       PetscInt, parameter :: one = 1, zero = 0
+      logical :: output_first_complex
       
       ! ~~~~~~~~~~  
 
@@ -863,363 +863,362 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
       reuse_triggered = .NOT. PetscObjectIsNull(cmat) 
 
-      ! ! ~~~~~~~~~~
-      ! ! Compute any matrix powers we might need to constrain sparsity and start assembling the 
-      ! ! components of the output matrix up to the order of poly_sparsity_order
-      ! ! The powers higher than poly_sparsity_order can be done with only
-      ! ! a single bit of comms and is done below this
-      ! ! ~~~~~~~~~~
-
-      ! ! matrix_powers stores all the powers of the input matrix
-      ! matrix_powers(1) = matrix
-
-      ! ! What power of A do we want to match the sparsity of
-      ! ! Compute the power we need if we're two or above
-      ! do order = 2, poly_sparsity_order
-
-      !    ! Let's just store each power, that way we can set the sparsity 
-      !    ! as the highest (unconstrained) power and do the mataxpy with a subset of entries
-      !    ! Takes more memory to do this but is faster
-      !    call MatMatMult(matrix, matrix_powers(order-1), &
-      !          MAT_INITIAL_MATRIX, 1.5d0, matrix_powers(order), ierr)        
-      ! end do  
-      
-      ! ! mat_sparsity_match now contains the sparsity of the power of A we want to match
-      ! mat_sparsity_match => matrix_powers(poly_sparsity_order)
-
-      ! ! Copy in the highest unconstrained power
-      ! ! Duplicate & copy the matrix, but ensure there is a diagonal present
-      ! call mat_duplicate_copy_plus_diag(matrix_powers(poly_sparsity_order), reuse_triggered, cmat)
-
-      ! ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
-      ! call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
-      ! call MatSetOption(cmat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr) 
-      ! ! We know we are only going to insert local vals
-      ! ! These options should turn off any reductions in the assembly
-      ! call MatSetOption(cmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)     
+      ! ~~~~~~~~~~
+      ! Compute cmat for all powers up to poly_sparsity_order
+      ! We have to be more careful here than in the monomial case
+      ! In the mononomial case we just compute the matrix powers up to poly_sparsity_order
+      ! and add them times the coefficients to cmat
+      ! Here though we have to build the Newton basis polynomials
+      ! The complex conjugate roots are tricky as they build up two powers at a time
+      ! The powers higher than poly_sparsity_order can be done with only
+      ! a single bit of comms and is done below this
+      ! ~~~~~~~~~~
+      output_first_complex = .FALSE.
+      if (poly_sparsity_order == 1) then
+
+         ! If we've got first order sparsity, we want to build cmat up to first order
+         ! and then we add in higher order powers later
+         ! We can just pass in the first two roots to build the first order gmres polynomial
+         ! mat_sparsity_match gets out the parts of the product up to 1st order
+         ! for the real case this will be the equivalent of prod on line 5 of Alg 3 in Loe 2021
+         ! I - 1/theta_1 A
+         ! whereas cmat will be 1/theta_1 + 1/theta_2 * (I - 1/theta_1 A)
+         ! For the complex case we instead pass out tmp from line 9 scaled by 1/(a^2 + b^2)
+         ! as this is the part of the product with sparsity up to A
+         ! This is because the prod for complex builds up the A^2 term for the next iteration
+         ! given it does two roots at a time
+         
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, cmat)  
+
+         call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
+                  coefficients(1:poly_sparsity_order + 1, 1:2), &
+                  cmat, mat_sparsity_match)         
+      else
+
+         ! If we're any higher, then we build cmat up to that order
+         ! But we have to be careful because the last root we want to explicitly
+         ! build up to here (ie the power of the matrix given by poly_sparsity_order)
+         ! might be the first root of a complex conjugate pair
+         ! In that case cmat only contains part of the result up to poly_sparsity_order
+         ! Similarly mat_sparsity_match contains the product up to poly_sparsity_order
+         ! The rest gets added in below
+         ! output_first_complex records if poly_sparsity_order hits the first root
+         ! of a complex conjugate pair, as we need to know that below to add in the rest
+         ! of the poly_sparsity_order+1 term from that pair
+         ! before moving on to the rest of the higher order roots
+         call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
+                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)
+      end if
+
+      ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
+      call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
+      call MatSetOption(cmat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr) 
+      ! We know we are only going to insert local vals
+      ! These options should turn off any reductions in the assembly
+      call MatSetOption(cmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)     
       
-      ! ! ~~~~~~~~~~~~
-      ! ! If we're in parallel we need to get the off-process rows of matrix that correspond
-      ! ! to the columns of mat_sparsity_match
-      ! ! We can therefore do the matmult for every constrained power locally with just that data
-      ! ! ~~~~~~~~~~~~
-      ! ! Have to double check comm_size /= 1 as we might be on a subcommunicator and we can't call
-      ! ! MatMPIAIJGetSeqAIJ specifically if that's the case
-      ! if (comm_size /= 1) then
-
-      !    ! ~~~~
-      !    ! Get the cols
-      !    ! ~~~~
-      !    call MatMPIAIJGetSeqAIJ(mat_sparsity_match, Ad, Ao, colmap, ierr)
-
-      !    call MatGetSize(Ad, rows_ad, cols_ad, ierr)             
-      !    ! We know the col size of Ao is the size of colmap, the number of non-zero offprocessor columns
-      !    call MatGetSize(Ao, rows_ao, cols_ao, ierr)         
-
-      !    ! For the column indices we need to take all the columns of mat_sparsity_match
-      !    A_array = mat_sparsity_match%v
-
-      !    ! These are the global indices of the columns we want
-      !    allocate(col_indices_off_proc_array(cols_ad + cols_ao))
-      !    allocate(ad_indices(cols_ad))
-      !    ! Local rows (as global indices)
-      !    do ifree = 1, cols_ad
-      !       ad_indices(ifree) = global_row_start + ifree - 1
-      !    end do
-
-      !    ! col_indices_off_proc_array is now sorted, which are the global indices of the columns we want
-      !    call merge_pre_sorted(ad_indices, colmap, col_indices_off_proc_array)
-      !    deallocate(ad_indices)
-
-      !    ! Create the sequential IS we want with the cols we want (written as global indices)
-      !    call ISCreateGeneral(PETSC_COMM_SELF, cols_ad + cols_ao, &
-      !                col_indices_off_proc_array, PETSC_USE_POINTER, col_indices(1), ierr) 
-      !    call ISCreateGeneral(PETSC_COMM_SELF, cols_ao, &
-      !                colmap, PETSC_USE_POINTER, row_indices(1), ierr)                      
-
-      !    ! ~~~~~~~
-      !    ! Now we can pull out the chunk of matrix that we need
-      !    ! ~~~~~~~
-
-      !    ! We need off-processor rows to compute matrix powers   
-      !    ! Setting this is necessary to avoid an allreduce when calling createsubmatrices
-      !    ! This will be reset to false after the call to createsubmatrices
-      !    call MatSetOption(matrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE, ierr)       
+      ! ~~~~~~~~~~~~
+      ! If we're in parallel we need to get the off-process rows of matrix that correspond
+      ! to the columns of mat_sparsity_match
+      ! We can therefore do the matmult for every constrained power locally with just that data
+      ! ~~~~~~~~~~~~
+      ! Have to double check comm_size /= 1 as we might be on a subcommunicator and we can't call
+      ! MatMPIAIJGetSeqAIJ specifically if that's the case
+      if (comm_size /= 1) then
+
+         ! ~~~~
+         ! Get the cols
+         ! ~~~~
+         call MatMPIAIJGetSeqAIJ(mat_sparsity_match, Ad, Ao, colmap, ierr)
+
+         call MatGetSize(Ad, rows_ad, cols_ad, ierr)             
+         ! We know the col size of Ao is the size of colmap, the number of non-zero offprocessor columns
+         call MatGetSize(Ao, rows_ao, cols_ao, ierr)         
+
+         ! For the column indices we need to take all the columns of mat_sparsity_match
+         A_array = mat_sparsity_match%v
+
+         ! These are the global indices of the columns we want
+         allocate(col_indices_off_proc_array(cols_ad + cols_ao))
+         allocate(ad_indices(cols_ad))
+         ! Local rows (as global indices)
+         do ifree = 1, cols_ad
+            ad_indices(ifree) = global_row_start + ifree - 1
+         end do
+
+         ! col_indices_off_proc_array is now sorted, which are the global indices of the columns we want
+         call merge_pre_sorted(ad_indices, colmap, col_indices_off_proc_array)
+         deallocate(ad_indices)
+
+         ! Create the sequential IS we want with the cols we want (written as global indices)
+         call ISCreateGeneral(PETSC_COMM_SELF, cols_ad + cols_ao, &
+                     col_indices_off_proc_array, PETSC_USE_POINTER, col_indices(1), ierr) 
+         call ISCreateGeneral(PETSC_COMM_SELF, cols_ao, &
+                     colmap, PETSC_USE_POINTER, row_indices(1), ierr)                      
+
+         ! ~~~~~~~
+         ! Now we can pull out the chunk of matrix that we need
+         ! ~~~~~~~
+
+         ! We need off-processor rows to compute matrix powers   
+         ! Setting this is necessary to avoid an allreduce when calling createsubmatrices
+         ! This will be reset to false after the call to createsubmatrices
+         call MatSetOption(matrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE, ierr)       
          
-      !    ! Now this will be doing comms to get the non-local rows we want
-      !    ! But only including the columns of the local fixed sparsity, as we don't need all the 
-      !    ! columns of the non-local entries unless we are doing a full matmatmult
-      !    ! This returns a sequential matrix
-      !    if (.NOT. PetscObjectIsNull(reuse_mat)) then
-      !       reuse_submatrices(1) = reuse_mat
-      !       call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_REUSE_MATRIX, reuse_submatrices, ierr)
-      !    else
-      !       call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_INITIAL_MATRIX, reuse_submatrices, ierr)
-      !       reuse_mat = reuse_submatrices(1)
-      !    end if
-      !    row_size = size(col_indices_off_proc_array)
-      !    call ISDestroy(col_indices(1), ierr)
-      !    call ISDestroy(row_indices(1), ierr)
-
-      ! ! Easy in serial as we have everything we neeed
-      ! else
-
-      !    Ad = mat_sparsity_match
-      !    cols_ad = local_cols
-      !    allocate(reuse_submatrices(1))
-      !    deallocate_submatrices = .TRUE.
-      !    reuse_submatrices(1) = matrix
-      !    row_size = local_rows
-      !    allocate(col_indices_off_proc_array(local_rows))
-      !    do ifree = 1, local_rows
-      !       col_indices_off_proc_array(ifree) = ifree-1
-      !    end do
-      ! end if   
+         ! Now this will be doing comms to get the non-local rows we want
+         ! But only including the columns of the local fixed sparsity, as we don't need all the 
+         ! columns of the non-local entries unless we are doing a full matmatmult
+         ! This returns a sequential matrix
+         if (.NOT. PetscObjectIsNull(reuse_mat)) then
+            reuse_submatrices(1) = reuse_mat
+            call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_REUSE_MATRIX, reuse_submatrices, ierr)
+         else
+            call MatCreateSubMatrices(matrix, one, row_indices, col_indices, MAT_INITIAL_MATRIX, reuse_submatrices, ierr)
+            reuse_mat = reuse_submatrices(1)
+         end if
+         row_size = size(col_indices_off_proc_array)
+         call ISDestroy(col_indices(1), ierr)
+         call ISDestroy(row_indices(1), ierr)
+
+      ! Easy in serial as we have everything we neeed
+      else
+
+         Ad = mat_sparsity_match
+         cols_ad = local_cols
+         allocate(reuse_submatrices(1))
+         deallocate_submatrices = .TRUE.
+         reuse_submatrices(1) = matrix
+         row_size = local_rows
+         allocate(col_indices_off_proc_array(local_rows))
+         do ifree = 1, local_rows
+            col_indices_off_proc_array(ifree) = ifree-1
+         end do
+      end if   
       
-      ! ! ~~~~~~~~~
-      ! ! Now that we are here, reuse_submatrices(1) contains A^poly_sparsity_order with all of the rows
-      ! ! that correspond to the non-zero columns of matrix
-      ! ! ~~~~~~~~~      
-
-      ! ! Have to get the max nnzs of the local and off-local rows we've just retrieved
-      ! max_nnzs = 0
-      ! do ifree = global_row_start, global_row_end_plus_one-1     
-      !    call MatGetRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      !    if (ncols > max_nnzs) max_nnzs = ncols
-      !    call MatRestoreRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      ! end do      
-      ! if (comm_size /= 1) then
-      !    do ifree = 1, cols_ao            
-      !       call MatGetRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      !       if (ncols > max_nnzs) max_nnzs = ncols
-      !       call MatRestoreRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      !    end do
-      ! end if
-      ! ! and also the sparsity power
-      ! do ifree = global_row_start, global_row_end_plus_one-1     
-      !    call MatGetRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      !    if (ncols > max_nnzs) max_nnzs = ncols
-      !    call MatRestoreRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
-      ! end do 
+      ! ~~~~~~~~~
+      ! Now that we are here, reuse_submatrices(1) contains A^poly_sparsity_order with all of the rows
+      ! that correspond to the non-zero columns of matrix
+      ! ~~~~~~~~~      
+
+      ! Have to get the max nnzs of the local and off-local rows we've just retrieved
+      max_nnzs = 0
+      do ifree = global_row_start, global_row_end_plus_one-1     
+         call MatGetRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+         if (ncols > max_nnzs) max_nnzs = ncols
+         call MatRestoreRow(matrix, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      end do      
+      if (comm_size /= 1) then
+         do ifree = 1, cols_ao            
+            call MatGetRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+            if (ncols > max_nnzs) max_nnzs = ncols
+            call MatRestoreRow(reuse_submatrices(1), ifree-1, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+         end do
+      end if
+      ! and also the sparsity power
+      do ifree = global_row_start, global_row_end_plus_one-1     
+         call MatGetRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+         if (ncols > max_nnzs) max_nnzs = ncols
+         call MatRestoreRow(mat_sparsity_match, ifree, ncols, PETSC_NULL_INTEGER_POINTER, PETSC_NULL_SCALAR_POINTER, ierr)
+      end do 
       
-      ! ! ~~~~~~~~
-      ! ! Get pointers to the sequential aij structure so we don't have to put critical regions
-      ! ! around the matgetrow
-      ! ! ~~~~~~~~
-      ! call MatGetRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
-      ! if (.NOT. done) then
-      !    print *, "Pointers not set in call to MatGetRowIJF"
-      !    call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
-      ! end if
-      ! ! Returns the wrong size pointer and can break if that size goes negative??
-      ! !call MatSeqAIJGetArrayF90(reuse_submatrices(1),submatrices_vals,ierr);
-      ! A_array = reuse_submatrices(1)%v
-      ! ! Now we must never overwrite the values in this pointer, and we must 
-      ! ! never call restore on it, see comment on top of the commented out
-      ! ! MatSeqAIJRestoreArray below
-      ! call MatSeqAIJGetArrayF90_mine(A_array, vals_c_ptr)
-      ! call c_f_pointer(vals_c_ptr, submatrices_vals, shape=[size(submatrices_ja)])
+      ! ~~~~~~~~
+      ! Get pointers to the sequential aij structure so we don't have to put critical regions
+      ! around the matgetrow
+      ! ~~~~~~~~
+      call MatGetRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
+      if (.NOT. done) then
+         print *, "Pointers not set in call to MatGetRowIJF"
+         call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
+      end if
+      ! Returns the wrong size pointer and can break if that size goes negative??
+      !call MatSeqAIJGetArrayF90(reuse_submatrices(1),submatrices_vals,ierr);
+      A_array = reuse_submatrices(1)%v
+      ! Now we must never overwrite the values in this pointer, and we must 
+      ! never call restore on it, see comment on top of the commented out
+      ! MatSeqAIJRestoreArray below
+      call MatSeqAIJGetArrayF90_mine(A_array, vals_c_ptr)
+      call c_f_pointer(vals_c_ptr, submatrices_vals, shape=[size(submatrices_ja)])
       
-      ! ! ~~~~~~~~~~
+      ! ~~~~~~~~~~
       
-      ! allocate(cols(max_nnzs))
-      ! allocate(vals(max_nnzs))
-      ! allocate(vals_power_temp(max_nnzs))
-      ! allocate(vals_previous_power_temp(max_nnzs))
-      ! allocate(cols_index_one(max_nnzs))
-      ! allocate(cols_index_two(max_nnzs))      
-
-      ! ! Scale the highest constrained power
-      ! call MatScale(cmat, coefficients(poly_sparsity_order+1), ierr)
-
-      ! ! Then go backwards and add in each of the coefficients * A^order from the second highest order down
-      ! do order = poly_sparsity_order - 1, 1, -1
-
-      !    ! Do result = alpha_1 * A_ff + alpha_2 * A_ff^2 + ....
-      !    ! Can use SUBSET_NONZERO_PATTERN as we have put the highest order power in first
-      !    call MatAXPY(cmat, coefficients(order+1), matrix_powers(order), SUBSET_NONZERO_PATTERN , ierr) 
-      ! end do 
-
-      ! ! Add in the 0th order term
-      ! do i_loc = 1, local_rows   
-         
-      !    ! Add in the I term - 0th order term
-      !    call MatSetValue(cmat, global_row_start + i_loc-1, global_row_start + i_loc-1, &
-      !                coefficients(1), ADD_VALUES, ierr)           
-      ! end do
-
-      ! ! ~~~~~~~~~~~~
-      ! ! From here we now have cmat with the correct values up to the power poly_sparsity_order
-      ! ! and hence we want to add in the sparsity constrained powers
-      ! ! ~~~~~~~~~~~~
+      allocate(cols(max_nnzs))
+      allocate(vals(max_nnzs))
+      allocate(vals_power_temp(max_nnzs))
+      allocate(vals_previous_power_temp(max_nnzs))
+      allocate(cols_index_one(max_nnzs))
+      allocate(cols_index_two(max_nnzs))      
+
+      ! ~~~~~~~~~~~~
+      ! From here we now have cmat with the correct values up to the power poly_sparsity_order
+      ! and hence we want to add in the sparsity constrained powers
+      ! ~~~~~~~~~~~~
       
-      ! ! Now go through and compute the sum of the matrix powers
-      ! ! We're doing row-wise matmatmults here assuming the fixed sparsity
-      ! ! We exploit the fact that the subsequent matrix powers can be done
-      ! ! one row at a time, so we only have to retrieve the needed vals from mat_sparsity_match once
-      ! do i_loc = 1, local_rows 
+      ! Now go through and compute the sum of the matrix powers
+      ! We're doing row-wise matmatmults here assuming the fixed sparsity
+      ! We exploit the fact that the subsequent matrix powers can be done
+      ! one row at a time, so we only have to retrieve the needed vals from mat_sparsity_match once
+      do i_loc = 1, local_rows 
                           
-      !    ! Get the row of mat_sparsity_match
-      !    call MatGetRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
-      !             cols_ptr, vals_ptr, ierr)
-      !    ! Copying here because mat_sparsity_match and matrix are often the same matrix
-      !    ! and hence we can only have one active matgetrow
-      !    ncols = ncols_two
-      !    cols(1:ncols) = cols_ptr(1:ncols)
-      !    vals(1:ncols) = vals_ptr(1:ncols)
-      !    call MatRestoreRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
-      !             cols_ptr, vals_ptr, ierr)         
-
-      !    ! This is just a symbolic for the set of rows given in cols
-      !    ! Let's just do all the column matching and extraction of the values once
+         ! Get the row of mat_sparsity_match
+         call MatGetRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
+                  cols_ptr, vals_ptr, ierr)
+         ! Copying here because mat_sparsity_match and matrix are often the same matrix
+         ! and hence we can only have one active matgetrow
+         ncols = ncols_two
+         cols(1:ncols) = cols_ptr(1:ncols)
+         vals(1:ncols) = vals_ptr(1:ncols)
+         call MatRestoreRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
+                  cols_ptr, vals_ptr, ierr)         
+
+         ! This is just a symbolic for the set of rows given in cols
+         ! Let's just do all the column matching and extraction of the values once
             
-      !    ! Allocate some space to store the matching indices
-      !    allocate(symbolic_ones(ncols))
-      !    allocate(symbolic_vals(ncols))
-      !    row_index_into_submatrix = 1
-
-      !    ! This is a row-wise product
-      !    do j_loc = 1, ncols
-
-      !       ! If we're trying to access a local row in matrix
-      !       if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
-
-      !          call MatGetRow(matrix, cols(j_loc), ncols_two, &
-      !                   cols_two_ptr, vals_two_ptr, ierr)
-
-      !       ! If we're trying to access a non-local row in matrix
-      !       else
-
-      !          ! this is local row index we want into reuse_submatrices(1) (as row_indices used to extract are just colmap)  
-      !          ! We know cols is sorted, so every non-local index will be greater than the last one
-      !          ! (it's just that cols could have some local ones between different non-local)
-      !          ! colmap is also sorted and we know every single non-local entry in cols(j_loc) is in colmap
-      !          do while (row_index_into_submatrix .le. cols_ao .AND. colmap(row_index_into_submatrix) .lt. cols(j_loc))
-      !             row_index_into_submatrix = row_index_into_submatrix + 1
-      !          end do
-
-      !          ! This is the number of columns
-      !          ncols_two = submatrices_ia(row_index_into_submatrix+1) - submatrices_ia(row_index_into_submatrix)
-      !          allocate(cols_two_ptr(ncols_two))
-      !          ! This is the local column indices in reuse_submatrices(1)
-      !          cols_two_ptr = submatrices_ja(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
-      !          ! Because col_indices_off_proc_array (and hence the column indices in reuse_submatrices(1) is sorted, 
-      !          ! then cols_two_ptr contains the sorted global column indices
-      !          cols_two_ptr = col_indices_off_proc_array(cols_two_ptr+1)
-
-      !          ! This is the values
-      !          vals_two_ptr => &
-      !           submatrices_vals(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
-      !       end if
+         ! Allocate some space to store the matching indices
+         allocate(symbolic_ones(ncols))
+         allocate(symbolic_vals(ncols))
+         row_index_into_submatrix = 1
+
+         ! This is a row-wise product
+         do j_loc = 1, ncols
+
+            ! If we're trying to access a local row in matrix
+            if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
+
+               call MatGetRow(matrix, cols(j_loc), ncols_two, &
+                        cols_two_ptr, vals_two_ptr, ierr)
+
+            ! If we're trying to access a non-local row in matrix
+            else
+
+               ! this is local row index we want into reuse_submatrices(1) (as row_indices used to extract are just colmap)  
+               ! We know cols is sorted, so every non-local index will be greater than the last one
+               ! (it's just that cols could have some local ones between different non-local)
+               ! colmap is also sorted and we know every single non-local entry in cols(j_loc) is in colmap
+               do while (row_index_into_submatrix .le. cols_ao .AND. colmap(row_index_into_submatrix) .lt. cols(j_loc))
+                  row_index_into_submatrix = row_index_into_submatrix + 1
+               end do
+
+               ! This is the number of columns
+               ncols_two = submatrices_ia(row_index_into_submatrix+1) - submatrices_ia(row_index_into_submatrix)
+               allocate(cols_two_ptr(ncols_two))
+               ! This is the local column indices in reuse_submatrices(1)
+               cols_two_ptr = submatrices_ja(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
+               ! Because col_indices_off_proc_array (and hence the column indices in reuse_submatrices(1) is sorted, 
+               ! then cols_two_ptr contains the sorted global column indices
+               cols_two_ptr = col_indices_off_proc_array(cols_two_ptr+1)
+
+               ! This is the values
+               vals_two_ptr => &
+                submatrices_vals(submatrices_ia(row_index_into_submatrix)+1:submatrices_ia(row_index_into_submatrix+1))
+            end if
             
-      !       ! Search for the matching column
-      !       ! We're intersecting the global column indices of mat_sparsity_match (cols) and matrix (cols_two_ptr)
-      !       call intersect_pre_sorted_indices_only(cols(1:ncols), cols_two_ptr, cols_index_one, cols_index_two, match_counter)      
+            ! Search for the matching column
+            ! We're intersecting the global column indices of mat_sparsity_match (cols) and matrix (cols_two_ptr)
+            call intersect_pre_sorted_indices_only(cols(1:ncols), cols_two_ptr, cols_index_one, cols_index_two, match_counter)      
             
-      !       ! Don't need to do anything if we have no matches
-      !       if (match_counter == 0) then 
-      !          ! Store that we can skip this entry
-      !          symbolic_ones(j_loc)%ptr => null()
-      !          symbolic_vals(j_loc)%ptr => null()                        
-      !       else
-
-      !          ! These are the matching local column indices for this row of mat_sparsity_match
-      !          allocate(symbolic_ones(j_loc)%ptr(match_counter))
-      !          symbolic_ones(j_loc)%ptr = cols_index_one(1:match_counter)
-
-      !          ! These are the matching values of matrix
-      !          allocate(symbolic_vals(j_loc)%ptr(match_counter))
-      !          symbolic_vals(j_loc)%ptr = vals_two_ptr(cols_index_two(1:match_counter)) 
-      !       end if   
+            ! Don't need to do anything if we have no matches
+            if (match_counter == 0) then 
+               ! Store that we can skip this entry
+               symbolic_ones(j_loc)%ptr => null()
+               symbolic_vals(j_loc)%ptr => null()                        
+            else
+
+               ! These are the matching local column indices for this row of mat_sparsity_match
+               allocate(symbolic_ones(j_loc)%ptr(match_counter))
+               symbolic_ones(j_loc)%ptr = cols_index_one(1:match_counter)
+
+               ! These are the matching values of matrix
+               allocate(symbolic_vals(j_loc)%ptr(match_counter))
+               symbolic_vals(j_loc)%ptr = vals_two_ptr(cols_index_two(1:match_counter)) 
+            end if   
             
-      !       ! Restore local row of matrix
-      !       if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
-      !          call MatRestoreRow(matrix, cols(j_loc), ncols_two, &
-      !                   cols_two_ptr, vals_two_ptr, ierr)
-      !       else
-      !          deallocate(cols_two_ptr)
-      !       end if            
-      !    end do
+            ! Restore local row of matrix
+            if (cols(j_loc) .ge. global_row_start .AND. cols(j_loc) < global_row_end_plus_one) then
+               call MatRestoreRow(matrix, cols(j_loc), ncols_two, &
+                        cols_two_ptr, vals_two_ptr, ierr)
+            else
+               deallocate(cols_two_ptr)
+            end if            
+         end do
          
-      !    ! Start with the values of mat_sparsity_match in it
-      !    vals_previous_power_temp(1:ncols) = vals(1:ncols)
+         ! Start with the values of mat_sparsity_match in it
+         vals_previous_power_temp(1:ncols) = vals(1:ncols)
                      
-      !    ! Loop over any matrix powers
-      !    ! vals_power_temp stores the value of A^(term-1) for this row, and we update this as we go through 
-      !    ! the term loop
-      !    do term = poly_sparsity_order+2, size(coefficients)
+         ! ! Loop over any matrix powers
+         ! ! vals_power_temp stores the value of A^(term-1) for this row, and we update this as we go through 
+         ! ! the term loop
+         ! do term = poly_sparsity_order+2, size(coefficients)
 
-      !       ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
-      !       vals_power_temp(1:ncols) = 0
+         !    ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
+         !    vals_power_temp(1:ncols) = 0
 
-      !       ! Have to finish all the columns before we move onto the next coefficient
-      !       do j_loc = 1, ncols
+         !    ! Have to finish all the columns before we move onto the next coefficient
+         !    do j_loc = 1, ncols
 
-      !          ! If we have no matching columns cycle this row
-      !          if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
+         !       ! If we have no matching columns cycle this row
+         !       if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
 
-      !          ! symbolic_vals(j_loc)%ptr has the matching values of A in it
-      !          vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) + &
-      !                   vals_previous_power_temp(j_loc) * symbolic_vals(j_loc)%ptr
+         !       ! symbolic_vals(j_loc)%ptr has the matching values of A in it
+         !       vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) + &
+         !                vals_previous_power_temp(j_loc) * symbolic_vals(j_loc)%ptr
 
-      !       end do
+         !    end do
                
-      !       ! ~~~~~~~~~~~
-      !       ! Now can add the value of coeff * A^(term-1) to our matrix
-      !       ! Can skip this if coeff is zero, but still need to compute A^(term-1)
-      !       ! for the next time through
-      !       ! ~~~~~~~~~~~
-      !       if (ncols /= 0 .AND. coefficients(term) /= 0d0) then
-      !          call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
-      !                coefficients(term) * vals_power_temp, ADD_VALUES, ierr)   
-      !       end if
-
-      !       ! This should now have the value of A^(term-1) in it
-      !       vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
-      !    end do      
-
-      !    ! Delete our symbolic
-      !    do j_loc = 1, ncols
-      !       if (associated(symbolic_ones(j_loc)%ptr)) then
-      !          deallocate(symbolic_ones(j_loc)%ptr)
-      !          deallocate(symbolic_vals(j_loc)%ptr)
-      !       end if      
-      !    end do  
-      !    deallocate(symbolic_vals, symbolic_ones)  
-      ! end do
-
-      ! call MatRestoreRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
-      ! ! We very deliberately don't call restorearray here!
-      ! ! There is no matseqaijgetarrayread or matseqaijrestorearrayread in Fortran
-      ! ! Those routines don't increment the PetscObjectStateGet which tells petsc
-      ! ! the mat has changed. Hence above we directly access the data pointer with 
-      ! ! a call to MatSeqAIJGetArrayF90_mine and then never write into it
-      ! ! If we call the restorearrayf90, that does increment the object state
-      ! ! even though we only read from the array
-      ! ! That would mean if we pass in a pc->pmat for example, just setting up a pc
-      ! ! would trigger petsc setting up the pc on every iteration of the pc
-      ! ! call MatSeqAIJRestoreArray(reuse_submatrices(1),submatrices_vals,ierr);
-
-      ! ! ~~~~~~~~~~~
-
-      ! ! Do the assembly, should need zero reductions in this given we've set the 
-      ! ! flags above
-      ! call MatAssemblyBegin(cmat, MAT_FINAL_ASSEMBLY, ierr)
-
-      ! ! Delete temporaries
-      ! do order = 2, poly_sparsity_order
-      !    call MatDestroy(matrix_powers(order), ierr)
-      ! end do
-      ! if (deallocate_submatrices) then
-      !    deallocate(reuse_submatrices)
-      !    reuse_submatrices => null()
-      ! end if
-
-      ! deallocate(col_indices_off_proc_array)
-      ! deallocate(cols, vals, vals_power_temp, vals_previous_power_temp, cols_index_one, cols_index_two)
-
-      ! ! Finish assembly
-      ! call MatAssemblyEnd(cmat, MAT_FINAL_ASSEMBLY, ierr) 
+         !    ! ~~~~~~~~~~~
+         !    ! Now can add the value of coeff * A^(term-1) to our matrix
+         !    ! Can skip this if coeff is zero, but still need to compute A^(term-1)
+         !    ! for the next time through
+         !    ! ~~~~~~~~~~~
+         !    if (ncols /= 0 .AND. coefficients(term) /= 0d0) then
+         !       call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+         !             coefficients(term) * vals_power_temp, ADD_VALUES, ierr)   
+         !    end if
+
+         !    ! This should now have the value of A^(term-1) in it
+         !    vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
+         ! end do      
+
+         ! Delete our symbolic
+         do j_loc = 1, ncols
+            if (associated(symbolic_ones(j_loc)%ptr)) then
+               deallocate(symbolic_ones(j_loc)%ptr)
+               deallocate(symbolic_vals(j_loc)%ptr)
+            end if      
+         end do  
+         deallocate(symbolic_vals, symbolic_ones)  
+      end do
+
+      call MatRestoreRowIJ(reuse_submatrices(1),shift,symmetric,inodecompressed,n,submatrices_ia,submatrices_ja,done,ierr) 
+      ! We very deliberately don't call restorearray here!
+      ! There is no matseqaijgetarrayread or matseqaijrestorearrayread in Fortran
+      ! Those routines don't increment the PetscObjectStateGet which tells petsc
+      ! the mat has changed. Hence above we directly access the data pointer with 
+      ! a call to MatSeqAIJGetArrayF90_mine and then never write into it
+      ! If we call the restorearrayf90, that does increment the object state
+      ! even though we only read from the array
+      ! That would mean if we pass in a pc->pmat for example, just setting up a pc
+      ! would trigger petsc setting up the pc on every iteration of the pc
+      ! call MatSeqAIJRestoreArray(reuse_submatrices(1),submatrices_vals,ierr);
+
+      ! ~~~~~~~~~~~
+
+      ! Do the assembly, should need zero reductions in this given we've set the 
+      ! flags above
+      call MatAssemblyBegin(cmat, MAT_FINAL_ASSEMBLY, ierr)
+
+      ! Delete temporaries
+      call MatDestroy(mat_sparsity_match, ierr)
+      if (deallocate_submatrices) then
+         deallocate(reuse_submatrices)
+         reuse_submatrices => null()
+      end if
+
+      deallocate(col_indices_off_proc_array)
+      deallocate(cols, vals, vals_power_temp, vals_previous_power_temp, cols_index_one, cols_index_two)
+
+      ! Finish assembly
+      call MatAssemblyEnd(cmat, MAT_FINAL_ASSEMBLY, ierr) 
 
          
    end subroutine mat_mult_powers_share_sparsity_newton_cpu   
@@ -1246,13 +1245,14 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
 
       ! Local variables
       PetscInt :: global_rows, global_cols, local_rows, local_cols
-      integer :: comm_size, errorcode, order
+      integer :: comm_size, errorcode
       PetscErrorCode :: ierr      
       MPIU_Comm :: MPI_COMM_MATRIX
       type(mat_ctxtype), pointer :: mat_ctx=>null()
       logical :: reuse_triggered      
       PetscReal :: square_sum
       type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
+      logical :: reuse_triggered
 
       ! ~~~~~~       
 
@@ -1314,10 +1314,11 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
          return
       endif
 
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)         
+
       ! ~~~~~~~~~~~~
       ! If we're here then we want an assembled approximate inverse
       ! ~~~~~~~~~~~~         
-      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)   
 
       ! If we're zeroth order poly this is trivial as it's just 1/theta_1 I
       if (poly_order == 0) then
@@ -1330,66 +1331,12 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
 
       ! For poly_order 1 and poly_sparsity_order 1 this is easy
       else if (poly_order == 1 .AND. poly_sparsity_order == 1) then
-         
-         ! Duplicate & copy the matrix, but ensure there is a diagonal present
-         call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, inv_matrix)
-
-         ! Flags to prevent reductions when assembling (there are assembles in the shift)
-         call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr) 
-         call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
-         call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
-
-         ! We only have two coefficients, so they are either both real or complex conjugates
-         ! If real
-         if (coefficients(1,2) == 0d0) then
-
-            ! Have to be careful here, as we may be first order, but the second eigenvaule
-            ! might have been set to zero thanks to the rank reducing solve 
-            ! So we just check if the second real part is zero and if it is
-            ! we just compute a 0th order inverse - annoyingly we can't call 
-            ! build_gmres_polynomial_newton_inverse_0th_order as that builds a MATDIAGONAL
-            ! and in the tests there is a problem where we reuse the sparsity, in the first
-            ! solve we don't have a zero coefficient but in the second solve we do
-            ! So the mat type needs to remain consistent
-            ! This can't happen in the complex case
-            if (coefficients(2,1) == 0d0) then
-
-               ! Set to zero
-               call MatScale(inv_matrix, 0d0, ierr)
-               ! Then add in the 0th order inverse
-               call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)
-               
-               ! Then just return
-               return  
-            end if
-
-            ! Could just compute the equivalent mononomial here to save some flops
-            ! but the whole point to doing the Newton form is to avoid the 
-            ! theta_1 * theta_2 that would result
-
-            ! result = -A_ff/theta_1
-            call MatScale(inv_matrix, -1d0/(coefficients(1, 1)), ierr)
-            ! result = I -A_ff/theta_1
-            call MatShift(inv_matrix, 1d0, ierr) 
-            ! result = 1/theta_2 * (I -A_ff/theta_1)
-            call MatScale(inv_matrix, 1d0/(coefficients(2, 1)), ierr)      
-
-            ! result = 1/theta_1 + 1/theta_2 * (I -A_ff/theta_1)
-            ! Don't need an assemble as there is one called in this
-            call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)       
 
-         ! Complex conjugate roots, a +- ib
-         else
-            ! a^2 + b^2
-            square_sum = coefficients(1,1)**2 + coefficients(1,2)**2
-
-            ! Complex conjugate roots
-            ! result = -A_ff / (a^2 + b^2)
-            call MatScale(inv_matrix, -1d0/square_sum, ierr)
-            ! result = 2a/(a^2 + b^2) I - A_ff / (a^2 + b^2)
-            ! Don't need an assemble as there is one called in this
-            call MatShift(inv_matrix, 2d0 * coefficients(1,1)/square_sum, ierr)       
-         end if    
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, inv_matrix)         
+         
+         call build_gmres_polynomial_newton_inverse_1st_1st(matrix, &
+               poly_order, coefficients, inv_matrix)
 
          ! Then just return
          return
@@ -1412,148 +1359,9 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       ! ~~~~~~~~~~
       ! We are only here if we don't constrain_sparsity
       ! ~~~~~~~~~~
-
-      ! If not re-using
-      ! Copy in the initial matrix
-      if (.NOT. reuse_triggered) then
-         ! Duplicate & copy the matrix, but ensure there is a diagonal present
-         call mat_duplicate_copy_plus_diag(matrix, .FALSE., inv_matrix)
-      else
-         ! For the powers > 1 the pattern of the original matrix will be different
-         ! to the resulting inverse
-         call MatCopy(matrix, inv_matrix, DIFFERENT_NONZERO_PATTERN, ierr)
-      end if
-
-      ! Set to zero as we add in each product of terms
-      call MatScale(inv_matrix, 0d0, ierr)
-
-      ! Don't set any off processor entries so no need for a reduction when assembling
-      call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)  
-
-      ! We start with an identity in mat_product
-      call generate_identity(matrix, mat_product)
-
-      ! ~~~~~~~~~~~~
-      ! Iterate over the order
-      ! This is basically the same as the MF application but we have to build the powers
-      ! ~~~~~~~~~~~~      
-      order = 1
-      do while (order .le. size(coefficients, 1) - 1)
-
-         ! Duplicate & copy the matrix, but ensure there is a diagonal present
-         ! temp_mat_A is going to store things with the sparsity of A
-         if (PetscObjectIsNull(temp_mat_A)) then
-            call mat_duplicate_copy_plus_diag(matrix, .FALSE., temp_mat_A)     
-         else
-            ! Can reuse the sparsity 
-            call mat_duplicate_copy_plus_diag(matrix, .TRUE., temp_mat_A)     
-         end if         
-
-         ! If real this is easy
-         if (coefficients(order,2) == 0d0) then
-
-            ! Skips eigenvalues that are numerically zero - see 
-            ! the comment in calculate_gmres_polynomial_roots_newton 
-            if (abs(coefficients(order,1)) < 1e-12) then
-               order = order + 1
-               cycle
-            end if        
-
-            ! Then add the scaled version of each product
-            if (reuse_triggered) then
-               ! If doing reuse we know our nonzeros are a subset
-               call MatAXPY(inv_matrix, 1d0/coefficients(order,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
-            else
-               ! Have to use the DIFFERENT_NONZERO_PATTERN here
-               call MatAXPYWrapper(inv_matrix, 1d0/coefficients(order,1), mat_product)
-            end if
-
-            ! temp_mat_A = A_ff/theta_k       
-            call MatScale(temp_mat_A, -1d0/coefficients(order,1), ierr)
-            ! temp_mat_A = I - A_ff/theta_k
-            call MatShift(temp_mat_A, 1d0, ierr)    
-            
-            ! mat_product_k_plus_1 = mat_product * temp_mat_A
-            call MatMatMult(temp_mat_A, mat_product, &
-                  MAT_INITIAL_MATRIX, 1.5d0, mat_product_k_plus_1, ierr)      
-            call MatDestroy(mat_product, ierr)  
-            mat_product = mat_product_k_plus_1     
-            
-            order = order + 1
-
-         ! Complex 
-         else
-
-            ! Skips eigenvalues that are numerically zero
-            if (coefficients(order,1)**2 + coefficients(order,2)**2 < 1e-12) then
-               order = order + 2
-               cycle
-            end if
-
-            ! Compute 2a I - A
-            ! Have to use the DIFFERENT_NONZERO_PATTERN here
-            ! temp_mat_A = -A    
-            call MatScale(temp_mat_A, -1d0, ierr)
-            ! temp_mat_A = 2a I - A_ff
-            call MatShift(temp_mat_A, 2d0 * coefficients(order,1), ierr)   
-            ! temp_mat_A = (2a I - A_ff)/(a^2 + b^2)
-            call MatScale(temp_mat_A, 1d0/(coefficients(order,1)**2 + coefficients(order,2)**2), ierr) 
-
-            call MatMatMult(temp_mat_A, mat_product, &
-                  MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)      
-
-            ! Then add the scaled version of each product
-            if (reuse_triggered) then
-               ! If doing reuse we know our nonzeros are a subset
-               call MatAXPY(inv_matrix, 1d0, temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
-            else
-               ! Have to use the DIFFERENT_NONZERO_PATTERN here
-               call MatAXPYWrapper(inv_matrix, 1d0, temp_mat_two)
-            end if            
-
-            if (order .le. size(coefficients, 1) - 2) then
-               ! temp_mat_three = matrix * temp_mat_two
-               call MatMatMult(matrix, temp_mat_two, &
-                     MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
-               call MatDestroy(temp_mat_two, ierr)  
-
-               ! Then add the scaled version of each product
-               if (reuse_triggered) then
-                  ! If doing reuse we know our nonzeros are a subset
-                  call MatAXPY(mat_product, -1d0, temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
-               else
-                  ! Have to use the DIFFERENT_NONZERO_PATTERN here
-                  call MatAXPYWrapper(mat_product, -1d0, temp_mat_three)
-               end if               
-               call MatDestroy(temp_mat_three, ierr) 
-            else
-               call MatDestroy(temp_mat_two, ierr)  
-            end if
-
-            ! Skip two evals
-            order = order + 2
-
-         end if       
-      end do
-
-      ! Final step if last root is real
-      if (coefficients(size(coefficients,1),2) == 0d0) then
-         ! Add in the final term multiplied by 1/theta_poly_order
-
-         ! Skips eigenvalues that are numerically zero
-         if (abs(coefficients(order,1)) > 1e-12) then            
-            if (reuse_triggered) then
-               ! If doing reuse we know our nonzeros are a subset
-               call MatAXPY(inv_matrix, 1d0/coefficients(order,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
-            else
-               ! Have to use the DIFFERENT_NONZERO_PATTERN here
-               call MatAXPYWrapper(inv_matrix, 1d0/coefficients(order,1), mat_product)
-            end if     
-         end if       
-      end if        
-
-      call MatDestroy(temp_mat_A, ierr)
-      call MatDestroy(mat_product, ierr)
+      call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
+                  inv_matrix)
+      
 
    end subroutine build_gmres_polynomial_newton_inverse     
    
@@ -1621,7 +1429,7 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
       type(tMat), intent(inout)                         :: inv_matrix
 
       ! Local variables
-      integer :: order
+      integer :: i
       PetscErrorCode :: ierr      
       logical :: reuse_triggered
       type(tVec) :: inv_vec, diag_vec, product_vec, temp_vec_A, one_vec, temp_vec_two
@@ -1633,7 +1441,6 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
       call MatCreateVecs(matrix, product_vec, diag_vec, ierr)
       call MatGetDiagonal(matrix, diag_vec, ierr)
 
-      ! This stores D^order
       if (.NOT. reuse_triggered) then
          call VecDuplicate(diag_vec, inv_vec, ierr)
       else
@@ -1649,40 +1456,40 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
       call VecSet(product_vec, 1d0, ierr)
       call VecSet(one_vec, 1d0, ierr)
 
-      order = 1
-      do while (order .le. size(coefficients, 1) - 1)
+      i = 1
+      do while (i .le. size(coefficients, 1) - 1)
 
          ! temp_vec_A is going to store things with the sparsity of A
          call VecCopy(diag_vec, temp_vec_A, ierr)     
 
          ! If real this is easy
-         if (coefficients(order,2) == 0d0) then
+         if (coefficients(i,2) == 0d0) then
 
             ! Skips eigenvalues that are numerically zero - see 
             ! the comment in calculate_gmres_polynomial_roots_newton 
-            if (abs(coefficients(order,1)) < 1e-12) then
-               order = order + 1
+            if (abs(coefficients(i,1)) < 1e-12) then
+               i = i + 1
                cycle
             end if        
 
-            call VecAXPY(inv_vec, 1d0/coefficients(order,1), product_vec, ierr)
+            call VecAXPY(inv_vec, 1d0/coefficients(i,1), product_vec, ierr)
 
             ! temp_vec_A = A_ff/theta_k       
-            call VecScale(temp_vec_A, -1d0/coefficients(order,1), ierr)
+            call VecScale(temp_vec_A, -1d0/coefficients(i,1), ierr)
             ! temp_vec_A = I - A_ff/theta_k
             call VecAXPY(temp_vec_A, 1d0, one_vec, ierr)
             
             ! product_vec = product_vec * temp_vec_A
             call VecPointwiseMult(product_vec, product_vec, temp_vec_A, ierr)   
             
-            order = order + 1
+            i = i + 1
 
          ! Complex 
          else
 
             ! Skips eigenvalues that are numerically zero
-            if (coefficients(order,1)**2 + coefficients(order,2)**2 < 1e-12) then
-               order = order + 2
+            if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
+               i = i + 2
                cycle
             end if
 
@@ -1690,22 +1497,22 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
             ! temp_vec_A = -A    
             call VecScale(temp_vec_A, -1d0, ierr)
             ! temp_vec_A = 2a I - A_ff
-            call VecAXPY(temp_vec_A, 2d0 * coefficients(order,1), one_vec, ierr) 
+            call VecAXPY(temp_vec_A, 2d0 * coefficients(i,1), one_vec, ierr) 
             ! temp_vec_A = (2a I - A_ff)/(a^2 + b^2)
-            call VecScale(temp_vec_A, 1d0/(coefficients(order,1)**2 + coefficients(order,2)**2), ierr) 
+            call VecScale(temp_vec_A, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), ierr) 
 
             ! temp_vec_two = temp_vec_A * product_vec
             call VecPointwiseMult(temp_vec_two, temp_vec_A, product_vec, ierr)   
             call VecAXPY(inv_vec, 1d0, temp_vec_two, ierr)         
 
-            if (order .le. size(coefficients, 1) - 2) then
+            if (i .le. size(coefficients, 1) - 2) then
                ! temp_vec_two = A * temp_vec_two
                call VecPointwiseMult(temp_vec_two, diag_vec, temp_vec_two, ierr) 
                call VecAXPY(product_vec, -1d0, temp_vec_two, ierr)
             end if
 
             ! Skip two evals
-            order = order + 2
+            i = i + 2
 
          end if       
       end do
@@ -1715,8 +1522,8 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
          ! Add in the final term multiplied by 1/theta_poly_order
 
          ! Skips eigenvalues that are numerically zero
-         if (abs(coefficients(order,1)) > 1e-12) then      
-            call VecAXPY(inv_vec, 1d0/coefficients(order,1), product_vec, ierr)  
+         if (abs(coefficients(i,1)) > 1e-12) then      
+            call VecAXPY(inv_vec, 1d0/coefficients(i,1), product_vec, ierr)  
          end if       
       end if
 
@@ -1737,6 +1544,374 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
 
    end subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton     
 
+! -------------------------------------------------------------------------------------------------------------------------------
+
+   subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coefficients, &
+                  inv_matrix, mat_product_output)
+
+      ! Specific 1st order with 1st order sparsity
+
+      ! ~~~~~~
+      type(tMat), intent(in)                            :: matrix
+      integer, intent(in)                               :: poly_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                         :: inv_matrix
+      type(tMat), intent(inout), optional               :: mat_product_output
+
+      ! Local variables
+      PetscErrorCode :: ierr      
+      logical :: reuse_triggered, output_product
+      PetscReal :: square_sum
+
+      ! ~~~~~~      
+
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)     
+      output_product = present(mat_product_output)    
+
+      ! Flags to prevent reductions when assembling (there are assembles in the shift)
+      call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr) 
+      call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
+      call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
+
+      ! We only have two coefficients, so they are either both real or complex conjugates
+      ! If real
+      if (coefficients(1,2) == 0d0) then
+
+         ! Have to be careful here, as we may be first order, but the second eigenvaule
+         ! might have been set to zero thanks to the rank reducing solve 
+         ! So we just check if the second real part is zero and if it is
+         ! we just compute a 0th order inverse - annoyingly we can't call 
+         ! build_gmres_polynomial_newton_inverse_0th_order as that builds a MATDIAGONAL
+         ! and in the tests there is a problem where we reuse the sparsity, in the first
+         ! solve we don't have a zero coefficient but in the second solve we do
+         ! So the mat type needs to remain consistent
+         ! This can't happen in the complex case
+         if (coefficients(2,1) == 0d0) then
+
+            ! Set to zero
+            call MatScale(inv_matrix, 0d0, ierr)
+            ! Then add in the 0th order inverse
+            call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)
+            
+            ! Then just return
+            return  
+         end if
+
+         ! Could just compute the equivalent mononomial here to save some flops
+         ! but the whole point to doing the Newton form is to avoid the 
+         ! theta_1 * theta_2 that would result
+
+         ! result = -A_ff/theta_1
+         call MatScale(inv_matrix, -1d0/(coefficients(1, 1)), ierr)
+         ! result = I -A_ff/theta_1
+         call MatShift(inv_matrix, 1d0, ierr) 
+         ! If we're doing this as part of fixed sparsity multiply, 
+         ! we need to return mat_product_output
+         if (output_product) then
+            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
+         end if
+
+         ! result = 1/theta_2 * (I -A_ff/theta_1)
+         call MatScale(inv_matrix, 1d0/(coefficients(2, 1)), ierr)      
+
+         ! result = 1/theta_1 + 1/theta_2 * (I -A_ff/theta_1)
+         ! Don't need an assemble as there is one called in this
+         call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)       
+
+      ! Complex conjugate roots, a +- ib
+      else
+         ! a^2 + b^2
+         square_sum = coefficients(1,1)**2 + coefficients(1,2)**2
+
+         ! Complex conjugate roots
+         ! result = -A_ff / (a^2 + b^2)
+         call MatScale(inv_matrix, -1d0/square_sum, ierr)
+         ! result = 2a/(a^2 + b^2) I - A_ff / (a^2 + b^2)
+         ! Don't need an assemble as there is one called in this
+         call MatShift(inv_matrix, 2d0 * coefficients(1,1)/square_sum, ierr)      
+         ! If we're doing this as part of fixed sparsity multiply, 
+         ! we need to return mat_product_output         
+         if (output_product) then
+            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
+         end if          
+      end if               
+
+   end subroutine build_gmres_polynomial_newton_inverse_1st_1st     
+   
+
+! -------------------------------------------------------------------------------------------------------------------------------
+
+   subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
+                  inv_matrix, mat_product_output, poly_sparsity_order, output_first_complex)
+
+      ! No constrained sparsity by default
+      ! If you pass in mat_product_output, poly_sparsity_order, output_first_complex
+      ! then it will build part of the terms, up to poly_sparsity_order, and return the product
+      ! in mat_product_output that you need to compute the rest of the fixed sparsity terms
+
+      ! ~~~~~~
+      type(tMat), intent(in)                            :: matrix
+      integer, intent(in)                               :: poly_order
+      PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
+      type(tMat), intent(inout)                         :: inv_matrix
+      type(tMat), intent(inout), optional               :: mat_product_output
+      integer, intent(in), optional                     :: poly_sparsity_order
+      logical, intent(inout), optional                  :: output_first_complex
+
+      ! Local variables
+      PetscErrorCode :: ierr      
+      logical :: reuse_triggered, output_product, first_complex
+      integer :: i, i_sparse
+      type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
+
+      ! ~~~~~~      
+
+      reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)  
+      output_product = present(mat_product_output)
+
+      if (.NOT. reuse_triggered) then
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         call mat_duplicate_copy_plus_diag(matrix, .FALSE., inv_matrix)
+      end if      
+
+      ! Set to zero as we add in each product of terms
+      call MatScale(inv_matrix, 0d0, ierr)
+
+      ! Don't set any off processor entries so no need for a reduction when assembling
+      call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr)  
+
+      ! We start with an identity in mat_product
+      call generate_identity(matrix, mat_product)
+
+      ! If we're going to output the product as part of a fixed sparsity multiply,
+      ! we may be asking to constrain the sparsity to a power in between order and order + 2
+      ! if there is a complex root at poly_sparsity_order
+      ! ie if we have roots (theta_1^r, theta_2^c, theta_3^c, theta_4^r) 
+      ! where ^r means a purely real root and ^c means a complex root 
+      ! want poly_sparsity_order = 1, we can't process all the way up to theta_3^c as that would 
+      ! compute up to an A^2 term which is beyond our sparsity constraint
+      ! So we just check if the last root also has it's complex conjugate present
+      ! This will never happen in any context except when we are outputting the product
+      ! as part of a fixed sparsity multiply
+
+      ! i_sparse tells us how many roots we are going to process
+      ! Normally this would just be size(coefficients, 1) and the loop below goes up 
+      ! to size(coefficients, 1) - 1. The last real root gets its final term added outside the loop
+      ! and if the last root is complex then we only have to hit the first of the pair in the loop
+      !
+      ! If we have fixed sparsity:
+      !
+      ! if the fixed sparsity root is real then we want to set i_sparse to poly_sparsity_order+1
+      ! so we hit the roots up to poly_sparsity_order in the loop and then we take care of the 
+      ! poly_sparsity_order + 1 root outside the loop
+      !
+      ! if the fixed sparsity root is complex but poly_sparsity_order + 1 hits the second of the pair
+      !    then we only need to set i_sparse to poly_sparsity_order + 1 so we only hit the first
+      !    pair in the loop below
+      ! 
+      ! if the fixed sparsity root is complex but poly_sparsity_order + 1 hits the first of the pair
+      !    then we need to set i_sparse to poly_sparsity_order + 2
+      !    otherwise we would never hit the first pair
+
+      i_sparse = size(coefficients, 1)
+      first_complex = .FALSE.
+
+      if (output_product) then
+         output_first_complex = .FALSE.
+         if (output_product) then
+            i_sparse = poly_sparsity_order + 1
+
+            ! If the one before is real, then we know we're on the first
+            if (coefficients(i_sparse-1,2) == 0d0) then
+               output_first_complex = .TRUE.
+               ! See discussion above
+               i_sparse = i_sparse + 1
+
+            ! If the one before is complex
+            else
+
+               ! Check if the distance between the fixed sparsity root and the one before is > zero
+               ! If so they must be complex conjugates and hence we are on the second of the pair         
+               if (abs(coefficients(i_sparse,1) - coefficients(i_sparse-1,1))/coefficients(i_sparse,1) > 1e-14 .AND. &
+                     abs(coefficients(i_sparse,2) + coefficients(i_sparse-1,2))/coefficients(i_sparse,2) > 1e-14) then
+                  output_first_complex = .TRUE.
+                  i_sparse = i_sparse + 1
+               end if            
+            end if
+         end if 
+         first_complex = output_first_complex
+      end if
+
+      ! ~~~~~~~~~~~~
+      ! Iterate over the i
+      ! This is basically the same as the MF application but we have to build the powers
+      ! ~~~~~~~~~~~~      
+      i = 1
+      ! Loop through to one fewer than the number of roots
+      ! We're always building up the next product
+      do while (i .le. i_sparse - 1)
+
+         ! Duplicate & copy the matrix, but ensure there is a diagonal present
+         ! temp_mat_A is going to store things with the sparsity of A
+         if (PetscObjectIsNull(temp_mat_A)) then
+            call mat_duplicate_copy_plus_diag(matrix, .FALSE., temp_mat_A)     
+         else
+            ! Can reuse the sparsity 
+            call mat_duplicate_copy_plus_diag(matrix, .TRUE., temp_mat_A)     
+         end if         
+
+         ! If real this is easy
+         if (coefficients(i,2) == 0d0) then
+
+            ! Skips eigenvalues that are numerically zero - see 
+            ! the comment in calculate_gmres_polynomial_roots_newton 
+            if (abs(coefficients(i,1)) < 1e-12) then
+               i = i + 1
+               cycle
+            end if        
+
+            ! Then add the scaled version of each product
+            if (i == 1) then
+               ! If i == 1 then we know mat_product is identity so we can do it directly
+               call MatShift(inv_matrix, 1d0/coefficients(i,1), ierr)  
+            else
+               if (reuse_triggered) then
+                  ! If doing reuse we know our nonzeros are a subset
+                  call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+               else
+                  ! Have to use the DIFFERENT_NONZERO_PATTERN here
+                  call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
+               end if
+            end if
+
+            ! temp_mat_A = A_ff/theta_k       
+            call MatScale(temp_mat_A, -1d0/coefficients(i,1), ierr)
+            ! temp_mat_A = I - A_ff/theta_k
+            call MatShift(temp_mat_A, 1d0, ierr)    
+            
+            ! mat_product_k_plus_1 = mat_product * temp_mat_A
+            if (i == 1) then
+               ! If i == 1 then we know mat_product is identity so we can just copy
+               call MatConvert(temp_mat_A, MATSAME, MAT_INITIAL_MATRIX, mat_product, ierr)  
+            else
+               call MatMatMult(temp_mat_A, mat_product, &
+                     MAT_INITIAL_MATRIX, 1.5d0, mat_product_k_plus_1, ierr)      
+               call MatDestroy(mat_product, ierr)  
+               mat_product = mat_product_k_plus_1  
+            end if
+            
+            ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
+            if (output_product .AND. i == i_sparse - 1) then
+               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
+            end if
+            
+            i = i + 1
+
+         ! Complex 
+         else
+
+            ! Skips eigenvalues that are numerically zero
+            if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
+               i = i + 2
+               cycle
+            end if
+
+            ! If doing the normal iteration
+            if (.NOT. first_complex) then
+
+               ! temp_mat_A = -A    
+               call MatScale(temp_mat_A, -1d0, ierr)
+               ! temp_mat_A = 2a I - A_ff
+               call MatShift(temp_mat_A, 2d0 * coefficients(i,1), ierr)   
+
+               ! temp_mat_A = (2a I - A_ff)/(a^2 + b^2)
+               call MatScale(temp_mat_A, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), ierr) 
+
+               if (i == 1) then
+                  ! If i == 1 then we know mat_product is identity so we can do it directly
+                  call MatConvert(temp_mat_A, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)  
+               else
+                  ! temp_mat_two = temp_mat_A * mat_product
+                  call MatMatMult(temp_mat_A, mat_product, &
+                        MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
+               end if               
+
+            ! If instead we only have the first of a complex conjugate pair
+            ! We want to pass out 2 * a * mat_product/(a^2 + b^2) and only add that to inv_matrix
+            ! This is equivalent to only part of tmp on Line 9 of Loe
+            ! The fixed sparsity loop will then finish the tmp with the term -A * prod/(a^2+b^2)
+            ! as this is the part that would increase the sparsity beyond poly_sparsity_order
+            else
+
+               ! Copy mat_product into temp_mat_two
+               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)  
+               ! temp_mat_two = 2a * mat_product/(a^2 + b^2)
+               call MatScale(temp_mat_two, 2d0 * coefficients(i,1)/(coefficients(i,1)**2 + coefficients(i,2)**2), ierr)
+
+            end if
+
+            ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+            if (output_product .AND. i > i_sparse - 2) then
+               call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)            
+            end if
+
+            ! Then add the scaled version of each product
+            if (reuse_triggered) then
+               ! If doing reuse we know our nonzeros are a subset
+               call MatAXPY(inv_matrix, 1d0, temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
+            else
+               ! Have to use the DIFFERENT_NONZERO_PATTERN here
+               call MatAXPYWrapper(inv_matrix, 1d0, temp_mat_two)
+            end if            
+
+            if (i .le. i_sparse - 2) then
+               ! temp_mat_three = matrix * temp_mat_two
+               call MatMatMult(matrix, temp_mat_two, &
+                     MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
+               call MatDestroy(temp_mat_two, ierr)  
+
+               ! Then add the scaled version of each product
+               if (reuse_triggered) then
+                  ! If doing reuse we know our nonzeros are a subset
+                  call MatAXPY(mat_product, -1d0, temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
+               else
+                  ! Have to use the DIFFERENT_NONZERO_PATTERN here
+                  call MatAXPYWrapper(mat_product, -1d0, temp_mat_three)
+               end if               
+               call MatDestroy(temp_mat_three, ierr) 
+            else
+               call MatDestroy(temp_mat_two, ierr)  
+            end if
+
+            ! Skip two evals
+            i = i + 2
+
+         end if       
+      end do
+
+      ! Final step if last root is real
+      if (.NOT. first_complex) then
+         if (coefficients(i_sparse,2) == 0d0) then
+            ! Add in the final term multiplied by 1/theta_poly_order
+
+            ! Skips eigenvalues that are numerically zero
+            if (abs(coefficients(i,1)) > 1e-12) then            
+               if (reuse_triggered) then
+                  ! If doing reuse we know our nonzeros are a subset
+                  call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+               else
+                  ! Have to use the DIFFERENT_NONZERO_PATTERN here
+                  call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
+               end if     
+            end if       
+         end if   
+      end if     
+
+      call MatDestroy(temp_mat_A, ierr)
+      call MatDestroy(mat_product, ierr)             
+
+   end subroutine build_gmres_polynomial_newton_inverse_full   
+
 ! -------------------------------------------------------------------------------------------------------------------------------
 
 

From c163ec45ab7d822804d3419af42d498d493327ec Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 29 Jan 2026 16:29:19 +0000
Subject: [PATCH 09/41] Partially finished higher order sparsity terms. The
 (r,r,r) and (c,c,r) cases are working, but the (r,c,c) is not yet.

---
 src/Gmres_Poly_Newton.F90 | 293 +++++++++++++++++++++++++++++---------
 1 file changed, 224 insertions(+), 69 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index f47f437..e52a67d 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -819,29 +819,29 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt :: global_col_start, global_col_end_plus_one, n, ncols, ncols_two, ifree, max_nnzs
       PetscInt :: i_loc, j_loc, row_size, rows_ao, cols_ao, rows_ad, cols_ad, shift = 0
       integer :: errorcode, match_counter, term
-      integer :: comm_size
+      integer :: comm_size, diag_index
       PetscErrorCode :: ierr      
       integer, dimension(:), allocatable :: cols_index_one, cols_index_two
       PetscInt, dimension(:), allocatable :: col_indices_off_proc_array, ad_indices, cols
       PetscReal, dimension(:), allocatable :: vals
       type(tIS), dimension(1) :: col_indices, row_indices
-      type(tMat) :: Ad, Ao
+      type(tMat) :: Ad, Ao, mat_sparsity_match
       PetscInt, dimension(:), pointer :: colmap
       logical :: deallocate_submatrices = .FALSE.
       type(c_ptr) :: vals_c_ptr
-      type(tMat), pointer :: mat_sparsity_match
       type(int_vec), dimension(:), allocatable :: symbolic_ones
       type(real_vec), dimension(:), allocatable :: symbolic_vals
       integer(c_long_long) A_array
       MPIU_Comm :: MPI_COMM_MATRIX
-      PetscReal, dimension(:), allocatable :: vals_power_temp, vals_previous_power_temp
+      PetscReal, dimension(:), allocatable :: vals_power_temp, vals_previous_power_temp, temp
       PetscInt, dimension(:), pointer :: submatrices_ia, submatrices_ja, cols_two_ptr, cols_ptr
       PetscReal, dimension(:), pointer :: vals_two_ptr, vals_ptr
       real(c_double), pointer :: submatrices_vals(:)
       logical :: reuse_triggered
       PetscBool :: symmetric = PETSC_FALSE, inodecompressed = PETSC_FALSE, done
       PetscInt, parameter :: one = 1, zero = 0
-      logical :: output_first_complex
+      logical :: output_first_complex, skip_add
+      PetscReal :: square_sum
       
       ! ~~~~~~~~~~  
 
@@ -863,6 +863,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
       reuse_triggered = .NOT. PetscObjectIsNull(cmat) 
 
+      print *, "coefficients", coefficients
+
       ! ~~~~~~~~~~
       ! Compute cmat for all powers up to poly_sparsity_order
       ! We have to be more careful here than in the monomial case
@@ -887,13 +889,26 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! as this is the part of the product with sparsity up to A
          ! This is because the prod for complex builds up the A^2 term for the next iteration
          ! given it does two roots at a time
+
+         ! If we have a real first coefficient and a second complex
+         ! we can't call build_gmres_polynomial_newton_inverse_1st_1st as it is only correct
+         ! for valid coefficients up to 1st order (ie both real or both complex)
+         if (coefficients(1,2) == 0d0 .AND. coefficients(2,2) /= 0d0) then
+
+            print *, "DOING FULL FIRST ORDER BUILD"
+
+            call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
+                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)            
+
+         else
          
-         ! Duplicate & copy the matrix, but ensure there is a diagonal present
-         call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, cmat)  
+            ! Duplicate & copy the matrix, but ensure there is a diagonal present
+            call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, cmat)  
 
-         call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
-                  coefficients(1:poly_sparsity_order + 1, 1:2), &
-                  cmat, mat_sparsity_match)         
+            call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
+                     coefficients(1:poly_sparsity_order + 1, 1:2), &
+                     cmat, mat_sparsity_match)    
+         end if     
       else
 
          ! If we're any higher, then we build cmat up to that order
@@ -1046,6 +1061,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       allocate(vals(max_nnzs))
       allocate(vals_power_temp(max_nnzs))
       allocate(vals_previous_power_temp(max_nnzs))
+      allocate(temp(max_nnzs))
       allocate(cols_index_one(max_nnzs))
       allocate(cols_index_two(max_nnzs))      
 
@@ -1069,7 +1085,15 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          cols(1:ncols) = cols_ptr(1:ncols)
          vals(1:ncols) = vals_ptr(1:ncols)
          call MatRestoreRow(mat_sparsity_match, i_loc - 1 + global_row_start, ncols_two, &
-                  cols_ptr, vals_ptr, ierr)         
+                  cols_ptr, vals_ptr, ierr)   
+         diag_index = -1
+         ! Find the diagonal index in this row     
+         do j_loc = 1, ncols
+            if (cols(j_loc) == i_loc - 1 + global_row_start) then
+               diag_index = j_loc
+               exit
+            end if
+         end do 
 
          ! This is just a symbolic for the set of rows given in cols
          ! Let's just do all the column matching and extraction of the values once
@@ -1145,39 +1169,169 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! Start with the values of mat_sparsity_match in it
          vals_previous_power_temp(1:ncols) = vals(1:ncols)
                      
-         ! ! Loop over any matrix powers
-         ! ! vals_power_temp stores the value of A^(term-1) for this row, and we update this as we go through 
-         ! ! the term loop
-         ! do term = poly_sparsity_order+2, size(coefficients)
+         ! Loop over any matrix powers
+         ! vals_power_temp stores the prod for this row, and we update this as we go through 
+         ! the term loop
+         term = poly_sparsity_order + 1
+         skip_add = .FALSE.
+         ! If the fixed sparsity root is the second of a complex pair, we start one term earlier
+         ! so that we can compute the correct part of the product, we just make sure not to add
+         if (coefficients(term,2) /= 0d0 .AND. .NOT. output_first_complex) then
+            term = term - 1
+            skip_add = .TRUE.
+            print *, "minus one starting term for complex root"
+         end if
+
+         print *, "starting with term", term
+         ! This loop skips the last coefficient
+         do while (term .le. size(coefficients, 1) - 1)
+
+            ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
+            vals_power_temp(1:ncols) = 0
+
+            print *, "coeff in term", term, coefficients(term, 1), coefficients(term, 2)
+
+            ! If real
+            if (coefficients(term,2) == 0d0) then
+
+               print *, "inside real term", term
+
+               ! ~~~~~~~~~~~
+               ! Now can add the value to our matrix
+               ! Can skip this if coeff is zero, but still need to compute A^(term-1)
+               ! for the next time through
+               ! Also we skip the first one if we're real as that value has already been added to the 
+               ! matrix by the build_gmres_polynomial_newton_inverse_full (as we had to build the product up
+               ! to that order)
+               ! ~~~~~~~~~~~
+               if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. term > poly_sparsity_order + 1) then
+                  print *, "adding to matrix real term", term
+                  call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+                        1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
+               end if          
+               
+               ! Initialize with previous product before the A*prod subtraction
+               vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)               
+
+               ! Have to finish all the columns before we move onto the next coefficient
+               do j_loc = 1, ncols
 
-         !    ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
-         !    vals_power_temp(1:ncols) = 0
+                  ! If we have no matching columns cycle this row
+                  if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
 
-         !    ! Have to finish all the columns before we move onto the next coefficient
-         !    do j_loc = 1, ncols
+                  print *, "processing column ", j_loc, " for real term ", term, "with coeff", coefficients(term, 1)
 
-         !       ! If we have no matching columns cycle this row
-         !       if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
+                  ! symbolic_vals(j_loc)%ptr has the matching values of A in it
+                  ! This is the (I - A_ff/theta_k) * prod
+                  vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) - &
+                           1d0/coefficients(term, 1) * &
+                           symbolic_vals(j_loc)%ptr * vals_previous_power_temp(j_loc)
+               end do
+
+               term = term + 1
+
+            ! If complex
+            else
 
-         !       ! symbolic_vals(j_loc)%ptr has the matching values of A in it
-         !       vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) + &
-         !                vals_previous_power_temp(j_loc) * symbolic_vals(j_loc)%ptr
+               square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
+               if (.NOT. skip_add) then
+
+                  print *, "NOT SKIP ADD", term, "with output_first_complex", output_first_complex
+
+                  ! We skip the 2 * a * prod from the first root of a complex pair if that has already
+                  ! been included in the inv_matrix from build_gmres_polynomial_newton_inverse_full
+                  if (term < poly_sparsity_order + 2) then
+                     if (.NOT. output_first_complex) then
+                        temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
+                        print *, "not skipping first complex part of product"
+                     else
+                        temp(1:ncols) = 0d0
+                        print *, "skipping first complex part of product"
+                     end if                     
+                  else
+                     temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
+                     print *, "adding 2a term as normal"
+                  end if
+
+                  ! This is the -A * prod
+                  do j_loc = 1, ncols
+
+                     ! If we have no matching columns cycle this row
+                     if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
+
+                     ! symbolic_vals(j_loc)%ptr has the matching values of A in it
+                     temp(symbolic_ones(j_loc)%ptr) = temp(symbolic_ones(j_loc)%ptr) - &
+                              symbolic_vals(j_loc)%ptr * vals_previous_power_temp(j_loc)
+                  end do           
+
+                  ! This is the p = p + 1/(a^2 + b^2) * temp
+                  if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
+                     call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+                           square_sum * temp(1:ncols), ADD_VALUES, ierr)   
+                  end if       
+
+                  ! for (r, c, c)
+                  ! problem here is 2 *a * prod has been added to inv_matrix but we need to have added
+                  ! 2aprod/a^2+b^2 
+                  ! for (c, c, r) mat product is output without the 1/a^2+b^2 but that is fine as we 
+                  ! compensate for that in the product
 
-         !    end do
                
-         !    ! ~~~~~~~~~~~
-         !    ! Now can add the value of coeff * A^(term-1) to our matrix
-         !    ! Can skip this if coeff is zero, but still need to compute A^(term-1)
-         !    ! for the next time through
-         !    ! ~~~~~~~~~~~
-         !    if (ncols /= 0 .AND. coefficients(term) /= 0d0) then
-         !       call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
-         !             coefficients(term) * vals_power_temp, ADD_VALUES, ierr)   
-         !    end if
-
-         !    ! This should now have the value of A^(term-1) in it
-         !    vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
-         ! end do      
+               ! First time through complex pair
+               else
+                  
+                  ! If we're skipping the add, then vals_previous_power_temp has all the correct
+                  ! values in it for temp
+                  ! All we have to do is compute prod for the next time through
+                  skip_add = .FALSE.
+                  print *, "SKIP ADD"
+                  temp(1:ncols) = vals_previous_power_temp(1:ncols)
+                  ! @@@ have to be careful here!
+                  ! If we've gone back a term, we don't have anything in prod
+                  ! prod is I when term = 1
+                  if (term == 1) then
+                     vals_previous_power_temp(1:ncols) = 0d0
+                     if (diag_index /= -1) then
+                        vals_previous_power_temp(diag_index) = 1d0
+                     end if
+                  end if
+               end if
+
+               if (term .le. size(coefficients, 1)- 2) then
+
+                  print *, "COMPUTING PRODUCT COMPLEX"
+
+                  vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)
+
+                  ! This is prod = prod - 1/(a^2 + b^2) * A * temp
+                  do j_loc = 1, ncols
+
+                     ! If we have no matching columns cycle this row
+                     if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
+
+                     ! symbolic_vals(j_loc)%ptr has the matching values of A in it
+                     vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) - &
+                              square_sum * &
+                              symbolic_vals(j_loc)%ptr * temp(j_loc)
+                  end do                  
+               end if
+
+               term = term + 2
+
+            end if
+
+            ! This should now have the value of A^(term-1) in it
+            vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
+         end do    
+         
+         ! Final step if last root is real
+         if (coefficients(term,2) == 0d0) then
+            if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
+               print *, "adding to matrix FINAL real term", term, coefficients(term, 1) 
+               call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+                     1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
+            end if             
+         end if
 
          ! Delete our symbolic
          do j_loc = 1, ncols
@@ -1215,7 +1369,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       end if
 
       deallocate(col_indices_off_proc_array)
-      deallocate(cols, vals, vals_power_temp, vals_previous_power_temp, cols_index_one, cols_index_two)
+      deallocate(cols, vals, vals_power_temp, vals_previous_power_temp, temp, cols_index_one, cols_index_two)
 
       ! Finish assembly
       call MatAssemblyEnd(cmat, MAT_FINAL_ASSEMBLY, ierr) 
@@ -1547,7 +1701,7 @@ end subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coefficients, &
-                  inv_matrix, mat_product_output)
+                  inv_matrix, mat_prod_or_temp)
 
       ! Specific 1st order with 1st order sparsity
 
@@ -1556,7 +1710,7 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       integer, intent(in)                               :: poly_order
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
-      type(tMat), intent(inout), optional               :: mat_product_output
+      type(tMat), intent(inout), optional               :: mat_prod_or_temp
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1566,13 +1720,15 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       ! ~~~~~~      
 
       reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)     
-      output_product = present(mat_product_output)    
+      output_product = present(mat_prod_or_temp)    
 
       ! Flags to prevent reductions when assembling (there are assembles in the shift)
       call MatSetOption(inv_matrix, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE, ierr) 
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
 
+      print *, "inside 1st 1st", coefficients
+
       ! We only have two coefficients, so they are either both real or complex conjugates
       ! If real
       if (coefficients(1,2) == 0d0) then
@@ -1606,9 +1762,9 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          ! result = I -A_ff/theta_1
          call MatShift(inv_matrix, 1d0, ierr) 
          ! If we're doing this as part of fixed sparsity multiply, 
-         ! we need to return mat_product_output
+         ! we need to return mat_prod_or_temp
          if (output_product) then
-            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
+            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
          end if
 
          ! result = 1/theta_2 * (I -A_ff/theta_1)
@@ -1624,16 +1780,18 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          square_sum = coefficients(1,1)**2 + coefficients(1,2)**2
 
          ! Complex conjugate roots
-         ! result = -A_ff / (a^2 + b^2)
-         call MatScale(inv_matrix, -1d0/square_sum, ierr)
-         ! result = 2a/(a^2 + b^2) I - A_ff / (a^2 + b^2)
+         ! result = -A_ff
+         call MatScale(inv_matrix, -1d0, ierr)
+         ! result = 2a I - A_ff
          ! Don't need an assemble as there is one called in this
-         call MatShift(inv_matrix, 2d0 * coefficients(1,1)/square_sum, ierr)      
+         call MatShift(inv_matrix, 2d0 * coefficients(1,1), ierr)      
          ! If we're doing this as part of fixed sparsity multiply, 
-         ! we need to return mat_product_output         
+         ! we need to return mat_prod_or_temp         
          if (output_product) then
-            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
-         end if          
+            call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
+         end if      
+         ! result = 2a I - A_ff/(a^2 + b^2)
+         call MatScale(inv_matrix, 1d0/square_sum, ierr)
       end if               
 
    end subroutine build_gmres_polynomial_newton_inverse_1st_1st     
@@ -1642,19 +1800,19 @@ end subroutine build_gmres_polynomial_newton_inverse_1st_1st
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
-                  inv_matrix, mat_product_output, poly_sparsity_order, output_first_complex)
+                  inv_matrix, mat_prod_or_temp, poly_sparsity_order, output_first_complex)
 
       ! No constrained sparsity by default
-      ! If you pass in mat_product_output, poly_sparsity_order, output_first_complex
+      ! If you pass in mat_prod_or_temp, poly_sparsity_order, output_first_complex
       ! then it will build part of the terms, up to poly_sparsity_order, and return the product
-      ! in mat_product_output that you need to compute the rest of the fixed sparsity terms
+      ! in mat_prod_or_temp that you need to compute the rest of the fixed sparsity terms
 
       ! ~~~~~~
       type(tMat), intent(in)                            :: matrix
       integer, intent(in)                               :: poly_order
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
-      type(tMat), intent(inout), optional               :: mat_product_output
+      type(tMat), intent(inout), optional               :: mat_prod_or_temp
       integer, intent(in), optional                     :: poly_sparsity_order
       logical, intent(inout), optional                  :: output_first_complex
 
@@ -1667,7 +1825,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! ~~~~~~      
 
       reuse_triggered = .NOT. PetscObjectIsNull(inv_matrix)  
-      output_product = present(mat_product_output)
+      output_product = present(mat_prod_or_temp)
 
       if (.NOT. reuse_triggered) then
          ! Duplicate & copy the matrix, but ensure there is a diagonal present
@@ -1802,7 +1960,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
-               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)  
+               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
             end if
             
             i = i + 1
@@ -1824,9 +1982,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! temp_mat_A = 2a I - A_ff
                call MatShift(temp_mat_A, 2d0 * coefficients(i,1), ierr)   
 
-               ! temp_mat_A = (2a I - A_ff)/(a^2 + b^2)
-               call MatScale(temp_mat_A, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), ierr) 
-
                if (i == 1) then
                   ! If i == 1 then we know mat_product is identity so we can do it directly
                   call MatConvert(temp_mat_A, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)  
@@ -1837,31 +1992,30 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                end if               
 
             ! If instead we only have the first of a complex conjugate pair
-            ! We want to pass out 2 * a * mat_product/(a^2 + b^2) and only add that to inv_matrix
+            ! We want to pass out mat_product and only add that to inv_matrix
             ! This is equivalent to only part of tmp on Line 9 of Loe
             ! The fixed sparsity loop will then finish the tmp with the term -A * prod/(a^2+b^2)
             ! as this is the part that would increase the sparsity beyond poly_sparsity_order
             else
 
                ! Copy mat_product into temp_mat_two
-               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)  
-               ! temp_mat_two = 2a * mat_product/(a^2 + b^2)
-               call MatScale(temp_mat_two, 2d0 * coefficients(i,1)/(coefficients(i,1)**2 + coefficients(i,2)**2), ierr)
+               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
 
             end if
 
             ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i > i_sparse - 2) then
-               call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_product_output, ierr)            
+               call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
             end if
 
             ! Then add the scaled version of each product
             if (reuse_triggered) then
                ! If doing reuse we know our nonzeros are a subset
-               call MatAXPY(inv_matrix, 1d0, temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
+               call MatAXPY(inv_matrix, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), &
+                        temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
             else
                ! Have to use the DIFFERENT_NONZERO_PATTERN here
-               call MatAXPYWrapper(inv_matrix, 1d0, temp_mat_two)
+               call MatAXPYWrapper(inv_matrix, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), temp_mat_two)
             end if            
 
             if (i .le. i_sparse - 2) then
@@ -1873,10 +2027,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! Then add the scaled version of each product
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
-                  call MatAXPY(mat_product, -1d0, temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
+                  call MatAXPY(mat_product, -1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), &
+                           temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
                else
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
-                  call MatAXPYWrapper(mat_product, -1d0, temp_mat_three)
+                  call MatAXPYWrapper(mat_product, -1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), temp_mat_three)
                end if               
                call MatDestroy(temp_mat_three, ierr) 
             else

From fcbed5451c821e2a3e90867713bfbc36187c320f Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 29 Jan 2026 22:03:31 +0000
Subject: [PATCH 10/41] High order terms with first order sparsity are now
 correct

---
 src/Gmres_Poly_Newton.F90 | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index e52a67d..7f604a6 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -1275,8 +1275,13 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! 2aprod/a^2+b^2 
                   ! for (c, c, r) mat product is output without the 1/a^2+b^2 but that is fine as we 
                   ! compensate for that in the product
+                  if (term < poly_sparsity_order + 2) then
+                     if (output_first_complex) then
+                        temp(1:ncols) = temp(1:ncols) + 2d0 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
+                        print *, "ADDING 2*a*prod back into temp"
+                     end if                     
+                  end if                  
 
-               
                ! First time through complex pair
                else
                   
@@ -1968,6 +1973,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
+
+            print *, "INTO FULL", "first_complex", first_complex
+
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
                i = i + 2
@@ -1977,6 +1985,8 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! If doing the normal iteration
             if (.NOT. first_complex) then
 
+               print *, "adding in 2a prod - A prod"
+
                ! temp_mat_A = -A    
                call MatScale(temp_mat_A, -1d0, ierr)
                ! temp_mat_A = 2a I - A_ff
@@ -1989,6 +1999,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   ! temp_mat_two = temp_mat_A * mat_product
                   call MatMatMult(temp_mat_A, mat_product, &
                         MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
+               end if    
+               
+               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+               if (output_product .AND. i > i_sparse - 2) then
+                  call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
                end if               
 
             ! If instead we only have the first of a complex conjugate pair
@@ -1998,14 +2013,19 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! as this is the part that would increase the sparsity beyond poly_sparsity_order
             else
 
+               print *, "only first complex - passing out product"
+
                ! Copy mat_product into temp_mat_two
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
 
-            end if
+               ! temp_mat_two = 2a * mat_product
+               call MatScale(temp_mat_two, 2d0 * coefficients(i,1), ierr)   
+
+               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+               if (output_product .AND. i > i_sparse - 2) then
+                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
+               end if                
 
-            ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
-            if (output_product .AND. i > i_sparse - 2) then
-               call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
             end if
 
             ! Then add the scaled version of each product
@@ -2019,6 +2039,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             end if            
 
             if (i .le. i_sparse - 2) then
+
+               print *, "COMPUTING PRODUCT IN FULL"
+
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     

From f370c9d4f749489688403067bd9dbd981b552a39 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 29 Jan 2026 23:49:28 +0000
Subject: [PATCH 11/41] Add in fixed sparsity order 1 assembled newton tests.
 The tests hit the three cases with real and complex eigenvalues, namely
 (r,r,r), (c,c,r) and (r,c,c) cases.

---
 tests/Makefile | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tests/Makefile b/tests/Makefile
index 514ae5e..888cecf 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -121,7 +121,12 @@ run_tests_load_serial:
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials in indefinite problem with zero diagonals"
 	./ex6 -f data/e05r0100_petsc -b_in_f 0 -pc_air_a_drop 1e-3 -pc_air_inverse_type power -ksp_max_it 26
-
+#
+	@echo ""
+	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 2nd order"
+	./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 2
+	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 3rd order"
+	./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 3	
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
 run_tests_load_parallel:
@@ -156,6 +161,12 @@ run_tests_load_parallel:
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials in indefinite problem with zero diagonals in parallel"
 	$(MPIEXEC) -n 2 ./ex6 -f data/e05r0100_petsc -b_in_f 0 -pc_air_a_drop 1e-3 -pc_air_inverse_type power -ksp_max_it 26		
+#
+	@echo ""
+	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 2nd order in parallel"
+	$(MPIEXEC) -n 2 ./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 2
+	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 3rd order in parallel"
+	$(MPIEXEC) -n 2 ./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 3		
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
@@ -278,8 +289,8 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
 	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
-	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
-	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
+#	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
+#		./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
 # 
 	@echo ""
 	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change with 0th order fixed sparsity"
@@ -534,9 +545,9 @@ run_tests_no_load_parallel:
 	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2 \
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
-	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
-	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
-	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
+#	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
+#	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
+#	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
 # 
 	@echo ""
 	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change in parallel with 0th order fixed sparsity"

From ec58033128f59fed32125d304f5dfc1e2337d90d Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Fri, 30 Jan 2026 01:21:36 +0000
Subject: [PATCH 12/41] Add test that checks the difference between residuals
 for different types of GMRES polynomial

---
 Makefile                   |   2 +-
 src/Gmres_Poly_Newton.F90  |  37 --------
 tests/Makefile             |  25 ++++++
 tests/ex12f_gmres_poly.F90 | 173 +++++++++++++++++++++++++++++++++++++
 4 files changed, 199 insertions(+), 38 deletions(-)
 create mode 100644 tests/ex12f_gmres_poly.F90

diff --git a/Makefile b/Makefile
index 3fcd41a..87fe1a6 100644
--- a/Makefile
+++ b/Makefile
@@ -138,7 +138,7 @@ OBJS := $(OBJS) $(SRCDIR)/PETSc_Helper.o \
 		  $(SRCDIR)/PCPFLAREINV.o	
 
 # Define a variable containing all the tests
-export TEST_TARGETS = ex12f ex6f ex6f_getcoeffs ex6 adv_1d adv_diff_2d ex6_cf_splitting adv_diff_cg_supg matrandom matrandom_check_reset
+export TEST_TARGETS = ex12f ex6f ex6f_getcoeffs ex6 adv_1d adv_diff_2d ex6_cf_splitting adv_diff_cg_supg matrandom matrandom_check_reset ex12f_gmres_poly
 # Include kokkos examples
 ifeq ($(PETSC_HAVE_KOKKOS),1)
 export TEST_TARGETS := $(TEST_TARGETS) adv_1dk
diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 7f604a6..5af813c 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -863,8 +863,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
       reuse_triggered = .NOT. PetscObjectIsNull(cmat) 
 
-      print *, "coefficients", coefficients
-
       ! ~~~~~~~~~~
       ! Compute cmat for all powers up to poly_sparsity_order
       ! We have to be more careful here than in the monomial case
@@ -895,8 +893,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! for valid coefficients up to 1st order (ie both real or both complex)
          if (coefficients(1,2) == 0d0 .AND. coefficients(2,2) /= 0d0) then
 
-            print *, "DOING FULL FIRST ORDER BUILD"
-
             call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)            
 
@@ -1179,23 +1175,17 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          if (coefficients(term,2) /= 0d0 .AND. .NOT. output_first_complex) then
             term = term - 1
             skip_add = .TRUE.
-            print *, "minus one starting term for complex root"
          end if
 
-         print *, "starting with term", term
          ! This loop skips the last coefficient
          do while (term .le. size(coefficients, 1) - 1)
 
             ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
             vals_power_temp(1:ncols) = 0
 
-            print *, "coeff in term", term, coefficients(term, 1), coefficients(term, 2)
-
             ! If real
             if (coefficients(term,2) == 0d0) then
 
-               print *, "inside real term", term
-
                ! ~~~~~~~~~~~
                ! Now can add the value to our matrix
                ! Can skip this if coeff is zero, but still need to compute A^(term-1)
@@ -1205,7 +1195,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! to that order)
                ! ~~~~~~~~~~~
                if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. term > poly_sparsity_order + 1) then
-                  print *, "adding to matrix real term", term
                   call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                         1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
                end if          
@@ -1219,8 +1208,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! If we have no matching columns cycle this row
                   if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
 
-                  print *, "processing column ", j_loc, " for real term ", term, "with coeff", coefficients(term, 1)
-
                   ! symbolic_vals(j_loc)%ptr has the matching values of A in it
                   ! This is the (I - A_ff/theta_k) * prod
                   vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) - &
@@ -1236,21 +1223,16 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
                if (.NOT. skip_add) then
 
-                  print *, "NOT SKIP ADD", term, "with output_first_complex", output_first_complex
-
                   ! We skip the 2 * a * prod from the first root of a complex pair if that has already
                   ! been included in the inv_matrix from build_gmres_polynomial_newton_inverse_full
                   if (term < poly_sparsity_order + 2) then
                      if (.NOT. output_first_complex) then
                         temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
-                        print *, "not skipping first complex part of product"
                      else
                         temp(1:ncols) = 0d0
-                        print *, "skipping first complex part of product"
                      end if                     
                   else
                      temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
-                     print *, "adding 2a term as normal"
                   end if
 
                   ! This is the -A * prod
@@ -1278,7 +1260,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   if (term < poly_sparsity_order + 2) then
                      if (output_first_complex) then
                         temp(1:ncols) = temp(1:ncols) + 2d0 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
-                        print *, "ADDING 2*a*prod back into temp"
                      end if                     
                   end if                  
 
@@ -1289,7 +1270,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! values in it for temp
                   ! All we have to do is compute prod for the next time through
                   skip_add = .FALSE.
-                  print *, "SKIP ADD"
                   temp(1:ncols) = vals_previous_power_temp(1:ncols)
                   ! @@@ have to be careful here!
                   ! If we've gone back a term, we don't have anything in prod
@@ -1304,8 +1284,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
                if (term .le. size(coefficients, 1)- 2) then
 
-                  print *, "COMPUTING PRODUCT COMPLEX"
-
                   vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)
 
                   ! This is prod = prod - 1/(a^2 + b^2) * A * temp
@@ -1332,7 +1310,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! Final step if last root is real
          if (coefficients(term,2) == 0d0) then
             if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
-               print *, "adding to matrix FINAL real term", term, coefficients(term, 1) 
                call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                      1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
             end if             
@@ -1408,9 +1385,6 @@ subroutine build_gmres_polynomial_newton_inverse(matrix, poly_order, &
       PetscErrorCode :: ierr      
       MPIU_Comm :: MPI_COMM_MATRIX
       type(mat_ctxtype), pointer :: mat_ctx=>null()
-      logical :: reuse_triggered      
-      PetscReal :: square_sum
-      type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
       logical :: reuse_triggered
 
       ! ~~~~~~       
@@ -1732,8 +1706,6 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
 
-      print *, "inside 1st 1st", coefficients
-
       ! We only have two coefficients, so they are either both real or complex conjugates
       ! If real
       if (coefficients(1,2) == 0d0) then
@@ -1973,9 +1945,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
-
-            print *, "INTO FULL", "first_complex", first_complex
-
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
                i = i + 2
@@ -1985,8 +1954,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! If doing the normal iteration
             if (.NOT. first_complex) then
 
-               print *, "adding in 2a prod - A prod"
-
                ! temp_mat_A = -A    
                call MatScale(temp_mat_A, -1d0, ierr)
                ! temp_mat_A = 2a I - A_ff
@@ -2013,8 +1980,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! as this is the part that would increase the sparsity beyond poly_sparsity_order
             else
 
-               print *, "only first complex - passing out product"
-
                ! Copy mat_product into temp_mat_two
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
 
@@ -2040,8 +2005,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             if (i .le. i_sparse - 2) then
 
-               print *, "COMPUTING PRODUCT IN FULL"
-
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
diff --git a/tests/Makefile b/tests/Makefile
index 888cecf..5a5081f 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -127,6 +127,19 @@ run_tests_load_serial:
 	./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 2
 	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 3rd order"
 	./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 3	
+#
+	@echo ""
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders"
+	@for order in 0 1 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
+	done
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders first order fixed sparsity"
+	@for order in 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order; \
+	done
+
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
 run_tests_load_parallel:
@@ -167,6 +180,18 @@ run_tests_load_parallel:
 	$(MPIEXEC) -n 2 ./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 2
 	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 3rd order in parallel"
 	$(MPIEXEC) -n 2 ./ex12f -f data/mat_stream_2364 -ksp_max_it 5 -pc_air_inverse_type newton -pc_air_poly_order 3		
+#
+	@echo ""
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders in parallel"
+	@for order in 0 1 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
+	done	
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders first order fixed sparsity in parallel"
+	@for order in 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order; \
+	done	
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
diff --git a/tests/ex12f_gmres_poly.F90 b/tests/ex12f_gmres_poly.F90
new file mode 100644
index 0000000..bdbf8ce
--- /dev/null
+++ b/tests/ex12f_gmres_poly.F90
@@ -0,0 +1,173 @@
+!
+      program main
+#include <petsc/finclude/petscksp.h>
+      use petscksp
+#include "finclude/pflare.h"      
+      implicit none
+
+!   Comparison between different forms of GMRES polynomials
+
+      PetscErrorCode  ierr
+      PetscInt m,n,mlocal,nlocal
+      PetscBool  flg
+      PetscReal      norm_power, norm_rhs, norm_arnoldi, norm_newton
+      PetscReal :: norm_diff_one, norm_diff_two
+      Vec              x,b,u, b_diff_type
+      Mat              A, A_diff_type
+      character*(128)  f
+      PetscViewer      fd
+      KSP              ksp
+      PC               pc
+      KSPConvergedReason reason
+      PetscInt, parameter :: one=1
+      MatType :: mtype, mtype_input
+
+      call PetscInitialize(PETSC_NULL_CHARACTER,ierr)
+      if (ierr .ne. 0) then
+        print*,'Unable to initialize PETSc'
+        stop
+      endif
+
+! Read in matrix and RHS
+      call PetscOptionsGetString(PETSC_NULL_OPTIONS,                    &
+     &        PETSC_NULL_CHARACTER,'-f',f,flg,ierr)
+      call PetscViewerBinaryOpen(PETSC_COMM_WORLD,f,FILE_MODE_READ,     &
+     &     fd,ierr)
+
+      call MatCreate(PETSC_COMM_WORLD,A,ierr)
+      call MatLoad(A,fd,ierr)
+
+      ! Get information about matrix
+      call MatGetSize(A,m,n,ierr)
+      call MatGetLocalSize(A,mlocal,nlocal,ierr)
+
+      call VecCreate(PETSC_COMM_WORLD,b,ierr)
+      call VecLoad(b,fd,ierr)
+      call PetscViewerDestroy(fd,ierr)
+
+      ! Test and see if the user wants us to use a different matrix type
+      ! with -mat_type on the command line
+      ! This lets us easily test our cpu and kokkos versions through our CI
+      call MatCreateFromOptions(PETSC_COMM_WORLD,PETSC_NULL_CHARACTER,&
+               one,mlocal,nlocal,m,n,A_diff_type,ierr)
+      call MatAssemblyBegin(A_diff_type,MAT_FINAL_ASSEMBLY,ierr)
+      call MatAssemblyEnd(A_diff_type,MAT_FINAL_ASSEMBLY,ierr)               
+      
+      call MatGetType(A, mtype, ierr)
+      call MatGetType(A_diff_type, mtype_input, ierr)
+
+      if (mtype /= mtype_input) then
+         ! Doesn't seem like there is a converter to kokkos
+         ! So instead we just copy into the empty A_diff_type
+         ! This will be slow as its not preallocated, but this is just for testing
+         call MatCopy(A, A_diff_type, DIFFERENT_NONZERO_PATTERN, ierr)
+         call MatDestroy(A,ierr)
+         A = A_diff_type
+
+         ! Mat and vec types have to match
+         call VecCreateFromOptions(PETSC_COMM_WORLD,PETSC_NULL_CHARACTER, & 
+                  one,nlocal,n,b_diff_type,ierr)
+         call VecCopy(b,b_diff_type,ierr)
+         call VecDestroy(b,ierr)
+         b = b_diff_type
+ 
+      else
+         call MatDestroy(A_diff_type,ierr)
+      end if
+
+      ! Set up solution
+      call VecDuplicate(b,x,ierr)
+      call VecDuplicate(b,u,ierr)
+
+      ! Register the pflare types
+      call PCRegister_PFLARE()
+
+      call VecNorm(b,NORM_2,norm_rhs,ierr)
+
+      ! ~~~~~~~~~~~~~
+      ! Do a solve with the power basis
+      ! ~~~~~~~~~~~~~
+      call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)
+      call KSPSetOperators(ksp,A,A,ierr)
+      call KSPGetPC(ksp, pc, ierr)       
+      call PCSetType(pc, PCAIR, ierr)      
+      call PCAIRSetInverseType(pc, PFLAREINV_POWER, ierr)
+      call KSPSetPC(ksp, pc, ierr)
+      call KSPSetFromOptions(ksp,ierr)
+
+      call VecSet(x, 0d0, ierr)
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp,reason,ierr)      
+      if (reason%v < 0) then
+         error stop 1
+      end if
+      ! Compute the residual
+      call MatMult(A,x,u,ierr)
+      call VecAXPY(u,-1d0,b,ierr)
+      call VecNorm(u,NORM_2,norm_power,ierr)
+      norm_power = norm_power/norm_rhs
+
+      ! ~~~~~~~~~~~~~
+      ! Now do a solve with the Arnoldi basis
+      ! ~~~~~~~~~~~~~      
+      call PCAIRSetInverseType(pc, PFLAREINV_ARNOLDI, ierr)
+
+      call VecSet(x, 0d0, ierr)
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp,reason,ierr)      
+      if (reason%v < 0) then
+         error stop 1
+      end if
+      ! Compute the residual
+      call MatMult(A,x,u,ierr)
+      call VecAXPY(u,-1d0,b,ierr)
+      call VecNorm(u,NORM_2,norm_arnoldi,ierr)
+      norm_arnoldi = norm_arnoldi/norm_rhs
+
+      ! ~~~~~~~~~~~~~
+      ! Now do a solve with the Newton basis
+      ! ~~~~~~~~~~~~~         
+      call PCAIRSetInverseType(pc, PFLAREINV_NEWTON, ierr)   
+
+      call VecSet(x, 0d0, ierr)
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp,reason,ierr)      
+      if (reason%v < 0) then
+         error stop 1
+      end if
+      ! Compute the residual
+      call MatMult(A,x,u,ierr)
+      call VecAXPY(u,-1d0,b,ierr)
+      call VecNorm(u,NORM_2,norm_newton,ierr)
+      norm_newton = norm_newton/norm_rhs
+      call KSPDestroy(ksp,ierr)
+
+      ! ~~~~~~~~~~~~~
+      ! Now check all the residuals are the same
+      ! For low order polynomials on the diagonally dominant
+      ! A_ff on each level they should be basically identical and hence
+      ! we should have almost no difference in the resulting residual
+      ! ~~~~~~~~~~~~~
+      norm_diff_one = abs(norm_power - norm_newton)/norm_newton
+      if (norm_diff_one > 1e-9) then
+         print *, "Residuals differ between polynomial bases!", norm_diff_one
+         print *, "Power basis residual:   ", norm_power
+         print *, "Newton basis residual:  ", norm_newton
+         error stop 1
+      end if
+      norm_diff_two = abs(norm_arnoldi - norm_power)/norm_power
+      if (norm_diff_two > 1e-9) then
+         print *, "Residuals differ between polynomial bases!", norm_diff_two
+         print *, "Arnoldi basis residual: ", norm_arnoldi
+         print *, "Newton basis residual:  ", norm_newton
+         error stop 1
+      end if      
+
+      call VecDestroy(b,ierr)
+      call VecDestroy(x,ierr)
+      call VecDestroy(u,ierr)
+      call MatDestroy(A,ierr)
+
+      call PetscFinalize(ierr)
+
+      end
\ No newline at end of file

From 6c0f56bd58d2a2eeca56759bccdf234b143d9f3c Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 5 Feb 2026 14:47:02 +0000
Subject: [PATCH 13/41] Higher order terms are working for newton assembly.
 Still more testing required

---
 src/Gmres_Poly_Newton.F90 | 251 +++++++++++++++++++++-----------------
 1 file changed, 142 insertions(+), 109 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 5af813c..2876053 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -684,18 +684,18 @@ subroutine mat_mult_powers_share_sparsity_newton(matrix, poly_order, poly_sparsi
       type(tMat), intent(inout)                          :: reuse_mat, cmat
       type(tMat), dimension(:), pointer, intent(inout)   :: reuse_submatrices
 
-#if defined(PETSC_HAVE_KOKKOS)                     
-      integer(c_long_long) :: A_array, B_array, reuse_array
-      integer :: errorcode, reuse_int_cmat, reuse_int_reuse_mat
-      PetscErrorCode :: ierr
-      MatType :: mat_type
-      Mat :: temp_mat, temp_mat_reuse, temp_mat_compare
-      PetscScalar normy;
-      logical :: reuse_triggered_cmat, reuse_triggered_reuse_mat
-      type(c_ptr)  :: coefficients_ptr
-      type(tMat) :: reuse_mat_cpu
-      type(tMat), dimension(:), pointer :: reuse_submatrices_cpu
-#endif      
+! #if defined(PETSC_HAVE_KOKKOS)                     
+!       integer(c_long_long) :: A_array, B_array, reuse_array
+!       integer :: errorcode, reuse_int_cmat, reuse_int_reuse_mat
+!       PetscErrorCode :: ierr
+!       MatType :: mat_type
+!       Mat :: temp_mat, temp_mat_reuse, temp_mat_compare
+!       PetscScalar normy;
+!       logical :: reuse_triggered_cmat, reuse_triggered_reuse_mat
+!       type(c_ptr)  :: coefficients_ptr
+!       type(tMat) :: reuse_mat_cpu
+!       type(tMat), dimension(:), pointer :: reuse_submatrices_cpu
+! #endif      
       ! ~~~~~~~~~~
 
       ! ~~~~~~~~~~
@@ -710,90 +710,90 @@ subroutine mat_mult_powers_share_sparsity_newton(matrix, poly_order, poly_sparsi
       return
 end if
 
-#if defined(PETSC_HAVE_KOKKOS)    
-
-      call MatGetType(matrix, mat_type, ierr)
-      if (mat_type == MATMPIAIJKOKKOS .OR. mat_type == MATSEQAIJKOKKOS .OR. &
-            mat_type == MATAIJKOKKOS) then                  
-
-         A_array = matrix%v             
-         reuse_triggered_cmat = .NOT. PetscObjectIsNull(cmat) 
-         reuse_triggered_reuse_mat = .NOT. PetscObjectIsNull(reuse_mat) 
-         reuse_int_cmat = 0
-         if (reuse_triggered_cmat) then
-            reuse_int_cmat = 1
-            B_array = cmat%v
-         end if
-         reuse_int_reuse_mat = 0
-         if (reuse_triggered_reuse_mat) then
-            reuse_int_reuse_mat = 1
-         end if         
-         reuse_array = reuse_mat%v
-         coefficients_ptr = c_loc(coefficients)
-
-         ! call mat_mult_powers_share_sparsity_newton_kokkos(A_array, poly_order, poly_sparsity_order, &
-         !         coefficients_ptr, reuse_int_reuse_mat, reuse_array, reuse_int_cmat, B_array)
+! #if defined(PETSC_HAVE_KOKKOS)    
+
+!       call MatGetType(matrix, mat_type, ierr)
+!       if (mat_type == MATMPIAIJKOKKOS .OR. mat_type == MATSEQAIJKOKKOS .OR. &
+!             mat_type == MATAIJKOKKOS) then                  
+
+!          A_array = matrix%v             
+!          reuse_triggered_cmat = .NOT. PetscObjectIsNull(cmat) 
+!          reuse_triggered_reuse_mat = .NOT. PetscObjectIsNull(reuse_mat) 
+!          reuse_int_cmat = 0
+!          if (reuse_triggered_cmat) then
+!             reuse_int_cmat = 1
+!             B_array = cmat%v
+!          end if
+!          reuse_int_reuse_mat = 0
+!          if (reuse_triggered_reuse_mat) then
+!             reuse_int_reuse_mat = 1
+!          end if         
+!          reuse_array = reuse_mat%v
+!          coefficients_ptr = c_loc(coefficients)
+
+!          ! call mat_mult_powers_share_sparsity_newton_kokkos(A_array, poly_order, poly_sparsity_order, &
+!          !         coefficients_ptr, reuse_int_reuse_mat, reuse_array, reuse_int_cmat, B_array)
                          
-         reuse_mat%v = reuse_array
-         cmat%v = B_array
-
-         ! If debugging do a comparison between CPU and Kokkos results
-         if (kokkos_debug()) then
-
-            ! If we're doing reuse and debug, then we have to always output the result 
-            ! from the cpu version, as it will have coo preallocation structures set
-            ! They aren't copied over if you do a matcopy (or matconvert)
-            ! If we didn't do that the next time we come through this routine 
-            ! and try to call the cpu version with reuse, it will segfault
-            if (reuse_triggered_cmat) then
-               temp_mat = cmat
-               call MatConvert(cmat, MATSAME, MAT_INITIAL_MATRIX, temp_mat_compare, ierr)  
-            else
-               temp_mat_compare = cmat                         
-            end if            
-
-            ! Debug check if the CPU and Kokkos versions are the same
-            ! We send in an empty reuse_mat_cpu here always, as we can't pass through
-            ! the same one Kokkos uses as it now only gets out the non-local rows we need
-            ! (ie reuse_mat and reuse_mat_cpu are no longer the same size)
-            reuse_submatrices_cpu => null()
-            call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
-                     coefficients, reuse_mat_cpu, reuse_submatrices_cpu, temp_mat)
-            call destroy_matrix_reuse(reuse_mat_cpu, reuse_submatrices_cpu)         
+!          reuse_mat%v = reuse_array
+!          cmat%v = B_array
+
+!          ! If debugging do a comparison between CPU and Kokkos results
+!          if (kokkos_debug()) then
+
+!             ! If we're doing reuse and debug, then we have to always output the result 
+!             ! from the cpu version, as it will have coo preallocation structures set
+!             ! They aren't copied over if you do a matcopy (or matconvert)
+!             ! If we didn't do that the next time we come through this routine 
+!             ! and try to call the cpu version with reuse, it will segfault
+!             if (reuse_triggered_cmat) then
+!                temp_mat = cmat
+!                call MatConvert(cmat, MATSAME, MAT_INITIAL_MATRIX, temp_mat_compare, ierr)  
+!             else
+!                temp_mat_compare = cmat                         
+!             end if            
+
+!             ! Debug check if the CPU and Kokkos versions are the same
+!             ! We send in an empty reuse_mat_cpu here always, as we can't pass through
+!             ! the same one Kokkos uses as it now only gets out the non-local rows we need
+!             ! (ie reuse_mat and reuse_mat_cpu are no longer the same size)
+!             reuse_submatrices_cpu => null()
+!             call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
+!                      coefficients, reuse_mat_cpu, reuse_submatrices_cpu, temp_mat)
+!             call destroy_matrix_reuse(reuse_mat_cpu, reuse_submatrices_cpu)         
                      
-            call MatConvert(temp_mat, MATSAME, MAT_INITIAL_MATRIX, &
-                        temp_mat_reuse, ierr)                        
-
-            call MatAXPYWrapper(temp_mat_reuse, -1d0, temp_mat_compare)
-            call MatNorm(temp_mat_reuse, NORM_FROBENIUS, normy, ierr)
-            ! There is floating point compute in these inverses, so we have to be a 
-            ! bit more tolerant to rounding differences
-            if (normy .gt. 1d-11 .OR. normy/=normy) then
-               !call MatFilter(temp_mat_reuse, 1d-14, PETSC_TRUE, PETSC_FALSE, ierr)
-               !call MatView(temp_mat_reuse, PETSC_VIEWER_STDOUT_WORLD, ierr)
-               print *, "Kokkos and CPU versions of mat_mult_powers_share_sparsity do not match"
-
-               call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)  
-            end if
-            call MatDestroy(temp_mat_reuse, ierr)
-            if (.NOT. reuse_triggered_cmat) then
-               call MatDestroy(cmat, ierr)
-            else
-               call MatDestroy(temp_mat_compare, ierr)
-            end if
-            cmat = temp_mat
-         end if
-
-      else
-
-         call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
-                  coefficients, reuse_mat, reuse_submatrices, cmat)       
-
-      end if
-#else
+!             call MatConvert(temp_mat, MATSAME, MAT_INITIAL_MATRIX, &
+!                         temp_mat_reuse, ierr)                        
+
+!             call MatAXPYWrapper(temp_mat_reuse, -1d0, temp_mat_compare)
+!             call MatNorm(temp_mat_reuse, NORM_FROBENIUS, normy, ierr)
+!             ! There is floating point compute in these inverses, so we have to be a 
+!             ! bit more tolerant to rounding differences
+!             if (normy .gt. 1d-11 .OR. normy/=normy) then
+!                !call MatFilter(temp_mat_reuse, 1d-14, PETSC_TRUE, PETSC_FALSE, ierr)
+!                !call MatView(temp_mat_reuse, PETSC_VIEWER_STDOUT_WORLD, ierr)
+!                print *, "Kokkos and CPU versions of mat_mult_powers_share_sparsity do not match"
+
+!                call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)  
+!             end if
+!             call MatDestroy(temp_mat_reuse, ierr)
+!             if (.NOT. reuse_triggered_cmat) then
+!                call MatDestroy(cmat, ierr)
+!             else
+!                call MatDestroy(temp_mat_compare, ierr)
+!             end if
+!             cmat = temp_mat
+!          end if
+
+!       else
+
+!          call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
+!                   coefficients, reuse_mat, reuse_submatrices, cmat)       
+
+!       end if
+! #else
       call mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sparsity_order, &
                   coefficients, reuse_mat, reuse_submatrices, cmat)
-#endif         
+!#endif         
 
       ! ~~~~~~~~~~
       
@@ -907,6 +907,9 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          end if     
       else
 
+         print *,"reals", coefficients(:,1)
+         print *,"imags", coefficients(:,2)
+
          ! If we're any higher, then we build cmat up to that order
          ! But we have to be careful because the last root we want to explicitly
          ! build up to here (ie the power of the matrix given by poly_sparsity_order)
@@ -918,7 +921,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! of a complex conjugate pair, as we need to know that below to add in the rest
          ! of the poly_sparsity_order+1 term from that pair
          ! before moving on to the rest of the higher order roots
-         call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
+         call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, &
+                  coefficients(1:poly_sparsity_order + 1, 1:2), &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)
       end if
 
@@ -1851,32 +1855,41 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       i_sparse = size(coefficients, 1)
       first_complex = .FALSE.
 
+      print *, "size coeffs", size(coefficients, 1)
+
       if (output_product) then
+
          output_first_complex = .FALSE.
          if (output_product) then
             i_sparse = poly_sparsity_order + 1
 
-            ! If the one before is real, then we know we're on the first
-            if (coefficients(i_sparse-1,2) == 0d0) then
-               output_first_complex = .TRUE.
-               ! See discussion above
-               i_sparse = i_sparse + 1
+            ! If the last root is real we don't have to do anything
+            if (coefficients(i_sparse,2) /= 0d0) then
 
-            ! If the one before is complex
-            else
-
-               ! Check if the distance between the fixed sparsity root and the one before is > zero
-               ! If so they must be complex conjugates and hence we are on the second of the pair         
-               if (abs(coefficients(i_sparse,1) - coefficients(i_sparse-1,1))/coefficients(i_sparse,1) > 1e-14 .AND. &
-                     abs(coefficients(i_sparse,2) + coefficients(i_sparse-1,2))/coefficients(i_sparse,2) > 1e-14) then
+               ! If the one before is real, then we know we're on the first
+               if (coefficients(i_sparse-1,2) == 0d0) then
                   output_first_complex = .TRUE.
+                  ! See discussion above
                   i_sparse = i_sparse + 1
-               end if            
+
+               ! If the one before is complex
+               else
+
+                  ! Check if the distance between the fixed sparsity root and the one before
+                  ! If > zero then they are not complex conjugates and hence we are on the first of the pair         
+                  if (abs(coefficients(i_sparse,1) - coefficients(i_sparse-1,1))/coefficients(i_sparse,1) > 1e-14 .AND. &
+                        abs(coefficients(i_sparse,2) + coefficients(i_sparse-1,2))/coefficients(i_sparse,2) > 1e-14) then
+                     output_first_complex = .TRUE.
+                     i_sparse = i_sparse + 1
+                  end if            
+               end if
             end if
          end if 
          first_complex = output_first_complex
       end if
 
+      print *, "i_sparse", i_sparse, "output_first_complex", output_first_complex
+
       ! ~~~~~~~~~~~~
       ! Iterate over the i
       ! This is basically the same as the MF application but we have to build the powers
@@ -1886,6 +1899,8 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! We're always building up the next product
       do while (i .le. i_sparse - 1)
 
+         print *, "i = ", i
+
          ! Duplicate & copy the matrix, but ensure there is a diagonal present
          ! temp_mat_A is going to store things with the sparsity of A
          if (PetscObjectIsNull(temp_mat_A)) then
@@ -1898,6 +1913,8 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! If real this is easy
          if (coefficients(i,2) == 0d0) then
 
+            print *, "real", "i_sparse", i_sparse
+
             ! Skips eigenvalues that are numerically zero - see 
             ! the comment in calculate_gmres_polynomial_roots_newton 
             if (abs(coefficients(i,1)) < 1e-12) then
@@ -1945,6 +1962,8 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
+            print *, "complex"
+
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
                i = i + 2
@@ -1970,6 +1989,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
+                  print *, "outputting first part of product in complex case", "i_sparse", i_sparse, "i", i
                   call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
                end if               
 
@@ -2005,10 +2025,12 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             if (i .le. i_sparse - 2) then
 
+               print *, "doing complex matmult step"
+
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
-               call MatDestroy(temp_mat_two, ierr)  
+               call MatDestroy(temp_mat_two, ierr)                 
 
                ! Then add the scaled version of each product
                if (reuse_triggered) then
@@ -2019,6 +2041,13 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
                   call MatAXPYWrapper(mat_product, -1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), temp_mat_three)
                end if               
+
+               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+               if (output_product .AND. .NOT. first_complex) then
+                  print *, "outputting product in complex case", "i_sparse", i_sparse, "i", i
+                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
+               end if                 
+
                call MatDestroy(temp_mat_three, ierr) 
             else
                call MatDestroy(temp_mat_two, ierr)  
@@ -2036,7 +2065,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Add in the final term multiplied by 1/theta_poly_order
 
             ! Skips eigenvalues that are numerically zero
-            if (abs(coefficients(i,1)) > 1e-12) then            
+            if (abs(coefficients(i,1)) > 1e-12) then     
+               
+               print *, "doing last real step"
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
                   call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
@@ -2049,7 +2080,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       end if     
 
       call MatDestroy(temp_mat_A, ierr)
-      call MatDestroy(mat_product, ierr)             
+      call MatDestroy(mat_product, ierr)       
+      
+      !call exit(0)
 
    end subroutine build_gmres_polynomial_newton_inverse_full   
 

From ea31e5b52f850274c9d805502a70a544d1a3a67b Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Thu, 5 Feb 2026 17:13:58 +0000
Subject: [PATCH 14/41] More careful about zero eigenvalues. Also saw a case
 where rounding in the rank deficient compute of harmonic ritz values lead to
 z negative eigenvalue (-1e-16). We now explicitly check for small eigenvalues
 in magnitude and set them to zero

---
 src/Gmres_Poly_Newton.F90 | 76 +++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 2876053..d0944cc 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -309,6 +309,15 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
       end if  
 
+      ! In some cases with rank deficiency, we can still end up with non-zero (or negative) eigenvalues that
+      ! are trivially small - we set them explicitly to zero
+      do i_loc = 1, poly_order + 1
+         if (abs(coefficients(i_loc, 1)**2 + coefficients(i_loc, 2)**2) < 1e-12) then
+            coefficients(i_loc, 1) = 0d0
+            coefficients(i_loc, 2) = 0d0
+         end if
+      end do      
+
       ! ~~~~~~~~~~~~~~
       ! Add roots for stability
       ! ~~~~~~~~~~~~~~         
@@ -1180,12 +1189,18 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             term = term - 1
             skip_add = .TRUE.
          end if
+         ! ! If the fixed sparsity root is real and the previous root was real,
+         ! ! we just need to compute the correct part of the product, we just make sure not to add
+         ! if (coefficients(term,2) == 0d0 .AND. coefficients(term-1,2) == 0d0) then
+         !    skip_add = .TRUE.
+         ! end if         
+
+         print *, "starting loop at term ", term, "skip_add ", skip_add
 
          ! This loop skips the last coefficient
          do while (term .le. size(coefficients, 1) - 1)
 
-            ! We need to sum up the product of vals_previous_power_temp(j_loc) * matching columns
-            vals_power_temp(1:ncols) = 0
+            print *, "term ", term, "coeff ", coefficients(term,1), coefficients(term,2), skip_add
 
             ! If real
             if (coefficients(term,2) == 0d0) then
@@ -1198,9 +1213,16 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! matrix by the build_gmres_polynomial_newton_inverse_full (as we had to build the product up
                ! to that order)
                ! ~~~~~~~~~~~
-               if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. term > poly_sparsity_order + 1) then
-                  call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
-                        1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
+               if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. &
+                  term > poly_sparsity_order + 1) then
+
+                  !if (.NOT. skip_add) then
+                     print *, "CALLING SET VALUES ", term, " to row "
+                     call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+                           1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
+                  ! else
+                  !    skip_add = .FALSE.
+                  ! end if
                end if          
                
                ! Initialize with previous product before the A*prod subtraction
@@ -1314,6 +1336,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! Final step if last root is real
          if (coefficients(term,2) == 0d0) then
             if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
+               print *, "adding REAL final term ", term, " coeff ", coefficients(term,1)
                call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                      1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
             end if             
@@ -1723,7 +1746,7 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          ! solve we don't have a zero coefficient but in the second solve we do
          ! So the mat type needs to remain consistent
          ! This can't happen in the complex case
-         if (coefficients(2,1) == 0d0) then
+         if (abs(coefficients(2,1)) < 1e-12) then
 
             ! Set to zero
             call MatScale(inv_matrix, 0d0, ierr)
@@ -1802,6 +1825,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       logical :: reuse_triggered, output_product, first_complex
       integer :: i, i_sparse
       type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
+      PetscReal :: square_sum, a_coeff
 
       ! ~~~~~~      
 
@@ -1855,7 +1879,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       i_sparse = size(coefficients, 1)
       first_complex = .FALSE.
 
-      print *, "size coeffs", size(coefficients, 1)
+      print *, "size coeffs", size(coefficients, 1), "coeffs", coefficients(:, 1), coefficients(:, 2)
 
       if (output_product) then
 
@@ -1915,11 +1939,13 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             print *, "real", "i_sparse", i_sparse
 
-            ! Skips eigenvalues that are numerically zero - see 
-            ! the comment in calculate_gmres_polynomial_roots_newton 
+            ! Skips eigenvalues that are numerically zero
+            ! We still compute the entries as as zero because we need the sparsity
+            ! to be correct for the next iteration
             if (abs(coefficients(i,1)) < 1e-12) then
-               i = i + 1
-               cycle
+               square_sum = 0
+            else
+               square_sum = 1d0/coefficients(i,1)
             end if        
 
             ! Then add the scaled version of each product
@@ -1929,15 +1955,15 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             else
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
-                  call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+                  call MatAXPY(inv_matrix, square_sum, mat_product, SUBSET_NONZERO_PATTERN, ierr)
                else
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
-                  call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
+                  call MatAXPYWrapper(inv_matrix, square_sum, mat_product)
                end if
             end if
 
             ! temp_mat_A = A_ff/theta_k       
-            call MatScale(temp_mat_A, -1d0/coefficients(i,1), ierr)
+            call MatScale(temp_mat_A, -square_sum, ierr)
             ! temp_mat_A = I - A_ff/theta_k
             call MatShift(temp_mat_A, 1d0, ierr)    
             
@@ -1954,6 +1980,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
+               print *, "outputting product in real case", "i_sparse", i_sparse, "i", i
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
             end if
             
@@ -1966,8 +1993,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
-               i = i + 2
-               cycle
+               square_sum = 0
+               a_coeff = 0
+             else  
+               square_sum = 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2)
+               a_coeff = 2d0 * coefficients(i,1)
             end if
 
             ! If doing the normal iteration
@@ -1976,7 +2006,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! temp_mat_A = -A    
                call MatScale(temp_mat_A, -1d0, ierr)
                ! temp_mat_A = 2a I - A_ff
-               call MatShift(temp_mat_A, 2d0 * coefficients(i,1), ierr)   
+               call MatShift(temp_mat_A, a_coeff, ierr)   
 
                if (i == 1) then
                   ! If i == 1 then we know mat_product is identity so we can do it directly
@@ -2004,7 +2034,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
 
                ! temp_mat_two = 2a * mat_product
-               call MatScale(temp_mat_two, 2d0 * coefficients(i,1), ierr)   
+               call MatScale(temp_mat_two, a_coeff, ierr)   
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
@@ -2016,11 +2046,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Then add the scaled version of each product
             if (reuse_triggered) then
                ! If doing reuse we know our nonzeros are a subset
-               call MatAXPY(inv_matrix, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), &
+               call MatAXPY(inv_matrix, square_sum, &
                         temp_mat_two, SUBSET_NONZERO_PATTERN, ierr)
             else
                ! Have to use the DIFFERENT_NONZERO_PATTERN here
-               call MatAXPYWrapper(inv_matrix, 1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), temp_mat_two)
+               call MatAXPYWrapper(inv_matrix, square_sum, temp_mat_two)
             end if            
 
             if (i .le. i_sparse - 2) then
@@ -2035,11 +2065,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! Then add the scaled version of each product
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
-                  call MatAXPY(mat_product, -1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), &
+                  call MatAXPY(mat_product, -square_sum, &
                            temp_mat_three, SUBSET_NONZERO_PATTERN, ierr)
                else
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
-                  call MatAXPYWrapper(mat_product, -1d0/(coefficients(i,1)**2 + coefficients(i,2)**2), temp_mat_three)
+                  call MatAXPYWrapper(mat_product, -square_sum, temp_mat_three)
                end if               
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
@@ -2067,7 +2097,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Skips eigenvalues that are numerically zero
             if (abs(coefficients(i,1)) > 1e-12) then     
                
-               print *, "doing last real step"
+               print *, "doing last real step, adding in term", i, "coeff", coefficients(i,1)
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
                   call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)

From ff5b9148eaed428fc53f0b0f3eb92a428042f301 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 15:49:01 +0000
Subject: [PATCH 15/41] Higher order fixed sparsity is now correct. Still have
 to add more testing

---
 src/Gmres_Poly_Newton.F90 | 193 ++++++++++++++++++++++++++------------
 1 file changed, 133 insertions(+), 60 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index d0944cc..40f761a 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -834,7 +834,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt, dimension(:), allocatable :: col_indices_off_proc_array, ad_indices, cols
       PetscReal, dimension(:), allocatable :: vals
       type(tIS), dimension(1) :: col_indices, row_indices
-      type(tMat) :: Ad, Ao, mat_sparsity_match
+      type(tMat) :: Ad, Ao, mat_sparsity_match, mat_product_save
       PetscInt, dimension(:), pointer :: colmap
       logical :: deallocate_submatrices = .FALSE.
       type(c_ptr) :: vals_c_ptr
@@ -851,6 +851,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt, parameter :: one = 1, zero = 0
       logical :: output_first_complex, skip_add
       PetscReal :: square_sum
+      integer, dimension(poly_order + 1, 2) :: status_output, status_product
       
       ! ~~~~~~~~~~  
 
@@ -903,7 +904,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          if (coefficients(1,2) == 0d0 .AND. coefficients(2,2) /= 0d0) then
 
             call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
-                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)            
+                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
+                  status_output, status_product, mat_product_save)    
 
          else
          
@@ -912,7 +914,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
             call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
                      coefficients(1:poly_sparsity_order + 1, 1:2), &
-                     cmat, mat_sparsity_match)    
+                     cmat, mat_sparsity_match, &
+                     status_output, status_product)    
          end if     
       else
 
@@ -932,9 +935,18 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! before moving on to the rest of the higher order roots
          call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, &
                   coefficients(1:poly_sparsity_order + 1, 1:2), &
-                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex)
+                  cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
+                  status_output, status_product, mat_product_save)
       end if
 
+      print *, "status output real", status_output(:, 1)
+      print *, "status output complex", status_output(:, 2)
+
+      print *, "sum", sum(status_output, 2)
+
+      print *, "status product real", status_product(:, 1)
+      print *, "status product complex", status_product(:, 2)     
+      
       ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
       call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
       call MatSetOption(cmat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr) 
@@ -1188,12 +1200,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          if (coefficients(term,2) /= 0d0 .AND. .NOT. output_first_complex) then
             term = term - 1
             skip_add = .TRUE.
-         end if
-         ! ! If the fixed sparsity root is real and the previous root was real,
-         ! ! we just need to compute the correct part of the product, we just make sure not to add
-         ! if (coefficients(term,2) == 0d0 .AND. coefficients(term-1,2) == 0d0) then
-         !    skip_add = .TRUE.
-         ! end if         
+         end if        
 
          print *, "starting loop at term ", term, "skip_add ", skip_add
 
@@ -1205,6 +1212,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If real
             if (coefficients(term,2) == 0d0) then
 
+               print *, "REAL CASE assembly", term
+
                ! ~~~~~~~~~~~
                ! Now can add the value to our matrix
                ! Can skip this if coeff is zero, but still need to compute A^(term-1)
@@ -1214,19 +1223,17 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! to that order)
                ! ~~~~~~~~~~~
                if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. &
-                  term > poly_sparsity_order + 1) then
+                  status_output(term, 1) /= 1) then
 
-                  !if (.NOT. skip_add) then
-                     print *, "CALLING SET VALUES ", term, " to row "
-                     call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
-                           1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
-                  ! else
-                  !    skip_add = .FALSE.
-                  ! end if
+                  print *, "ADDING IN REAL TERM ", term
+                  call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
+                        1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
                end if          
                
                ! Initialize with previous product before the A*prod subtraction
-               vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)               
+               vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)        
+               
+               print *, "DOING REAL PRODCUT for term ", term
 
                ! Have to finish all the columns before we move onto the next coefficient
                do j_loc = 1, ncols
@@ -1246,19 +1253,19 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If complex
             else
 
+               print *, "COMPLEX CASE assembly", term
+
                square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
                if (.NOT. skip_add) then
 
                   ! We skip the 2 * a * prod from the first root of a complex pair if that has already
                   ! been included in the inv_matrix from build_gmres_polynomial_newton_inverse_full
-                  if (term < poly_sparsity_order + 2) then
-                     if (.NOT. output_first_complex) then
-                        temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
-                     else
-                        temp(1:ncols) = 0d0
-                     end if                     
-                  else
+                  if (status_output(term, 2) /= 1) then
+                     print *, term, "adding in 2a prod"
                      temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
+                  else
+                     print *, term, "skipping adding in 2a prod"
+                     temp(1:ncols) = 0d0
                   end if
 
                   ! This is the -A * prod
@@ -1283,28 +1290,61 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! 2aprod/a^2+b^2 
                   ! for (c, c, r) mat product is output without the 1/a^2+b^2 but that is fine as we 
                   ! compensate for that in the product
-                  if (term < poly_sparsity_order + 2) then
+                  if (status_output(term, 2) == 1) then
                      if (output_first_complex) then
+                        print *, "ADDING IN 2a prod second time for term ", term
                         temp(1:ncols) = temp(1:ncols) + 2d0 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
                      end if                     
                   end if                  
 
                ! First time through complex pair
                else
+
+                  print *, "SKIP ADDING IN COMPLEX TERM ", term
+                  !@@@ for the case where we have (r, c, c, ....) and second order sparsity
+                  ! i think the problem is that we have to skip adding anything to p as inverse_matrix
+                  ! already has the correct values in it, as we computed tmp which will have 2nd order terms
+                  ! in it, but we skipped the product in the full, which is correct as that would compute 3rd order 
+                  ! terms. so the thing that gets output in mat_prod_or_tmp is tmp  
+                  ! 
                   
                   ! If we're skipping the add, then vals_previous_power_temp has all the correct
                   ! values in it for temp
                   ! All we have to do is compute prod for the next time through
                   skip_add = .FALSE.
+                  !@@@@ so then this line sets temp to be tmp
                   temp(1:ncols) = vals_previous_power_temp(1:ncols)
+
                   ! @@@ have to be careful here!
                   ! If we've gone back a term, we don't have anything in prod
                   ! prod is I when term = 1
+                  ! @@@@ if we're doing this for the first time, we know product is I
+                  ! so we just set prod to be I
+                  ! @@@@ the problem is if we're not doing this for the first time
+                  ! we need to know what prod had in it from the previous time, as our full 
+                  ! is only outputting prod or temp, not both, because at lower order when we output
+                  ! temp in this case we knew prod was I so we didn't have to store both
+                  ! in the (r, c, c) case prod will have been I - 1/theta_1 A_ff from the r
+                  ! but for it to work with the loop below vals_previous_power_temp has to contain that but
+                  ! over the sparsity of the 2nd order term.
                   if (term == 1) then
                      vals_previous_power_temp(1:ncols) = 0d0
                      if (diag_index /= -1) then
                         vals_previous_power_temp(diag_index) = 1d0
                      end if
+                  ! In the case the mat_product_save is not the identity, we need to pull it's value out
+                  ! We only do this once for the first term in this case
+                  else
+
+                     call MatGetRow(mat_product_save, i_loc - 1 + global_row_start, ncols_two, &
+                              cols_two_ptr, vals_two_ptr, ierr)
+                     
+                     ! We have guaranteed in the full version that mat_product_save has fixed sparsity
+                     vals_previous_power_temp(1:ncols_two) = vals_two_ptr(1:ncols_two)
+                     
+                     call MatRestoreRow(mat_product_save, i_loc - 1 + global_row_start, ncols_two, &
+                              cols_two_ptr, vals_two_ptr, ierr)                     
+
                   end if
                end if
 
@@ -1707,7 +1747,7 @@ end subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coefficients, &
-                  inv_matrix, mat_prod_or_temp)
+                  inv_matrix, mat_prod_or_temp, status_output, status_product)
 
       ! Specific 1st order with 1st order sparsity
 
@@ -1717,6 +1757,7 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
       type(tMat), intent(inout), optional               :: mat_prod_or_temp
+      integer, dimension(poly_order + 1, 2), intent(inout), optional :: status_output, status_product
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1733,6 +1774,9 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
 
+      status_output = 0
+      status_product = 0      
+
       ! We only have two coefficients, so they are either both real or complex conjugates
       ! If real
       if (coefficients(1,2) == 0d0) then
@@ -1752,6 +1796,10 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
             call MatScale(inv_matrix, 0d0, ierr)
             ! Then add in the 0th order inverse
             call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)
+
+            !!@@@ need product here
+            print *, "CHECK/FIX THIS"
+            call exit(0)
             
             ! Then just return
             return  
@@ -1776,7 +1824,10 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
 
          ! result = 1/theta_1 + 1/theta_2 * (I -A_ff/theta_1)
          ! Don't need an assemble as there is one called in this
-         call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)       
+         call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)     
+         
+         status_output(1:2, 1) = 1
+         status_product(1,1) = 1         
 
       ! Complex conjugate roots, a +- ib
       else
@@ -1796,6 +1847,9 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          end if      
          ! result = 2a I - A_ff/(a^2 + b^2)
          call MatScale(inv_matrix, 1d0/square_sum, ierr)
+
+         status_output(1:2, 2) = 1
+         status_product(1,2) = 1 
       end if               
 
    end subroutine build_gmres_polynomial_newton_inverse_1st_1st     
@@ -1804,7 +1858,8 @@ end subroutine build_gmres_polynomial_newton_inverse_1st_1st
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
-                  inv_matrix, mat_prod_or_temp, poly_sparsity_order, output_first_complex)
+                  inv_matrix, mat_prod_or_temp, poly_sparsity_order, output_first_complex, &
+                  status_output, status_product, mat_product_save)
 
       ! No constrained sparsity by default
       ! If you pass in mat_prod_or_temp, poly_sparsity_order, output_first_complex
@@ -1816,9 +1871,10 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       integer, intent(in)                               :: poly_order
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
-      type(tMat), intent(inout), optional               :: mat_prod_or_temp
+      type(tMat), intent(inout), optional               :: mat_prod_or_temp, mat_product_save
       integer, intent(in), optional                     :: poly_sparsity_order
       logical, intent(inout), optional                  :: output_first_complex
+      integer, dimension(poly_order + 1, 2), intent(inout), optional :: status_output, status_product
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1845,6 +1901,8 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
       ! We start with an identity in mat_product
       call generate_identity(matrix, mat_product)
+      status_output = 0
+      status_product = 0
 
       ! If we're going to output the product as part of a fixed sparsity multiply,
       ! we may be asking to constrain the sparsity to a power in between order and order + 2
@@ -1961,6 +2019,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   call MatAXPYWrapper(inv_matrix, square_sum, mat_product)
                end if
             end if
+            status_output(i, 1) = 1
 
             ! temp_mat_A = A_ff/theta_k       
             call MatScale(temp_mat_A, -square_sum, ierr)
@@ -1977,6 +2036,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatDestroy(mat_product, ierr)  
                mat_product = mat_product_k_plus_1  
             end if
+            status_product(i, 1) = maxval(status_product) + 1
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
@@ -1989,7 +2049,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
-            print *, "complex"
+            print *, "complex", first_complex
 
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
@@ -2000,13 +2060,34 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                a_coeff = 2d0 * coefficients(i,1)
             end if
 
-            ! If doing the normal iteration
-            if (.NOT. first_complex) then
+            ! If our fixed sparsity root is the first of a complex conjugate pair
+            ! We want to pass out mat_product and only add that to inv_matrix
+            ! This is equivalent to only part of tmp on Line 9 of Loe
+            ! The fixed sparsity loop will then finish the tmp with the term -A * prod/(a^2+b^2)
+            ! as this is the part that would increase the sparsity beyond poly_sparsity_order            
+            if (i == poly_sparsity_order + 1 .AND. first_complex) then
+
+               ! Copy mat_product into temp_mat_two
+               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
+
+               ! temp_mat_two = 2a * mat_product
+               call MatScale(temp_mat_two, a_coeff, ierr)   
+               status_output(i, 2) = 1
+
+               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+               if (output_product .AND. i > i_sparse - 2) then
+                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
+               end if  
+               
+            ! Just do the normal loop
+            else
 
                ! temp_mat_A = -A    
                call MatScale(temp_mat_A, -1d0, ierr)
                ! temp_mat_A = 2a I - A_ff
                call MatShift(temp_mat_A, a_coeff, ierr)   
+               status_output(i, 2) = 1
+               status_output(i+1, 2) = 1
 
                if (i == 1) then
                   ! If i == 1 then we know mat_product is identity so we can do it directly
@@ -2016,31 +2097,22 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   call MatMatMult(temp_mat_A, mat_product, &
                         MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
                end if    
+               status_product(i, 2) = maxval(status_product) + 1
                
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
-                  print *, "outputting first part of product in complex case", "i_sparse", i_sparse, "i", i
-                  call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
-               end if               
-
-            ! If instead we only have the first of a complex conjugate pair
-            ! We want to pass out mat_product and only add that to inv_matrix
-            ! This is equivalent to only part of tmp on Line 9 of Loe
-            ! The fixed sparsity loop will then finish the tmp with the term -A * prod/(a^2+b^2)
-            ! as this is the part that would increase the sparsity beyond poly_sparsity_order
-            else
-
-               ! Copy mat_product into temp_mat_two
-               call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
-
-               ! temp_mat_two = 2a * mat_product
-               call MatScale(temp_mat_two, a_coeff, ierr)   
-
-               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
-               if (output_product .AND. i > i_sparse - 2) then
-                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
-               end if                
-
+                  print *, "outputting TEMP in complex case", "i_sparse", i_sparse, "i", i
+                  call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr) 
+                  ! If i == 1 then we know mat_product is the identity and we don't bother 
+                  ! to write it out, we just have some custom code in the product given its trivial
+                  if (i /= 1) then 
+                     ! This ensures it has the matching sparsity
+                     call MatConvert(mat_prod_or_temp, MATSAME, MAT_INITIAL_MATRIX, mat_product_save, ierr)  
+                     ! This zeros mat_product_save and then puts mat_product into the sparsity pattern 
+                     ! of mat_prod_or_temp
+                     call MatCopy(mat_product, mat_product_save, DIFFERENT_NONZERO_PATTERN, ierr)     
+                  end if              
+               end if                 
             end if
 
             ! Then add the scaled version of each product
@@ -2060,7 +2132,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
-               call MatDestroy(temp_mat_two, ierr)                 
+               call MatDestroy(temp_mat_two, ierr)   
+               status_output(i, 2) = 1
+               status_product(i+1, 2) = maxval(status_product) + 1
 
                ! Then add the scaled version of each product
                if (reuse_triggered) then
@@ -2075,7 +2149,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. .NOT. first_complex) then
                   print *, "outputting product in complex case", "i_sparse", i_sparse, "i", i
-                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)            
+                  call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)      
                end if                 
 
                call MatDestroy(temp_mat_three, ierr) 
@@ -2105,14 +2179,13 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
                   call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
                end if     
+               status_output(i, 1) = 1
             end if       
          end if   
       end if     
 
       call MatDestroy(temp_mat_A, ierr)
       call MatDestroy(mat_product, ierr)       
-      
-      !call exit(0)
 
    end subroutine build_gmres_polynomial_newton_inverse_full   
 

From e9e654a94078b2a80c4a1aba760467fa06f57449 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 16:36:43 +0000
Subject: [PATCH 16/41] Output was wrong in gmres polynomial comparison test

---
 tests/ex12f_gmres_poly.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/ex12f_gmres_poly.F90 b/tests/ex12f_gmres_poly.F90
index bdbf8ce..dc597bb 100644
--- a/tests/ex12f_gmres_poly.F90
+++ b/tests/ex12f_gmres_poly.F90
@@ -148,18 +148,18 @@ program main
       ! A_ff on each level they should be basically identical and hence
       ! we should have almost no difference in the resulting residual
       ! ~~~~~~~~~~~~~
-      norm_diff_one = abs(norm_power - norm_newton)/norm_newton
+      norm_diff_one = abs(norm_arnoldi - norm_newton)/norm_arnoldi
       if (norm_diff_one > 1e-9) then
          print *, "Residuals differ between polynomial bases!", norm_diff_one
-         print *, "Power basis residual:   ", norm_power
          print *, "Newton basis residual:  ", norm_newton
+         print *, "Arnoldi basis residual:   ", norm_arnoldi
          error stop 1
       end if
-      norm_diff_two = abs(norm_arnoldi - norm_power)/norm_power
+      norm_diff_two = abs(norm_arnoldi - norm_power)/norm_arnoldi
       if (norm_diff_two > 1e-9) then
          print *, "Residuals differ between polynomial bases!", norm_diff_two
+         print *, "Power basis residual:  ", norm_power
          print *, "Arnoldi basis residual: ", norm_arnoldi
-         print *, "Newton basis residual:  ", norm_newton
          error stop 1
       end if      
 

From 0aeed7e03a84536fb6faa3d962d3b3d60529c624 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 16:37:03 +0000
Subject: [PATCH 17/41] Add tests for fixed sparsity

---
 src/Gmres_Poly_Newton.F90 | 112 ++++++++++++++++++++++----------------
 tests/Makefile            |   7 ++-
 2 files changed, 69 insertions(+), 50 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 40f761a..8fd36d7 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -919,8 +919,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          end if     
       else
 
-         print *,"reals", coefficients(:,1)
-         print *,"imags", coefficients(:,2)
+         ! print *,"reals", coefficients(:,1)
+         ! print *,"imags", coefficients(:,2)
 
          ! If we're any higher, then we build cmat up to that order
          ! But we have to be careful because the last root we want to explicitly
@@ -939,13 +939,13 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   status_output, status_product, mat_product_save)
       end if
 
-      print *, "status output real", status_output(:, 1)
-      print *, "status output complex", status_output(:, 2)
+      ! print *, "status output real", status_output(:, 1)
+      ! print *, "status output complex", status_output(:, 2)
 
-      print *, "sum", sum(status_output, 2)
+      ! print *, "sum", sum(status_output, 2)
 
-      print *, "status product real", status_product(:, 1)
-      print *, "status product complex", status_product(:, 2)     
+      ! print *, "status product real", status_product(:, 1)
+      ! print *, "status product complex", status_product(:, 2)     
       
       ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
       call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
@@ -1202,17 +1202,17 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             skip_add = .TRUE.
          end if        
 
-         print *, "starting loop at term ", term, "skip_add ", skip_add
+         !print *, "starting loop at term ", term, "skip_add ", skip_add
 
          ! This loop skips the last coefficient
          do while (term .le. size(coefficients, 1) - 1)
 
-            print *, "term ", term, "coeff ", coefficients(term,1), coefficients(term,2), skip_add
+            !print *, "term ", term, "coeff ", coefficients(term,1), coefficients(term,2), skip_add
 
             ! If real
             if (coefficients(term,2) == 0d0) then
 
-               print *, "REAL CASE assembly", term
+               !print *, "REAL CASE assembly", term
 
                ! ~~~~~~~~~~~
                ! Now can add the value to our matrix
@@ -1225,7 +1225,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. &
                   status_output(term, 1) /= 1) then
 
-                  print *, "ADDING IN REAL TERM ", term
+                  !print *, "ADDING IN REAL TERM ", term
                   call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                         1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
                end if          
@@ -1233,7 +1233,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! Initialize with previous product before the A*prod subtraction
                vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)        
                
-               print *, "DOING REAL PRODCUT for term ", term
+               !print *, "DOING REAL PRODCUT for term ", term
 
                ! Have to finish all the columns before we move onto the next coefficient
                do j_loc = 1, ncols
@@ -1253,7 +1253,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If complex
             else
 
-               print *, "COMPLEX CASE assembly", term
+               !print *, "COMPLEX CASE assembly", term
 
                square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
                if (.NOT. skip_add) then
@@ -1261,10 +1261,10 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! We skip the 2 * a * prod from the first root of a complex pair if that has already
                   ! been included in the inv_matrix from build_gmres_polynomial_newton_inverse_full
                   if (status_output(term, 2) /= 1) then
-                     print *, term, "adding in 2a prod"
+                     !print *, term, "adding in 2a prod"
                      temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
                   else
-                     print *, term, "skipping adding in 2a prod"
+                     !print *, term, "skipping adding in 2a prod"
                      temp(1:ncols) = 0d0
                   end if
 
@@ -1292,7 +1292,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   ! compensate for that in the product
                   if (status_output(term, 2) == 1) then
                      if (output_first_complex) then
-                        print *, "ADDING IN 2a prod second time for term ", term
+                        !print *, "ADDING IN 2a prod second time for term ", term
                         temp(1:ncols) = temp(1:ncols) + 2d0 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
                      end if                     
                   end if                  
@@ -1300,7 +1300,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! First time through complex pair
                else
 
-                  print *, "SKIP ADDING IN COMPLEX TERM ", term
+                  !print *, "SKIP ADDING IN COMPLEX TERM ", term
                   !@@@ for the case where we have (r, c, c, ....) and second order sparsity
                   ! i think the problem is that we have to skip adding anything to p as inverse_matrix
                   ! already has the correct values in it, as we computed tmp which will have 2nd order terms
@@ -1376,7 +1376,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! Final step if last root is real
          if (coefficients(term,2) == 0d0) then
             if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
-               print *, "adding REAL final term ", term, " coeff ", coefficients(term,1)
+               !print *, "adding REAL final term ", term, " coeff ", coefficients(term,1)
                call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                      1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
             end if             
@@ -1774,8 +1774,10 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
       call MatSetOption(inv_matrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE,  ierr)
 
-      status_output = 0
-      status_product = 0      
+      if (output_product) then
+         status_output = 0
+         status_product = 0      
+      end if
 
       ! We only have two coefficients, so they are either both real or complex conjugates
       ! If real
@@ -1826,8 +1828,10 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          ! Don't need an assemble as there is one called in this
          call MatShift(inv_matrix, 1d0/(coefficients(1, 1)), ierr)     
          
-         status_output(1:2, 1) = 1
-         status_product(1,1) = 1         
+         if (output_product) then
+            status_output(1:2, 1) = 1
+            status_product(1,1) = 1         
+         end if
 
       ! Complex conjugate roots, a +- ib
       else
@@ -1848,8 +1852,10 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          ! result = 2a I - A_ff/(a^2 + b^2)
          call MatScale(inv_matrix, 1d0/square_sum, ierr)
 
-         status_output(1:2, 2) = 1
-         status_product(1,2) = 1 
+         if (output_product) then
+            status_output(1:2, 2) = 1
+            status_product(1,2) = 1 
+         end if
       end if               
 
    end subroutine build_gmres_polynomial_newton_inverse_1st_1st     
@@ -1879,7 +1885,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! Local variables
       PetscErrorCode :: ierr      
       logical :: reuse_triggered, output_product, first_complex
-      integer :: i, i_sparse
+      integer :: i, i_sparse, sparsity_order
       type(tMat) :: mat_product, temp_mat_A, temp_mat_two, temp_mat_three, mat_product_k_plus_1
       PetscReal :: square_sum, a_coeff
 
@@ -1901,8 +1907,14 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
       ! We start with an identity in mat_product
       call generate_identity(matrix, mat_product)
-      status_output = 0
-      status_product = 0
+      if (output_product) then
+         status_output = 0
+         status_product = 0
+      end if
+      sparsity_order = poly_order
+      if (present(poly_sparsity_order)) then
+         sparsity_order = poly_sparsity_order
+      end if
 
       ! If we're going to output the product as part of a fixed sparsity multiply,
       ! we may be asking to constrain the sparsity to a power in between order and order + 2
@@ -1937,13 +1949,13 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       i_sparse = size(coefficients, 1)
       first_complex = .FALSE.
 
-      print *, "size coeffs", size(coefficients, 1), "coeffs", coefficients(:, 1), coefficients(:, 2)
+      !print *, "size coeffs", size(coefficients, 1), "coeffs", coefficients(:, 1), coefficients(:, 2)
 
       if (output_product) then
 
          output_first_complex = .FALSE.
          if (output_product) then
-            i_sparse = poly_sparsity_order + 1
+            i_sparse = sparsity_order + 1
 
             ! If the last root is real we don't have to do anything
             if (coefficients(i_sparse,2) /= 0d0) then
@@ -1970,7 +1982,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          first_complex = output_first_complex
       end if
 
-      print *, "i_sparse", i_sparse, "output_first_complex", output_first_complex
+      !print *, "i_sparse", i_sparse, "output_first_complex", output_first_complex
 
       ! ~~~~~~~~~~~~
       ! Iterate over the i
@@ -1981,7 +1993,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! We're always building up the next product
       do while (i .le. i_sparse - 1)
 
-         print *, "i = ", i
+         !print *, "i = ", i
 
          ! Duplicate & copy the matrix, but ensure there is a diagonal present
          ! temp_mat_A is going to store things with the sparsity of A
@@ -1995,7 +2007,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! If real this is easy
          if (coefficients(i,2) == 0d0) then
 
-            print *, "real", "i_sparse", i_sparse
+            !print *, "real", "i_sparse", i_sparse
 
             ! Skips eigenvalues that are numerically zero
             ! We still compute the entries as as zero because we need the sparsity
@@ -2019,7 +2031,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   call MatAXPYWrapper(inv_matrix, square_sum, mat_product)
                end if
             end if
-            status_output(i, 1) = 1
+            if (output_product) status_output(i, 1) = 1
 
             ! temp_mat_A = A_ff/theta_k       
             call MatScale(temp_mat_A, -square_sum, ierr)
@@ -2036,11 +2048,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatDestroy(mat_product, ierr)  
                mat_product = mat_product_k_plus_1  
             end if
-            status_product(i, 1) = maxval(status_product) + 1
+            if (output_product) status_product(i, 1) = maxval(status_product) + 1
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
-               print *, "outputting product in real case", "i_sparse", i_sparse, "i", i
+               !print *, "outputting product in real case", "i_sparse", i_sparse, "i", i
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
             end if
             
@@ -2049,7 +2061,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
-            print *, "complex", first_complex
+            !print *, "complex", first_complex
 
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
@@ -2065,14 +2077,14 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! This is equivalent to only part of tmp on Line 9 of Loe
             ! The fixed sparsity loop will then finish the tmp with the term -A * prod/(a^2+b^2)
             ! as this is the part that would increase the sparsity beyond poly_sparsity_order            
-            if (i == poly_sparsity_order + 1 .AND. first_complex) then
+            if (i == sparsity_order + 1 .AND. first_complex) then
 
                ! Copy mat_product into temp_mat_two
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, temp_mat_two, ierr)
 
                ! temp_mat_two = 2a * mat_product
                call MatScale(temp_mat_two, a_coeff, ierr)   
-               status_output(i, 2) = 1
+               if (output_product) status_output(i, 2) = 1
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
@@ -2086,8 +2098,10 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatScale(temp_mat_A, -1d0, ierr)
                ! temp_mat_A = 2a I - A_ff
                call MatShift(temp_mat_A, a_coeff, ierr)   
-               status_output(i, 2) = 1
-               status_output(i+1, 2) = 1
+               if (output_product) then
+                  status_output(i, 2) = 1
+                  status_output(i+1, 2) = 1
+               end if
 
                if (i == 1) then
                   ! If i == 1 then we know mat_product is identity so we can do it directly
@@ -2097,11 +2111,11 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   call MatMatMult(temp_mat_A, mat_product, &
                         MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
                end if    
-               status_product(i, 2) = maxval(status_product) + 1
+               if (output_product) status_product(i, 2) = maxval(status_product) + 1
                
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
-                  print *, "outputting TEMP in complex case", "i_sparse", i_sparse, "i", i
+                  !print *, "outputting TEMP in complex case", "i_sparse", i_sparse, "i", i
                   call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr) 
                   ! If i == 1 then we know mat_product is the identity and we don't bother 
                   ! to write it out, we just have some custom code in the product given its trivial
@@ -2127,14 +2141,16 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             if (i .le. i_sparse - 2) then
 
-               print *, "doing complex matmult step"
+               !print *, "doing complex matmult step"
 
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
                call MatDestroy(temp_mat_two, ierr)   
-               status_output(i, 2) = 1
-               status_product(i+1, 2) = maxval(status_product) + 1
+               if (output_product) then               
+                  status_output(i, 2) = 1
+                  status_product(i+1, 2) = maxval(status_product) + 1
+               end if
 
                ! Then add the scaled version of each product
                if (reuse_triggered) then
@@ -2148,7 +2164,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. .NOT. first_complex) then
-                  print *, "outputting product in complex case", "i_sparse", i_sparse, "i", i
+                  !print *, "outputting product in complex case", "i_sparse", i_sparse, "i", i
                   call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)      
                end if                 
 
@@ -2171,7 +2187,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Skips eigenvalues that are numerically zero
             if (abs(coefficients(i,1)) > 1e-12) then     
                
-               print *, "doing last real step, adding in term", i, "coeff", coefficients(i,1)
+               !print *, "doing last real step, adding in term", i, "coeff", coefficients(i,1)
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
                   call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
@@ -2179,7 +2195,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
                   call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
                end if     
-               status_output(i, 1) = 1
+               if (output_product) status_output(i, 1) = 1
             end if       
          end if   
       end if     
diff --git a/tests/Makefile b/tests/Makefile
index 5a5081f..47a11c3 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -134,10 +134,13 @@ run_tests_load_serial:
 		echo "--- Testing order = $$order ---"; \
 		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
 	done
-	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders first order fixed sparsity"
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity in parallel"
 	@for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
-		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order; \
+		for sparsity in $$(seq 1 $$(($$order - 1))); do \
+			echo "  --- Testing sparsity order = $$sparsity ---"; \
+				./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
+		done; \
 	done
 
 # ~~~~~~~~~~~

From 798b1350e35e2b3e92ce0b12dd3e0fca900a0a3d Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 18:34:44 +0000
Subject: [PATCH 18/41] Fix 1st order sparsity

---
 src/Gmres_Poly_Newton.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 8fd36d7..efa811f 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -1757,7 +1757,7 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
       type(tMat), intent(inout), optional               :: mat_prod_or_temp
-      integer, dimension(poly_order + 1, 2), intent(inout), optional :: status_output, status_product
+      integer, dimension(:, :), intent(inout), optional :: status_output, status_product
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1880,7 +1880,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       type(tMat), intent(inout), optional               :: mat_prod_or_temp, mat_product_save
       integer, intent(in), optional                     :: poly_sparsity_order
       logical, intent(inout), optional                  :: output_first_complex
-      integer, dimension(poly_order + 1, 2), intent(inout), optional :: status_output, status_product
+      integer, dimension(:, :), intent(inout), optional :: status_output, status_product
 
       ! Local variables
       PetscErrorCode :: ierr      

From 0bf4cd9b88e7e06cc57bf8aea0df40dc345b2106 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 18:56:07 +0000
Subject: [PATCH 19/41] Fixed 1st order case where (r,r) but second is zero

---
 src/Gmres_Poly_Newton.F90 | 43 +++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index efa811f..6c437ce 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -851,7 +851,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt, parameter :: one = 1, zero = 0
       logical :: output_first_complex, skip_add
       PetscReal :: square_sum
-      integer, dimension(poly_order + 1, 2) :: status_output, status_product
+      integer, dimension(poly_order + 1, 2) :: status_output
       
       ! ~~~~~~~~~~  
 
@@ -905,7 +905,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
             call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
-                  status_output, status_product, mat_product_save)    
+                  status_output, mat_product_save)    
 
          else
          
@@ -915,7 +915,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
                      coefficients(1:poly_sparsity_order + 1, 1:2), &
                      cmat, mat_sparsity_match, &
-                     status_output, status_product)    
+                     status_output)    
          end if     
       else
 
@@ -936,16 +936,13 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, &
                   coefficients(1:poly_sparsity_order + 1, 1:2), &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
-                  status_output, status_product, mat_product_save)
+                  status_output, mat_product_save)
       end if
 
       ! print *, "status output real", status_output(:, 1)
       ! print *, "status output complex", status_output(:, 2)
 
-      ! print *, "sum", sum(status_output, 2)
-
-      ! print *, "status product real", status_product(:, 1)
-      ! print *, "status product complex", status_product(:, 2)     
+      ! print *, "sum", sum(status_output, 2)    
       
       ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
       call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
@@ -1747,7 +1744,7 @@ end subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coefficients, &
-                  inv_matrix, mat_prod_or_temp, status_output, status_product)
+                  inv_matrix, mat_prod_or_temp, status_output)
 
       ! Specific 1st order with 1st order sparsity
 
@@ -1757,7 +1754,7 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
       PetscReal, dimension(:, :), target, contiguous, intent(inout)    :: coefficients
       type(tMat), intent(inout)                         :: inv_matrix
       type(tMat), intent(inout), optional               :: mat_prod_or_temp
-      integer, dimension(:, :), intent(inout), optional :: status_output, status_product
+      integer, dimension(:, :), intent(inout), optional :: status_output
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1776,7 +1773,6 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
 
       if (output_product) then
          status_output = 0
-         status_product = 0      
       end if
 
       ! We only have two coefficients, so they are either both real or complex conjugates
@@ -1796,12 +1792,17 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
 
             ! Set to zero
             call MatScale(inv_matrix, 0d0, ierr)
-            ! Then add in the 0th order inverse
-            call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)
 
-            !!@@@ need product here
-            print *, "CHECK/FIX THIS"
-            call exit(0)
+            ! Tricky case here as we want to pass out the identity with the 
+            ! sparsity of A
+            if (output_product) then
+               call MatConvert(inv_matrix, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
+               call MatShift(mat_prod_or_temp, 1d0, ierr)
+               status_output(1:2, 1) = 1
+            end if                
+
+            ! Then add in the 0th order inverse
+            call MatShift(inv_matrix, 1d0/coefficients(1,1), ierr)       
             
             ! Then just return
             return  
@@ -1830,7 +1831,6 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
          
          if (output_product) then
             status_output(1:2, 1) = 1
-            status_product(1,1) = 1         
          end if
 
       ! Complex conjugate roots, a +- ib
@@ -1854,7 +1854,6 @@ subroutine build_gmres_polynomial_newton_inverse_1st_1st(matrix, poly_order, coe
 
          if (output_product) then
             status_output(1:2, 2) = 1
-            status_product(1,2) = 1 
          end if
       end if               
 
@@ -1865,7 +1864,7 @@ end subroutine build_gmres_polynomial_newton_inverse_1st_1st
 
    subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coefficients, &
                   inv_matrix, mat_prod_or_temp, poly_sparsity_order, output_first_complex, &
-                  status_output, status_product, mat_product_save)
+                  status_output, mat_product_save)
 
       ! No constrained sparsity by default
       ! If you pass in mat_prod_or_temp, poly_sparsity_order, output_first_complex
@@ -1880,7 +1879,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       type(tMat), intent(inout), optional               :: mat_prod_or_temp, mat_product_save
       integer, intent(in), optional                     :: poly_sparsity_order
       logical, intent(inout), optional                  :: output_first_complex
-      integer, dimension(:, :), intent(inout), optional :: status_output, status_product
+      integer, dimension(:, :), intent(inout), optional :: status_output
 
       ! Local variables
       PetscErrorCode :: ierr      
@@ -1909,7 +1908,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       call generate_identity(matrix, mat_product)
       if (output_product) then
          status_output = 0
-         status_product = 0
       end if
       sparsity_order = poly_order
       if (present(poly_sparsity_order)) then
@@ -2048,7 +2046,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatDestroy(mat_product, ierr)  
                mat_product = mat_product_k_plus_1  
             end if
-            if (output_product) status_product(i, 1) = maxval(status_product) + 1
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
@@ -2111,7 +2108,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   call MatMatMult(temp_mat_A, mat_product, &
                         MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
                end if    
-               if (output_product) status_product(i, 2) = maxval(status_product) + 1
                
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
@@ -2149,7 +2145,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                call MatDestroy(temp_mat_two, ierr)   
                if (output_product) then               
                   status_output(i, 2) = 1
-                  status_product(i+1, 2) = maxval(status_product) + 1
                end if
 
                ! Then add the scaled version of each product

From bbbbac31a4df0409c7a003082c85b142a29b1b69 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 21:44:04 +0000
Subject: [PATCH 20/41] Added eigenvalue clustering to improve stbility of
 Newton form of GMRES polynomials. Also ensure all the exact zero eigenvalues
 are moved to the end of the coefficients array.

---
 src/Gmres_Poly_Newton.F90 | 495 ++++++++++++++++++++++++++++----------
 tests/Makefile            |  49 +++-
 2 files changed, 409 insertions(+), 135 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 6c437ce..66bc5c8 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -135,6 +135,240 @@ subroutine modified_leja(real_roots, imag_roots, indices)
 
    end subroutine modified_leja   
 
+   ! -------------------------------------------------------------------------------------------------
+
+   subroutine cluster_eigenvalues_stable(real_roots, imag_roots, rel_tol, abs_tol)
+
+      ! Robust clustering of (possibly complex) harmonic Ritz values.
+      ! Numerically distinct clusters are moved to the front.
+      ! Remaining entries are set to zero.
+      ! Skips eigenvalues that are exactly zero (both real and imag parts).
+      !
+      ! Inputs:
+      !   real_roots, imag_roots : eigenvalues (length k)
+      !   rel_tol               : relative tolerance (suggest sqrt(eps) ~ 1e-8)
+      !   abs_tol               : absolute tolerance (suggest eps * ||H|| ~ 1e-15)
+      !
+      ! Outputs:
+      !   real_roots, imag_roots : clustered eigenvalues at front, zeros after
+
+      PetscReal, dimension(:), intent(inout) :: real_roots, imag_roots
+      PetscReal, intent(in)                  :: rel_tol, abs_tol
+      integer                                :: i, j, n, n_unique, cluster_size
+      logical, allocatable                   :: used(:)
+      PetscReal                              :: dist, mag_i, mag_j, scale
+      PetscReal                              :: sum_real, sum_imag
+      PetscReal, allocatable                 :: rtmp(:), itmp(:)
+
+      n = size(real_roots)
+
+      allocate(used(n))
+      allocate(rtmp(n), itmp(n))
+
+      used = .false.
+      n_unique = 0
+
+      ! ---------------------------------------------------------
+      ! All-pairs clustering (no sorting to preserve proximity)
+      ! ---------------------------------------------------------
+      do i = 1, n
+
+         if (used(i)) cycle
+
+         ! Skip eigenvalues that are exactly zero
+         if (real_roots(i) == 0.0d0 .AND. imag_roots(i) == 0.0d0) then
+            used(i) = .true.
+            cycle
+         end if
+
+         ! Start new cluster with eigenvalue i
+         sum_real = real_roots(i)
+         sum_imag = imag_roots(i)
+         cluster_size = 1
+         used(i) = .true.
+         
+         mag_i = sqrt(real_roots(i)**2 + imag_roots(i)**2)
+
+         ! Look for all other eigenvalues close to this one
+         do j = i + 1, n
+
+            if (used(j)) cycle
+
+            ! Skip exactly zero eigenvalues
+            if (real_roots(j) == 0.0d0 .AND. imag_roots(j) == 0.0d0) then
+               used(j) = .true.
+               cycle
+            end if
+
+            mag_j = sqrt(real_roots(j)**2 + imag_roots(j)**2)
+            
+            ! Distance between eigenvalues
+            dist = sqrt((real_roots(j) - real_roots(i))**2 + &
+                        (imag_roots(j) - imag_roots(i))**2)
+
+            ! Use the larger magnitude for relative scaling
+            scale = max(mag_i, mag_j, 1.0d0)
+
+            ! Check if within tolerance
+            if (dist <= abs_tol + rel_tol * scale) then
+               sum_real = sum_real + real_roots(j)
+               sum_imag = sum_imag + imag_roots(j)
+               cluster_size = cluster_size + 1
+               used(j) = .true.
+            end if
+
+         end do
+
+         ! Compute cluster centroid (mean)
+         n_unique = n_unique + 1
+         rtmp(n_unique) = sum_real / dble(cluster_size)
+         itmp(n_unique) = sum_imag / dble(cluster_size)
+
+      end do
+
+      ! ---------------------------------------------------------
+      ! Output compact form
+      ! ---------------------------------------------------------
+      real_roots = 0.0d0
+      imag_roots = 0.0d0
+
+      real_roots(1:n_unique) = rtmp(1:n_unique)
+      imag_roots(1:n_unique) = itmp(1:n_unique)
+
+      deallocate(used, rtmp, itmp)
+
+   end subroutine cluster_eigenvalues_stable
+
+
+   ! -------------------------------------------------------------------------------------------------
+
+   subroutine compute_extra_roots(real_roots, imag_roots, real_roots_output, imag_roots_output)
+
+      ! Add extra roots for stability
+      ! Computes the product of factors for each eigenvalue and adds extra copies
+      ! of roots that have large products (to improve polynomial stability)
+      ! Only non-zero eigenvalues should be passed in
+      ! real_roots_output, imag_roots_output are allocated and filled with the original
+      ! roots plus any extra copies, with perturbed values for the leja sort
+
+      ! ~~~~~~
+      PetscReal, dimension(:), intent(inout)              :: real_roots, imag_roots
+      PetscReal, dimension(:), allocatable, intent(inout) :: real_roots_output, imag_roots_output
+
+      ! Local variables
+      integer :: i_loc, j_loc, k_loc, n_roots, total_extra, counter
+      PetscReal :: a, b, c, d, div_real, div_imag, div_mag
+      PetscReal, dimension(size(real_roots)) :: pof
+      integer, dimension(size(real_roots)) :: extra_pair_roots, overflow
+
+      ! ~~~~~~
+
+      n_roots = size(real_roots)
+
+      ! Compute the product of factors
+      pof = 1   
+      extra_pair_roots = 0
+      overflow = 0
+      total_extra = 0
+      do k_loc = 1, n_roots
+
+         a = real_roots(k_loc)
+         b = imag_roots(k_loc)      
+         
+         ! We have already computed pof for the positive imaginary complex conjugate
+         if (b < 0) cycle
+
+         ! Skips eigenvalues that are numerically zero
+         if (abs(a) < 1e-12) cycle
+         if (a**2 + b**2 < 1e-12) cycle
+
+         ! Compute product(k)_{i, j/=i} * | 1 - theta_j/theta_i|
+         do i_loc = 1, n_roots
+
+            ! Skip
+            if (k_loc == i_loc) cycle
+
+            c = real_roots(i_loc)
+            d = imag_roots(i_loc)
+
+            ! Skips eigenvalues that are numerically zero
+            if (abs(c) < 1e-12) cycle
+            if (c**2 + d**2 < 1e-12) cycle
+
+            ! theta_k/theta_i
+            div_real = (a * c + b * d)/(c**2 + d**2)
+            div_imag = (b * c - a * d)/(c**2 + d**2)
+
+            ! |1 - theta_k/theta_i|
+            div_mag = sqrt((1 - div_real)**2 + div_imag**2)
+
+            ! Pof is about to overflow, store the exponent and 
+            ! reset pof back to one
+            ! We can hit this for very high order polynomials, where we have to 
+            ! add more roots than 22 (ie pof > 1e308)
+            if (log10(pof(k_loc)) + log10(div_mag) > 307) then
+               overflow(k_loc) = overflow(k_loc) + int(log10(pof(k_loc)))
+               pof(k_loc) = 1
+            end if            
+
+            ! Product
+            pof(k_loc) = pof(k_loc) * div_mag
+
+         end do
+
+         ! If pof > 1e4, we add an extra root, plus one extra for every 1e14
+         if (log10(pof(k_loc)) > 4 .OR. overflow(k_loc) /= 0) then
+
+            ! if real extra_pair_roots counts each distinct real root we're adding
+            ! if imaginary it only counts a pair as one
+            extra_pair_roots(k_loc) = ceiling((log10(pof(k_loc)) + overflow(k_loc) - 4.0)/14.0)
+            total_extra = total_extra + extra_pair_roots(k_loc)
+
+            ! If imaginary, the pof is the same for the conjugate, let's just set it to -1
+            if (b > 0) then
+               ! We know the positive imaginary value is first, so the conjugate follows it
+               pof(k_loc+1) = -1
+               ! We need the conjugates as well
+               total_extra = total_extra + extra_pair_roots(k_loc)
+
+            end if            
+         end if
+      end do
+
+      ! Allocate output arrays (original roots + extra roots)
+      allocate(real_roots_output(n_roots + total_extra))
+      allocate(imag_roots_output(n_roots + total_extra))
+      real_roots_output = 0d0
+      imag_roots_output = 0d0
+
+      ! Copy in original roots
+      real_roots_output(1:n_roots) = real_roots(1:n_roots)
+      imag_roots_output(1:n_roots) = imag_roots(1:n_roots)
+
+      ! Add the extra copies of roots, ensuring conjugate pairs we add 
+      ! are next to each other
+      counter = n_roots + 1
+      do i_loc = 1, n_roots
+
+         ! For each extra root pair to add
+         do j_loc = 1, extra_pair_roots(i_loc)
+
+            real_roots_output(counter) = real_roots(i_loc)
+            imag_roots_output(counter) = imag_roots(i_loc)
+            ! Add in the conjugate
+            if (imag_roots(i_loc) > 0) then
+               real_roots_output(counter+1) = real_roots(i_loc)
+               imag_roots_output(counter+1) = -imag_roots(i_loc)
+            end if
+
+            counter = counter + 1
+            if (imag_roots(i_loc) > 0) counter = counter + 1
+         end do
+      end do
+
+   end subroutine compute_extra_roots   
+
+
 ! -------------------------------------------------------------------------------------------------------------------------------
 
    subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots, coefficients)
@@ -160,22 +394,22 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       ! Local variables
       PetscInt :: global_rows, global_cols, local_rows, local_cols
       integer :: lwork, subspace_size, rank, i_loc, comm_size, comm_rank, errorcode, iwork_size, j_loc
-      integer :: total_extra, counter, k_loc, m
+      integer :: total_extra, counter, k_loc, m, numerical_order
       PetscErrorCode :: ierr      
       MPIU_Comm :: MPI_COMM_MATRIX
       PetscReal, dimension(poly_order+2,poly_order+1) :: H_n
       PetscReal, dimension(poly_order+1,poly_order+2) :: H_n_T
-      PetscReal, dimension(poly_order+1) :: e_d, solution, s, pof
-      integer, dimension(poly_order+1) :: extra_pair_roots, overflow
+      PetscReal, dimension(poly_order+1) :: e_d, solution, s
       integer, dimension(:), allocatable :: iwork_allocated, indices
-      PetscReal, dimension(:), allocatable :: work
+      PetscReal, dimension(:), allocatable :: work, real_roots_added, imag_roots_added
+      PetscReal, dimension(:), allocatable :: perturbed_real, perturbed_imag
       PetscReal, dimension(:,:), allocatable :: VL, VR
-      PetscReal :: beta, div_real, div_imag, a, b, c, d, div_mag
+      PetscReal :: beta
       PetscReal, dimension(:, :), allocatable :: coefficients_temp
       type(tVec) :: w_j
       type(tVec), dimension(poly_order+2) :: V_n
       logical :: use_harmonic_ritz = .TRUE.
-      PetscReal :: rcond = 1e-12
+      PetscReal :: rcond = 1e-12, rel_tol, abs_tol, H_norm
 
       ! ~~~~~~    
 
@@ -264,8 +498,8 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          call dgelsd(poly_order + 1, poly_order + 1, 1, H_n_T, size(H_n_T, 1), &
                         e_d, size(e_d), s, rcond, rank, &
                         work, lwork, iwork_allocated, errorcode)
-         deallocate(work, iwork_allocated)         
-
+         deallocate(work, iwork_allocated)        
+         
          ! Copy in the solution
          solution = e_d
 
@@ -309,154 +543,150 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
       end if  
 
-      ! In some cases with rank deficiency, we can still end up with non-zero (or negative) eigenvalues that
+      ! print *, "coefficients r", coefficients(:, 1)   
+      ! print *, "coefficients c", coefficients(:, 2)
+
+      ! These are the tolerances that control the clustering
+      H_norm = norm2(H_n(1:m,1:m))
+      rel_tol = 1.0d0 * sqrt(epsilon(1.0d0))
+      abs_tol = epsilon(1.0d0) * max(H_norm, beta)
+
+      !print *, "H_norm", H_norm, "rel_tol", rel_tol, "abs_tol", abs_tol
+
+      ! In some cases with numerical rank deficiency, we can still
+      ! end up with non-zero (or negative) eigenvalues that
       ! are trivially small - we set them explicitly to zero
       do i_loc = 1, poly_order + 1
-         if (abs(coefficients(i_loc, 1)**2 + coefficients(i_loc, 2)**2) < 1e-12) then
+         if (coefficients(i_loc,1)**2 + coefficients(i_loc,2)**2 < &
+            (abs_tol + rel_tol*H_norm)**2) then
             coefficients(i_loc, 1) = 0d0
             coefficients(i_loc, 2) = 0d0
          end if
       end do      
 
-      ! ~~~~~~~~~~~~~~
-      ! Add roots for stability
-      ! ~~~~~~~~~~~~~~         
-      if (add_roots) then 
-
-         ! Compute the product of factors
-         pof = 1   
-         extra_pair_roots = 0
-         overflow = 0
-         total_extra = 0
-         do k_loc = 1, poly_order + 1
+      ! print *, "after zero coefficients r", coefficients(:, 1)   
+      ! print *, "after zero coefficients c", coefficients(:, 2)       
+     
+      ! Cluster close eigenvalues together to improve stability of the polynomial evaluation
+      call cluster_eigenvalues_stable(coefficients(:, 1), coefficients(:, 2), rel_tol, abs_tol)      
+      
+      ! print *, "after cluster coefficients r", coefficients(:, 1)   
+      ! print *, "after cluster coefficients c", coefficients(:, 2)      
 
-            a = coefficients(k_loc, 1)
-            b = coefficients(k_loc, 2)      
-            
-            ! We have already computed pof for the positive imaginary complex conjugate
-            if (b < 0) cycle
+      ! ~~~~~~~~~~~~~~
+      ! Extract the non-zero eigenvalues for root adding and leja ordering
+      ! Zero eigenvalues will be appended at the end
+      ! ~~~~~~~~~~~~~~
+      ! Count the number of non-zero eigenvalues after clustering
+      numerical_order = 0
+      do i_loc = 1, poly_order + 1
+         if (coefficients(i_loc, 1) /= 0d0 .OR. coefficients(i_loc, 2) /= 0d0) then
+            numerical_order = numerical_order + 1
+         end if
+      end do
 
-            ! Skips eigenvalues that are numerically zero
-            if (abs(a) < 1e-12) cycle
-            if (a**2 + b**2 < 1e-12) cycle
+      ! ~~~~~~~~~~~~~~
+      ! Add roots for stability (only on non-zero eigenvalues)
+      ! ~~~~~~~~~~~~~~         
+      if (add_roots .AND. numerical_order > 0) then 
 
-            ! Compute product(k)_{i, j/=i} * | 1 - theta_j/theta_i|
-            do i_loc = 1, poly_order + 1
+         ! Extract non-zero eigenvalues into a temporary array
+         allocate(coefficients_temp(numerical_order, 2))
+         counter = 0
+         do i_loc = 1, poly_order + 1
+            if (coefficients(i_loc, 1) /= 0d0 .OR. coefficients(i_loc, 2) /= 0d0) then
+               counter = counter + 1
+               coefficients_temp(counter, 1) = coefficients(i_loc, 1)
+               coefficients_temp(counter, 2) = coefficients(i_loc, 2)
+            end if
+         end do
 
-               ! Skip
-               if (k_loc == i_loc) cycle
+         ! Call compute_extra_roots only on the non-zero eigenvalues
+         ! This allocates real_roots_added/imag_roots_added with the original + extra roots
+         call compute_extra_roots(coefficients_temp(:, 1), coefficients_temp(:, 2), &
+                  real_roots_added, imag_roots_added)
+
+         ! total number of non-zero roots after adding extras
+         total_extra = size(real_roots_added) - numerical_order
+
+         ! Resize coefficients to hold non-zero roots (with extras) + zero roots at end
+         deallocate(coefficients)
+         allocate(coefficients(size(real_roots_added) + (poly_order + 1 - numerical_order), 2))
+         coefficients = 0d0
+
+         ! Create perturbed copy for leja ordering
+         allocate(perturbed_real(size(real_roots_added)))
+         allocate(perturbed_imag(size(real_roots_added)))
+         perturbed_real = real_roots_added
+         perturbed_imag = imag_roots_added
+
+         ! Perturb the extra roots so they have unique values for the leja sort
+         counter = numerical_order + 1
+         do i_loc = 1, numerical_order
+            k_loc = 0
+            do j_loc = counter, size(real_roots_added)
+               ! Check if this extra root matches the original
+               if (real_roots_added(j_loc) == coefficients_temp(i_loc, 1) .AND. &
+                   abs(imag_roots_added(j_loc)) == abs(coefficients_temp(i_loc, 2))) then
+                  k_loc = k_loc + 1
+                  perturbed_real(j_loc) = real_roots_added(j_loc) + k_loc * 5e-8
+               end if
+            end do
+         end do
 
-               c = coefficients(i_loc, 1)
-               d = coefficients(i_loc, 2)
+         ! Leja order only the non-zero eigenvalues (with extras)
+         call modified_leja(perturbed_real, perturbed_imag, indices)
 
-               ! Skips eigenvalues that are numerically zero
-               if (abs(c) < 1e-12) cycle
-               if (c**2 + d**2 < 1e-12) cycle
+         ! Reorder the (non-perturbed) roots using the leja ordering
+         coefficients(1:size(real_roots_added), 1) = real_roots_added(indices)
+         coefficients(1:size(real_roots_added), 2) = imag_roots_added(indices)
 
-               ! theta_k/theta_i
-               div_real = (a * c + b * d)/(c**2 + d**2)
-               div_imag = (b * c - a * d)/(c**2 + d**2)
+         ! Zero eigenvalues are already zero at the end from the coefficients = 0d0 above
 
-               ! |1 - theta_k/theta_i|
-               div_mag = sqrt((1 - div_real)**2 + div_imag**2)
+         ! Cleanup
+         deallocate(coefficients_temp, real_roots_added, imag_roots_added)
+         deallocate(perturbed_real, perturbed_imag, indices)
 
-               ! Pof is about to overflow, store the exponent and 
-               ! reset pof back to one
-               ! We can hit this for very high order polynomials, where we have to 
-               ! add more roots than 22 (ie pof > 1e308)
-               if (log10(pof(k_loc)) + log10(div_mag) > 307) then
-                  overflow(k_loc) = overflow(k_loc) + int(log10(pof(k_loc)))
-                  pof(k_loc) = 1
-               end if            
+      else
 
-               ! Product
-               pof(k_loc) = pof(k_loc) * div_mag
+         ! No root adding - just leja order the non-zero eigenvalues
+         ! and put zeros at the end
+         if (numerical_order > 0) then
 
+            ! Extract non-zero eigenvalues
+            allocate(coefficients_temp(numerical_order, 2))
+            counter = 0
+            do i_loc = 1, poly_order + 1
+               if (coefficients(i_loc, 1) /= 0d0 .OR. coefficients(i_loc, 2) /= 0d0) then
+                  counter = counter + 1
+                  coefficients_temp(counter, 1) = coefficients(i_loc, 1)
+                  coefficients_temp(counter, 2) = coefficients(i_loc, 2)
+               end if
             end do
 
-            ! If pof > 1e4, we add an extra root, plus one extra for every 1e14
-            if (log10(pof(k_loc)) > 4 .OR. overflow(k_loc) /= 0) then
-
-               ! if real extra_pair_roots counts each distinct real root we're adding
-               ! if imaginary it only counts a pair as one
-               extra_pair_roots(k_loc) = ceiling((log10(pof(k_loc)) + overflow(k_loc) - 4.0)/14.0)
-               total_extra = total_extra + extra_pair_roots(k_loc)
+            ! Leja order the non-zero eigenvalues
+            call modified_leja(coefficients_temp(:, 1), coefficients_temp(:, 2), indices)
 
-               ! If imaginary, the pof is the same for the conjugate, let's just set it to -1
-               if (b > 0) then
-                  ! We know the positive imaginary value is first, so the conjugate follows it
-                  pof(k_loc+1) = -1
-                  ! We need the conjugates as well
-                  total_extra = total_extra + extra_pair_roots(k_loc)
+            ! Reorder and put zeros at the end
+            coefficients = 0d0
+            coefficients(1:numerical_order, 1) = coefficients_temp(indices, 1)
+            coefficients(1:numerical_order, 2) = coefficients_temp(indices, 2)
 
-               end if            
-            end if
-         end do
+            deallocate(coefficients_temp, indices)
 
-         ! If we have extra roots we need to resize the coefficients storage
-         if (total_extra > 0) then
-            allocate(coefficients_temp(size(coefficients, 1), size(coefficients, 2)))
-            coefficients_temp(1:size(coefficients, 1), 1:size(coefficients, 2)) = coefficients
-            deallocate(coefficients)
-            allocate(coefficients(size(coefficients_temp, 1) + total_extra, 2))
-            coefficients = 0
-            coefficients(1:size(coefficients_temp, 1), :) = coefficients_temp
-            deallocate(coefficients_temp)
          end if
-      end if
 
-      ! Take a copy of the existing roots
-      coefficients_temp = coefficients
-
-      if (add_roots) then
-         
-         ! Add the extra copies of roots, ensuring conjugate pairs we add 
-         ! are next to each other
-         counter = size(extra_pair_roots)+1
-         do i_loc = 1, size(extra_pair_roots)
-
-            ! For each extra root pair to add
-            do j_loc = 1, extra_pair_roots(i_loc)
-
-               coefficients(counter, :) = coefficients(i_loc, :)
-               ! Add in the conjugate
-               if (coefficients(i_loc, 2) > 0) then
-                  coefficients(counter+1, 1) = coefficients(i_loc, 1)
-                  coefficients(counter+1, 2) = -coefficients(i_loc, 2)
-               end if
-
-               ! Store a perturbed root so we have unique values for the leja sort below
-               ! Just peturbing the real value
-               coefficients_temp(counter, 1) = coefficients(i_loc, 1) + j_loc * 5e-8
-               coefficients_temp(counter, 2) = coefficients(i_loc, 2)
-               ! Add in the conjugate
-               if (coefficients(i_loc, 2) > 0) then
-                  coefficients_temp(counter+1, 1) = coefficients(i_loc, 1) + j_loc * 5e-8
-                  coefficients_temp(counter+1, 2) = -coefficients(i_loc, 2)
-               end if            
-
-               counter = counter + 1
-               if (coefficients(i_loc, 2) > 0) counter = counter + 1
-            end do
-         end do
       end if
 
-      ! ~~~~~~~~~~~~~~
-      ! Now compute a modified leja ordering for stability
-      ! ~~~~~~~~~~~~~~      
-      ! Called with the peturbed extra roots
-      call modified_leja(coefficients_temp(:,1), coefficients_temp(:,2), indices)   
-
-      ! Reorder the (non-peturbed) roots 
-      coefficients(:,1) = coefficients(indices,1)
-      coefficients(:,2) = coefficients(indices,2)
+      ! print *, "after root adding and leja coefficients r", coefficients(:, 1)   
+      ! print *, "after root adding and leja coefficients c", coefficients(:, 2)       
 
       ! Cleanup
-      deallocate(coefficients_temp)
       do i_loc = 1, subspace_size+1
          call VecDestroy(V_n(i_loc), ierr)
       end do
       call VecDestroy(w_j, ierr)
+       
 
    end subroutine calculate_gmres_polynomial_roots_newton   
 
@@ -1209,6 +1439,11 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If real
             if (coefficients(term,2) == 0d0) then
 
+               if (abs(coefficients(term,1)) < 1e-12) then
+                  term = term + 1
+                  cycle
+               end if
+
                !print *, "REAL CASE assembly", term
 
                ! ~~~~~~~~~~~
@@ -1218,9 +1453,8 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! Also we skip the first one if we're real as that value has already been added to the 
                ! matrix by the build_gmres_polynomial_newton_inverse_full (as we had to build the product up
                ! to that order)
-               ! ~~~~~~~~~~~
-               if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12 .AND. &
-                  status_output(term, 1) /= 1) then
+               ! ~~~~~~~~~~~               
+               if (ncols /= 0 .AND. status_output(term, 1) /= 1) then
 
                   !print *, "ADDING IN REAL TERM ", term
                   call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
@@ -1250,6 +1484,11 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If complex
             else
 
+               if (coefficients(term,1)**2 + coefficients(term,2)**2 < 1e-12) then
+                  term = term + 2
+                  cycle
+               end if
+
                !print *, "COMPLEX CASE assembly", term
 
                square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
@@ -1277,7 +1516,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   end do           
 
                   ! This is the p = p + 1/(a^2 + b^2) * temp
-                  if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
+                  if (ncols /= 0) then
                      call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                            square_sum * temp(1:ncols), ADD_VALUES, ierr)   
                   end if       
@@ -1378,8 +1617,6 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                      1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
             end if             
          end if
-
-         ! Delete our symbolic
          do j_loc = 1, ncols
             if (associated(symbolic_ones(j_loc)%ptr)) then
                deallocate(symbolic_ones(j_loc)%ptr)
diff --git a/tests/Makefile b/tests/Makefile
index 47a11c3..797edf9 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -134,7 +134,7 @@ run_tests_load_serial:
 		echo "--- Testing order = $$order ---"; \
 		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
 	done
-	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity in parallel"
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity"
 	@for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		for sparsity in $$(seq 1 $$(($$order - 1))); do \
@@ -189,12 +189,15 @@ run_tests_load_parallel:
 	@for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
-	done	
-	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders first order fixed sparsity in parallel"
+	done
+	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity in parallel"
 	@for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
-		$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order; \
-	done	
+		for sparsity in $$(seq 1 $$(($$order - 1))); do \
+			echo "  --- Testing sparsity order = $$sparsity ---"; \
+				$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
+		done; \
+	done
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
@@ -416,7 +419,24 @@ run_tests_no_load_serial:
 	 -pc_air_improve_w_its 3 -ksp_max_it 3 -pc_air_a_drop 1e-3 -pc_air_inverse_type power
 	@echo "Test improving Z with PC regenerated with no sparsity change with 1 iteration"
 	./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -pc_air_one_point_classical_prolong 0 \
-	 -pc_air_improve_w_its 1 -ksp_max_it 3 -pc_air_a_drop 1e-3 -pc_air_inverse_type power	 	
+	 -pc_air_improve_w_its 1 -ksp_max_it 3 -pc_air_a_drop 1e-3 -pc_air_inverse_type power
+#
+	@echo ""
+	@echo "Test Newton AIRG on advection for for different orders"
+	@for order in 0 1 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
+		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+	done
+	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity"
+	@for order in 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		for sparsity in $$(seq 1 $$(($$order - 1))); do \
+			echo "  --- Testing sparsity order = $$sparsity ---"; \
+				./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
+				-pc_air_inverse_sparsity_order $$sparsity -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+		done; \
+	done	 	 	
 #	 
 # ~~~~~~~~~~~~~~~~~~~~~~~	 
 # Include kokkos examples	 
@@ -614,6 +634,23 @@ run_tests_no_load_parallel:
 	@echo "Test improving W with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -pc_air_one_point_classical_prolong 0 \
 	 -pc_air_improve_w_its 3 -ksp_max_it 3	-pc_air_a_drop 1e-3 -pc_air_inverse_type power
+#
+	@echo ""
+	@echo "Test Newton AIRG on advection for for different orders in parallel"
+	@for order in 0 1 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
+		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+	done
+	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity in parallel"
+	@for order in 2 3 4 5 6; do \
+		echo "--- Testing order = $$order ---"; \
+		for sparsity in $$(seq 1 $$(($$order - 1))); do \
+			echo "  --- Testing sparsity order = $$sparsity ---"; \
+				$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
+				-pc_air_inverse_sparsity_order $$sparsity -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+		done; \
+	done
 
 #	 
 # ~~~~~~~~~~~~~~~~~~~~~~~	 

From 36750055281c171b56c52ced1224e91ce6d4e19a Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 22:13:50 +0000
Subject: [PATCH 21/41] Fix bug in Newton where too little memorywas being
 allocated for the coefficients

---
 src/Approx_Inverse_Setup.F90 |  7 ++++++-
 tests/Makefile               | 12 +++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/Approx_Inverse_Setup.F90 b/src/Approx_Inverse_Setup.F90
index f83f6e4..293f4e0 100644
--- a/src/Approx_Inverse_Setup.F90
+++ b/src/Approx_Inverse_Setup.F90
@@ -70,7 +70,12 @@ subroutine calculate_and_build_approximate_inverse(matrix, inverse_type, &
             allocate(coefficients(poly_order + 1, 1))
          end if
       else
-         coefficients => coefficients_stack
+         if (inverse_type == PFLAREINV_NEWTON .OR. inverse_type == PFLAREINV_NEWTON_NO_EXTRA) then
+            ! Newton basis needs storage for real and imaginary roots
+            allocate(coefficients(poly_order + 1, 2))
+         else         
+            coefficients => coefficients_stack
+         end if
       end if
 
       ! This is diabolical - In petsc 3.22, they changed the way to test for 
diff --git a/tests/Makefile b/tests/Makefile
index 797edf9..33391a6 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -93,6 +93,8 @@ run_tests_load_serial:
 	./ex6 -f data/mat_stream_2364 -pc_type pflareinv -ksp_max_it 21		
 	@echo "Test single level GMRES polynomial preconditioning for hyperbolic streaming problem in C"
 	./ex6 -f data/mat_stream_2364 -pc_type pflareinv -pc_pflareinv_type power -ksp_max_it 21	
+	@echo "Test single level Newton GMRES polynomial preconditioning for hyperbolic streaming problem in C"
+	./ex6 -f data/mat_stream_2364 -pc_type pflareinv -pc_pflareinv_type newton -ksp_max_it 21		
 # 
 	@echo ""
 	@echo "Test single level GMRES polynomial preconditioning with the Newton basis matrix-free for hyperbolic streaming problem in C"
@@ -320,8 +322,8 @@ run_tests_no_load_serial:
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 10 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
 	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change"
 	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2
-#	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
-#		./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
+	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change"
+	./ex6f -m 10 -n 10 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2		
 # 
 	@echo ""
 	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change with 0th order fixed sparsity"
@@ -593,9 +595,9 @@ run_tests_no_load_parallel:
 	@echo "Test AIRG Newton with 2nd order GMRES polynomials with PC regenerated with no sparsity change in parallel"
 	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_poly_order 2 -pc_air_inverse_sparsity_order 2 \
 	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton
-#	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
-#	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
-#	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
+	@echo "Test AIRG Newton with 2nd order fixed sparsity GMRES polynomials with PC regenerated with no sparsity change in parallel"
+	$(MPIEXEC) -n 2 ./ex6f -m 10 -n 10 -regen -pc_air_reuse_sparsity -ksp_max_it 3 -pc_air_inverse_sparsity_order 2 \
+	 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton	
 # 
 	@echo ""
 	@echo "Test AIRG Newton with GMRES polynomials with PC reused with no sparsity change in parallel with 0th order fixed sparsity"

From 88d2c3997f7907978e40773c8ed8dbdae3478d14 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 22:16:26 +0000
Subject: [PATCH 22/41] Add newton test for indefinite problem

---
 tests/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/Makefile b/tests/Makefile
index 33391a6..534f1b4 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -123,6 +123,8 @@ run_tests_load_serial:
 	@echo ""
 	@echo "Test AIRG with GMRES polynomials in indefinite problem with zero diagonals"
 	./ex6 -f data/e05r0100_petsc -b_in_f 0 -pc_air_a_drop 1e-3 -pc_air_inverse_type power -ksp_max_it 26
+	@echo "Test AIRG with Newton GMRES polynomials in indefinite problem with zero diagonals"
+	./ex6 -f data/e05r0100_petsc -b_in_f 0 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -ksp_max_it 26			
 #
 	@echo ""
 	@echo "Test AIRG with assembled Newton GMRES polynomials for hyperbolic streaming problem for 2nd order"

From 54e02ff088d59542af94c8ac1dcfb628bd690416 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 22:36:48 +0000
Subject: [PATCH 23/41] Added 1138_bus test from matrix market and Loe's paper
 for Newton GMRES polynomial, both matrix-free and assembled

---
 tests/Makefile           |   6 ++++++
 tests/data/1138_bus      | Bin 0 -> 35720 bytes
 tests/data/1138_bus.info |   1 +
 3 files changed, 7 insertions(+)
 create mode 100644 tests/data/1138_bus
 create mode 100644 tests/data/1138_bus.info

diff --git a/tests/Makefile b/tests/Makefile
index 534f1b4..355816e 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -146,6 +146,12 @@ run_tests_load_serial:
 				./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
 		done; \
 	done
+#
+	@echo ""
+	@echo "Test Newton GMRES polynomials matrix-free with added roots in market matrix problem 1138"
+	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 60 -pc_pflareinv_matrix_free -ksp_norm_type unpreconditioned -ksp_max_it 6
+	@echo "Test Newton GMRES polynomials with fixed sparsity with added roots in market matrix problem 1138"
+	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 60 -ksp_norm_type unpreconditioned -ksp_max_it 6	
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
diff --git a/tests/data/1138_bus b/tests/data/1138_bus
new file mode 100644
index 0000000000000000000000000000000000000000..dc7455355f0809361de6af65a2a559a98a7ee368
GIT binary patch
literal 35720
zcmb`vby!wg*Y-_WU{ES1D4}2>f{hG96f8ioMKQ6vySux)-Nv@vZW)T*-5uM`ZRdB6
zwdTB@>-D~$_xt|%JdU5|>^aw5^IB`hSlGDskF&9H=w{;RV`F1uuSgFS>_9q&161NA
zOx(6sen~5SYb1^2wGI=vF4H<(mn(6)4dT{yX_Yu#hK|>&^I4}6w{ELW_uo{flQ_AS
z-~AG<%awR3Q_$hsufud5k|&EwC+!x$#7V!1W>s$S>%98<B&|*_X?1=buj|lxbsPR~
zX|2l?x2{v?k+O9;x^5jNX>^>pC0@5lUyshC%M<<og5=Za{*`?C+QhBP*6DQHb@=ao
zU9WDlPA4j1y6x8W=rSZs;<Z|*wGI=vAZ1D#L6@QP>-1W6Sz2{^>v(bNHtKRjb)F1x
zOBy{^^|iSmOt(eiH9B7W*7r)6Cw1xitkX)o4wt+-PQtWX+MwGe;gU}KbsFv0YMti)
z7G@nMd93qU`^By6)@ig#8P;jVuhagwIO{sK+gf#5|4ntd5~r_S_qR@OokqfR8zfxv
z=zP|0omYotQu%aV3D<6G6}MoWUWZw$^*z+#GRCx9)cTqvPPbR5(ec)Pam)4VFj2`T
z=x}{a)~eI#d^*m$PMu$;vCgCWOQ-q0e<a+xy%MkUh)P+%t1d(9e~Z^;==?g3?n~{r
zu20-@U6M!c!+%So%hhU~Ux!(%E>G%}YZbr5TZfC=x*sH7$`$l@5Vy2TtFFVkOo`Lu
zL0^M}$$L-yx^Bs%<3)8@I$px8(ea{^M#|JkS-R~R;nry-Uc0U95Vv)lP9v(*NxW{0
zz7M+1x?UZx{gVH`l`CaRT2brrby|Hrx*d{6$LaJsueJVOpKgcFYaOrSbUVba%d)Ol
zhyAWPzb;$*bscigwOgmrerpxClr3tFgjwg4`}lj9Zo75(?`26D5~lB&?py10;?{m^
z)#29qdmAOrx}Cbd|LwYTex1%bzb?nRK5^?j)~c_=I!xTw<%wU{W34)mbv-(*E<?v@
zzs_%+*LwVmTiT?{u}-hALEi&OW1UvFS@Kxt)4yTe7F~{RlWvQ(TIZKK1ncs38KTzl
zx=abzak@QPC5?9LdbH|%|4nuKq(1BXzt<^wb(pn^Th}8heksd3T-+M#wEwM43DfP+
zZPIzgFY(rD9jEi@GOXjQ!*u&3Owx!7I$XkZSvp?Ehi<zL(`Ei1uJh~ib-b=e;&mC;
zs@+nTZjW`Fbi9tUF3&ohgiD>)>9t#{q|t84tNs5iOqZwASeLESYt?buFL^W)W{vgS
z!rCoq^fOxAx=dZagy}M^uUn_paXOvk|2<6me^*_GsLn5Gti!GISeLES>3H34QSG)a
z$2v^Ht*=F%FFIbA_j~!);X19Tbvo;3pS~VZjimiOk1p3bjdeQf@^xD4FkR38*Es8R
zXpZ!QT@X~lG5<H*Ql=v)01AROh`tV$w#)A&H)si3f!09!Qr^pQ{Sxm3l7Yl&zjb;U
zZ@B=<wz~k8yt4K{Jr<Svoq#XWgeZ4rsI*DaVcuxF04jSU*?=>Ub*(GH=ly4xjT^$;
zfwWE9E$-|f2gnIL{)=C=(YW3`h|df10l5}ye}4EqK?^JOQhwE5yTb6RzO*|8w>Pfq
zFjTh(*J<Mi&~LW$mAg5#5Rkk9pr~^DLyG`W37=~EZ684;O&}-+N`T@@#f^Tn{Q{Nv
zk|0>A-2YNQ>i`ue{azYKzBOPk*r&qEKr4Z=ARLqfl|gwB0V;q<P!S9Tp<oD*JYj&8
zMni{zdw?*M#4qJZ8InH+>;Mw4!=m8UaSh-;3M5SG7q){ml!g9uK;JpYwMw~F6jh;p
zfT)xo3#x${pcd!_B<uu+Jo?yvCfpKV9Y~r%rrSZrhxluvUr?W+i{V}Z4#6*FVeeG>
z%!n6vD%cOsATAy>0SQWLLz6%qP!|jYiOT&3`W1XfUPpl6f-QcBwgrI?@CCT%w$lHS
zx1ORtGzD}6(pL=?jiBv;gm(wxZ>*3ueM4Cp0DfP#7~8hqK-Yu&V~cxXi+f;;ez%oA
zmpo13mOP<mxN$Ab;7$jeKoK*I%@mA%^n)G4FY$;oo)^vGUI2Fs#9ah8;BE<8f!0<E
zx9u(X+W@(?wxEO3c2K!4p(8-JgM)H+LOzTEThXov6BRn6uD$R>ZMv9oc7K>|8%c*g
zvfBcM+g|R2?z0|n4+Nq;fs7lWALtEaj0t^F<|k8)xc+cUngKxEqB5pLrMw}iufYGf
zjkv+^3y3p*Pa~ik0Q$-J{S1dn*k~Z`$tdHI3L6O(MgfT*hqwpIJpnq#46}jT?latD
z0o?W-fLuS)+l)7{uZS?*Ux~Mwi0iTi($ABC)cFs1f;8n!zrBn{+;15ZcK96_&#Gxq
zVY*@lbS04A*#<BRYy`8xRxk(b0CT}<Fb^C8^MPFd0&pEH1oyxqupOKPi@|z;HrXrz
zx6C%$EQNa+SOd<2+u$*fu-)J&xDS>CY1;~=tDw@h{mPGVY^1#g)p4ugmUc_IQvM#W
z7W@Sy-#Q@U=_-)6$u&#6-YNG^sEoh!;0loX{sf!AW*~W`AH*#x{!2j0lXS;{u74Zc
z(s$AyqS7Zfm0RkO>y$ndHv{R<T|n9_{U9pWCMxYe1?0NKy%*><{tfqGAoutHI0)n(
zNxbxt#7Q|f00W7WuromVM6OfvN%}M(`7Z#S?g-q{)?-SgEOE=V3MYWRHff`jCw(L3
z-%%><J_jxX=_@Hi>XUi|T)Xi*)qN@Hq#Q}7(_e>MuHlhV=}X)@o4Z!}M2#^$rcl3w
ztx6+(E#pdlf5JoX50J4ZZYfv$B@AWRJO$6uwjk&;({1w({wyFH@CN}ZO+M%g_#Hrg
z5DYq^J#v3uf>-b(pH^EBxL+g84*CYX1s}k7gh6aG0auU%c!7ez8~6ZUfO>610D)*5
zGylKP_W*6Qfm_-rerczaE$MAOf=}S5m3~HCF8IGdA-3s(7pMr&SEwWO8&v8m3pd(m
z^8=s^+q_DV(bgUyugx!L0f78Q`w(P2ucU9#R=H-|>_F0c0SQkBoPZlB2c!)dKxvQ>
zNO%z-{et^pyo=Fh+dQBcki6($+oB*dkZ|$qKFkSsC<p^tf!sf5-~x(+^2%KXDtV+n
zcYy0N+9>Undn)%s%EJAVJ~QS5-cYGe(qbGK_umsL_sk1OU;KXm{17JNrw}Ntkb5d&
zqPSMOolv<R=?}R^X{WTW1PBDu=Bifil5mFu>7P;{0!W+XzDYk;29jU$;XWGWin{`+
z1SGw<r3|@u=r5Ts7;#dM^c?|-m$Z=}N+D&-y{!TyZ49UhVnHoX4a9-!pb4m<RMJSe
zq!AV3K?10)+@jhq?m8gRO2u6l)C2WFSLJV@!bBxpRA>kqgHFob2r5XJAng;9Ku6FN
zGy}829OZ5Y9SD+@4uUoZy+8{f{)wO^7z@UMR^T>h4X%MU=-=be6d>VuKwB^k;m4p-
zM|;o#^Z-4<a3E#P0#b&KM?3BIz%SR-8FW|L1=<ZrpNLAhpv#l?_6B`GU(gTq2Lr$e
zFdh7F@pAogorA#;Fcb^}qrvZC(rzhlBp3zc8m535z&c#=O57MA*C+j=`%l89PbUG%
zHxuZ77;pORPM{x7!aV`1-8#=?gkJ?x?^M;tQ0Z^uI_2K#a;4t;R%J*%`u@*_dmfmN
z`0L7j19}rYGQ(v)EpuhNgWv+(;$H|hg3VwN*reQxm7anwv2rhkdl~Y3nQFTnx&o{O
zm#o~Y;9i4t_R!VfoN^<~b}d+Eh8gZ{h)b`+)<ZXdKY+AL+Vi_#+PVe&K)zDS{io@-
z-HNd6aHCAy?O+Gk1!U}M|4z7ZA8bV>Znr|l>R&*v<BMvq_$BXN@Hg0JrHnKl@TY<^
z0DfaVw;y^ykp@M78ytcjRvdvIRqclwbsUFEyR^!f5#%16R!BW(kx%*`YA@xWZ*9*j
za9`~_;l2pX50z`V46Xp&d)vF<IpWYhJ87GQUj;I*1&kfplQ71IjGucz#*pL%l1Ijm
zem2N6!A|P91ElN+;30Shq#nsDWnkPIAl?{%60Xx>d>Ui+AB0K1$4Z|-QHJePrMgUM
zi@FBe7sxAp{Stfzui&?VzJ_LmzJX?fzJ)qM-+_P8Z$V(H>9&0j{QxBYS2LfTD>SDW
zZkGgiGtd%w)*$SoN{4#v3d8*gNM0ay+GPRZ@M8=~dA8rccZ8u~Mm*YMSI`W%YX|or
zl$+U1WBXI3%K*)eupdykC0yETzYy+UW|&;3oh`^?sy))%=Z0JAu?G$y9l&)+eLBqk
zF4Emsap~dS3l76CbxQlCeFFN+pfci7k3IbM1rX*0+~K|op1_T^+m!)w-C2S39n^jf
zSb_O&5fBJUf)<F&2AmOxzLWbPZaWu%KC_EP7|OTL3#83bmK(se8R`!m1;ztB|Lo?2
z#b60QUG@mKpA2TAt{ezMJB_^JmVU?w;I{9f{2tI;K>AJMl;3EZ^f#U-_PaouN`t<(
z$M4Gy?UUb^kw?@ENZ*KC;sqTh*WnHPfDe#s_5~q8!U}-^<rjA`Py(QT?9gw<?@_Kh
z6m?I4N?P=(T~Sa9NIBv!4kADYq?109cqt3mp9dEJe&hBx!EJ!wo&96P>wKtF(inB(
zUK!<zN_&OUK<W!tZmCzo1l)gn2O#}e7Ap6&9FX>uhd%?f0#wFFMW~EfT#sEP5Cy`3
zj58S<1jTrevF0FS0?3#!%8G;=<4nr29|RU59AS2`0Od$MQnoRE@*@moTE>*zhbo{d
z%9Z|t8o%w9&}kqB)B#dfB4`DwfdnAo)j>^A14vjK<*yH|1)766&<MnXdO*_E29m!a
zNK$Sozb@J#=_MV?kn0k)YXJHv9SoIxQf6bMmunSw3((pOGs<iNcQTOjo0@J1X>T)i
ztr92glRT1Fu34^6{Bj*BNGI`aK@Ttpv;*xGy`dOacAb&V2ig&I0YgA1AnAt!#M=!7
z64wuO1$~vf8&uMD2fcu#m%0XkVW20FaO5+_hq$F)$uIRtpXjps!wnqd{z`tSQ_7b9
zlC)B`lsO!X2Q$G4Fcx{dp(DX;xN}2CL7kza!AiI>_Uy)h#c=0`B9Gk!Fb*t4m^)O;
zl(1P~E|?FNBQ75l;SM>W6Tu3&y`Yo86fg%&2B=5;M)_0WUIb8|F%FT(ZaU&}fdX*P
z01_tGAZesdX}c#>%3BMh4Qs$Wgv&KadlrCIstjp&Gx((*gjt@M$m`$%q^>*w^%?1w
zAk9*+4F2p&k;b@2SE#f{`U?H#P|$Q6<*tT52asz<dmQpYrG4wbX0TrA2I!yQ53mtz
zvQkO69qa*rfxm%-Z3A0?<dgVa%Dol31Bgl*aZ9|YuoFny-3nc<l)V>7`TIaBV6Yz?
zP+WqhfrE-e(8J&YxCM@YW8kP#aUTZ~Cwc;$RGflJzSBz2C_St6oYM13CGABZWr<2#
zE`uxJx|Lpq`<e>VD)BeKO(6MigMYvs@Ce*h{(H)QANm+P01s7|_{IGKd{kjlhVT?f
z{J-Fd@=Mq=@Em+l`W`Cz-+-6k6_Bz-UjvE%0wn&e@_&cE1K)s@FKGlRQ}TWSpA|o#
zKY^6-RU!GL42k;%BpzyS1Mr`rw$ReRPK8Oh14svQ1B_pJ{}}%C&<r56mF9svFUEIk
zXhz@xB%de91RNDkP*)&|^!D3;l#>NyRb+!oUCzJ-xB*qZ5som2HVBh)B)`PF1Eg_i
zZ@T3<Xw3g|K&2g`IYEA;NrX!Ka)Gi+bz4QHE~GQsmk(MH_#j^r6n=*WP>GjoQ~l!5
z1#U^_t&sFx;r0c=zz_5Sg}@L{7|aI#AOOr%S_C=)6a_uNdX$q4Ee47MiIcSAE&&3;
zBBdpv<A7)oD5a44djqL=4hR9Aft0@r$bByZq%VYWpggDmqJZ>eTTl@Yhz6m`T?JYR
zR0S<T7>H4BNgE5oK{X&{M1bm`35WzWfaKHlG=#emkUFGnDObvqa-<B&D|u>yT8b2C
z9FY4e_oyj|2X%qOC4hQB?ptk8A4r{bKzkthB(0>8I7uh@bXyw3El8WCd_nR{J<XI`
zRPK|6cL2>nN6-RDSZg5R;%=qTVWMq-j2Ed_RQgKtw*%5f2@{nvB)pr_PEd)H`zd2k
z+=9e+2WxDMd5m$dr@}8|rzaQ$76HT?<8m-`2#|3yAM^n;Kwr=gj8}ef_Xh*OFd$`!
zdmxwtR--<-K9uhu*Q(2vvZnwka}yW}q&*9PlrtPi9ik%u(mF_Aj09uBC~zH&24lcF
zAmxZ#;^zVh8wchADQhy2`lO8$z(mC)sKg1<CTYhiuofHu(}1*VI=Bs_4C%L}N@t;M
za(|%m`!s&Xa_v&D^qthV0!ZIVyO7U8?wgKZ0Jk7@NZQ3f(uyuoZc(XknPNF~8<28W
zf<Kf>yZ;2}cjI2m*pRa1TBJXueY?R1rIK$WkiHW(0|r~bK_F#q1v|kWuowId(ty;z
z9qa%S_m^@@zN6qYkbJ3N7mzek_dey8^t!ymaGwNHw!}*w@k?9w1Gx`Jl>3nKpMXj`
zq&|t0IwkHnkn(icF}MW@mpp=$eF|I#(#|vBEVv1tDEB$#KM$337r;eu30wh>lw0Di
zsj#ci>p;qpFme9_Zh*VsK9IOuirdgT3W>j`kaQ1$)bT*MwO_)d%*R05`V`~I4*Cr0
z2z?IC0DS??3VjJ)f%o7m_>S;&Dy@XS20y_Y@D_XlDxLigq|2tl-zoRMN~JvHb8rAs
zUS^O5NSz;)euN_4{uB5GcP8j(6{hNO$cV6S0PQkBokn{ky$z6SQ+W-)J;H3waEWse
zKd#$Q-Inz53+M-_)4>U#9>XnZ#V=U3S+3KVue(59K`!8?G%wU0<N(=00p-pK^#J*R
zq|XiVDD_ls$t&^sfrLq3Q7`2#2=xKpN`0XvKt<pON`gWl2vh=vK`<x-{6P^A0E#Pj
zQD`I(4F$!3q^}PoO&E~!rMy5O<qP633nY)Y%Yjlr@(Lk9>XiJ^%3lF0?JNx%1BsV1
zByV{j?bT_dT=7eO0`Z_82nP|0C}?dU?H6|ps0^gODxeyO19C0muL%->lv5Q*yyS^h
zh`+j`DYOQVw34qDXabUelqv101Ef4@gSZnx15g)8I>{?_OISnD$V%n<r4C)DxSN4w
z&>1ubEkGyG60`!XK^u?)B)lzX2PA!a&;fJ=5+`9@Kv$aWl<&`CS?hAajEB}_Z_<==
zyeN66SKhy=VC7H48|)2cr?7rC`|Yf236F16_u0{A^|-)d`wj~nCvsRJch_`#?KsdS
zZ^!)36WFbvXSMwIBe_7Q$jWD4KBaiS-Ul21mB!(<S~kqPA!F+8PKQUA?UbG?uB*^D
z;e1Hy<9iRbM=y$H=Vsw{zb5UcB8N8)tlQx|yY)Z6<L%rKY7*j<t<YbA?E9k7sxqZd
zbM=%xQUUv>Ri1F(@heAWADs2V)rJ(?JY;&)wC(KXQEkbSh#sj=?iA@!(D`KQ+ki)V
zN8c!tdhhGdE`K~pN0lNRI`oRY%URRqOs+h$bn2a}J@4Nr>cr)3e-*F#WHsgeb9vRy
z4)arAe0`U7<@S=PADbMn=32v%a$KJH?efYQ7Jr2j(?ib3JWT!f+}3xF-9K@@4P7FW
zz9zGG=WR`M{yiZ~g;U;XE+_04y-oeGwAZZ-ukLYn|7~t_T9hOICti6keObUtY2NVk
zKDSEb;Yu40d@i$X8CM!`x97(n1-VkILXQg6YirRt^GjD=dVD{rbfnUeN@vj>-8?I_
zY?zOCB*gT&_~ShXg^h99a;*p#DLN`$O8!%{qfNS{$9XurpPw}CQp57>o*JB`#^J{7
zzI$ti!cU8_`>IjN8@uc$6;CRjR6OZ;mjPizJ!{3MJ~*}Lb>y4eR4}Z}x7TMgu-y-z
zMNwbVvHfsQ`*W4ArvB?Re_eK$yPU4@hl=$Udy|SM6(6SJ!@k+Q*!Zl)&eVtYR}TBS
z-bsCH^Xob#T(*R3|D$Xl9Iy4hoT`+n7mgYbcp(G(RJJeqVD$|SXs|Oi|D<F}o9fpZ
z|5D$cx>hmzYEe!bgZF@q4Jozw>twG#sr2NGsdr}da~zW^j1p42)OsJBn@cosd*UAV
zhO79#EIi?{1LtV#P;5d#WeRL}Gt-biBT^q9rxVKpnp3C0?->2vbY)bVviSyawZ-R;
z?3!|!<I+zVdf-7AO?n*C{n^$OuJ}E+x%a)%JkD|5+`xTVN%?6@DYsJi$D6-h=&`rm
zU^-NIU1*o(S1j#ovguOmGvV*JTEf_ZwOu^_qx}slEL+ffUVBzb7q(w<4DGUlYtNrQ
z<=LXnmU6mfc5Ytd{5#g|Y<O?io+HjD{$JZSK4GlgZJYBPHSnKTJMEg$oTJ9?nP(-$
z>`4j<V>~I1eqA(cOi90|eYr`R)7{(W$8sBIb38XszPEYol0u}@h1RaKc*e*iR!WU?
z+HczV)R~o1iRvS^-#i?~N~vre@w&58YUsGWbiZs<St&IoqkdLOG0D9`9!Jb$rIfbv
zQ2t%<FSt@^?}^I~xLW$7#mC(r8f-nQuZsh-_`W=wFr5?fZA@vhvpuCv{%~M&V}CAj
z^>#p7L4WGMBvbp{69#ep%CnbMopF%MR-Rq6)xM=nwHKw&_OKtT>*R{lVk(Y(??^3Q
zrvEr2x`*X=U3|&jeaUBjQ8)L^@B07wi>sVC_Ugp9985)Ab8p*Olw-X7V^+2rLT#?P
z_q$hRBe!~$N&1~S1cu3jmfg<RS$;S&n%y_<emHG!AiE9<7?M7AAqVy~uQ$`wvU~5m
zPNn(<G3+Y+;%Z0o&d%0<5ha#?elX+Jmz<;QgOoQTnsb>m6}LX>S)P=i`ef}={@b9b
z9OWNdqezjTT(0ZSQNt@X<M5Ib1{L;k;u0&P{>uIDQf@PIukG?qC1~WDOF_v4^707(
z*m|o+9bnf^5AS!ZYR4HpHY_f2V+u9zuy|juquaPrhRseV{>jG?#mDb&d!T})JzH00
z*oZ6St?KvKrQNRh=OaD2?w;bK&mY;%4QOS*J{=0u(B(~~1G#FZtHbvMM_IJ@%)RXf
zeYrs+#wHr=h&xiFU5@;<cwky)8IW9m*svM%>ei=z<oC4C#h={bN|Tc<-iGV`<e)va
zr<%?^NN)d(di+nZBdnB4hMsV5^<)|=rIOR}Hmf_El~Vt%=6G*4G4$Ht<~zB|x^<@(
z4Sz^`W}`I)&*|&oD&Yfj4!bad<G;t}I-Q{@C#=c8yg|r!uJ>nycY79ka@WRPZ@2v;
zl9QEIHQUS8&2eMVk*c1~=J=*<NANiRcZVKlq<>d&iI>ZcYEX?M3zy#YWlKq}kp6iW
zKf7ZT7gD(ShdGOw!Ybhh<4hw8ciZfBZ~?dMaV^K_B6b}25KoiLAzZdpu@C9ae&t>X
zG4aFJPNy!#E1U{hQ=jXd3C(b{S`rN}&~L-FCzaUuO#24|pO)ul-9|NtegBsI8<cx?
zt3hUt8`7a*N~TL3SI>4*&%XW~X8v9*?a}QbHyH+fIlTSLuaQ5Sa9MR<i=G=<b?x3C
z9B7t918p0BMTP8E>iekEf9>IpuP{9F@8U-5uHy&0jcePP*H_zTL5;OGVeB>ag;U+0
z_1U}LipT4Fw&c9iVsd|XIL7qu+n_1GW>ebq?#A;q??+5u>JDSyokinkFN?7}2MUMY
z>g2!;IP0dv&TpP(rs}iy$$+IyuG1<t8rhufz3e=0zt2fgNjum1UcJiUC;T1%ih9I$
z|Gr9kd9nb7`{ME5W@+k^0>iugv$}2Sr?Nxy<eRpOs*J?XdGJoI)FLEcMA=M~_D3D#
zcdg$;WrMSHu2bz#>Q_Bb1~L~eG;LDmg$ud2`Mx3LC*5yKY4WI{GGX8eDSP~n^j%E{
z!%H+S{2|S9J%>8CGM;x)FDiU0<kn7yv+uadH6A}}#sU6;`?9?Br?eG)=gyrwkMrf-
z5}h=*AbVCH@%i(L13Eo*yXm{}pORlV-^Q6G6EiR40%3nfmp#^#yWZ${2qTWt7F3cS
zF6SRCujw|H=Z3z%wDD(`<4t5KuXYh*o<se^Kb^)1WZ%d4htbX<9B=#g8T$<HNcky8
zj>N^C4sB+o)M4CAkAS%)Iox^f#%Z+*l74?FrLGQ(UZiem#PPdwH@jH3IL8cIaAIJ1
z4{DM9sl0eNs#1wRI&2T&sI}oo3v@`PM&Iz79rTqW*5vQo=J+$J-~G+Y(2)z7>e=pU
z{jh@N`PggQwDSH(`f;852Tn)FOyT&y%y`V&<sS`v;QnCmg+^Q}Ypq^QuWtPPIcs^o
zw*JzsO69VzsaMVJL$BQUz{#GI4>=6&%rQ?qkN)f3mn)mUbDEH@h#j6_oNzM3)`T@t
zJSNlCKh`g<%Mm$7j-GyN4mF^4Ef0Jy%Pz})?D}#4HHWl4x$|PqD4MV%v|?$OVD|Sa
zy!6g)%rBLu&Gs;!tNMBF`m=hxOS=h{`%(Y>%_c3zwC2d%rCz_CX-|!8zdi0SK1#p;
zxI(KxFJ82(#$NSoXI*{&h1yp%p1YQLOpP}iGUA2L%?f0kXV=r0>W&DiKD>22`)~W2
z&b`7XicY(<=TDCu9JJJ?(lf95T-2kHN1bsCsNTxCm3O4|qKJm}2RAR8%OMj(M=qE!
zor~9)kzs6dC|wx8(s=LsoBhl-yDwf1&XiZTTPvka>G6O3p3<h<R@vy@pNoydtj52o
zCEeb>HV5age88)|HpGkR7#)kAZOo4><5azeEc0P=UQ7we_pbMMD#uDGV9v01Yo155
zQt~;2+t+73E2VJt{!mIqI>&FA_cK2$)$={wf7CpmyaubXQi`nLwy)r$+pLr-;_ZKK
zXHHg1-c>h#3)tw-uFoG`dzg6yyRGb#w`;A_6w(xLVZWj5lg=x{ttdD4+fyRl>Wr_*
z_jK2Bqpo;b(tB+F{y9rp8A`-dyi&2|T*Yxw9^5%D<kBHIA)qtoJKg$(b8r#%FFR;J
z`g;r5dtJMz@IGCrd8PCdF;ld>*NP;JIgJ?uM_y^tXK0@MdYz!x6V#-G@qFN#HLxt(
z_K{20`FoFJLR<E0>eB7C<0H%bsAnSH%3h5bD`fe{#nik}&wIjEK9!&1QyRCq^B{l=
z9vSw*wRJGp%zx$ZxAQ%y-11Jvw;U|RMMFA&cWLsGD#bLahc_OVKGHd$Ve~@IpJCIp
z(2qAM*2{eVMb4kU=Vqp%9N^aIlIx2O6t`nqU(EEma`fp}*(z>hw?TBYO3|!b@KVkd
z$=!ER+ru(V_Sf$-ii!8GxqE4E_8%tG?s$%PgXKs1<rI6)oF^4pVOOox^)EE;AM?Ga
z=L^B#vS-;?sRNI=5&5(6)N|A|#(3Xa=CO61j2{3OH?NERFaId;t9T5RG0S6JZpDGd
zJd6vpEIN44iAXA=)+b7-CO4})^u;9(eY1XB&A7uHxa;*zyeZjzQG-p3_AR77cdO57
zn*I~leLcBKAeHCv>7J$iitgcvuraM>Y^p^m4(7ZiYFkM5@xz92@cOPjCl#1QUIj9S
z?)5IoN~!6D+vhP7IO<Yj*J=xPa-@1MH@r8tOtr&nICMjomM@+z*X`u&(Wif24o;-l
za<7jppW{VQsefJh(>@>jsdlLOB9WR;D5a?%&2@!u=MDM%Z+7EigBQz#n5Q=Gy7t@S
zjvSEJ^^c){Jg2dTywlb8^WYJe5{>mzt(8%3iB%eMv7|;3l|O8xPIt@IcWz$OGA<j=
z8SdI?_C$`Xnf-W&B^9Z5s=3ap+;fV@<9gF6qT99>G5vdR`C`+Y%Qj9SHSeZ~O2el_
zTutMUzsoFMJIIZu9XKNkKpGp^VSMDwp;Xfe%b9Fnxbo{ShXe97=D;$>yop-Pl5LQZ
zdYn`6>v5Ri+p_E3-Y2=tbP612?4OXDS5w8^Pbx(;e8qW_+7@z3e~^M6{(7Dvwm18<
zx-hr%ni4dAY>5|jFP-H?k1KeAjne%{NvXzrmFje?ob|`CL$ttct1<uaYyV_#js-qE
z&vCu+o>A*GO7zJ7KFO&j2R~o=p!TOTRJl&4{e$+rrLN1({TTiHi|aAV?LwaUmUWdX
zkD8Bg>SG^$lPly}U#;7rTr@vax*<LFZ#-mSO#E<aen+NZKAbo)bkoF7E4h5!m4%LN
znp1;v#(Ii^e*QRi@cI<aWc++;aOT|`Gv{p8o9dp}QWBTS<-^SRR#jeBYC+aiPS;@D
z&F4S&Q?<Bgyijj)Wjnu%Lp*y@(B4Y3LJyASd@eWRPS^Z{LN8-hQ`?1FZb^~{9VK=#
z>ru}=s^6%7%gWv#xq@1+>)*5QtBmrXB^6G(y<x!<?{5h5=AiDCe&vcQMRnrd%J`$2
zosIc3g%-<{^YV^HTujZUiiI|NTJynlS~AYx*oV>c?P5XQ<;BR0$$ah=Y&W6r$JXBZ
zy{VPzPLl1QVD>887YmLSlyocHs}bks^b;aKe92+UL0>kNcRziQ<J-UT32EDjogJRv
zDpfs+D}28EA?mL@)Mmt4x3vL*9JR!pKQ(V`tedIEVr-8z8^Xmm9Ej_du@R>WoAz!;
z`(*N5ztMi{)77k$R!zxd{EpkG=S$rLV_nNhw`97Jj_dip_&LA-JBvTy9)6YO*KpXr
zZ|A#T`pdHZQ~Moy-@~%*(d(ap$-OtVOkbV@Pme6MB|{Ic>$$Uig<YYn>Z9%(%=Nxr
zAL;eE>PKp_7_Yw}3%TZo11sv>9!&MVXK32_ycd`0+u~-`b{D8|jQPAQbL&_>_XmH|
zu+sk1o$lV@E)Q$_<=nrIyQ}yA;Erbhc67P%e)RX<q<<$$Y3igK1<yWeX1VU#o05)Z
z?!LjY-=X()+BKBvP_({I>Sk{~XF`THeYq;RFgJUc@HcvpQVw8xmpGq#f5$GwfRWtJ
z(VXAu_uqtWS@Ph?%VFK*HhEaqld*fB&&%2I2BjSzA_ItH&F7Pz?{FP+9Z%}MQI{su
zZ_WN#gqx+GQtWP)JeK(2>sw@_i~aA6mkls#a|hG?&r9|7P_t(H>l_>ThFU+sbP4-W
z>N&`@NUF8r#1ebP_^q2v%|<<ooshi-_5Ox!nJKgN`j94{#PR~`i^*ZXPE0EwU|C0Z
zQsYOBBO1Cnx6xmEK2$r|oDVkY^QqO3aW|;Ng^pMdUgy}iHq%ai+^5&AG$`4e=c#_9
zWw~1$>wW)m|1PcmB7(h!KlS3aPxL+$hZkSs+F;RD4s3G0P~qkuNv&Hce)*q24|Qxp
zGb%O<A6uq6cY8A+&-M7+Jjz@T>-9PfQ{gI}%9pO|;4!@?O_^u(2W`Fm9?SD+PBh;O
zdOv_FOzP$JYN8A4=RuTBpNPQkpGoh3>EGXinnwN9?jn}y^H*_#npbqawPMZwI>)I?
z>x4SZ*0m!2JW#)XYI+;Xz*XBV>yT|Tyre;-#sy6rzH=L%oHVhGv2LV})m#?b?fJ=~
zdVQ`G>kMQ6#j+36Wd4#bncDB+lIFUd64n~?RH}T$)TFtXc4I$Bt&gZh*`wR=rq$1T
z8uPs|w(nd??-#0mrJ2)eV<DK6yL2;uzk2-Zb?tzyNAM!s#VNyNnHWrJo@JSDB}eqA
zJ?Y;XRHrsA8$IX|E2TuYdEref-=kVNu5|bK9!G6mjQ29foAYzaeOCK0q<@dCk_O1e
zBh71pRZX8u+`~WCyBxpJ`-&V!(ycbsYuE{Yyiqvfq)fLH^?oh2Dlgm3t=Y#hV&l1T
zku*KdoS&$93h8mz%Ex>!Hhh9@lZ!L4Uxw95?avhR4%@?{=5U4U&+|qWT}e%*8~eDF
z*yNwQc=J=MP-DN4^txs76j??WA^YezXYSRBWTjy*N-lOUdTUdv(y*`V);stX-k7SC
z9PYRdP8#u+m4<!tZdCI|iPot~sZ!*zD(P#Uv9zOoRrG$x!`!~8xeo18^N)zMW3M^u
zuJe!F50v0+D<j_?sh^8IE|nM@Gob>N%03PoH4VAAd5%G)O5rY4&1v!1xONi@?-TmH
z#>I{=^lX-M4h7%a(HSQztbV@~w-Md{{1xZeI$bte*tf%92X@z(!OpibI<=`-%kmyB
z9vi)|d&Da)_-24t=4^IUG55Gb`@_z0$(BVcE_%UKetD6^*Pgezbo-}xA+)CkOU?B`
z(IQcgGAC{2;BCSC*KBBPDX&_L%|Dw?-q-sERQqtjzh11{ZJ8(5c{IokCF=cOs#x2Y
z?@_+Ni=!@9Zo^8c*o6wty`#r)_QI9jN{s%4GxTlg*)pas=iWbfMAX7mDw%oIAH`0%
zbJj5@VqZ*b%$|q348zV9Rs1?G-?!=G*yCjTsx^A8*XtOvjW^~stdxobV%uWDM9!XU
z*SDVizwBkso5S8e+x=(grJAYtLtodtaxo?K@$3=VmtQVLRm}GxE2W@`Z}+`l@PU<*
zS8j9sDW$<z%1>LKt~M8Z9Dlm-%ceY7jsMcUjpq>ixs@q!sp4(+yU}sfi<~d0en8T#
zbp9b+uzJHszn1U$Up2h{DA}lCw{3~5&xa3X-w}0-XZkssbF}I|;>LpRr1D!d>NR$;
z$XU-<sC!6F*`U<xCT@8taXxk`snIX_xmTqGa~;D<Da!3?gQLfSsN)COj_qPe-{X?8
ze{88o*PArd+y}8KwDsy;n^F9mrv7TUI{R=>hu@VO_As6wY;*g1YuDjZNag!o!&Ls?
zm5Nq<`KVgei<Ic%)WUIUZT5;uUV3A6fA(_j?tXk)Grd1*(LC3mhn~yxm{dAa=}7OR
zYOObH<d%_D*Kz3|SPmS#!Fg-`%zSsZn?+qexDR$qH%qT~*)z9n_eM~&T$n9%?L{iR
zMSU)wKA7*^GIm;+{9#{{MMMQM<}F_84F~?8O2PgM%5IL`!KG$zIQV+ZZW=Q=Vshic
zAGx-9Zp->Tuh(@07nt(}J<lwA7PH;b52${)xsFPlKf$K<{mY!aept=pDV;cb*7>Pf
zj<ltb)6dBMo8G@>eP0uOjq?jq_2}oOo+nXg-aJ9$JC0^AxBcVe0{-Gcwaont<tH`1
zDaVY)A>+PR<;*d+)4|M*f(8x6!s<93++^+}mHC{!Dof=Sx?Ij&c+c?nFIKbDk$D?C
zOzuey>@MO#^pe!?pM1I-zZ*)sw9nW-RnK#-lQ}*zwOdzCxPZs+>3MYc)iYz>RkgeM
z{EE-{YFVarE4b=ibDyu#OYGA3t;7YdEqna&eJVSqAHVp<`LC%DH+ONYlj9ou7sd8?
zrDs(3;Jrh`5;s%#H|G4dLyCFsrPlxdYk03E=5t=}6SO}fucdzUzlBFG%_<Wj%l=I0
zZFAkN_G@U~g?GmJL!_GD74mdF(62>zy&feMPAZ&+ecEKKZ-=P)fu7fPt830@+o|=t
z4xdo#W07``FX{a{%RYVcH|BeN$bQ+5?#csF<k)f`dqvFs>aw;Msr`KF?kU@ueYu>x
z<uSjF7-8;PC8_gxrKI9X#Z%7<Snc}Uvgok0*f#FFnkoI1dguBS<JNnyJr#7EL)3g(
zg;Sm97&guCu~G^#_Z?U%4J{zYFnu}tB6hdyg|Wv&&jQ7I2XWy_+2jCE@2`-Gr_$-7
z9amKkWWTyHU5I5@v;Cy}6uk|rWak=8EAW)e@rJa{Z8craoh|*Q_f=XXo9A45zkkL?
z(_Un{S}%25Y3~12X;SmSw0o~vox_px)AUKOGH~r|DR<BS91D**O|5I<^eJ;Jm)czm
z3y(6iq*PX!$LRGiE%jf3ZIVJ9l4S17XzjRO%obcN`*3=`+V_&V4-o#{`2CR9hAmg;
z`)A%o{hqeW57oIX&Ddy;gP<*$9nXf&=W_AJhASyQ#a!9;F-Nx`%RZ0Z?-_LF3=aNA
za8>ghn6xHN`#S8&z@l7Ut%HL<-mRW)nlsf4c>m^V)LD+}X1wpI?_KkJTmO!=>gRx-
z-~H}ixpk8Kz9{~Jv2V#`*PG|Ze(6R<C4{B0gZIb5HU9p>w(o|#Em~+5yVoB-yHC9U
zE_din=eAL8NTsK=aZWf+OJL{Zs(4Vnr1<LLTMO^FN?|Hr;i)e!u5-T2z7Nl}2p*P-
zeU780TjjXHR2=_jxk?@8z*;8XV{Vz**e9g4r=N`d^xzFxEp~33`XF{#=PfB|)MTHr
z&&dTdzX=@deTu!#d#8S0^Pb)PHqQ!4cA~*&VJ<N+mC8SCTcXpjne5uq=`T;;rtGry
zFbq(-at5E|FK_aUVy7`>x{gZBrR|ic(P7z6>cMLLO=+8w<T*;E<9%RY0sA2HT!iyx
ze(4mreI5DtN-kW^p*VXj82Bh-(GQfiXO%Hek2!?WJ4I2BFXnSj>3{h1+<DNkU_>k{
z)$c+5{-mg*M<y+9H-iJ8o)H77)L%ody}vNPjnd|s=a%|@`t>=Jdbmy&R!YA=SG1jp
zQqt>Cz1~#z4tm~0!S0i<{V4sJl~UrR@_T1=X~mIhU!u&n98Jp2`o>C0g;R&Y#(5Oe
z+yeF^4-ev?SxEMEIM;A7=jHkwOz($RZY#&<UX&EF_3VHVJDAq3yFa^JNA}D7?RzUa
z$raRjK#3R4mu^3Dnv$Af8C-c8t9?{`?$2c$7as9VyF)31Yh@Wx!<OT3bliMv*bnMf
zwJmPIagItYb^Y_5eq3@sW<PzuS)PMwmoQx8Ur@LH=6+3Km$0*E+f3ltQRaDC@Id^^
zi-vNf`CQ^C)i0sfjOPz^nX=umzft=tTs;iC&v`F%h+CmHHFA`q2}6zNHy17;&8on4
z)`l+%w2kAA>O4Zde`v*q$FCRuS&Wy>#W7jrEDAk#FJs!jcR5e~rakO$-C*BKBi<kW
z=Q`C_`;<zljsH1JSW9!kG@HK5hS%VP5r1QYxjxnnZn#0S*>4)mg)Xh){7d$h4RHNJ
zBZvCPdY}6_8ukV3vvD^p$o}9UWk;djcU)uUM>!y%aX*q|AD$|V-i`_VLTy*Z&ToC)
zwx_lum5=I0951#b&t?v6zhK9R@y|KlJcp!}6^-XQ4T*%+M!EWy@ztfux9r=yKj*@-
zJL9;DI=wN^3)Om@v!rAQ8Pp~#7b^9K$N8jN>|B3m(yBotsQuJrH>$bPVs}y5v}56t
z?=$0v(w_9bng0F8zc$aW^!rbZKWe@c+Y}#vuxpw0PJjNjj&lsaV|w094qs;O<7#`k
za+kmND>ux}vaj6J!Q3y@?W~>?r^^+OaiZEM*82%v8ugcrANFo{XM6fv?YQ8Gzvm2S
zQJAw&DO2lntE>NGf1vgQ+m0~Lb7F6tl?j+Wr==l-jQ1(4_YOBsG0%(Ys`qxZI^WRe
z5&myXJ4R}`tN+b6fm~6I$0Eb9eK}|q2lfBj@%51voXb2%Wk2&Ai<BJ;jV{#iRSVB?
z`uAZO&r#;Qg(CLhxT4Kn4m9T<H1dLZPOHaLh>uL;`dG%3+Rvj2k#elxfExCLnIY^0
z>0!d-VoSX_uifp)mow+FU;Xu&$E+C4J}@$oefiSne(?W{ulogNVP{jfi;Db!naqxj
ztdzQBGxyc>b3xzlx+5_=s<#IA5fe_HJb6mD3-)_C<-I})u75toNjJ4Qa6eb8A{VJ{
zxN+V`YQ1b(Hy3c8o7x~cjg?aPx=#<zopffU6qn+?b7nDbE;+pMfU9%1a`6x5`F2@#
zE*d^D?|z$qHfuX3E>;MS_38go+^>e$+O@Og^5(uPyZhukUcl=<_55m%hk9!Mqvyvp
znsi+}c)}VEjF>BiOI$$hJ8FA}d`EX}EP2I^nqR~=W3JO2SK(@jD@k7LIez=yPv5_D
zwAmkeok!(-cQ>E#w<`xuYMX9g&f(PHg0Vl%uB}T4B=u>;h0S%6<@r@qoxc^?RT~=+
zRoKfjDg5FNjH{|{wk=0wrG$R69L~xG-e-QAchUw5&RnKhYLmX4{T|z9?%a>sx`#bF
zmM^>Br)E7bPyW|@K6*W05x0T&QP`D&1MSA;XQ!MAE>S1$Q{%?wev7-yw_ykV9?VK9
z$;sGXW#^ZDV>|86L;jT~cP+5K8nqm{|L>J93psc7w%b>Q&9l7Ee}AtPYuh$=&r_LM
zDXIMi%f3UgoxATg`&pHhQjnUrD<yS*sIU3^V5L-MU&S8Z>MY@arPH^r8lRheu0JXr
zm*0;jWZH#kNmYyeMS(lR`^S2hx9p2Y|2VO?$c$@TY-#bsd!N*%#C$joJg|Z@BwQUl
zrdMOu?<3FlKenCp&A>rrWI0!qoy_+n1*V#ISw+^)d+PJ9F!kF5v&^^a*gLKd4%qUt
zYnsjcz;2T{pSrG`1D?H$-#>%e6*cz>L%+dnV$Lmne$Dy!-LA83Vg;OUkIa<1poL|<
zkUJ@$;<FC9DX9;hq6L0&WDj#b7;cMhUvQTlRlGxu&qX{_`m<7s+jdV3M_KQWseI&P
zm#_AzTb<ZFvh)7>-=f%!k2iC&n@%1zd>Z-Z8OBPf&}7r@!DX?1wXAunTlT|B>@w^>
z*n8^HXQ|(BQ2y}CXGUgu%Sy@Dd>^w?s&)0&<-66>vr;Pfw%qFd{l-#hSD5$J`^Ih!
z9kbQwcZ+j<X|O!gh==Ukzt`5khM%FrOCJBKU%n2h_LE1U>F!Ytm$Oppxy-OfWmofj
z-?G17=DC>v`Lg@>VL1+G_h#pvzG=4~4<nzJPrljg-N$aHYEItlbdsvhI{vF;sZ5so
z)p?<w_fw^lVty3Ho=uz9j0)Vs`D^$1HLA%#3X3)EuYzk+P3-7XmVd`psCV!IE5+|x
z9tRmHI?KnkVg61WH66uNn#CDb&l<3CbyW(k)jPwY2LE!wsqcbYHE&4K-FBqkxz3MD
zH8S_n3n}|dRWH?CZ`Z8olwqXrU#X^l;lTK^-lrsGCuOmhsxa*^9xxv4Q+)7;;VUb0
z$^N4{MXu{dttT0FsO+xZFL@iMtNFgcb+*~%xbVr=t(06?%s>lqqI%D}e21CQ_{miG
z)rS{Dj#OaZ`e9T5iGG{<efw_DF1IIBP<uH>i(sGBQloQZs>fbAT}oUi+MF6~Htk~d
zJh7zLsiQq>mEm0HclJK}_5&5LD|71P>MWdN*vQuDUe)6a_7~gxrH&?TpQ@B9PmCLF
z>kPZx1p_aJhty&3w3xg3?zg3F=`wcP>~%<=%W&hp#{8Tr+#ZSrj|=DAJ^s_1HtE^L
zw5#BfvyJZ?C~`WEC(B(X-=-7R<eHq1^CXOQnUHHx>UTNC$&yBToD_ZrYn#%36tvm2
z?|09z`Qsm5us`QsBdTs!Kh79_9uwe$r0rhybvAGcf9HCoJL&P2x&Mc(!M*&c<V)-h
zw|Yvtor&*pe0AlBWj?L;-H7sGT7CNomp=n5v*0V7eTzBo3tv?3(!=G$E#DI;`!8xX
z5$2~62RZ1Zm|F!=;sfIx!SWs|z4%6ToD5UlX37pkJvY_)J5|0X$IxXsP}x_8KQs3^
ziicmG(6&Tpy`N5-KJPH*lP+m4azeoC9M{26*4tt~t?hDyCXO9ne8CCJzFpW)`3%I9
zeM7_yIEC7HnCI7#a_o8ijKyB6$2xN#TF-~IovgEe({<y9tz@OtW~6B^sqNjnEjR5?
zwH>S8KaT4r%jPore3K$-nD(|MGZnsj?Aiq`H5ISTEOSZigIo5)>&`XTQ)>N0>sqIX
z;W*Va%o_@GsG6tu4DXux&za9y&v%CR$MMF?udLS>*}pa&as1K%R!Z&Ju)napx8l~V
z+qLp^BTGNUR}-`P1g@6eJTGeFVeWUt&KU*+)^;?!skz^!?bMsJO)<v5nkQP$+k+aI
z>#0(vJr_5O!)}G!TB<&JM6Y)%S5VIwbHA;JIu8qSSn#3KhEdcjr);kj<x1vxF14B<
zW_ESBMFmX%SFELp6L7gt?^}LPy8WHhxllqK<M&QYK8pEfJ!<ek%t)G8?9!_5O$clA
z{wB?xxjk&a%c<JlhuVI^>c95nurHB2>_)z*l&UoB=fM{(+GlE!sx<6-X!f@6r!-4d
zN<GE|!!WHJ>+3CR?yHjWQ~W9O9Mffa_`Pa(&a>B&^qZpBW#p{O_g-i?a*TeDacOg2
zPu^P=^groeF7@Z9^1n9Md_rmWdKz|%ZXb@E8auuOw^aS*UlpsPbg;KwfK&AUv4^$T
zv+DhUh>UWa+0GK)W)N-{?6r0K5?4K4h9@-XbJjB5&2@tBVq+dgS}PPZ&yPLtG=3M8
z<Uz$Z4GPV>-&XH`Q%vg<wc|1sv-pGW8s~c!zhAB%|1^4*nUzx7+B1fIaa8`tFp!F%
z+IxcWOYFo{AxmJpA~`8Kqyr3@tHU01jWO=iX1E#lMupnP;RbKz!fL<TcRD`LxxSK>
zlJAWrEB$89Vx`nzGOQs&Gjch1^Sxi@U!&iNros%p$w}&6Sf(w#*fk)&@zY0`f3KC`
zmQ*YQzH&(K%u4@%_qUiU<{&<7muo<13N>Pno29mw3yP<R7yO6)wY3zz+_YaVefZb@
z>~~tFe*C#Ez;WhC4yot9`$S3%sqp{tkN1V4Vp8f|_v3|ZJRP~Tqw)O!m0r3?3>vw#
zmvNv#?RFUBfwY}@Am0A?fPkW7FgqQ5lCu1A%yWO+ELKYHl}hX@wc#==C7(-)hl+Q*
z&Pu5q;pq_(%}Ocvx^??A#TI3yl$JKd7@wgoH@h_s#d)flAL;$|xH)eccA0dWD=GWL
zyeC6Wd-%1X`Z2p?VZjl$0atV1UQ2N?SY_4R!_GyVQsW*}p(OuUY=BnQ`$pJ*j5PM)
zT|JL>c>dQ)D&Q-hZ}g%%M{o-3_Jd29=T8<pkMO(lIxow?rDXY5iyAdGzdNk0>`x=y
z%<oJJs`XI$_ps_3T$7ZaiuPMSuJNO7?2|IEX#cTU*x9E^_}5;wN#}3S39IQ$_7?k3
zy{{4;B4)~&^?3l*oNn4@S2$<bGgE_-#&{%U*G1uCZIu{A9`Dm_T~ufhE2RqF{og+M
zC!@s<*GtW#3OM@qD0U{AU0#b>P&v}7&lhw%@|LSsHuH|7Y&Y+C>0Gmik|qEC3^PCW
zdd3+mWgC1Z?K5XC61DWrkU&y?N*jxA#&=-)`QmzHc#b8BJy|Jvb($t0I8fC03Ddo<
z&ESH`_s-{>+?G8m&z?GTp%3Sn<y3U|i?LkvyYbx%sd!5KX|AJH|8arRK^69P+Ct5X
z$zwW&^G2nfe;Pf69XvXpdKLVgo%%Nq58e5e!`{g<J|~r}zi7_Q8)Z3Lq}QxI1D<lu
zzzUV-bsomWs@C)WbI4n&Fl;$Cl$x?qij9wKKY6|zXKoO=@Zr!8?ET@$>zw_%Q0%|l
zDcA07>=19-zv=aQMAKar-<_F7-Mbj}73|*+=1JA(Q1~8O-?=XbP&r%kyA_>2F#@}Z
zHg&kX+Sk-}XgLoyUHGBuOZKxj?yKdwGb3!49AH@9|3N2>eNQf6+T~N*BIfsO8G7ET
z-s_x)rC*gDFy*S;J}S?yG<I_TynW1rw%A{g%fokI-HiF8<@*EWr=W^BZOy-&eVUm2
zDf)aqZTu5?-dg%m-)G&wzxQj?PxIu7Om$pfj^%%ZL)3m{!z<=|$gT1Zhh^X2u&bl_
zolVds`HZQz<@-M06{h{Hhh5K<<fr}=@z(h6h+>iqdn;1oh0+$sS-<C~o42`N6}_i;
zlY5O<lh)*HVy4}X)x6|CzQ=Non3reqoSm$c0@kf7e{P~5E2XxvbuXP+ciQsZVC30q
zTQ-%?uJ-}CKuYHB>yK5j*l+$mZyQ_Gw9oM~_anGmV3TYsCwJBBermo{%vgPO`K;}%
zi#X%-Ed3<?-6HOv`5j0V^Lr@{Ht!Q9_c82`IM%dx<;b)cIUpeA$Gl3+h%&LZH`LmE
zj%jaG+UzgAPGOH>!-ii-+DXl}9A1r$Xv_I|xfNpYLi#&l%X?gn2g~pOf;zX;<3y`}
z2{Hff&Q7IrPx>^tFuPWM-Tce(#`wPQ)yy7Uo>OG@6>YBsmtgN{(GBahY|q}qVjk97
z5>MgxGW19py_O5tFu(Wp&cE<Wzjga5WNKEs0uf!<XWslX7gHVCZ}cGfK#<B_H0{cA
zSDYMFX51DIsyy_MXJdO&xoq2GzErKtnbdPJuIC0!lxMN`(x;mmE!)S{?acj+`U8!9
zkJPUY{tY`c@#6Z~w_*ZxlmfEjG41q`^OS43azT2$pWkJ;cz5<lQrAO;HW!r-arJvH
z^})b@vxmG3AblOW-KxE$+DE!QsvTjfUQ+c??&ej-)H$__l~U1Hu-0yYeY}1ZZhc?;
zg*@ANo~ye!hBF17XqnUIG}~d-3j;&wB)K3e=xBbIlsmm&=gaY1*dcG%%jrvY3;WgW
zbHB4gcBDQ{J6kU0O2gE5Q=OCYX3s<!#+3Y{K*){MZ?22)I_&(I`tOee9Z#3rM(%xX
zRGl(y94jSNAE|P~R5@WPA2~V<w_UyXCENV<t^T=$PI?{?_HlpKG4+4-N`2<wKP7KO
z-7uxppre@ARO9S}+I>Hrw?6rL_ii*aFbAh+?^UNmhEw^#SlNis_T!wP&McpKZWT$}
zlgEGT`)F~Ib^3gmUEdw8badK!avxtDA0(u*QmXx>?yN`SO0oJLOrNi@TfmfYeoeDc
zr=>EjETzk#+Ix+CVb0rP$}#7`>n!CKZD86@IE}zlHeezvrBVUdrK@wBb02(@^=<HY
zit9WD6S(W_-Xb7<iz9K=<y6l4*kNXE9}#uPT(5V`JhZb-%1vr+FR%Axu2xRWE%9Bf
zjXD2|D{`#Mr`>lcpdY>x_T0uwEp{g*^0(}qJ$WiCrFMbxIamnk{O#Q3fW?U$PczR|
z>&@Joh!b6kJ2)sId#`a8`wq9H%e}kw{fEK>jqeLNK%IYi%sl??%9nU9W`6HMmG;M7
zfAG&CF7fEL9H3kF#lj=xvtE1F`$THpX<2{P9p4iZxmK3_1(nXS-qGI^w<$kqT-V#}
z^nR`7enqP1i$2#$n>X(sR#aT}4XhkJig4j^=HF@1&z(WdV1_Z^mA1#As_o^on40=M
zNSzXmeKNhi<!owyT-oVUlTQ!vfg;Z5&G(q)yWe)1%ym|TI(Js*rlkCoHnEfO-PP~k
zf&RWfTtQ}KI5+mI_RMMN`#)*md(*D9OZq9Z{+e@?Q`Gl)m1d-7nz`;Xsc;%zY!^O|
z{lF!c{pfk$p@YSKwMH@7jw!_Iy-IEKn(v3o%3e7Ch69(^9!ln5Wj{Kxy|K=<<k#m`
z&D8g|N-3qJvHwSL<xP8=*2)eiG&Fgt`{e7E`{Zn%15g9g9)Oin9rL?>R!XC1o9CrT
zHDD%x`+~)eu48WVyKenHE8XXeht2B46yN*y^hJMMwtP3OzvEB3Y3>JTyEDD6aKDsT
z>Erf``dpfGCsxUs{cr_Jn`6#r0!=#u%X=&8skxs}`z>BKYcA`3Y|Fl8yP;FDu~C4M
zqhaNY??WQgd3K(nBL@d2AEhaY_&odUBa0on*Yc^gs#e@g$}XMS1ep6x_09W9T7N&M
zY}3)SkJtULzDpr}AL=cd+i+8#U;6uIN?SD3*bg8zuhZi`<%_ZZrq4mCPjs=~B_|)E
z<c+cnYR3B9`1kRs>(l#PdR><EQ$9oYBga``^@=?VW2NMqrSI8Qy$Y~WQr}%uqn+mb
z!T)p*|Ak{JvhP8>oozpH&=7NfK74%nlr8hZE$4d;#+dswHKtZ+SaQWacA1+kF#r0$
zIsC!=)?Z3B;h+!3{D;aP%)0N+3Szfw_xFCuU5b5_{Yl;zB^NZTRGw;2F~3Jq--#3T
zyC8;{tjE!ip}%lkIFQx2rTD4~7Y5fqMY{YJez5-OG>1!?_NE-N#phk2v;KN~Q*0k&
z|BiJ2f!WRU`n;#)9`m~!54B#h$$D~Vjc)xom-!t#RqijxV>vmZdsxDEzk1|*C<GrI
zd}6m2D=SQ@Hin|rd1sjCh0cRQ^Ks52KZf<L<jda4p*=3mct_2Cz&xwu6wCdqJN}w{
z5Kpy7%q!Y@N_{F{)%=b!*M`w`{_WkEv^H8`{#_1jM;jPze5XJIpI^a2-w!=b_4p;-
zpUv8s-w$egS8Ye2?R&NTYoTvB9ezbduu^KbQ2v~TtNz}EO5R=gHfQm-)b6DDeVTr*
z750$jcoz1_*5>_<JCjL;({vk8<L^7<KQ7Y&UsCBPp|O1a+sR^YmA#U29!#Z<SN`zF
ztksmT7oUqv+Gd#tmy0&O)8@igjq?G^cTZJ&o8S56T+-RE@R;qSwf^QM6{}@>#%f(f
zT0`4lI=T56JLh$BYtYw&v{r0kJh%1tVC-D*&s6Me>))Ma+|)a`OAK?kU;$&?QdmjD
z{+slCU7wrAmD?^K*phmFuzrp>o8L#XQqteCDgBRqn5N<8bHsVh$MN?+ZQ)W;rX7!u
zX)k8^`!PjV8-KR}-$$3P#3cerrKhs(jdK}RN@+Wcb$;p>_iQh2Wtph$3^`X|<C9}I
zzhdXF4F<+s*<<;;5}BS=zUaBSRhUXo`kYBAsq;qG=Z>S3O*@d{KC8PP>++cgv^LM5
zDaL%iG%@$LxxtFurSCt_%Q<0G(fI2hlwf|pV)>3GyoU3K+{ZSvQgTk2?G-U|HQPnq
z+Mdw(VCoN#C7C{YXP^Z0_s2>}+fCN=`X+xkhSmF$v>mxps<#kdF^APA^_>{S&B5{4
z&@|5T@K(3dgTgsq+qO-nuAglAPTtQB)_VQNrhfaH<MDErCsb&LxxRHt&3Lk2;|vsd
z-`Fo;?+R`UW_(;ok<DTKR^kdfexqVJb`N2<`NrQhBF~CzeSgl5rGrb`iM=jWFMz+c
zH25AD++*k5zfUx2E!FwZhAMfga`y9mVrTR@%DyX&@3l!^PeG^Y(R0H`Qh2;Me{*hO
zoC8y_%jWYy^)sn{rg~0>{Sd_zz;@t?Y4}dWw6oUdH?0Sx+!(*1E0=dP?6oN7s`>qw
z`nwoh@&v5@I_;<UT4?Uw@fLf=_){qhmX5#7`W&I*Q1d%YeI64xULKRV_4i=3?VRyl
zz2!W$ty;$gO)}5_GT#gs)An#jYG17o4j}8%)a8o|d%a$L47*tBvdjFQ+_`VBzV-$3
zvHC8TB32vU7gF2m#@`X5A<HJnMig}$Y}l!AtRtQ#En-NYcU6xKp1m_Xmb`uEeR=-$
z80QM^@O{{VpX^v{>Z^4f{ixW#v2t+BKHKoMWc}q-V!ZKP4g38t?Ur5g;q%lLPpEpZ
z`S&F9seNT_k5p8BkFS){Rot|7XMzhWrTRJXx%Hvv9Ae&A_Vyk*yt(IQs><me{hM`&
z<$Ib6%P^hX@`^(|jB^&w?Q>>g*N#Of&slE!_?sWQ1f^7e(|tbqW;E@H)b}owQd`Vv
z269W&-i^BNHvc|{yMJtvg%$U5Vv_kCueMVympL^0&-S0$C2&Ok+b(mpT`Oz*);hyY
zI|9AWTX0SO^?SFR;zAzSKAU-oUGH=LlqDCa(s}%>625cEb)P1``tXvoJSu%T@&0(q
zn60&^$FXm$lq%aA-@lS-2kCl$-*-{vP~J=}n-3{;jjEo+SNn;LY48Zc{)|d&G3;lk
za7X-klr5XMVE3?ykm5Ty!;`<)Mm9V~NpJ9HD_X%`RDGXcEFOQ(Cv80E*i(FP=W0*b
z#T{l;D?d|2Im6C@iha5lv_0t^7o2xo4yq~F`c^N}9^|6r%%)woKA+5&)bLk8&LSLG
z!T4T8pJ!9x&}Ue<9MtFdY`1;nzTCr_vu)A66EZ)|_aF9tC3XeK-}mC&<*~h9WFcv7
z^q2YfL|o%mJm|Y~00p=k_HdlTe(mNI*RtfAtN+*P$yZq^X}f_c9+i7vOYCadf70j1
zr7Q1U+ctMED&EWZo|HYzeFReNBi#==+_#bW98Z_NWsT9@;waz9p#053>#<VuPLR*j
z3sJo(=68+7hrMWjdSo!WjE$(-z`qUKzPw|z{!=?re$xJYSKl@C%{GEv&V|L!SvN}m
zJp!teTmFnsCGvjOW8jdarqtpYzS50JpfS6=#ej=Lj@841W&$-YV%m2`jK<gY?l>P(
zb{xu1-?E>j_mA~=NNJPJ^@2Y4*Yn7-E&e%pXm(9jN_yW_&%d>npJMFyQI#Ap^LjVc
KQjgk?{C@yzp%rcb

literal 0
HcmV?d00001

diff --git a/tests/data/1138_bus.info b/tests/data/1138_bus.info
new file mode 100644
index 0000000..270767f
--- /dev/null
+++ b/tests/data/1138_bus.info
@@ -0,0 +1 @@
+-matload_block_size 1

From d7ba6834c9070964bb3a2c92ca64d4a0162c219b Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 22:39:13 +0000
Subject: [PATCH 24/41] Add higher order test that fails in Loe

---
 tests/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/Makefile b/tests/Makefile
index 355816e..e814af8 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -148,10 +148,14 @@ run_tests_load_serial:
 	done
 #
 	@echo ""
-	@echo "Test Newton GMRES polynomials matrix-free with added roots in market matrix problem 1138"
+	@echo "Test Newton GMRES polynomials order 60 matrix-free with added roots in market matrix problem 1138"
 	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 60 -pc_pflareinv_matrix_free -ksp_norm_type unpreconditioned -ksp_max_it 6
-	@echo "Test Newton GMRES polynomials with fixed sparsity with added roots in market matrix problem 1138"
-	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 60 -ksp_norm_type unpreconditioned -ksp_max_it 6	
+	@echo "Test Newton GMRES polynomials order 60 with fixed sparsity with added roots in market matrix problem 1138"
+	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 60 -ksp_norm_type unpreconditioned -ksp_max_it 6
+	@echo "Test Newton GMRES polynomials order 120 matrix-free with added roots in market matrix problem 1138"
+	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 120 -pc_pflareinv_matrix_free -ksp_norm_type unpreconditioned -ksp_max_it 5
+	@echo "Test Newton GMRES polynomials order 120 with fixed sparsity with added roots in market matrix problem 1138"
+	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 120 -ksp_norm_type unpreconditioned -ksp_max_it 5		
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~

From 0f15e4cd849e5976bba40959a224f24ff91182ab Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 23:17:00 +0000
Subject: [PATCH 25/41] Add better comments for Newton GMRES polynomial

---
 src/Gmres_Poly_Newton.F90 | 217 +++++++++++++++-----------------------
 1 file changed, 85 insertions(+), 132 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 66bc5c8..c389b08 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -139,7 +139,7 @@ end subroutine modified_leja
 
    subroutine cluster_eigenvalues_stable(real_roots, imag_roots, rel_tol, abs_tol)
 
-      ! Robust clustering of (possibly complex) harmonic Ritz values.
+      ! Clustering of (possibly complex) harmonic Ritz values.
       ! Numerically distinct clusters are moved to the front.
       ! Remaining entries are set to zero.
       ! Skips eigenvalues that are exactly zero (both real and imag parts).
@@ -169,7 +169,7 @@ subroutine cluster_eigenvalues_stable(real_roots, imag_roots, rel_tol, abs_tol)
       n_unique = 0
 
       ! ---------------------------------------------------------
-      ! All-pairs clustering (no sorting to preserve proximity)
+      ! All-pairs clustering
       ! ---------------------------------------------------------
       do i = 1, n
 
@@ -249,7 +249,7 @@ subroutine compute_extra_roots(real_roots, imag_roots, real_roots_output, imag_r
       ! of roots that have large products (to improve polynomial stability)
       ! Only non-zero eigenvalues should be passed in
       ! real_roots_output, imag_roots_output are allocated and filled with the original
-      ! roots plus any extra copies, with perturbed values for the leja sort
+      ! roots plus any extra
 
       ! ~~~~~~
       PetscReal, dimension(:), intent(inout)              :: real_roots, imag_roots
@@ -470,8 +470,9 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          ! What we find is that when use this to compute eigenvalues we find e-vals 
          ! as we might expect up to the rank
          ! but then we have some eigenvalues that are numerically zero
-         ! We keep those and our application of the newton polynomial in 
-         ! petsc_matvec_gmres_newton_mf and petsc_matvec_gmres_newton_mf_residual
+         ! Given the way the outside code is structured, we can't lower the poly_order
+         ! in this routine and return
+         ! Instead we keep the "zero" eigenvalues our application of the newton polynomial
          ! just skips them and hence we don't do any 
          ! extra work in the application phase than we would have done with lower order     
          
@@ -543,16 +544,15 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          call MPI_Abort(MPI_COMM_WORLD, MPI_ERR_OTHER, errorcode)
       end if  
 
-      ! print *, "coefficients r", coefficients(:, 1)   
-      ! print *, "coefficients c", coefficients(:, 2)
+      ! ~~~~~~~~~~~~~~
+      ! Now we have to check the output eigenvalues
+      ! ~~~~~~~~~~~~~~
 
       ! These are the tolerances that control the clustering
       H_norm = norm2(H_n(1:m,1:m))
       rel_tol = 1.0d0 * sqrt(epsilon(1.0d0))
       abs_tol = epsilon(1.0d0) * max(H_norm, beta)
 
-      !print *, "H_norm", H_norm, "rel_tol", rel_tol, "abs_tol", abs_tol
-
       ! In some cases with numerical rank deficiency, we can still
       ! end up with non-zero (or negative) eigenvalues that
       ! are trivially small - we set them explicitly to zero
@@ -562,16 +562,18 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
             coefficients(i_loc, 1) = 0d0
             coefficients(i_loc, 2) = 0d0
          end if
-      end do      
-
-      ! print *, "after zero coefficients r", coefficients(:, 1)   
-      ! print *, "after zero coefficients c", coefficients(:, 2)       
+      end do         
      
+      ! ~~~~~~~~~~~~~~
       ! Cluster close eigenvalues together to improve stability of the polynomial evaluation
+      ! For example when computing the e'vals of a constant diagonal matrix
+      ! the rank revealing factorisation above doesn't always report a rank of 1 given roundoff
+      ! Instead it returns multiple eigenvalues that are very close to each other, 
+      ! and we want to cluster those together and treat them as one root
+      ! ~~~~~~~~~~~~~~      
+
+      ! This places all exactly zero eigenvalues at the end of coefficients
       call cluster_eigenvalues_stable(coefficients(:, 1), coefficients(:, 2), rel_tol, abs_tol)      
-      
-      ! print *, "after cluster coefficients r", coefficients(:, 1)   
-      ! print *, "after cluster coefficients c", coefficients(:, 2)      
 
       ! ~~~~~~~~~~~~~~
       ! Extract the non-zero eigenvalues for root adding and leja ordering
@@ -676,10 +678,7 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
 
          end if
 
-      end if
-
-      ! print *, "after root adding and leja coefficients r", coefficients(:, 1)   
-      ! print *, "after root adding and leja coefficients c", coefficients(:, 2)       
+      end if     
 
       ! Cleanup
       do i_loc = 1, subspace_size+1
@@ -1109,25 +1108,32 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       ! In the mononomial case we just compute the matrix powers up to poly_sparsity_order
       ! and add them times the coefficients to cmat
       ! Here though we have to build the Newton basis polynomials
-      ! The complex conjugate roots are tricky as they build up two powers at a time
-      ! The powers higher than poly_sparsity_order can be done with only
-      ! a single bit of comms and is done below this
+      ! As a rule, the value input here into cmat is correct up to the power
+      ! of poly_sparsity_order, with all other terms being added in the fixed sparsity loops
+      ! below
+      !
+      ! For complex conjugate roots, two terms are computed at a time, but if the sparsity order
+      ! falls in between those two roots, only part of the output cmat is included
+      ! Any remaining terms are output in either mat_sparsity_match or mat_product_save depending 
+      ! on the case
+      ! mat_sparsity_match has either temp or prod in it depending on the case
+      ! mat_product_save only exists in some cases and stores prod from the previous term
+      ! mat_sparsity_match and mat_product_save are always output with the sparsity of sparsity order
+      !
+      ! status_output is an array of length the number of roots
+      ! and has a 1 in each position if that term has been added to the output
+      ! It just helps us keep track of what has gone into cmat and what hasn't
+      ! It breaks up complex conjugate pairs into the first root (i) which has the same power as prod
+      !   tmp = 2 * a * prod
+      !   p = p + 1/(a^2 + b^2) * tmp
+      ! and the second root (i+1) which has the same power as prod * A:
+      !   tmp = -A * prod
+      !   p = p + 1/(a^2 + b^2) * tmp
       ! ~~~~~~~~~~
+
       output_first_complex = .FALSE.
       if (poly_sparsity_order == 1) then
 
-         ! If we've got first order sparsity, we want to build cmat up to first order
-         ! and then we add in higher order powers later
-         ! We can just pass in the first two roots to build the first order gmres polynomial
-         ! mat_sparsity_match gets out the parts of the product up to 1st order
-         ! for the real case this will be the equivalent of prod on line 5 of Alg 3 in Loe 2021
-         ! I - 1/theta_1 A
-         ! whereas cmat will be 1/theta_1 + 1/theta_2 * (I - 1/theta_1 A)
-         ! For the complex case we instead pass out tmp from line 9 scaled by 1/(a^2 + b^2)
-         ! as this is the part of the product with sparsity up to A
-         ! This is because the prod for complex builds up the A^2 term for the next iteration
-         ! given it does two roots at a time
-
          ! If we have a real first coefficient and a second complex
          ! we can't call build_gmres_polynomial_newton_inverse_1st_1st as it is only correct
          ! for valid coefficients up to 1st order (ie both real or both complex)
@@ -1137,6 +1143,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
                   status_output, mat_product_save)    
 
+         ! Valid 1st order polynomial, so this case is easy
          else
          
             ! Duplicate & copy the matrix, but ensure there is a diagonal present
@@ -1149,30 +1156,15 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          end if     
       else
 
-         ! print *,"reals", coefficients(:,1)
-         ! print *,"imags", coefficients(:,2)
-
          ! If we're any higher, then we build cmat up to that order
          ! But we have to be careful because the last root we want to explicitly
-         ! build up to here (ie the power of the matrix given by poly_sparsity_order)
+         ! build up to here (ie the power of the matrix given by sparsity_order)
          ! might be the first root of a complex conjugate pair
-         ! In that case cmat only contains part of the result up to poly_sparsity_order
-         ! Similarly mat_sparsity_match contains the product up to poly_sparsity_order
-         ! The rest gets added in below
-         ! output_first_complex records if poly_sparsity_order hits the first root
-         ! of a complex conjugate pair, as we need to know that below to add in the rest
-         ! of the poly_sparsity_order+1 term from that pair
-         ! before moving on to the rest of the higher order roots
          call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, &
                   coefficients(1:poly_sparsity_order + 1, 1:2), &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
                   status_output, mat_product_save)
-      end if
-
-      ! print *, "status output real", status_output(:, 1)
-      ! print *, "status output complex", status_output(:, 2)
-
-      ! print *, "sum", sum(status_output, 2)    
+      end if 
       
       ! We know we will never have non-zero locations outside of the highest constrained sparsity power 
       call MatSetOption(cmat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE,  ierr)     
@@ -1423,40 +1415,30 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          term = poly_sparsity_order + 1
          skip_add = .FALSE.
          ! If the fixed sparsity root is the second of a complex pair, we start one term earlier
-         ! so that we can compute the correct part of the product, we just make sure not to add
+         ! so that we can compute the correct part of the fixed sparsity product, we just make sure not to add
+         ! anything to cmat as it is already correct up to the fixed sparsity order
          if (coefficients(term,2) /= 0d0 .AND. .NOT. output_first_complex) then
             term = term - 1
             skip_add = .TRUE.
          end if        
 
-         !print *, "starting loop at term ", term, "skip_add ", skip_add
-
          ! This loop skips the last coefficient
          do while (term .le. size(coefficients, 1) - 1)
 
-            !print *, "term ", term, "coeff ", coefficients(term,1), coefficients(term,2), skip_add
-
             ! If real
             if (coefficients(term,2) == 0d0) then
 
+               ! Skips eigenvalues that are numerically zero - see 
+               ! the comment in calculate_gmres_polynomial_roots_newton                
                if (abs(coefficients(term,1)) < 1e-12) then
                   term = term + 1
                   cycle
                end if
 
-               !print *, "REAL CASE assembly", term
-
                ! ~~~~~~~~~~~
                ! Now can add the value to our matrix
-               ! Can skip this if coeff is zero, but still need to compute A^(term-1)
-               ! for the next time through
-               ! Also we skip the first one if we're real as that value has already been added to the 
-               ! matrix by the build_gmres_polynomial_newton_inverse_full (as we had to build the product up
-               ! to that order)
                ! ~~~~~~~~~~~               
                if (ncols /= 0 .AND. status_output(term, 1) /= 1) then
-
-                  !print *, "ADDING IN REAL TERM ", term
                   call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                         1d0/coefficients(term, 1) * vals_previous_power_temp(1:ncols), ADD_VALUES, ierr)   
                end if          
@@ -1464,16 +1446,13 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                ! Initialize with previous product before the A*prod subtraction
                vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)        
                
-               !print *, "DOING REAL PRODCUT for term ", term
-
-               ! Have to finish all the columns before we move onto the next coefficient
+               ! This is the 1/theta_i * A * prod but where A * prod has fixed sparsity
                do j_loc = 1, ncols
 
                   ! If we have no matching columns cycle this row
                   if (.NOT. associated(symbolic_ones(j_loc)%ptr)) cycle
 
                   ! symbolic_vals(j_loc)%ptr has the matching values of A in it
-                  ! This is the (I - A_ff/theta_k) * prod
                   vals_power_temp(symbolic_ones(j_loc)%ptr) = vals_power_temp(symbolic_ones(j_loc)%ptr) - &
                            1d0/coefficients(term, 1) * &
                            symbolic_vals(j_loc)%ptr * vals_previous_power_temp(j_loc)
@@ -1484,23 +1463,23 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! If complex
             else
 
+               ! Skips eigenvalues that are numerically zero - see 
+               ! the comment in calculate_gmres_polynomial_roots_newton                
                if (coefficients(term,1)**2 + coefficients(term,2)**2 < 1e-12) then
                   term = term + 2
                   cycle
                end if
 
-               !print *, "COMPLEX CASE assembly", term
-
                square_sum = 1d0/(coefficients(term,1)**2 + coefficients(term,2)**2)
+
+               ! If our fixed sparsity order falls on the first of a complex conjugate pair
                if (.NOT. skip_add) then
 
                   ! We skip the 2 * a * prod from the first root of a complex pair if that has already
-                  ! been included in the inv_matrix from build_gmres_polynomial_newton_inverse_full
+                  ! been included in the cmat from build_gmres_polynomial_newton_inverse_full
                   if (status_output(term, 2) /= 1) then
-                     !print *, term, "adding in 2a prod"
                      temp(1:ncols) = 2 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
                   else
-                     !print *, term, "skipping adding in 2a prod"
                      temp(1:ncols) = 0d0
                   end if
 
@@ -1521,69 +1500,57 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
                            square_sum * temp(1:ncols), ADD_VALUES, ierr)   
                   end if       
 
-                  ! for (r, c, c)
-                  ! problem here is 2 *a * prod has been added to inv_matrix but we need to have added
-                  ! 2aprod/a^2+b^2 
-                  ! for (c, c, r) mat product is output without the 1/a^2+b^2 but that is fine as we 
-                  ! compensate for that in the product
+                  ! Here we need to go back in and ensure 2 *a * prod is in temp if we skipped it 
+                  ! above. We know it is already in cmat, but it has to be in temp when we 
+                  ! do the next product 
                   if (status_output(term, 2) == 1) then
                      if (output_first_complex) then
-                        !print *, "ADDING IN 2a prod second time for term ", term
                         temp(1:ncols) = temp(1:ncols) + 2d0 * coefficients(term, 1) * vals_previous_power_temp(1:ncols)
                      end if                     
                   end if                  
 
-               ! First time through complex pair
+               ! If our fixed sparsity order falls on the second of a complex conjugate pair
                else
 
-                  !print *, "SKIP ADDING IN COMPLEX TERM ", term
-                  !@@@ for the case where we have (r, c, c, ....) and second order sparsity
-                  ! i think the problem is that we have to skip adding anything to p as inverse_matrix
-                  ! already has the correct values in it, as we computed tmp which will have 2nd order terms
-                  ! in it, but we skipped the product in the full, which is correct as that would compute 3rd order 
-                  ! terms. so the thing that gets output in mat_prod_or_tmp is tmp  
-                  ! 
+                  ! In this case we have already included both 2*a*prod - A * prod into cmat
+                  ! But we still have to compute the product for the next term
+                  ! The problem here is that mat_sparsity_match has temp in it in this case, not 
+                  ! the old prod from whatever the previous loop is
+                  ! In that case build_gmres_polynomial_newton_inverse_full also outputs
+                  ! mat_product_save which is the old value of prod but with the sparsity of 
+                  ! mat_sparsity_match (with zeros if needed)
                   
-                  ! If we're skipping the add, then vals_previous_power_temp has all the correct
-                  ! values in it for temp
-                  ! All we have to do is compute prod for the next time through
+                  ! This case only occurs once for each row, so once we've hit this 
+                  ! we will always have our correct prod 
                   skip_add = .FALSE.
-                  !@@@@ so then this line sets temp to be tmp
+                  ! temp is output into mat_sparsity_match in this case
                   temp(1:ncols) = vals_previous_power_temp(1:ncols)
 
-                  ! @@@ have to be careful here!
-                  ! If we've gone back a term, we don't have anything in prod
-                  ! prod is I when term = 1
-                  ! @@@@ if we're doing this for the first time, we know product is I
-                  ! so we just set prod to be I
-                  ! @@@@ the problem is if we're not doing this for the first time
-                  ! we need to know what prod had in it from the previous time, as our full 
-                  ! is only outputting prod or temp, not both, because at lower order when we output
-                  ! temp in this case we knew prod was I so we didn't have to store both
-                  ! in the (r, c, c) case prod will have been I - 1/theta_1 A_ff from the r
-                  ! but for it to work with the loop below vals_previous_power_temp has to contain that but
-                  ! over the sparsity of the 2nd order term.
+                  ! If sparsity order is 1, the previous product will have been the identity
+                  ! and we don't output it into mat_product_save because that is a trivial case 
+                  ! we can do ourselves
                   if (term == 1) then
                      vals_previous_power_temp(1:ncols) = 0d0
                      if (diag_index /= -1) then
                         vals_previous_power_temp(diag_index) = 1d0
                      end if
+
                   ! In the case the mat_product_save is not the identity, we need to pull it's value out
-                  ! We only do this once for the first term in this case
                   else
 
                      call MatGetRow(mat_product_save, i_loc - 1 + global_row_start, ncols_two, &
                               cols_two_ptr, vals_two_ptr, ierr)
                      
-                     ! We have guaranteed in the full version that mat_product_save has fixed sparsity
+                     ! We have guaranteed in the build_gmres_polynomial_newton_inverse_full
+                     ! version that mat_product_save has fixed sparsity
                      vals_previous_power_temp(1:ncols_two) = vals_two_ptr(1:ncols_two)
                      
                      call MatRestoreRow(mat_product_save, i_loc - 1 + global_row_start, ncols_two, &
                               cols_two_ptr, vals_two_ptr, ierr)                     
-
                   end if
                end if
 
+               ! Now we compute the next product
                if (term .le. size(coefficients, 1)- 2) then
 
                   vals_power_temp(1:ncols) = vals_previous_power_temp(1:ncols)
@@ -1605,18 +1572,20 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
             end if
 
-            ! This should now have the value of A^(term-1) in it
+            ! This should now have the value of prod in it
             vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
          end do    
          
          ! Final step if last root is real
          if (coefficients(term,2) == 0d0) then
             if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
-               !print *, "adding REAL final term ", term, " coeff ", coefficients(term,1)
                call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
                      1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
             end if             
          end if
+
+         ! ~~~~~~~~~~~~~~~
+
          do j_loc = 1, ncols
             if (associated(symbolic_ones(j_loc)%ptr)) then
                deallocate(symbolic_ones(j_loc)%ptr)
@@ -1646,6 +1615,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
       ! Delete temporaries
       call MatDestroy(mat_sparsity_match, ierr)
+      !call MatDestroy(mat_product_save, ierr)
       if (deallocate_submatrices) then
          deallocate(reuse_submatrices)
          reuse_submatrices => null()
@@ -2104,9 +2074,10 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                   status_output, mat_product_save)
 
       ! No constrained sparsity by default
-      ! If you pass in mat_prod_or_temp, poly_sparsity_order, output_first_complex
+      ! If you pass in mat_prod_or_temp, poly_sparsity_order, output_first_complex, status_output and 
+      ! mat_product_save
       ! then it will build part of the terms, up to poly_sparsity_order, and return the product
-      ! in mat_prod_or_temp that you need to compute the rest of the fixed sparsity terms
+      ! in mat_prod_or_temp and mat_product_save that you need to compute the rest of the fixed sparsity terms
 
       ! ~~~~~~
       type(tMat), intent(in)                            :: matrix
@@ -2158,9 +2129,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! where ^r means a purely real root and ^c means a complex root 
       ! want poly_sparsity_order = 1, we can't process all the way up to theta_3^c as that would 
       ! compute up to an A^2 term which is beyond our sparsity constraint
-      ! So we just check if the last root also has it's complex conjugate present
-      ! This will never happen in any context except when we are outputting the product
-      ! as part of a fixed sparsity multiply
 
       ! i_sparse tells us how many roots we are going to process
       ! Normally this would just be size(coefficients, 1) and the loop below goes up 
@@ -2184,8 +2152,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       i_sparse = size(coefficients, 1)
       first_complex = .FALSE.
 
-      !print *, "size coeffs", size(coefficients, 1), "coeffs", coefficients(:, 1), coefficients(:, 2)
-
       if (output_product) then
 
          output_first_complex = .FALSE.
@@ -2217,8 +2183,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          first_complex = output_first_complex
       end if
 
-      !print *, "i_sparse", i_sparse, "output_first_complex", output_first_complex
-
       ! ~~~~~~~~~~~~
       ! Iterate over the i
       ! This is basically the same as the MF application but we have to build the powers
@@ -2228,8 +2192,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
       ! We're always building up the next product
       do while (i .le. i_sparse - 1)
 
-         !print *, "i = ", i
-
          ! Duplicate & copy the matrix, but ensure there is a diagonal present
          ! temp_mat_A is going to store things with the sparsity of A
          if (PetscObjectIsNull(temp_mat_A)) then
@@ -2242,8 +2204,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! If real this is easy
          if (coefficients(i,2) == 0d0) then
 
-            !print *, "real", "i_sparse", i_sparse
-
             ! Skips eigenvalues that are numerically zero
             ! We still compute the entries as as zero because we need the sparsity
             ! to be correct for the next iteration
@@ -2286,7 +2246,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
-               !print *, "outputting product in real case", "i_sparse", i_sparse, "i", i
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
             end if
             
@@ -2295,8 +2254,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
          ! Complex 
          else
 
-            !print *, "complex", first_complex
-
             ! Skips eigenvalues that are numerically zero
             if (coefficients(i,1)**2 + coefficients(i,2)**2 < 1e-12) then
                square_sum = 0
@@ -2346,9 +2303,9 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                         MAT_INITIAL_MATRIX, 1.5d0, temp_mat_two, ierr)     
                end if    
                
-               ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
+               ! We copy out the last part of the old product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
-                  !print *, "outputting TEMP in complex case", "i_sparse", i_sparse, "i", i
+
                   call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr) 
                   ! If i == 1 then we know mat_product is the identity and we don't bother 
                   ! to write it out, we just have some custom code in the product given its trivial
@@ -2374,8 +2331,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
             if (i .le. i_sparse - 2) then
 
-               !print *, "doing complex matmult step"
-
                ! temp_mat_three = matrix * temp_mat_two
                call MatMatMult(matrix, temp_mat_two, &
                      MAT_INITIAL_MATRIX, 1.5d0, temp_mat_three, ierr)     
@@ -2396,7 +2351,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. .NOT. first_complex) then
-                  !print *, "outputting product in complex case", "i_sparse", i_sparse, "i", i
                   call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)      
                end if                 
 
@@ -2419,7 +2373,6 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Skips eigenvalues that are numerically zero
             if (abs(coefficients(i,1)) > 1e-12) then     
                
-               !print *, "doing last real step, adding in term", i, "coeff", coefficients(i,1)
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
                   call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)

From 85015cfd08ff3df183229eaec03ee5f2a10bc215 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Mon, 9 Feb 2026 23:47:05 +0000
Subject: [PATCH 26/41] Fix memory leaks

---
 src/Gmres_Poly_Newton.F90 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index c389b08..aecb590 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -1615,7 +1615,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
 
       ! Delete temporaries
       call MatDestroy(mat_sparsity_match, ierr)
-      !call MatDestroy(mat_product_save, ierr)
+      call MatDestroy(mat_product_save, ierr)
       if (deallocate_submatrices) then
          deallocate(reuse_submatrices)
          reuse_submatrices => null()
@@ -2236,6 +2236,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! mat_product_k_plus_1 = mat_product * temp_mat_A
             if (i == 1) then
                ! If i == 1 then we know mat_product is identity so we can just copy
+               call MatDestroy(mat_product, ierr)
                call MatConvert(temp_mat_A, MATSAME, MAT_INITIAL_MATRIX, mat_product, ierr)  
             else
                call MatMatMult(temp_mat_A, mat_product, &

From 4b0d5dda6edd0707d6bcdc1e7581fb2f3c1a818f Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 00:11:56 +0000
Subject: [PATCH 27/41] Add specific test with diagonal matrix for different
 GMRES polynomials to test they can handle solving a problem where a lower
 order polynomial is an exact solution.

---
 Makefile           |   2 +-
 tests/Makefile     |  10 +++-
 tests/mat_diag.F90 | 119 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 tests/mat_diag.F90

diff --git a/Makefile b/Makefile
index 87fe1a6..87f3926 100644
--- a/Makefile
+++ b/Makefile
@@ -138,7 +138,7 @@ OBJS := $(OBJS) $(SRCDIR)/PETSc_Helper.o \
 		  $(SRCDIR)/PCPFLAREINV.o	
 
 # Define a variable containing all the tests
-export TEST_TARGETS = ex12f ex6f ex6f_getcoeffs ex6 adv_1d adv_diff_2d ex6_cf_splitting adv_diff_cg_supg matrandom matrandom_check_reset ex12f_gmres_poly
+export TEST_TARGETS = ex12f ex6f ex6f_getcoeffs ex6 adv_1d adv_diff_2d ex6_cf_splitting adv_diff_cg_supg matrandom matrandom_check_reset ex12f_gmres_poly mat_diag
 # Include kokkos examples
 ifeq ($(PETSC_HAVE_KOKKOS),1)
 export TEST_TARGETS := $(TEST_TARGETS) adv_1dk
diff --git a/tests/Makefile b/tests/Makefile
index e814af8..a1b773d 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -450,7 +450,15 @@ run_tests_no_load_serial:
 				./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
 				-pc_air_inverse_sparsity_order $$sparsity -ksp_norm_type unpreconditioned -ksp_max_it 5; \
 		done; \
-	done	 	 	
+	done
+#
+	@echo ""
+	@echo "Test Newton GMRES polynomial on a diagonal matrix"
+	./mat_diag -ksp_max_it 1
+	@echo "Test Arnoldi GMRES polynomial on a diagonal matrix"
+	./mat_diag -pc_pflareinv_type arnoldi -ksp_max_it 1
+	@echo "Test Power GMRES polynomial on a diagonal matrix"
+	./mat_diag -pc_pflareinv_type power -ksp_max_it 1		
 #	 
 # ~~~~~~~~~~~~~~~~~~~~~~~	 
 # Include kokkos examples	 
diff --git a/tests/mat_diag.F90 b/tests/mat_diag.F90
new file mode 100644
index 0000000..14da617
--- /dev/null
+++ b/tests/mat_diag.F90
@@ -0,0 +1,119 @@
+#include <petsc/finclude/petscksp.h>
+#include "finclude/pflare.h"
+      use petscksp
+      implicit none
+
+      ! Test that the gmres polynomials can handle small solve of diagonal matrix
+      ! We leave the polynomial order here as the default (which is 6), despite
+      ! the fact that much lower polynomial order is an exact solve in this case
+      ! This tests that the various gmres polynomial methods correctly
+      ! identify we only need up to lower order
+
+      PetscErrorCode :: ierr
+      Mat :: A
+      PetscInt :: m, n, nnzs
+      PetscInt, parameter :: one = 1, two = 2, three = 3
+      Vec :: x,b
+      KSP :: ksp
+      PC :: pc
+      PetscBool :: flg
+      KSPConvergedReason reason
+      PetscRandom rctx
+
+      call PetscInitialize(PETSC_NULL_CHARACTER,ierr)    
+      ! Register the pflare types
+      call PCRegister_PFLARE()      
+      
+      m      = 10
+      n      = 10
+      call PetscOptionsGetInt(PETSC_NULL_OPTIONS,PETSC_NULL_CHARACTER,'-m',m,flg,ierr)
+      call PetscOptionsGetInt(PETSC_NULL_OPTIONS,PETSC_NULL_CHARACTER,'-n',n,flg,ierr)
+
+      ! Create matrix
+      call MatCreate(PETSC_COMM_WORLD,A,ierr)
+      call MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n,ierr)
+      call MatSetFromOptions(A,ierr)
+      nnzs = m;
+      call MatSeqAIJSetPreallocation(A, nnzs, PETSC_NULL_INTEGER_ARRAY, ierr)
+      call MatMPIAIJSetPreallocation(A, nnzs, PETSC_NULL_INTEGER_ARRAY, nnzs, PETSC_NULL_INTEGER_ARRAY, ierr)
+      call MatSetUp(A,ierr)
+      call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr)
+      call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr)
+      call MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE, ierr)
+
+      call MatCreateVecs(A,b,x,ierr)
+      call VecSet(x, 0d0, ierr)
+
+      ! Random rhs
+      call PetscRandomCreate(PETSC_COMM_WORLD, rctx, ierr)
+      call PetscRandomSetFromOptions(rctx, ierr)
+      call VecSetRandom(b, rctx, ierr)
+      call PetscRandomDestroy(rctx, ierr)      
+
+      ! ~~~~~~~~~~~~~~
+      ! Set constant diagonal values in matrix
+      ! In Newton form should only need a single root 
+      ! (ie a 0th order polynomial) for an exact solve
+      ! Starting with the identity, the inverse should also be the identity
+      ! ~~~~~~~~~~~~~~
+      call MatShift(A, 1d0, ierr)
+
+      call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)
+      call KSPSetOperators(ksp,A,A,ierr)
+      call KSPGetPC(ksp,pc,ierr)
+      ! Set newton gmres polynomial as PC
+      call PCSetType(pc, PCPFLAREINV, ierr)
+      call PCPFLAREINVSetType(pc, PFLAREINV_NEWTON, ierr)
+      call KSPSetPC(ksp, pc, ierr)       
+      call KSPSetFromOptions(ksp,ierr)   
+      call KSPSetUp(ksp,ierr)
+
+      ! Do the solve
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp, reason, ierr)
+      if (reason%v < 0) then
+         error stop 1
+      end if    
+      
+      ! ~~~~~~~~~~~~~~
+      ! Instead now set the diagonal to 1.5
+      ! In Newton form should only need a single root 
+      ! ~~~~~~~~~~~~~~
+      call MatShift(A, 0.5d0, ierr)
+      call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr)
+      call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr)      
+      call VecSet(x, 0d0, ierr)    
+
+      ! Do another solve - this will automatically trigger the setup as the matrix
+      ! has changed
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp, reason, ierr)
+      if (reason%v < 0) then
+         error stop 1
+      end if        
+      
+      ! ~~~~~~~~~~~~~~
+      ! Instead now have two different constant values in the diagonal
+      ! In Newton form should only need two roots
+      ! (ie a 1st order polynomial) for an exact solve
+      ! ~~~~~~~~~~~~~~
+      ! Set one of the values to 2.5
+      call MatSetValue(A, 0, 0, 2.5d0, INSERT_VALUES, ierr)
+      call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr)
+      call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr)      
+      call VecSet(x, 0d0, ierr)    
+
+      ! Do another solve - this will automatically trigger the setup as the matrix
+      ! has changed
+      call KSPSolve(ksp,b,x,ierr)
+      call KSPGetConvergedReason(ksp, reason, ierr)
+      if (reason%v < 0) then
+         error stop 1
+      end if       
+
+      call MatDestroy(A, ierr)
+      call VecDestroy(b, ierr)
+      call VecDestroy(x, ierr)
+      call KSPDestroy(ksp, ierr)
+      call PetscFinalize(ierr)
+      end
\ No newline at end of file

From ffccf78cfc4d2cd60bc4a36c541741b25463175b Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 00:26:20 +0000
Subject: [PATCH 28/41] 64-bit fixes

---
 src/Gmres_Poly_Newton.F90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index aecb590..40d90d5 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -1055,9 +1055,9 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       PetscInt :: local_rows, local_cols, global_rows, global_cols
       PetscInt :: global_row_start, global_row_end_plus_one, row_index_into_submatrix
       PetscInt :: global_col_start, global_col_end_plus_one, n, ncols, ncols_two, ifree, max_nnzs
-      PetscInt :: i_loc, j_loc, row_size, rows_ao, cols_ao, rows_ad, cols_ad, shift = 0
+      PetscInt :: i_loc, j_loc, row_size, rows_ao, cols_ao, rows_ad, cols_ad, shift = 0, diag_index
       integer :: errorcode, match_counter, term
-      integer :: comm_size, diag_index
+      integer :: comm_size
       PetscErrorCode :: ierr      
       integer, dimension(:), allocatable :: cols_index_one, cols_index_two
       PetscInt, dimension(:), allocatable :: col_indices_off_proc_array, ad_indices, cols
@@ -1149,7 +1149,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! Duplicate & copy the matrix, but ensure there is a diagonal present
             call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, cmat)  
 
-            call build_gmres_polynomial_newton_inverse_1st_1st(matrix, one, &
+            call build_gmres_polynomial_newton_inverse_1st_1st(matrix, 1, &
                      coefficients(1:poly_sparsity_order + 1, 1:2), &
                      cmat, mat_sparsity_match, &
                      status_output)    

From 725e974f4ce77fb73250080d3a6d27eaf0cc0e2f Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 00:33:51 +0000
Subject: [PATCH 29/41] 64-bit fix for new diagonal test

---
 tests/mat_diag.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/mat_diag.F90 b/tests/mat_diag.F90
index 14da617..1d6a54a 100644
--- a/tests/mat_diag.F90
+++ b/tests/mat_diag.F90
@@ -12,7 +12,7 @@
       PetscErrorCode :: ierr
       Mat :: A
       PetscInt :: m, n, nnzs
-      PetscInt, parameter :: one = 1, two = 2, three = 3
+      PetscInt, parameter :: one = 1, two = 2, three = 3, zero = 0
       Vec :: x,b
       KSP :: ksp
       PC :: pc
@@ -98,7 +98,7 @@
       ! (ie a 1st order polynomial) for an exact solve
       ! ~~~~~~~~~~~~~~
       ! Set one of the values to 2.5
-      call MatSetValue(A, 0, 0, 2.5d0, INSERT_VALUES, ierr)
+      call MatSetValue(A, zero, zero, 2.5d0, INSERT_VALUES, ierr)
       call MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY,ierr)
       call MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY,ierr)      
       call VecSet(x, 0d0, ierr)    

From 385ea448bc508b3578c2abcacdc07ffe2254da1f Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 02:42:49 +0000
Subject: [PATCH 30/41] Fix coefficients access out of bounds in some
 circumstances

---
 src/Gmres_Poly_Newton.F90 | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 40d90d5..12e50f7 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -553,6 +553,8 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       rel_tol = 1.0d0 * sqrt(epsilon(1.0d0))
       abs_tol = epsilon(1.0d0) * max(H_norm, beta)
 
+      print *, "coeffs", coefficients(:,1), "imag", coefficients(:,2)
+
       ! In some cases with numerical rank deficiency, we can still
       ! end up with non-zero (or negative) eigenvalues that
       ! are trivially small - we set them explicitly to zero
@@ -562,7 +564,9 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
             coefficients(i_loc, 1) = 0d0
             coefficients(i_loc, 2) = 0d0
          end if
-      end do         
+      end do       
+      
+      print *, "coeffs after zero", coefficients(:,1), "imag", coefficients(:,2)
      
       ! ~~~~~~~~~~~~~~
       ! Cluster close eigenvalues together to improve stability of the polynomial evaluation
@@ -575,6 +579,8 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       ! This places all exactly zero eigenvalues at the end of coefficients
       call cluster_eigenvalues_stable(coefficients(:, 1), coefficients(:, 2), rel_tol, abs_tol)      
 
+      print *, "coeffs after cluster", coefficients(:,1), "imag", coefficients(:,2)
+
       ! ~~~~~~~~~~~~~~
       ! Extract the non-zero eigenvalues for root adding and leja ordering
       ! Zero eigenvalues will be appended at the end
@@ -613,6 +619,8 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
 
          ! Resize coefficients to hold non-zero roots (with extras) + zero roots at end
          deallocate(coefficients)
+         print *, "size(real_roots_added)", size(real_roots_added), "poly_order + 1", poly_order + 1, "numerical_order", numerical_order
+         print *, "new size", size(real_roots_added) + (poly_order + 1 - numerical_order)
          allocate(coefficients(size(real_roots_added) + (poly_order + 1 - numerical_order), 2))
          coefficients = 0d0
 
@@ -685,6 +693,8 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
          call VecDestroy(V_n(i_loc), ierr)
       end do
       call VecDestroy(w_j, ierr)
+
+      print *, "coeffs after leja", coefficients(:,1), "imag", coefficients(:,2)
        
 
    end subroutine calculate_gmres_polynomial_roots_newton   
@@ -799,11 +809,11 @@ subroutine petsc_matvec_gmres_newton_mf(mat, x, y)
       if (mat_ctx%imag_roots(size(mat_ctx%real_roots)) == 0d0) then
 
          ! Skips eigenvalues that are numerically zero
-         if (abs(mat_ctx%real_roots(i)) > 1e-12) then
+         if (abs(mat_ctx%real_roots(size(mat_ctx%real_roots))) > 1e-12) then
 
             ! y = y + theta_i * MF_VEC_TEMP
             call VecAXPBY(y, &
-                     1d0/mat_ctx%real_roots(i), &
+                     1d0/mat_ctx%real_roots(size(mat_ctx%real_roots)), &
                      1d0, &
                      mat_ctx%mf_temp_vec(MF_VEC_TEMP), ierr) 
          end if
@@ -1575,12 +1585,12 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
             ! This should now have the value of prod in it
             vals_previous_power_temp(1:ncols) = vals_power_temp(1:ncols)
          end do    
-         
+
          ! Final step if last root is real
-         if (coefficients(term,2) == 0d0) then
-            if (ncols /= 0 .AND. abs(coefficients(term,1)) > 1e-12) then
+         if (coefficients(size(coefficients, 1),2) == 0d0) then
+            if (ncols /= 0 .AND. abs(coefficients(size(coefficients, 1),1)) > 1e-12) then
                call MatSetValues(cmat, one, [global_row_start + i_loc-1], ncols, cols, &
-                     1d0/coefficients(term, 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)   
+                     1d0/coefficients(size(coefficients, 1), 1) * vals_power_temp(1:ncols), ADD_VALUES, ierr)
             end if             
          end if
 
@@ -1926,8 +1936,8 @@ subroutine build_gmres_polynomial_inverse_0th_order_sparsity_newton(matrix, poly
          ! Add in the final term multiplied by 1/theta_poly_order
 
          ! Skips eigenvalues that are numerically zero
-         if (abs(coefficients(i,1)) > 1e-12) then      
-            call VecAXPY(inv_vec, 1d0/coefficients(i,1), product_vec, ierr)  
+         if (abs(coefficients(size(coefficients, 1),1)) > 1e-12) then      
+            call VecAXPY(inv_vec, 1d0/coefficients(size(coefficients, 1),1), product_vec, ierr)  
          end if       
       end if
 
@@ -2372,16 +2382,16 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             ! Add in the final term multiplied by 1/theta_poly_order
 
             ! Skips eigenvalues that are numerically zero
-            if (abs(coefficients(i,1)) > 1e-12) then     
+            if (abs(coefficients(i_sparse,1)) > 1e-12) then     
                
                if (reuse_triggered) then
                   ! If doing reuse we know our nonzeros are a subset
-                  call MatAXPY(inv_matrix, 1d0/coefficients(i,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
+                  call MatAXPY(inv_matrix, 1d0/coefficients(i_sparse,1), mat_product, SUBSET_NONZERO_PATTERN, ierr)
                else
                   ! Have to use the DIFFERENT_NONZERO_PATTERN here
-                  call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i,1), mat_product)
+                  call MatAXPYWrapper(inv_matrix, 1d0/coefficients(i_sparse,1), mat_product)
                end if     
-               if (output_product) status_output(i, 1) = 1
+               if (output_product) status_output(i_sparse, 1) = 1
             end if       
          end if   
       end if     

From 10328e9ca2f44e62bbe9cce044e47664654317cb Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 03:05:14 +0000
Subject: [PATCH 31/41] Need to respect contiguous keyword when passing in
 coefficients below the poly order

---
 src/Gmres_Poly_Newton.F90 | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 12e50f7..cd37029 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -553,8 +553,6 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       rel_tol = 1.0d0 * sqrt(epsilon(1.0d0))
       abs_tol = epsilon(1.0d0) * max(H_norm, beta)
 
-      print *, "coeffs", coefficients(:,1), "imag", coefficients(:,2)
-
       ! In some cases with numerical rank deficiency, we can still
       ! end up with non-zero (or negative) eigenvalues that
       ! are trivially small - we set them explicitly to zero
@@ -565,9 +563,7 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
             coefficients(i_loc, 2) = 0d0
          end if
       end do       
-      
-      print *, "coeffs after zero", coefficients(:,1), "imag", coefficients(:,2)
-     
+           
       ! ~~~~~~~~~~~~~~
       ! Cluster close eigenvalues together to improve stability of the polynomial evaluation
       ! For example when computing the e'vals of a constant diagonal matrix
@@ -579,8 +575,6 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       ! This places all exactly zero eigenvalues at the end of coefficients
       call cluster_eigenvalues_stable(coefficients(:, 1), coefficients(:, 2), rel_tol, abs_tol)      
 
-      print *, "coeffs after cluster", coefficients(:,1), "imag", coefficients(:,2)
-
       ! ~~~~~~~~~~~~~~
       ! Extract the non-zero eigenvalues for root adding and leja ordering
       ! Zero eigenvalues will be appended at the end
@@ -619,8 +613,6 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
 
          ! Resize coefficients to hold non-zero roots (with extras) + zero roots at end
          deallocate(coefficients)
-         print *, "size(real_roots_added)", size(real_roots_added), "poly_order + 1", poly_order + 1, "numerical_order", numerical_order
-         print *, "new size", size(real_roots_added) + (poly_order + 1 - numerical_order)
          allocate(coefficients(size(real_roots_added) + (poly_order + 1 - numerical_order), 2))
          coefficients = 0d0
 
@@ -692,10 +684,7 @@ subroutine calculate_gmres_polynomial_roots_newton(matrix, poly_order, add_roots
       do i_loc = 1, subspace_size+1
          call VecDestroy(V_n(i_loc), ierr)
       end do
-      call VecDestroy(w_j, ierr)
-
-      print *, "coeffs after leja", coefficients(:,1), "imag", coefficients(:,2)
-       
+      call VecDestroy(w_j, ierr)       
 
    end subroutine calculate_gmres_polynomial_roots_newton   
 
@@ -1091,6 +1080,7 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
       logical :: output_first_complex, skip_add
       PetscReal :: square_sum
       integer, dimension(poly_order + 1, 2) :: status_output
+      PetscReal, dimension(poly_sparsity_order+1,2) :: coeffs_contig
       
       ! ~~~~~~~~~~  
 
@@ -1158,9 +1148,10 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          
             ! Duplicate & copy the matrix, but ensure there is a diagonal present
             call mat_duplicate_copy_plus_diag(matrix, reuse_triggered, cmat)  
-
+            ! Have to be careful to pass in a contiguous piece of memory here
+            coeffs_contig = coefficients(1:poly_sparsity_order + 1, 1:2)
             call build_gmres_polynomial_newton_inverse_1st_1st(matrix, 1, &
-                     coefficients(1:poly_sparsity_order + 1, 1:2), &
+                     coeffs_contig, &
                      cmat, mat_sparsity_match, &
                      status_output)    
          end if     
@@ -1170,8 +1161,9 @@ subroutine mat_mult_powers_share_sparsity_newton_cpu(matrix, poly_order, poly_sp
          ! But we have to be careful because the last root we want to explicitly
          ! build up to here (ie the power of the matrix given by sparsity_order)
          ! might be the first root of a complex conjugate pair
+         coeffs_contig = coefficients(1:poly_sparsity_order + 1, 1:2)
          call build_gmres_polynomial_newton_inverse_full(matrix, poly_order, &
-                  coefficients(1:poly_sparsity_order + 1, 1:2), &
+                  coeffs_contig, &
                   cmat, mat_sparsity_match, poly_sparsity_order, output_first_complex, &
                   status_output, mat_product_save)
       end if 

From 1da7e42c76de2cad57c8cb36361b2e2857d8a34b Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 03:40:10 +0000
Subject: [PATCH 32/41] Failures in for loops in tests/Makefile weren't
 propogating through to the CI

---
 tests/Makefile | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/Makefile b/tests/Makefile
index a1b773d..ed5d6cc 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -134,12 +134,12 @@ run_tests_load_serial:
 #
 	@echo ""
 	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders"
-	@for order in 0 1 2 3 4 5 6; do \
+	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
 	done
 	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity"
-	@for order in 2 3 4 5 6; do \
+	@set -e; for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		for sparsity in $$(seq 1 $$(($$order - 1))); do \
 			echo "  --- Testing sparsity order = $$sparsity ---"; \
@@ -200,16 +200,16 @@ run_tests_load_parallel:
 #
 	@echo ""
 	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders in parallel"
-	@for order in 0 1 2 3 4 5 6; do \
+	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$order; \
 	done
 	@echo "Test difference between GMRES polynomial forms for hyperbolic streaming problem for different orders and fixed sparsity in parallel"
-	@for order in 2 3 4 5 6; do \
+	@set -e; for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		for sparsity in $$(seq 1 $$(($$order - 1))); do \
 			echo "  --- Testing sparsity order = $$sparsity ---"; \
-				$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
+			$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
 		done; \
 	done
 
@@ -437,13 +437,13 @@ run_tests_no_load_serial:
 #
 	@echo ""
 	@echo "Test Newton AIRG on advection for for different orders"
-	@for order in 0 1 2 3 4 5 6; do \
+	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
 		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
 	done
 	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity"
-	@for order in 2 3 4 5 6; do \
+	@set -e; for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		for sparsity in $$(seq 1 $$(($$order - 1))); do \
 			echo "  --- Testing sparsity order = $$sparsity ---"; \
@@ -659,13 +659,13 @@ run_tests_no_load_parallel:
 #
 	@echo ""
 	@echo "Test Newton AIRG on advection for for different orders in parallel"
-	@for order in 0 1 2 3 4 5 6; do \
+	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
 		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
 	done
 	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity in parallel"
-	@for order in 2 3 4 5 6; do \
+	@set -e; for order in 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		for sparsity in $$(seq 1 $$(($$order - 1))); do \
 			echo "  --- Testing sparsity order = $$sparsity ---"; \
@@ -694,7 +694,7 @@ run_tests_medium_serial:
 
 #
 	@echo ""
-	@for size in $$(seq 100 20 200); do \
+	@set -e; for size in $$(seq 100 20 200); do \
 		echo "--- Testing size = $$size x $$size ---"; \
 		./adv_diff_2d -da_grid_x $$size -da_grid_y $$size -pc_type air -ksp_pc_side right \
 	    -ksp_rtol 1e-10 -ksp_atol 1e-50 -pc_air_a_lump -pc_air_a_drop 1e-5 -pc_air_strong_threshold 0.99 -ksp_max_it 6; \
@@ -702,7 +702,7 @@ run_tests_medium_serial:
 #
 	@echo ""
 	@echo "--- Running scaling study on adv_diff_2d ---"
-	@for size in 100 200 400 800; do \
+	@set -e; for size in 100 200 400 800; do \
 		echo "--- Testing size = $$size x $$size ---"; \
 		./adv_diff_2d -da_grid_x $$size -da_grid_y $$size -pc_type air -ksp_pc_side right \
 	    -ksp_rtol 1e-10 -ksp_atol 1e-50 -pc_air_a_lump -pc_air_a_drop 1e-5 -pc_air_strong_threshold 0.99 -ksp_max_it 6; \
@@ -714,7 +714,7 @@ run_tests_medium_parallel:
 
 #
 	@echo ""
-	@for size in $$(seq 100 20 200); do \
+	@set -e; for size in $$(seq 100 20 200); do \
 		echo "--- Testing size = $$size x $$size ---"; \
 		$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x $$size -da_grid_y $$size -pc_type air -ksp_pc_side right \
 	    -ksp_rtol 1e-10 -ksp_atol 1e-50 -pc_air_a_lump -pc_air_a_drop 1e-5 -pc_air_strong_threshold 0.99 -ksp_max_it 6; \
@@ -722,7 +722,7 @@ run_tests_medium_parallel:
 #
 	@echo ""
 	@echo "--- Running scaling study on adv_diff_2d in parallel ---"
-	@for size in 100 200 400 800; do \
+	@set -e; for size in 100 200 400 800; do \
 		echo "--- Testing size = $$size x $$size ---"; \
 		$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x $$size -da_grid_y $$size -pc_type air -ksp_pc_side right \
 	    -ksp_rtol 1e-10 -ksp_atol 1e-50 -pc_air_a_lump -pc_air_a_drop 1e-5 -pc_air_strong_threshold 0.99 -ksp_max_it 6; \

From 30354dc2c3c7144627d4144e868dd23857ac1759 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 14:09:20 +0000
Subject: [PATCH 33/41] Detection of complex conjugate pairs was broken when
 imaginary parts were very close

---
 src/Gmres_Poly_Newton.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index cd37029..23c3209 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -2174,7 +2174,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
                   ! Check if the distance between the fixed sparsity root and the one before
                   ! If > zero then they are not complex conjugates and hence we are on the first of the pair         
-                  if (abs(coefficients(i_sparse,1) - coefficients(i_sparse-1,1))/coefficients(i_sparse,1) > 1e-14 .AND. &
+                  if (abs(coefficients(i_sparse,1) - coefficients(i_sparse-1,1))/coefficients(i_sparse,1) > 1e-14 .OR. &
                         abs(coefficients(i_sparse,2) + coefficients(i_sparse-1,2))/coefficients(i_sparse,2) > 1e-14) then
                      output_first_complex = .TRUE.
                      i_sparse = i_sparse + 1

From 38fba34be009dedcb1015896c96ad42f59fcc844 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 14:54:59 +0000
Subject: [PATCH 34/41] Fix memory leak

---
 src/Gmres_Poly_Newton.F90 | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Gmres_Poly_Newton.F90 b/src/Gmres_Poly_Newton.F90
index 23c3209..1761c90 100644
--- a/src/Gmres_Poly_Newton.F90
+++ b/src/Gmres_Poly_Newton.F90
@@ -2249,6 +2249,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
             
             ! We copy out the last product if we're doing this as part of a fixed sparsity multiply
             if (output_product .AND. i == i_sparse - 1) then
+               call MatDestroy(mat_prod_or_temp, ierr)
                call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
             end if
             
@@ -2282,6 +2283,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
+                  call MatDestroy(mat_prod_or_temp, ierr)
                   call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)  
                end if  
                
@@ -2309,11 +2311,13 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
                ! We copy out the last part of the old product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. i > i_sparse - 2) then
 
+                  call MatDestroy(mat_prod_or_temp, ierr)
                   call MatConvert(temp_mat_two, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr) 
                   ! If i == 1 then we know mat_product is the identity and we don't bother 
                   ! to write it out, we just have some custom code in the product given its trivial
                   if (i /= 1) then 
                      ! This ensures it has the matching sparsity
+                     call MatDestroy(mat_product_save, ierr)
                      call MatConvert(mat_prod_or_temp, MATSAME, MAT_INITIAL_MATRIX, mat_product_save, ierr)  
                      ! This zeros mat_product_save and then puts mat_product into the sparsity pattern 
                      ! of mat_prod_or_temp
@@ -2354,6 +2358,7 @@ subroutine build_gmres_polynomial_newton_inverse_full(matrix, poly_order, coeffi
 
                ! We copy out the last part of the product if we're doing this as part of a fixed sparsity multiply
                if (output_product .AND. .NOT. first_complex) then
+                  call MatDestroy(mat_prod_or_temp, ierr)
                   call MatConvert(mat_product, MATSAME, MAT_INITIAL_MATRIX, mat_prod_or_temp, ierr)      
                end if                 
 

From 994a0514eff6229d0083cab95d0b96efe2193c71 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 15:33:19 +0000
Subject: [PATCH 35/41] Iteration count change for CI

---
 tests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index ed5d6cc..947b1fd 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -440,7 +440,7 @@ run_tests_no_load_serial:
 	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
-		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 6; \
 	done
 	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity"
 	@set -e; for order in 2 3 4 5 6; do \

From 450d9bb76ddbf924104c0e804ece8f904d751b04 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 16:36:36 +0000
Subject: [PATCH 36/41] Intel CI is still broken with power basis, disable
 power basis comparison in ex12f_gmres_poly in parallel

---
 dockerfiles/Dockerfile_intel |  3 ++
 tests/ex12f_gmres_poly.F90   | 61 +++++++++++++++++++++---------------
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/dockerfiles/Dockerfile_intel b/dockerfiles/Dockerfile_intel
index ea51ed6..b7a2ae3 100644
--- a/dockerfiles/Dockerfile_intel
+++ b/dockerfiles/Dockerfile_intel
@@ -28,6 +28,9 @@ RUN source /opt/intel/oneapi/setvars.sh && \
     sed -i '/^run_tests_load_parallel:/,/^run_tests_no_load_serial:/s/-pc_pflareinv_type power/-pc_pflareinv_type arnoldi/g' tests/Makefile && \
     sed -i '/^run_tests_no_load_parallel:/,/^run_tests_medium_serial:/s/-pc_air_inverse_type power/-pc_air_inverse_type arnoldi/g' tests/Makefile && \
     sed -i '/^run_tests_no_load_parallel:/,/^run_tests_medium_serial:/s/-pc_pflareinv_type power/-pc_pflareinv_type arnoldi/g' tests/Makefile && \
+    echo "Disabling power basis in parallel test ex12f_gmres_poly in tests/Makefile" && \
+    sed -i '/^run_tests_load_parallel:/,/^run_tests_no_load_serial:/s/ex12f_gmres_poly/ex12f_gmres_poly -no_power/g' tests/Makefile && \
+    sed -i '/^run_tests_no_load_parallel:/,/^run_tests_medium_serial:/s/ex12f_gmres_poly/ex12f_gmres_poly -no_power/g' tests/Makefile && \    
     make -j2 && make -j2 check && \
     make -j2 tests
 
diff --git a/tests/ex12f_gmres_poly.F90 b/tests/ex12f_gmres_poly.F90
index dc597bb..c0d188e 100644
--- a/tests/ex12f_gmres_poly.F90
+++ b/tests/ex12f_gmres_poly.F90
@@ -9,7 +9,7 @@ program main
 
       PetscErrorCode  ierr
       PetscInt m,n,mlocal,nlocal
-      PetscBool  flg
+      PetscBool  flg, check, no_power
       PetscReal      norm_power, norm_rhs, norm_arnoldi, norm_newton
       PetscReal :: norm_diff_one, norm_diff_two
       Vec              x,b,u, b_diff_type
@@ -33,6 +33,12 @@ program main
      &        PETSC_NULL_CHARACTER,'-f',f,flg,ierr)
       call PetscViewerBinaryOpen(PETSC_COMM_WORLD,f,FILE_MODE_READ,     &
      &     fd,ierr)
+      no_power = PETSC_FALSE
+      ! Our CI has an intel pipeline and the intel MPI breaks with the power basis
+      ! so we can disable the power basis test with a command line option
+      call PetscOptionsGetBool(PETSC_NULL_OPTIONS,PETSC_NULL_CHARACTER, &
+               '-no_power', check,flg,ierr)  
+      if (flg) no_power = check     
 
       call MatCreate(PETSC_COMM_WORLD,A,ierr)
       call MatLoad(A,fd,ierr)
@@ -85,16 +91,15 @@ program main
       call VecNorm(b,NORM_2,norm_rhs,ierr)
 
       ! ~~~~~~~~~~~~~
-      ! Do a solve with the power basis
+      ! Do a solve with the Arnoldi basis
       ! ~~~~~~~~~~~~~
       call KSPCreate(PETSC_COMM_WORLD,ksp,ierr)
       call KSPSetOperators(ksp,A,A,ierr)
       call KSPGetPC(ksp, pc, ierr)       
       call PCSetType(pc, PCAIR, ierr)      
-      call PCAIRSetInverseType(pc, PFLAREINV_POWER, ierr)
+      call PCAIRSetInverseType(pc, PFLAREINV_ARNOLDI, ierr)
       call KSPSetPC(ksp, pc, ierr)
       call KSPSetFromOptions(ksp,ierr)
-
       call VecSet(x, 0d0, ierr)
       call KSPSolve(ksp,b,x,ierr)
       call KSPGetConvergedReason(ksp,reason,ierr)      
@@ -104,25 +109,27 @@ program main
       ! Compute the residual
       call MatMult(A,x,u,ierr)
       call VecAXPY(u,-1d0,b,ierr)
-      call VecNorm(u,NORM_2,norm_power,ierr)
-      norm_power = norm_power/norm_rhs
+      call VecNorm(u,NORM_2,norm_arnoldi,ierr)
+      norm_arnoldi = norm_arnoldi/norm_rhs
 
       ! ~~~~~~~~~~~~~
-      ! Now do a solve with the Arnoldi basis
+      ! Now do a solve with the Power basis
       ! ~~~~~~~~~~~~~      
-      call PCAIRSetInverseType(pc, PFLAREINV_ARNOLDI, ierr)
-
-      call VecSet(x, 0d0, ierr)
-      call KSPSolve(ksp,b,x,ierr)
-      call KSPGetConvergedReason(ksp,reason,ierr)      
-      if (reason%v < 0) then
-         error stop 1
+      if (.NOT. no_power) then
+         call PCAIRSetInverseType(pc, PFLAREINV_POWER, ierr)
+
+         call VecSet(x, 0d0, ierr)
+         call KSPSolve(ksp,b,x,ierr)
+         call KSPGetConvergedReason(ksp,reason,ierr)      
+         if (reason%v < 0) then
+            error stop 1
+         end if
+         ! Compute the residual
+         call MatMult(A,x,u,ierr)
+         call VecAXPY(u,-1d0,b,ierr)
+         call VecNorm(u,NORM_2,norm_power,ierr)
+         norm_power = norm_power/norm_rhs
       end if
-      ! Compute the residual
-      call MatMult(A,x,u,ierr)
-      call VecAXPY(u,-1d0,b,ierr)
-      call VecNorm(u,NORM_2,norm_arnoldi,ierr)
-      norm_arnoldi = norm_arnoldi/norm_rhs
 
       ! ~~~~~~~~~~~~~
       ! Now do a solve with the Newton basis
@@ -155,13 +162,15 @@ program main
          print *, "Arnoldi basis residual:   ", norm_arnoldi
          error stop 1
       end if
-      norm_diff_two = abs(norm_arnoldi - norm_power)/norm_arnoldi
-      if (norm_diff_two > 1e-9) then
-         print *, "Residuals differ between polynomial bases!", norm_diff_two
-         print *, "Power basis residual:  ", norm_power
-         print *, "Arnoldi basis residual: ", norm_arnoldi
-         error stop 1
-      end if      
+      if (.NOT. no_power) then
+         norm_diff_two = abs(norm_arnoldi - norm_power)/norm_arnoldi
+         if (norm_diff_two > 1e-9) then
+            print *, "Residuals differ between polynomial bases!", norm_diff_two
+            print *, "Power basis residual:  ", norm_power
+            print *, "Arnoldi basis residual: ", norm_arnoldi
+            error stop 1
+         end if      
+      end if
 
       call VecDestroy(b,ierr)
       call VecDestroy(x,ierr)

From dd15fcbcda2455567c0e6ecdb4d15696d0a6b21e Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 22:07:51 +0000
Subject: [PATCH 37/41] Update readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 13a5cff..2719d0d 100644
--- a/README.md
+++ b/README.md
@@ -37,8 +37,8 @@ PCPFLAREINV contains methods for computing approximate inverses, most of which c
    | ------------- | -- | ------------- | -- |
    | power  |  PFLAREINV_POWER  | GMRES polynomial, applied as a mononomial, with coefficients computed with a power basis  | Yes |
    | arnoldi  |  PFLAREINV_ARNOLDI  | GMRES polynomial, applied as a mononomial, with coefficients computed with an Arnoldi method  | Yes |
-   | newton  |  PFLAREINV_NEWTON  | GMRES polynomial, applied as a Newton polynomial, with roots computed with an Arnoldi method and with extra roots added for stability  | Yes |
-   | newton_no_extra  |  PFLAREINV_NEWTON_NO_EXTRA  | GMRES polynomial, applied as a Newton polynomial, with roots computed with an Arnoldi method and with no extra roots added   | Yes |
+   | newton  |  PFLAREINV_NEWTON  | GMRES polynomial, applied as a Newton polynomial, with roots computed with an Arnoldi method and with extra roots added for stability  | Matrix-free: Yes Assembled: No |
+   | newton_no_extra  |  PFLAREINV_NEWTON_NO_EXTRA  | GMRES polynomial, applied as a Newton polynomial, with roots computed with an Arnoldi method and with no extra roots added   | Matrix-free: Yes Assembled: No |
    | neumann  |  PFLAREINV_NEUMANN  | Neumann polynomial  | Yes |
    | sai  |  PFLAREINV_SAI  | Sparse approximate inverse  | No |
    | isai  |  PFLAREINV_ISAI  | Incomplete sparse approximate inverse (equivalent to a one-level RAS)  | No |

From 85a3ab9787c2a7b09cecfe84d965bf62c4a200ea Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 22:09:08 +0000
Subject: [PATCH 38/41] Some of the new tests exposed a bug in the Kokkos GMRES
 polynomial implementation (unconnected to the Newton form). If using
 higher-order fixed sparsity, the matching during the fixed sparsity
 matrix-matrix product was not correct for non-local columns of local rows

---
 src/Gmres_Polyk.kokkos.cxx | 142 +++++++++++++++++++++++++++++--------
 1 file changed, 111 insertions(+), 31 deletions(-)

diff --git a/src/Gmres_Polyk.kokkos.cxx b/src/Gmres_Polyk.kokkos.cxx
index 071e646..a1279ee 100644
--- a/src/Gmres_Polyk.kokkos.cxx
+++ b/src/Gmres_Polyk.kokkos.cxx
@@ -67,14 +67,18 @@ PETSC_INTERN void mat_mult_powers_share_sparsity_kokkos(Mat *input_mat, const in
    Mat *submatrices;
 
    // Pull out the nonlocal parts of the input mat we need
+   const PetscInt *colmap_input_mat;
+   PetscInt cols_ao_input = 0;
    if (mpi)
    {
-      PetscCallVoid(MatMPIAIJGetSeqAIJ(*input_mat, &mat_local_input, &mat_nonlocal_input, NULL));
+      PetscCallVoid(MatMPIAIJGetSeqAIJ(*input_mat, &mat_local_input, &mat_nonlocal_input, &colmap_input_mat));
 
       PetscCallVoid(MatMPIAIJGetSeqAIJ(*mat_sparsity_match, &mat_local_sparsity, &mat_nonlocal_sparsity, &colmap_mat_sparsity_match));
       PetscCallVoid(MatGetSize(mat_nonlocal_sparsity, &rows_ao, &cols_ao));
       PetscCallVoid(MatGetSize(mat_local_sparsity, &rows_ad, &cols_ad));
-
+      PetscInt rows_ao_input;
+      PetscCallVoid(MatGetSize(mat_nonlocal_input, &rows_ao_input, &cols_ao_input));
+      
       // We need to pull out all the columns in the sparsity mat
       // and the nonlocal rows that correspond to the nonlocal columns
       // from the input mat      
@@ -175,6 +179,71 @@ PETSC_INTERN void mat_mult_powers_share_sparsity_kokkos(Mat *input_mat, const in
    PetscCallVoid(MatSeqAIJGetCSRAndMemType(mat_local_output, &device_local_i_output, &device_local_j_output, &device_local_vals_output, &mtype));
    if (mpi) PetscCallVoid(MatSeqAIJGetCSRAndMemType(mat_nonlocal_output, &device_nonlocal_i_output, &device_nonlocal_j_output, &device_nonlocal_vals_output, &mtype));
 
+   // ~~~~~~~~~~~~~~
+   // Build a mapping from the input matrix's nonlocal column indices to the 
+   // sparsity matrix's column space ("local" submat column space), which is defined as:
+   //   [0..cols_ad-1] for local columns, [cols_ad..cols_ad+cols_ao-1] for sparsity colmap columns
+   // 
+   // When doing the matrix-matrix product:
+   // 1. We need to compare local cols from local rows 
+   // We need to access the local input matrix and we 
+   // can do that directly given local indices are the same
+   //
+   // 2. We need to compare nonlocal cols from non-local rows
+   // We need to access submat for this which now only has the non-local rows in it
+   // Those will have a "local" column index that matches col_indices_off_proc_array given 
+   // we create it with MatCreateSubMatrices
+   //
+   // 3. We need to compare nonlocal cols from local rows
+   // We need to access the input_matrix for this
+   // But (for higher order fixed sparsity) the colmap of the input matrix is not the same
+   // as the colmap of the sparsity matrix
+   // So below we create a mapping that converts from the input matrix's nonlocal column indices
+   // to the "local" column indices of the submat (which correspond to the sparsity matrix's column space) for the nonlocal columns
+   // If there are not matching entries in the sparsity colmap, we use a large sentinel value that will never
+   // match any col_orig and preserves sorted order.
+   //
+   // This mapping is needed because the input matrix and sparsity matrix may have
+   // different colmaps when poly_sparsity_order >= 2.
+   // ~~~~~~~~~~~~~~
+
+   // Use a sentinel larger than any valid column index
+   const PetscInt COLMAP_NOT_FOUND = cols_ad + cols_ao + 1;
+
+   auto input_nonlocal_to_submat_col_d = PetscIntKokkosView("input_nonlocal_to_submat_col_d", mpi ? cols_ao_input : 1);
+   if (mpi && cols_ao_input > 0)
+   {
+      // Build the mapping on the host
+      // Both colmaps are sorted, so we can do a merge-style scan
+      auto input_nonlocal_to_submat_col_h = Kokkos::create_mirror_view(input_nonlocal_to_submat_col_d);
+      PetscInt sparsity_colmap_idx = 0;
+      for (PetscInt k = 0; k < cols_ao_input; k++)
+      {
+         PetscInt global_col = colmap_input_mat[k];
+         // Advance the sparsity colmap index (both are sorted)
+         while (sparsity_colmap_idx < cols_ao && colmap_mat_sparsity_match[sparsity_colmap_idx] < global_col)
+         {
+            sparsity_colmap_idx++;
+         }
+         if (sparsity_colmap_idx < cols_ao && colmap_mat_sparsity_match[sparsity_colmap_idx] == global_col)
+         {
+            input_nonlocal_to_submat_col_h(k) = cols_ad + sparsity_colmap_idx;
+         }
+         else
+         {
+            // Not found — use sentinel value that preserves sort order
+            // Since colmap_input is sorted and colmap_sparsity is sorted,
+            // if an entry is missing it's between two found entries,
+            // so we assign a value that maintains monotonicity
+            input_nonlocal_to_submat_col_h(k) = COLMAP_NOT_FOUND;
+         }
+      }
+      Kokkos::deep_copy(input_nonlocal_to_submat_col_d, input_nonlocal_to_submat_col_h);
+      // Log copy with petsc
+      bytes = input_nonlocal_to_submat_col_h.extent(0) * sizeof(PetscInt);
+      PetscCallVoid(PetscLogCpuToGpu(bytes));
+   }
+
    // ~~~~~~~~~~~~~~
    // Find maximum non-zeros per row for sizing scratch memory
    // ~~~~~~~~~~~~~~
@@ -358,8 +427,11 @@ PETSC_INTERN void mat_mult_powers_share_sparsity_kokkos(Mat *input_mat, const in
             while (idx_col_of_row_i < ncols_row_i && idx_col_of_row_j < ncols_row_of_col_j) {
 
                // The col_target is the column we are trying to match in the row of column j
-               // We always convert it to the "local" indexing as if it were in the columns of the submat, ie 
-               // the column indexing of [local cols; local cols + 0:cols_ao-1]
+               // We convert everything to the submat "local" column space for comparison, ie
+               // the column indexing of [0..cols_ad-1 for local cols; cols_ad+k for sparsity colmap[k]]
+               // When the input matrix and sparsity matrix have different colmaps
+               // (poly_sparsity_order >= 2), we use the input_nonlocal_to_submat_col_d mapping
+               // to convert the input matrix's nonlocal column indices to the sparsity colmap space
                PetscInt col_target;
                if (row_of_col_j_local)
                {
@@ -369,8 +441,11 @@ PETSC_INTERN void mat_mult_powers_share_sparsity_kokkos(Mat *input_mat, const in
                   }
                   else
                   {
-                     // Convert to "local" column index of submat by adding cols_ad
-                     col_target = device_nonlocal_j_input[device_nonlocal_i_input[row_of_col_j] + idx_col_of_row_j - local_cols_row_of_col_j] + cols_ad;
+                     // This is the case where we need to access non-local columns in local rows of input_matrix
+                     // and hence we need our mapping
+                     // Convert nonlocal column index from input matrix's colmap space
+                     // to the to "local" column index of submat
+                     col_target = input_nonlocal_to_submat_col_d(device_nonlocal_j_input[device_nonlocal_i_input[row_of_col_j] + idx_col_of_row_j - local_cols_row_of_col_j]);
                   }
                }
                else
@@ -390,40 +465,45 @@ PETSC_INTERN void mat_mult_powers_share_sparsity_kokkos(Mat *input_mat, const in
                   // Convert to "local" column index of submat by adding cols_ad
                   col_orig = device_nonlocal_j_sparsity[device_nonlocal_i_sparsity[i] + (idx_col_of_row_i - local_cols_row_i)] + cols_ad;
                }
-               
-               if (col_orig < col_target) {
-                  // Original column is smaller, move to next original column
-                  idx_col_of_row_i++;
-               } else if (col_orig > col_target) {
-                  // Target column is smaller, move to next target column
+
+               // Skip entries where the input column doesn't exist in the sparsity pattern
+               if (col_target == COLMAP_NOT_FOUND) {
                   idx_col_of_row_j++;
-               // We've found a matching index and hence we can do our compute
                } else {
-
-                  PetscReal val_target;
-                  if (row_of_col_j_local)
-                  {
-                     if (idx_col_of_row_j < local_cols_row_of_col_j)
+                  if (col_orig < col_target) {
+                     // Original column is smaller, move to next original column
+                     idx_col_of_row_i++;
+                  } else if (col_orig > col_target) {
+                     // Target column is smaller, move to next target column
+                     idx_col_of_row_j++;
+                  // We've found a matching index and hence we can do our compute
+                  } else {
+
+                     PetscReal val_target;
+                     if (row_of_col_j_local)
                      {
-                        val_target = device_local_vals_input[device_local_i_input[row_of_col_j] + idx_col_of_row_j];
+                        if (idx_col_of_row_j < local_cols_row_of_col_j)
+                        {
+                           val_target = device_local_vals_input[device_local_i_input[row_of_col_j] + idx_col_of_row_j];
+                        }
+                        else
+                        {
+                           val_target = device_nonlocal_vals_input[device_nonlocal_i_input[row_of_col_j] + idx_col_of_row_j - local_cols_row_of_col_j];
+                        }
                      }
                      else
                      {
-                        val_target = device_nonlocal_vals_input[device_nonlocal_i_input[row_of_col_j] + idx_col_of_row_j - local_cols_row_of_col_j];
+                        val_target = device_submat_vals[device_submat_i[row_of_col_j] + idx_col_of_row_j];
                      }
-                  }
-                  else
-                  {
-                     val_target = device_submat_vals[device_submat_i[row_of_col_j] + idx_col_of_row_j];
-                  }                     
 
-                  // Has to be atomic! Potentially lots of contention so maybe not 
-                  // the most performant way to do this
-                  Kokkos::atomic_add(&vals_temp[idx_col_of_row_i], vals_prev[j] * val_target);
+                     // Has to be atomic! Potentially lots of contention so maybe not 
+                     // the most performant way to do this
+                     Kokkos::atomic_add(&vals_temp[idx_col_of_row_i], vals_prev[j] * val_target);
 
-                  // Move forward in both arrays
-                  idx_col_of_row_i++;
-                  idx_col_of_row_j++;
+                     // Move forward in both arrays
+                     idx_col_of_row_i++;
+                     idx_col_of_row_j++;
+                  }
                }
             }
          });      

From 9ddf8ee7f43347147cbbb46061d24a543363a708 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 22:22:18 +0000
Subject: [PATCH 39/41] Iteration count for test CI

---
 tests/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Makefile b/tests/Makefile
index 947b1fd..51f7921 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -662,7 +662,7 @@ run_tests_no_load_parallel:
 	@set -e; for order in 0 1 2 3 4 5 6; do \
 		echo "--- Testing order = $$order ---"; \
 		$(MPIEXEC) -n 2 ./adv_diff_2d -da_grid_x 50 -da_grid_y 50 -pc_type air -pc_air_inverse_type newton -pc_air_poly_order $$order \
-		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 5; \
+		-pc_air_inverse_sparsity_order $$order -ksp_norm_type unpreconditioned -ksp_max_it 6; \
 	done
 	@echo "Test Newton AIRG on advection for for different orders and fixed sparsity in parallel"
 	@set -e; for order in 2 3 4 5 6; do \

From d8f00222cab6bb079af3ea6a7eaa52ed04177754 Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Tue, 10 Feb 2026 23:53:41 +0000
Subject: [PATCH 40/41] Test that both matrix-free smoothing and assembled (to
 compute the restrictor) work with Newton form

---
 tests/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/Makefile b/tests/Makefile
index 51f7921..f86f49c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -156,6 +156,9 @@ run_tests_load_serial:
 	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 120 -pc_pflareinv_matrix_free -ksp_norm_type unpreconditioned -ksp_max_it 5
 	@echo "Test Newton GMRES polynomials order 120 with fixed sparsity with added roots in market matrix problem 1138"
 	./ex6 -b_in_f 0 -f data/1138_bus -pc_type pflareinv  -pc_pflareinv_type newton -pc_pflareinv_order 120 -ksp_norm_type unpreconditioned -ksp_max_it 5		
+#
+	@echo "Test Newton AIRG with GMRES polynomials for hyperbolic streaming problem, matrix-free smoothing"
+	./ex12f -f data/mat_stream_2364 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -pc_air_matrix_free_polys -ksp_max_it 5	
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~
@@ -212,6 +215,9 @@ run_tests_load_parallel:
 			$(MPIEXEC) -n 2 ./ex12f_gmres_poly -f data/mat_stream_2364 -pc_air_poly_order $$order -pc_air_inverse_sparsity_order $$sparsity; \
 		done; \
 	done
+#
+	@echo "Test Newton AIRG with GMRES polynomials for hyperbolic streaming problem, matrix-free smoothing in parallel"
+	$(MPIEXEC) -n 2 ./ex12f -f data/mat_stream_2364 -pc_air_a_drop 1e-3 -pc_air_inverse_type newton -pc_air_matrix_free_polys -ksp_max_it 5	
 
 # ~~~~~~~~~~~
 # ~~~~~~~~~~~

From eb88540dcdba7eeb754f701be0c5f3e39e1d9ebf Mon Sep 17 00:00:00 2001
From: sdargavi <s.dargaville@imperial.ac.uk>
Date: Wed, 11 Feb 2026 00:08:04 +0000
Subject: [PATCH 41/41] Enable diagonal detection for Newton GMRES polynomial

---
 src/AIR_MG_Setup.F90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/AIR_MG_Setup.F90 b/src/AIR_MG_Setup.F90
index f35bdfa..5893e45 100644
--- a/src/AIR_MG_Setup.F90
+++ b/src/AIR_MG_Setup.F90
@@ -426,8 +426,6 @@ subroutine setup_air_pcmg(amat, pmat, air_data, pcmg_input)
          ! Convert Aff to a matdiagonal type
          ! Haven't rewritten some inverse types to take advantage of matdiagonal
          if (aff_diag .AND. &
-                  inverse_type_aff /= PFLAREINV_NEWTON .AND. &
-                  inverse_type_aff /= PFLAREINV_NEWTON_NO_EXTRA .AND. &
                   inverse_type_aff /= PFLAREINV_SAI .AND. &
                   inverse_type_aff /= PFLAREINV_ISAI) then