diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..a00ad8b
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,141 @@
+---
+# Rust-style formatting for C++17 (ExaConstit)
+Language: Cpp
+Standard: c++17
+
+# Line length matching rustfmt default
+ColumnLimit: 100
+
+# Indentation - Rust uses 4 spaces everywhere
+IndentWidth: 4
+TabWidth: 4
+UseTab: Never
+ContinuationIndentWidth: 4
+AccessModifierOffset: -4
+IndentCaseLabels: false
+IndentCaseBlocks: false
+IndentGotoLabels: false
+IndentPPDirectives: None
+IndentExternBlock: NoIndent
+IndentWrappedFunctionNames: false
+
+# Braces - Rust style (same line for functions/structs)
+BreakBeforeBraces: Custom
+BraceWrapping:
+ AfterCaseLabel: false
+ AfterClass: false
+ AfterControlStatement: Never
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterStruct: false
+ AfterUnion: false
+ AfterExternBlock: false
+ BeforeCatch: false
+ BeforeElse: false
+ BeforeLambdaBody: false
+ BeforeWhile: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+
+# Function and control flow formatting
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortLambdasOnASingleLine: Empty
+
+# Alignment - keep things clean like Rust
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: None
+AlignConsecutiveDeclarations: None
+AlignConsecutiveMacros: None
+AlignEscapedNewlines: Left
+AlignOperands: Align
+AlignTrailingComments: true
+
+# Pointer and reference alignment (Rust style: left-aligned with type)
+PointerAlignment: Left
+ReferenceAlignment: Left
+DerivePointerAlignment: false
+
+# Spacing
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpaceBeforeSquareBrackets: false
+
+# Line breaking
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: Yes
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+
+# Bin packing and arguments
+BinPackArguments: false
+BinPackParameters: false
+
+# Constructor initializer and inheritance
+ConstructorInitializerIndentWidth: 4
+CompactNamespaces: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MaxEmptyLinesToKeep: 1
+
+# Include sorting - ExaConstit headers first, then TPLs, then stdlib
+# NOTE: clang-format will alphabetize within each priority group
+SortIncludes: CaseSensitive
+IncludeBlocks: Regroup
+IncludeCategories:
+ # ExaConstit project headers (quoted includes from project directories)
+ - Regex: '^"(boundary_conditions|fem_operators|mfem_expt|models|options|postprocessing|sim_state|solvers|system_driver|umats|utilities)/'
+ Priority: 1
+ - Regex: '^"(system_driver)'
+ Priority: 1
+ # Third-party library headers (RAJA, mfem, ecmech, snls, caliper)
+ - Regex: '^"(RAJA|mfem|ecmech|snls|caliper)'
+ Priority: 2
+ # Standard C++ library (angle brackets, no file extension)
+ - Regex: '^<[a-z_]+>$'
+ Priority: 3
+ # C standard library (angle brackets with .h extension)
+ - Regex: '^<[a-z_]+\.h>$'
+ Priority: 3
+ # Catch-all for anything else
+ - Regex: '.*'
+ Priority: 99
+
+# Namespace formatting
+NamespaceIndentation: None
+FixNamespaceComments: true
+
+# Penalties (for breaking decisions)
+PenaltyBreakAssignment: 100
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+
+# Misc
+ReflowComments: true
+SortUsingDeclarations: true
\ No newline at end of file
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..77413db
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,3 @@
+# .git-blame-ignore-revs
+# First clang-format run so lots of changes...
+fec3a715e3c8c9e86a92c03f8e6501ad58bbe68f
\ No newline at end of file
diff --git a/.github/workflows/build-ecmech/action.yml b/.github/workflows/build-ecmech/action.yml
index 7b605f4..f876fbe 100644
--- a/.github/workflows/build-ecmech/action.yml
+++ b/.github/workflows/build-ecmech/action.yml
@@ -17,7 +17,7 @@ runs:
steps:
- name: Install ECMech
run: |
- git clone --single-branch --branch v0.4.1 --depth 1 ${{ inputs.ecmech-repo }} ${{ inputs.ecmech-dir }};
+ git clone --single-branch --branch v0.4.3 --depth 1 ${{ inputs.ecmech-repo }} ${{ inputs.ecmech-dir }};
cd ${{ inputs.ecmech-dir }};
git submodule init;
git submodule update;
diff --git a/.github/workflows/build-mfem/action.yml b/.github/workflows/build-mfem/action.yml
index 8a2c2f8..7eaa887 100644
--- a/.github/workflows/build-mfem/action.yml
+++ b/.github/workflows/build-mfem/action.yml
@@ -39,6 +39,7 @@ runs:
-DMFEM_USE_CUDA=OFF \
-DMFEM_USE_OPENMP=OFF \
-DMFEM_USE_RAJA=ON -DRAJA_DIR=${{ inputs.raja-dir }} \
+ -DCMAKE_CXX_STANDARD=17 \
-DCMAKE_BUILD_TYPE=Release
make -j3;
make install;
diff --git a/.github/workflows/build-raja/action.yml b/.github/workflows/build-raja/action.yml
index 81e597d..9964924 100644
--- a/.github/workflows/build-raja/action.yml
+++ b/.github/workflows/build-raja/action.yml
@@ -14,7 +14,7 @@ runs:
steps:
- name: Install RAJA
run: |
- git clone --single-branch --branch v2024.07.0 --depth 1 ${{ inputs.raja-repo }} ${{ inputs.raja-dir }};
+ git clone --single-branch --branch v2025.09.1 --depth 1 ${{ inputs.raja-repo }} ${{ inputs.raja-dir }};
cd ${{ inputs.raja-dir }};
git submodule init;
git submodule update;
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 723c1e9..6ff47f4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -53,7 +53,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install numpy
+ pip install numpy pandas toml
# Only get MPI if defined for the job.
# TODO: It would be nice to have only one step, e.g. with a dedicated
@@ -61,9 +61,12 @@ jobs:
- name: get MPI (Linux)
if: matrix.mpi == 'parallel' && matrix.os == 'ubuntu-latest'
run: |
- sudo apt-get install mpich libmpich-dev
- export MAKE_CXX_FLAG="MPICXX=mpic++"
-
+ sudo apt-get install openmpi-bin libopenmpi-dev
+ # Set MPI oversubscription globally for the job
+ echo "OMPI_MCA_rmaps_base_oversubscribe=1" >> $GITHUB_ENV
+ echo "OMPI_MCA_btl_vader_single_copy_mechanism=none" >> $GITHUB_ENV
+ echo "OMPI_MCA_btl_base_warn_component_unused=0" >> $GITHUB_ENV
+ # export MAKE_CXX_FLAG="MPICXX=mpic++"
# Get RAJA through cache, or build it.
# Install will only run on cache miss.
- name: cache raja
@@ -72,7 +75,7 @@ jobs:
uses: actions/cache@v4
with:
path: ${{ env.RAJA_TOP_DIR }}
- key: ${{ runner.os }}-build-${{ env.RAJA_TOP_DIR }}-v2.02
+ key: ${{ runner.os }}-build-${{ env.RAJA_TOP_DIR }}-v2.03
- name: get raja
if: matrix.mpi == 'parallel' && steps.raja-cache.outputs.cache-hit != 'true'
@@ -88,7 +91,7 @@ jobs:
uses: actions/cache@v4
with:
path: ${{ env.ECMECH_TOP_DIR }}
- key: ${{ runner.os }}-build-${{ env.ECMECH_TOP_DIR }}-v2.02
+ key: ${{ runner.os }}-build-${{ env.ECMECH_TOP_DIR }}-v2.04
- name: get ecmech
if: matrix.mpi == 'parallel' && steps.ecmech-cache.outputs.cache-hit != 'true'
@@ -140,7 +143,7 @@ jobs:
uses: actions/cache@v4
with:
path: ${{ env.MFEM_TOP_DIR }}
- key: ${{ runner.os }}-build-${{ env.MFEM_TOP_DIR }}-v2.04
+ key: ${{ runner.os }}-build-${{ env.MFEM_TOP_DIR }}-v2.06
- name: install mfem
if: matrix.mpi == 'parallel' && steps.mfem-cache.outputs.cache-hit != 'true'
@@ -164,4 +167,13 @@ jobs:
- name: cmake unit tests
if: matrix.build-system == 'cmake'
run: |
- cd ${{ github.workspace }}/build/ && ctest --output-on-failure
\ No newline at end of file
+ cd ${{ github.workspace }}/build/
+ # Ensure MPI settings are active
+ export OMPI_MCA_rmaps_base_oversubscribe=1
+ export OMPI_MCA_btl_vader_single_copy_mechanism=none
+ # Run tests with better output
+ ctest --output-on-failure --verbose
+ env:
+ OMPI_MCA_rmaps_base_oversubscribe: 1
+ OMPI_MCA_btl_vader_single_copy_mechanism: none
+ OMPI_MCA_btl_base_warn_component_unused: 0
\ No newline at end of file
diff --git a/README.md b/README.md
index c64e1c9..37d2d5a 100644
--- a/README.md
+++ b/README.md
@@ -1,112 +1,444 @@
-# ExaConstit App
+
-Updated: Feb. 6, 2025
+```
+███████╗██╗ ██╗ █████╗ ██████╗ ██████╗ ███╗ ██╗███████╗████████╗██╗████████╗
+██╔════╝╚██╗██╔╝██╔══██╗██╔════╝██╔═══██╗████╗ ██║██╔════╝╚══██╔══╝██║╚══██╔══╝
+█████╗ ╚███╔╝ ███████║██║ ██║ ██║██╔██╗ ██║███████╗ ██║ ██║ ██║
+██╔══╝ ██╔██╗ ██╔══██║██║ ██║ ██║██║╚██╗██║╚════██║ ██║ ██║ ██║
+███████╗██╔╝ ██╗██║ ██║╚██████╗╚██████╔╝██║ ╚████║███████║ ██║ ██║ ██║
+╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝ ╚═╝
+```
+
+**High-Performance Crystal Plasticity & Micromechanics Simulation**
+
+*Velocity-based finite element framework for polycrystalline materials*
+
+[Quick Start](#quick-start) • [Documentation](#documentation) • [Examples](#examples) • [Contributing](#contributing)
+
+
+
+---
+
+## What is ExaConstit?
+
+ExaConstit is a cutting-edge, **velocity-based finite element code** designed for high-fidelity simulation of polycrystalline materials. Built on LLNL's MFEM library, it delivers unprecedented performance for crystal plasticity and micromechanics modeling on leadership-class HPC systems.
+
+### Key Applications
+- **Crystal Plasticity Simulations** - Grain-level deformation analysis
+- **Bulk Constitutive Properties** - Homogenization of polycrystalline materials
+- **Additive Manufacturing** - Process-structure-property relationships
+- **Experimental Validation** - Lattice strain calculations for diffraction experiments
+
+## Features
+
+### **Advanced Finite Element Framework**
+- **Velocity-Based Formulation** - Updated Lagrangian with superior convergence
+- **Multi-Material Support** - Heterogeneous material regions
+- **Adaptive Time Stepping** - Automatic timestep control for robustness
+
+### **Crystal Plasticity Modeling**
+- **ExaCMech Integration** - Advanced crystal plasticity constitutive models
+- **Multi-Crystal Support** - BCC, FCC, and HCP crystal structures
+- **Grain-Level Resolution** - Individual grain orientations and properties
+- **State Variable Evolution** - Full history-dependent material behavior
+
+### **High-Performance Computing**
+- **GPU Acceleration** - CUDA and HIP support for maximum performance
+- **MPI Parallelization** - Scales to tens of thousands of processors
+- **Memory Efficiency** - Matrix-free partial assembly algorithms
+- **Performance Portability** - RAJA framework for unified CPU/GPU code
+
+### **Material Model Flexibility**
+- **ExaCMech Library** - State-of-the-art crystal plasticity models
+- **UMAT Interface** - Abaqus-compatible user material subroutines
+- **Custom Models** - Extensible architecture for new constitutive laws
+- **Multi-Model Regions** - Different materials in different regions
+
+### **Advanced Post-Processing**
+- **Visualization Output** - VisIt, ParaView, and ADIOS2 support
+- **Volume Averaging** - Macroscopic stress-strain behavior and other useful parameters
+- **Lattice Strain Analysis** - In-situ diffraction experiment simulation
+- **Python Tools** - Comprehensive analysis and plotting scripts
+
+## Quick Start
+
+### Prerequisites
+```bash
+# Essential dependencies
+MPI implementation (OpenMPI, MPICH, Intel MPI)
+MFEM (v4.8+) with parallel/GPU support
+ExaCMech (v0.4.3+) crystal plasticity library
+RAJA (≥2024.07.x) performance portability
+CMake (3.24+)
+```
+
+### Installation
-Version 0.8.0
+ExaConstit provides automated installation scripts for different platforms. For detailed instructions, see [Installation Guide](docs/install.md).
-# Description:
-A principal purpose of this code app is to probe the deformation response of polycrystalline materials; for example, in homogenization to obtain bulk constitutive properties of metals. This is a nonlinear quasi-static, implicit solid mechanics code built on the MFEM library based on an updated Lagrangian formulation (velocity based).
-
-Currently, only Dirichlet boundary conditions (homogeneous and inhomogeneous by degree-of-freedom component) have been implemented. Neumann (traction) boundary conditions and a body force are not implemented. These Dirichlet boundary conditions can applied per surface/boundary using either the use of applied velocity or applied velocity gradients (constant strain) boundary conditions. One can also mix and match the use of those two boundary condition types across the various boundaries of the problem in order to more complicated material deformations such as pure torsion. Additionally, we support changing boundary conditions. So, it's possible to run cyclic, strain-rate jump tests, or a number of other type simulations.
+#### Quick Start
-On the material modelling front of things, ExaConstit can easily handle various material models. We provide a base class, `ExaModel`, to build each material model or class of material models. We currently support two very different material model libraries/interfaces through UMATs or ExaCMech. Crystal plasticity model capabilities are primarily provided through the ExaCMech library.
+**Linux (Intel CPU)**
+```bash
+./scripts/install/unix_cpu_intel_install.sh
+```
+
+**macOS**
+```bash
+./scripts/install/unix_cpu_mac_install.sh
+```
+
+**NVIDIA GPU (CUDA)**
+```bash
+./scripts/install/unix_gpu_cuda_install.sh
+```
+
+**AMD GPU (HIP/ROCm)**
+```bash
+./scripts/install/unix_gpu_hip_install.sh
+```
+
+#### Before First Run
+
+⚠️ **You must customize the build configuration for your system.**
+
+Edit the appropriate config file in `scripts/install/configs/` and update:
+- Compiler paths and versions
+- MPI installation location
+- Python executable path
+- Module load commands (HPC systems)
-Through the ExaCMech library, we are able to offer a range of crystal plasticity models that can run on the GPU. The current models that are available are a power law slip kinetic model with both nonlinear and linear variations of a voce hardening law for BCC and FCC materials, and a single Kocks-Mecking dislocation density hardening model with balanced thermally activated slip kinetics with phonon drag effects for BCC, FCC, and HCP materials. Any future model types to the current list are a simple addition within ExaConstit, but they will need to be implemented within ExaCMech. Given the templated structure of ExaCMech, some additions would be comparatively straightforward.
+See the [Installation Guide](docs/install.md) for detailed setup instructions.
-The code is capable of running on the GPU by making use of either a partial assembly formulation (no global matrix formed) or element assembly (only element assembly formed) of our typical FEM code. These methods currently only implement a simple matrix-free jacobi preconditioner. The MFEM team is currently working on other matrix-free preconditioners. Additionally, ExaConstit can be built to run with either CUDA or HIP-support in-order to run on most GPU-capable machines out there.
+#### Build Options
+```bash
+# Clean rebuild
+REBUILD=ON ./scripts/install/unix_gpu_cuda_install.sh
-The code supports constant time steps, user-supplied variable time steps, or automatically calculated time steps. Boundary conditions are supplied for the velocity field on a surface. The code supports a number of different preconditioned Krylov iterative solvers (PCG, GMRES, MINRES) for either symmetric or nonsymmetric positive-definite systems. We also support either a newton raphson or newton raphson with a line search for the nonlinear solve. We might eventually look into supporting a nonlinear solver such as L-BFGS as well.
+# Target specific GPU architecture
+CMAKE_GPU_ARCHITECTURES=80 ./scripts/install/unix_gpu_cuda_install.sh
-Finally, we support being able to make use of full integration or BBar type integration schemes to be used with various models. The default feature is to perform full integration of the element at the quadrature point. The BBar integration performs full integration of the deviatoric response with an element average integration for the volume response. The BBar method is based on the work given in [this paper](https://doi.org/10.1002/nme.1620150914) and more specifically we make use of Eq 23. It should be noted that currently we don't support a partial assembly formulation for the BBar integrations.
+# Adjust parallel jobs
+MAKE_JOBS=16 ./scripts/install/unix_cpu_intel_install.sh
+```
+**Note for MI300A users:** Set `HSA_XNACK=1` before running simulations.
-## Remark:
-This code is still very much actively being developed. It should be expected that breaking changes can and will occur. So, we make no guarantees about stability at this point in time. Any available release should be considered stable but may be lacking several features of interest that are found in the ```exaconstit-dev``` branch.
+For troubleshooting, manual builds, and advanced configuration, see the [Installation Guide](docs/install.md).
-Currently, the code has been tested using monotonic and cyclic loading with either an auto-generated mesh that has been instantiated with grain data from some voxel data set or meshes formed from ```MFEM v1.0```. Meshes produced from Neper can also be used but do require some additional post-processing. See the ```Script``` section for ways to accomplishing this.
+#### **Manual Build**
+```bash
+# Clone and prepare
+git clone https://github.com/LLNL/blt.git cmake/blt
+mkdir build && cd build
-ExaCMech models are capable of running on the GPU. However, we currently have no plans for doing the same for UMAT-based kernels. The ExaCMech material class can be used as a guide for how to do the necessary set-up, material kernel, and post-processing step if a user would like to expand the UMAT features and submit a pull request to add the capabilities into ExaConstit.
+# Configure
+cmake .. \
+ -DENABLE_MPI=ON \
+ -DMFEM_DIR=${MFEM_INSTALL_DIR} \
+ -DECMECH_DIR=${EXACMECH_INSTALL_DIR} \
+ -DRAJA_DIR=${RAJA_INSTALL_DIR}
-See the included ```options.toml``` to see all of the various different options that are allowable in this code and their default values.
+# Build
+make -j $(nproc)
+```
-A TOML parser has been included within this directory, since it has an MIT license. The repository for it can be found at: https://github.com/ToruNiina/toml11/.
+### First Simulation
+```bash
+# Run a crystal plasticity example
+cd test/data
+mpirun -np 4 ../../build/mechanics -opt voce_full.toml
-Example UMATs maybe obtained from https://web.njit.edu/~sac3/Software.html . We have not included them due to a question of licensing. The ones that have been run and are known to work are the linear elasticity model and the neo-Hookean material. The ```umat_tests``` subdirectory in the ```src``` directory can be used as a guide for how to convert your own UMATs over to one with which ExaConstit can interface.
+# Generate stress-strain plots
+python ../../scripts/postprocessing/macro_stress_strain_plot.py
+```
-Note: the grain.txt, props.txt and state.txt files are expected inputs for crystal-plasticity problems. If a mesh is provided it should be in the MFEM or cubit format which has the grains IDs already assigned to the element attributes.
+## Examples
-# Scripts
-Useful scripts are provided within the ```scripts``` directory. The ```mesh_generator``` executable when generated can create an ```MFEM v1.0``` mesh for auto-generated mesh when provided a grain ID file. It is also capable of taking in a ```vtk``` mesh file that MFEM is capable of reading, and then it will generate the appropriate ```MFEM v1.0``` file format with the boundary element attributes being generated in the same way ExaConstit expects them. The ```vtk``` mesh currently needs to be a rectilinear mesh in order to work. All of the options for ```mesh_generator``` can be viewed by running ```./mesh_generator --help```
+### **Crystal Plasticity Simulation**
+```toml
+# options.toml - Crystal plasticity configuration
+grain_file = "grain.txt"
+orientation_file = "orientations.txt"
-If you have version 4 of ```Neper``` then you can make use of the `-faset 'faces'` option while meshing and output things as a `gmsh` v2.2 file. Afterwards, you can make use of the `neper_v4_mesh.py` cli script in `scripts/meshing` to automatically use the faset information and autogenerate the boundary attributes that `MFEM\ExaConstit` can understand and use. Although, you will need to check and see which face corresponds to what boundary attribute, so you can correctly apply boundary conditions to the body. Further information is provided in the top level comment of the script for how to do this.
+[Mesh]
+filename = "polycrystal.mesh"
-For older versions of neper v2-v3, an additional python script is provided called ```fepx2mfem_mesh.py``` that provides a method to convert from a mesh generated using Neper v3.5.2 in the FEpX format into the ```vtk``` format that can now be converted over to the ```MFEM v1.0``` format using the ```mesh_generator``` script.
+[Materials]
+[[Materials.regions]]
+material_name = "titanium_alloy"
+mech_type = "ExaCMech"
-# Examples
+[Materials.regions.model.ExaCMech]
+shortcut = "evptn_HCP_A"
-Several small examples that you can run are found in the ```test/data``` directory. These examples cover a wide range of different use cases of the code, but the `toml` file for each test case may not be representative of all the options as found in the `src/options.toml` file.
+```
-# Postprocessing
+### **Post-Processing Workflow**
+```bash
+# Extract stress-strain data
+python scripts/postprocessing/macro_stress_strain_plot.py output/
-The ```scripts/postprocessing``` directory contains several useful post-processing tools. The ```macro_stress_strain_plot.py``` file can be used to generate macroscopic stress strain plots. An example script ```adios2_example.py``` is provided as example for how to make use of the ```ADIOS2``` post-processing files if ```MFEM``` was compiled with ```ADIOS2``` support. It's highly recommended to install ```MFEM``` with this library if you plan to be doing a lot of post-processing of data in python.
+# Calculate lattice strains (experimental validation)
+python scripts/postprocessing/calc_lattice_strain.py \
+ --config lattice_strain_config.json
-A set of scripts to perform lattice strain calculations similar to those found in powder diffraction type experiments can be found in the ```scripts/postprocessing``` directory. The appropriate python scripts are: `adios2_extraction.py`, `strain_Xtal_to_Sample.py`, and `calc_lattice_strain.py`. In order to use these scripts, one needs to run with the `light_up=true` option set in the `Visualization` table of your simulation option file. Alternatively, if you just use the `light_up` option and provide the necessary parameters as defined in the `src/options.toml` file you a set of insitu lattice strain calculations will be done. The cost of these insitu calculations is fairly nominal and are generally advisable to use when performing large scale simulations where this data is desireable.
+# Generate visualization files
+python scripts/postprocessing/adios2_example.py results.bp
+```
-# Workflow Examples
+## Output and Visualization
+
+### **Version 0.9 Output Updates**
+ExaConstit v0.9 introduces significant improvements to output management and file organization:
+
+#### **Modern Configuration Support**
+- **Legacy compatibility**: Previous option file formats continue to work
+- **Conversion utility**: Use our conversion script to migrate to the modern TOML format:
+ ```bash
+ python scripts/exaconstit_old2new_options.py old_options.toml -o new_options.toml
+ ```
+
+#### **Enhanced Output Files**
+- **Headers included**: All simulation output files now contain descriptive headers
+- **Time and volume data**: Automatically included in all output files so the auto_dt_file has been removed
+- **Improved format**: Enhanced data organization (note: format differs from previous versions)
+- **Basename-based directories**: Output location determined by `basename` and `Postprocessing.Projections.output_directory` settings in options file
+ ```toml
+ # if not provided defaults to option file name
+ basename = "exaconstit" # Creates output sub-directory: exaconstit/
+ ```
+
+#### **Advanced Visualization Control**
+- **Backward compatibility**: Visualization files remain compatible with previous versions
+- **User-friendly naming**: Visualization variable names updated for better clarity
+- **Selective field output**: Specify exactly which fields to save (new capability):
+ ```toml
+ [PostProcessing.projections]
+ # Some of these values are only compatible with ExaCMech
+ enabled_projections = ["stress", "von_mises", "volume", "centroid", "dpeff", "elastic_strain"]
+ # if set to true then all defaults are outputted by default
+ auto_enable_compatible = false
+ ```
+
+### **Migration Guide for Existing Users**
+- **Existing simulations**: Previous option files work without modification
+- **Output processing**: Update post-processing scripts to handle new file headers
+- **Directory structure**: Account for new basename-based output organization
+- **Visualization workflows**: Existing VisIt/ParaView workflows remain functional
+
+## Advanced Features
+
+### **Mesh Generation & Processing**
+- **Auto-Generated Meshes** - From grain ID files
+- **Neper Integration** - v4 mesh processing with boundary detection
+- **Format Conversion** - VTK to MFEM
+- **Boundary Attribute** - Automatic boundary labelling
+
+#### **Mesh Generator Utility**
+The `mesh_generator` executable provides flexible mesh creation and conversion:
+```bash
+# Create MFEM mesh from grain ID file
+./mesh_generator --grain_file grains.txt --output polycrystal.mesh
+
+# Convert VTK mesh to MFEM format with boundary attributes
+./mesh_generator --vtk_input mesh.vtk --output converted.mesh
+
+# View all options
+./mesh_generator --help
+```
-We've provided several different useful workflows in the `workflows` directory. One is an optimization set of scripts that makes use of a genetic algorithm to optimize material parameters based on experimental results. Internally, it makes use of either a simple workflow manager for something like a workstation or it can leverage the python bindings to the Flux job queue manager created initially by LLNL to run on large HPC systems.
+**Capabilities**:
+- **Auto-generated meshes** from grain ID files
+- **VTK to MFEM conversion** with automatic boundary attribute generation
+- **Boundary Attribute** compatible with ExaConstit requirements
-The other workflow is based on a UQ workflow for metal additive manufacturing that was developed as part of the ExaAM project. You can view the open short workshop paper for an overview of the ExaAM project's workflow and the results https://doi.org/10.1145/3624062.3624103 . This workflow connects microstructures provided by an outside code such as LLNL's ExaCA code (https://github.com/LLNL/ExaCA) or other sources such as nf-HEDM methods to local properties to be used by a part scale application code. The goal here is to utilize ExaConstit to run a ton of simulations rather than experiments in order to obtain data that can be used to parameterize macroscopic material models such as an anisotropic yield surface.
+#### **Neper Integration**
+**For Neper v4 users**:
+```bash
+# Generate mesh with face information
+neper -M n100-id1.tess -faset 'faces' -format gmsh2.2
-# Installing Notes:
+# Convert to ExaConstit format
+python scripts/meshing/neper_v4_mesh.py input.msh output.mesh
+```
-* git clone the LLNL BLT library into cmake directory. It can be obtained at https://github.com/LLNL/blt.git
-* MFEM will need to be built with hypre v2.26.0-v2.30.0; metis5; RAJA v2022.x+; and optionally Conduit, ADIOS2, or ZLIB.
- * Conduit and ADIOS2 supply output support. ZLIB allows MFEM to read in gzip mesh files or save data as being compressed.
- * You'll need to use the exaconstit-dev branch of MFEM found on this fork of MFEM: https://github.com/rcarson3/mfem.git
- * We do plan on upstreaming the necessary changes needed for ExaConstit into the master branch of MFEM, so you'll no longer be required to do this
- * Version 0.8.0 of ExaConstit is compatible with the following mfem hash:
- 31b42daa3cdddeff04ce3f59befa769b262facd7
- or
- 29a8e15382682babe0f5c993211caa3008e1ec96
- * Version 0.7.0 of Exaconstit is compatible with the following mfem hash 78a95570971c5278d6838461da6b66950baea641
- * Version 0.6.0 of ExaConstit is compatible with the following mfem hash 1b31e07cbdc564442a18cfca2c8d5a4b037613f0
- * Version 0.5.0 of ExaConstit required 5ebca1fc463484117c0070a530855f8cbc4d619e
- * ExaCMech is required for ExaConstit to be built and can be obtained at https://github.com/LLNL/ExaCMech.git and now requires the develop branch. ExaCMech depends internally on SNLS, from https://github.com/LLNL/SNLS.git. We depend on v0.4.1 of ExaCMech as of this point in time.
- * GPU-builds of ExaCMech >= v0.4.1 and thus ExaConstit now require the RAJA Portability Suite (RAJA, Umpire, and CHAI) to compile and run on the GPU. We currently leverage the `v2024.07.0` tag for all of the RAJA Portability Suite repos.
- * For versions of ExaCMech >= 0.3.3, you'll need to add `-DENABLE_SNLS_V03=ON` to the cmake commands as a number of cmake changes were made to that library and SNLS.
-* RAJA is required for ExaConstit to be built and should be the same one that ExaCMech and MFEM are built with. It can be obtained at https://github.com/LLNL/RAJA. Currently, RAJA >= 2022.10.x is required for ExaConstit due to a dependency update in MFEMv4.5.
-* An example install bash script for unix systems can be found in ```scripts/install/unix_install_example.sh```. This is provided as an example of how to install ExaConstit and its dependencies, but it is not guaranteed to work on every system. A CUDA version of that script is also included in that folder (`unix_gpu_cuda_install_example.sh`), and only minor modifications are required if using a version of Cmake >= 3.18.*. In those cases ```CUDA_ARCH``` has been changed to ```CMAKE_CUDA_ARCHITECTURES```. You'll also need to look up what you're CUDA architecture compute capability is set to and modify that within the script. Currently, it is set to ```sm_70``` which is associated with the Volta architecture. We also have a HIP version included in that folder (`unix_gpu_cuda_install_example.sh`). It's based on a LLNL El Capitan-like system build of things so things might need tweaking for other AMD GPU machines.
+**For Neper v2-v3 users**:
+```bash
+# Convert FEpX format to VTK
+python scripts/meshing/fepx2mfem_mesh.py fepx_mesh.txt vtk_mesh.vtk
+# Then use mesh_generator for final conversion
+./mesh_generator --vtk_input vtk_mesh.vtk --output final.mesh
+```
-* Create a build directory and cd into there
-* Run ```cmake .. -DENABLE_MPI=ON -DENABLE_FORTRAN=OFF -DMFEM_DIR{mfem's installed cmake location} -DBLT_SOURCE_DIR=${BLT cloned location if not located in cmake directory} -DECMECH_DIR=${ExaCMech installed cmake location} -DRAJA_DIR={RAJA installed location} -DSNLS_DIR={SNLS installed cmake location}```
-* Run ```make -j 4```
+#### **Required Input Files for Crystal Plasticity**
+When setting up crystal plasticity simulations, you need (file names can be different):
+##### **Essential Files**
+- **`grain.txt`**: Element-to-grain ID mapping (one ID per element)
+- **`props.txt`**: Material parameters for each grain type/material
+- **`state.txt`**: Initial internal state variables (typically zeros)
+- **`orientations.txt`**: Crystal orientations (quaternions)
+- **`regions.txt`**: Mapping from grain-to-region ID mapping
-# Future Implemenations Notes:
-
-* Multiple phase materials
-* Commonly used post-processing tools either through Python or C++ code
-# Contributors:
-* Robert A. Carson (Principal Developer)
- * carson16@llnl.gov
+##### **Mesh Requirements**
+- **Format**: MFEM v1.0 or Cubit format
+- **Grain IDs**: Must be assigned to element attributes in the mesh
+- **Boundary attributes**: Required for boundary condition application
-* Nathan Barton
+### **Experimental Integration**
+- **Lattice Strain Calculations** - Powder diffraction simulation
+- **In-Situ Analysis** - Real-time lattice strain monitoring
+- **Microstructure Coupling** - Integration with ExaCA and other tools
-* Steven R. Wopschall (initial contributions)
+#### **Stress-Strain Analysis**
+```bash
+# Generate macroscopic stress-strain plots
+python scripts/postprocessing/macro_stress_strain_plot.py
+```
-* Jamie Bramwell (initial contributions)
+#### **Lattice Strain Analysis**
+Simulate powder diffraction experiments with in-situ lattice strain calculations:
+```bash
+# Extract lattice strain data from ADIOS2 files
+python scripts/postprocessing/adios2_extraction.py
-# CONTRIBUTING
+# Transform crystal strains to sample coordinates
+python scripts/postprocessing/strain_Xtal_to_Sample.py
-ExaConstit is distributed under the terms of the BSD-3-Clause license. All new contributions must be made under this license.
+# Calculate lattice strains for specific HKL directions
+python scripts/postprocessing/calc_lattice_strain.py
+```
-# Citation
-If you're using ExaConstit and would like to cite us please use the below `bibtex` entry. Additionally, we would love to be able to point to ExaConstit's use in the literature and elsewhere so feel free to message us with a link to your work as Google Scholar does not always pick up the below citation. We can then list your work among the others that have used our code.
+**Enable lattice strain output** in your simulation:
+```toml
+[Visualizations]
+light_up = true # Enables in-situ lattice strain calculations
+# Configure specific HKL directions and parameters in options.toml
```
+
+##### **ADIOS2 Integration**
+For large-scale data analysis (recommended for extensive post-processing):
+```bash
+# Example ADIOS2 data processing
+python scripts/postprocessing/adios2_example.py
+
+# Requires MFEM built with ADIOS2 support
+```
+
+### **Materials Science Workflows**
+
+#### **Parameter Optimization**
+Multi-objective genetic algorithm-based optimization for material parameter identification:
+```bash
+# Optimize material parameters against experimental data
+cd workflows/optimization/
+python ExaConstit_NSGA3.py
+```
+
+**Features**:
+- **Flux integration**: Leverage LLNL's Flux job manager for HPC systems
+- **Workstation support**: Simple workflow manager for desktop systems
+- **Multi-objective optimization**: Fit multiple experimental datasets simultaneously
+
+#### **Uncertainty Quantification (UQ)**
+ExaAM integration for additive manufacturing applications:
+```bash
+# UQ workflow for process-structure-property relationships
+cd workflows/Stage3/pre_main_post_script
+python chal_prob_full.py
+```
+
+**Applications**:
+- **Microstructure-property linkage**: Connect ExaCA microstructures to mechanical properties
+- **Part-scale modeling**: Generate data for macroscopic material model parameterization
+- **Process optimization**: Optimize additive manufacturing parameters
+- **Anisotropic yield surface**: Development from polycrystal simulations
+
+**Academic Reference**: [ExaAM UQ Workflow Paper](https://doi.org/10.1145/3624062.3624103)
+
+## Documentation
+
+### **Getting Started**
+- [Developer's Guide](developers_guide.md) - Complete development documentation
+- [Configuration Reference](src/options.toml) - All available simulation options
+
+### **Scientific Background**
+- **Crystal Plasticity Theory** - Micromechanics fundamentals
+- **Finite Element Implementation** - Velocity-based formulation details
+- **GPU Acceleration** - Performance optimization strategies
+
+### **Tutorials & Examples**
+- **Basic Simulations** - Simple deformation tests
+- **Complex Loading** - Cyclic and multiaxial loading
+- **Multi-Material Problems** - Composite and layered materials
+- **Experimental Validation** - Lattice strain analysis
+
+## Ecosystem & Integration
+
+### **Related LLNL Projects**
+- **[ExaCMech](https://github.com/LLNL/ExaCMech)** - Crystal plasticity constitutive models
+- **[ExaCA](https://github.com/LLNL/ExaCA)** - Cellular automata code for alloy nucleation and solidification
+- **[MFEM](https://mfem.org)** - Finite element methods library
+- **ExaAM** - Exascale Computing Project project on additive manufacturing for process-structure-properties calculations
+
+### **Third-Party Tools**
+- **Neper** - Polycrystal mesh generation
+- **VisIt/ParaView** - Visualization and analysis
+- **ADIOS2** - High-performance I/O
+- **Python Ecosystem** - NumPy, SciPy, Matplotlib integration
+
+## Performance & Scalability
+
+### **Benchmarks**
+- **CPU Performance** - Scales to 1000+ MPI processes
+- **GPU Acceleration** - 15-25x speedup on V100 or MI250x/MI300a systems
+- **Memory Efficiency** - Matrix-free algorithms reduce memory footprint
+- **I/O Performance** - ADIOS2 integration for petascale data management
+
+### **Optimization Features**
+- **Partial Assembly** - Matrix-free operator evaluation
+- **Device Memory Management** - Automatic host/device transfers
+- **Communication Optimization** - Minimal MPI collective operations
+
+## Contributing
+
+We welcome contributions from the materials science and computational mechanics communities!
+
+### **Development**
+```bash
+# Fork the repository and create a feature branch
+git checkout -b feature/amazing-new-capability
+
+# Make your changes with comprehensive tests
+# Follow our C++17 coding standards
+
+# Submit a pull request with detailed description
+```
+
+### **Contribution Areas**
+- **Material Models** - New constitutive relationships
+- **Boundary Conditions** - Extended loading capabilities such as Neumann BCs or periodic BCs
+- **Post-Processing** - Analysis and visualization tools
+- **Performance** - GPU optimization and scalability
+- **Documentation** - Tutorials and examples
+
+### **Getting Help**
+- **GitHub Issues** - Bug reports and feature requests
+- **Discussions** - Technical questions and community support
+- **Documentation** - Comprehensive guides and API reference
+
+## License & Citation
+
+ExaConstit is distributed under the **BSD-3-Clause license**. All contributions must be made under this license.
+
+### **Citation**
+If you use ExaConstit in your research, please cite the below. Additionally, we would love to be able to point to ExaConstit's use in the literature and elsewhere so feel free to message us with a link to your work as Google Scholar does not always pick up the below citation. We can then list your work among the others that have used our code.
+
+```bibtex
@misc{ exaconstit,
title = {{ExaConstit}},
author = {Carson, Robert A. and Wopschall, Steven R. and Bramwell, Jamie A.},
@@ -123,10 +455,28 @@ annote = {
}
```
-# LICENSE
+### LICENSE
License is under the BSD-3-Clause license. See [LICENSE](LICENSE) file for details. And see also the [NOTICE](NOTICE) file.
`SPDX-License-Identifier: BSD-3-Clause`
``LLNL-CODE-793434``
+
+## Core Team
+
+### **Lawrence Livermore National Laboratory**
+- **Robert A. Carson** (Principal Developer) - carson16@llnl.gov
+- **Nathan Barton** - Initial Development
+- **Steven R. Wopschall** - Initial Development
+- **Jamie Bramwell** - Initial Development
+
+---
+
+
+
+**Built at Lawrence Livermore National Laboratory**
+
+*Advancing materials science through high-performance computing*
+
+
\ No newline at end of file
diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake
index 6e36490..6473f8d 100644
--- a/cmake/CMakeBasics.cmake
+++ b/cmake/CMakeBasics.cmake
@@ -4,7 +4,7 @@
set(PACKAGE_BUGREPORT "carson16@llnl.gov")
set(EXACONSTIT_VERSION_MAJOR 0)
-set(EXACONSTIT_VERSION_MINOR 8)
+set(EXACONSTIT_VERSION_MINOR 9)
set(EXACONSTIT_VERSION_PATCH \"0\")
set(HEADER_INCLUDE_DIR
diff --git a/cmake/ExaConstitOptions.cmake b/cmake/ExaConstitOptions.cmake
index a9908d6..0ba1d29 100644
--- a/cmake/ExaConstitOptions.cmake
+++ b/cmake/ExaConstitOptions.cmake
@@ -10,12 +10,3 @@ option(ENABLE_CUDA "Enable CUDA" OFF)
option(ENABLE_HIP "Enable HIP" OFF)
option(ENABLE_OPENMP "Enable OpenMP" OFF)
-
-option(ENABLE_SNLS_V03 "Enable building library with v0.3.0+ of SNLS" OFF)
-
-# Force atleast static if user turns off both
-# if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
-# message("Both static and shared libaries were disabled."
-# "Building static libraries re-enabled.")
-# set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static libraries" FORCE)
-# endif(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
diff --git a/cmake/blt b/cmake/blt
index fb4246b..e783e30 160000
--- a/cmake/blt
+++ b/cmake/blt
@@ -1 +1 @@
-Subproject commit fb4246b8bae74c3d7291bef9698fd38863844680
+Subproject commit e783e30f2823ee1a208f7f90741b41c1f5a08063
diff --git a/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake b/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake
index d38ab81..bf0df07 100644
--- a/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake
+++ b/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake
@@ -8,7 +8,8 @@ set(_tpls
snls
exacmech
mfem
- caliper)
+ caliper
+ threads)
foreach(_tpl ${_tpls})
string(TOUPPER ${_tpl} _uctpl)
@@ -138,4 +139,69 @@ if (DEFINED CALIPER_DIR)
endif()
else()
message("Caliper support disabled")
+endif()
+
+################################
+# Threads (platform-specific)
+################################
+
+set(EXACONSTIT_THREADS_EXPLICIT_LINK FALSE CACHE INTERNAL "Whether explicit thread linking is required")
+
+if(UNIX AND NOT APPLE)
+ find_package(Threads REQUIRED)
+ include(CheckCXXSourceCompiles)
+
+ # Test 1: Basic thread support without any flags
+ set(CMAKE_REQUIRED_LIBRARIES_SAVE ${CMAKE_REQUIRED_LIBRARIES})
+ set(CMAKE_REQUIRED_LIBRARIES "")
+
+ check_cxx_source_compiles("
+ #include
+ #include
+ #include
+ #include
+ int main() {
+ std::atomic counter{0};
+ std::mutex m;
+ std::condition_variable cv;
+
+ std::thread t([&]{
+ std::unique_lock lock(m);
+ counter++;
+ cv.notify_one();
+ });
+
+ t.join();
+ return counter.load();
+ }" THREADS_IMPLICIT_LINK)
+
+ # Test 2: If implicit didn't work, verify explicit works
+ if(NOT THREADS_IMPLICIT_LINK)
+ set(CMAKE_REQUIRED_LIBRARIES Threads::Threads)
+ check_cxx_source_compiles("
+ #include
+ int main() {
+ std::thread t([]{});
+ t.join();
+ return 0;
+ }" THREADS_EXPLICIT_WORKS)
+
+ if(NOT THREADS_EXPLICIT_WORKS)
+ message(FATAL_ERROR "Threading support not functional even with explicit linking!")
+ endif()
+ endif()
+
+ # Restore
+ set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES_SAVE})
+
+ # Register if needed
+ if(NOT THREADS_IMPLICIT_LINK)
+ message(STATUS " Result: Explicit pthread linking REQUIRED")
+ set(EXACONSTIT_THREADS_EXPLICIT_LINK TRUE CACHE INTERNAL "Whether explicit thread linking is required")
+ else()
+ message(STATUS " Result: pthread implicitly linked (no action needed)")
+ endif()
+
+elseif(APPLE)
+ message(STATUS "Threads support built-in on macOS")
endif()
\ No newline at end of file
diff --git a/developers_guide.md b/developers_guide.md
new file mode 100644
index 0000000..ca152e6
--- /dev/null
+++ b/developers_guide.md
@@ -0,0 +1,784 @@
+# ExaConstit Developer's Guide
+
+## Table of Contents
+1. [Introduction](#introduction)
+2. [Prerequisites](#prerequisites)
+3. [Installation](#installation)
+4. [Dependency Version Compatibility](#dependency-version-compatibility)
+5. [Codebase Overview](#codebase-overview)
+6. [Source Directory Structure](#source-directory-structure)
+7. [Key Components](#key-components)
+8. [Configuration System](#configuration-system)
+9. [Advanced Solver Configuration](#advanced-solver-configuration)
+10. [Building and Testing](#building-and-testing)
+11. [Development Workflow](#development-workflow)
+12. [UMAT Development Resources](#umat-development-resources)
+13. [Contributing Guidelines](#contributing-guidelines)
+
+## Introduction
+
+ExaConstit is a high-performance, velocity-based, updated Lagrangian finite element code for nonlinear solid mechanics problems with a focus on micromechanics modeling. Built on the MFEM library, it specializes in crystal plasticity simulations and bulk constitutive property determination for polycrystalline materials.
+
+**Key Features:**
+- Velocity-based updated Lagrangian formulation
+- Crystal plasticity and micromechanics modeling
+- GPU acceleration with CUDA/HIP support
+- MPI parallelization for HPC systems
+- Integration with ExaCMech material library
+- UMAT interface support
+- Advanced post-processing capabilities
+
+## Prerequisites
+
+### Required Knowledge
+- **C++17**: Modern C++ standards and best practices
+- **Finite Element Method (FEM)**: Theory and implementation
+- **Solid Mechanics**: Nonlinear mechanics, crystal plasticity
+- **Numerical Methods**: Newton-Raphson, Krylov iterative solvers
+- **Parallel Computing**: MPI, OpenMP, GPU programming concepts
+
+### System Requirements
+- C++17 compatible compiler (GCC 7+, Clang 5+, Intel 19+)
+- MPI implementation (OpenMPI, MPICH, Intel MPI)
+- CMake 3.24 or higher
+- Git for version control
+
+## Installation
+
+### Quick Start
+For detailed installation instructions, refer to the build scripts in `scripts/install/`:
+
+- **Linux/Unix**: `scripts/install/unix_install_example.sh`
+- **GPU (CUDA)**: `scripts/install/unix_gpu_cuda_install_example.sh`
+- **GPU (HIP/AMD)**: `scripts/install/unix_gpu_hip_install_example.sh`
+
+### Dependencies
+
+**Core Dependencies:**
+- **MFEM** (v4.8+): Finite element library with parallel/GPU support
+- **ExaCMech** (v0.4.3+): Crystal plasticity constitutive model library
+- **RAJA** (≥2024.07.x): Performance portability framework
+- **UMPIRE** (≥2024.07.x): (GPU-only) Performance portability framework
+- **CHAI** (≥2024.07.x): (GPU-only) Performance portability framework
+- **BLT**: LLNL build system
+- **SNLS**: Nonlinear solver library
+
+**Optional Dependencies:**
+- **ADIOS2**: (MFEM-based) High-performance I/O for visualization
+- **Caliper**: Performance profiling
+
+### Basic Build Process
+```bash
+git submodule init && git submodule update
+
+# Create build directory
+mkdir build && cd build
+
+# Configure
+cmake .. \
+ -DENABLE_MPI=ON \
+ -DENABLE_FORTRAN=OFF \
+ -DMFEM_DIR=${MFEM_INSTALL_DIR} \
+ -DECMECH_DIR=${EXACMECH_INSTALL_DIR} \
+ -DRAJA_DIR=${RAJA_INSTALL_DIR} \
+ -DSNLS_DIR=${SNLS_INSTALL_DIR}
+
+# Build
+make -j 4
+```
+
+## Dependency Version Compatibility
+
+### **MFEM Requirements**
+ExaConstit requires a specific MFEM development branch with ExaConstit-specific features:
+
+#### **Current Requirements**
+- **Repository**: https://github.com/rcarson3/mfem.git
+- **Branch**: `exaconstit-dev`
+- **Version Dependencies**:
+ - **v0.9.0**: Compatible with MFEM hashes `a6bb7b7c2717e991b52ad72460f212f7aec1173e`
+ - **v0.8.0**: Compatible with MFEM hashes `31b42daa3cdddeff04ce3f59befa769b262facd7` or `29a8e15382682babe0f5c993211caa3008e1ec96`
+ - **v0.7.0**: Compatible with MFEM hash `78a95570971c5278d6838461da6b66950baea641`
+ - **v0.6.0**: Compatible with MFEM hash `1b31e07cbdc564442a18cfca2c8d5a4b037613f0`
+ - **v0.5.0**: Required MFEM hash `5ebca1fc463484117c0070a530855f8cbc4d619e`
+
+#### **MFEM Build Requirements**
+```bash
+# Required dependencies for MFEM
+cmake .. \
+ -DMFEM_USE_MPI=ON \
+ -DMFEM_USE_METIS_5=ON \
+ -DMFEM_USE_HYPRE=ON \ # v2.26.0-v2.30.0
+ -DMFEM_USE_RAJA=ON \ # v2022.x+
+ -DMFEM_USE_ADIOS2=ON \ # Optional: high-performance I/O
+ -DMFEM_USE_ZLIB=ON # Optional: compressed mesh support
+```
+
+**Note**: Future releases will integrate these changes into MFEM master branch, eliminating the need for the development fork.
+
+### **ExaCMech Version Requirements**
+- **Repository**: https://github.com/LLNL/ExaCMech.git
+- **Branch**: `develop` (required)
+- **Version**: v0.4.3+ required
+- **SNLS Dependency**: https://github.com/LLNL/SNLS.git
+
+### **RAJA Portability Suite**
+For GPU builds of ExaCMech >= v0.4.3:
+
+#### **Required Components**
+- **RAJA**: Performance portability framework
+- **Umpire**: Memory management
+- **CHAI**: Array abstraction
+
+#### **Version Requirements**
+- **Tag**: `v2024.07.0` for all RAJA Portability Suite repositories
+- **Important**: All RAJA suite components (RAJA, Umpire, CHAI) must use matching versions
+- **Minimum RAJA**: v2024.07.0
+- **Note**: Version mismatch between RAJA components can cause build failures or runtime errors
+
+### **Additional Dependencies**
+- **HYPRE**: v2.26.0 - v2.30.0 (algebraic multigrid / various preconditioners)
+- **METIS**: Version 5 (mesh partitioning)
+- **ADIOS2**: Optional (high-performance parallel I/O)
+- **ZLIB**: Optional (compressed mesh and data support)
+
+## Codebase Overview
+
+ExaConstit follows a modular architecture designed for extensibility and performance:
+
+```
+ExaConstit/
+├── src/ # Main source code
+├── test/ # Test cases and examples
+├── scripts/ # Build scripts and utilities
+├── workflows/ # Optimization and UQ workflows
+└── cmake/ # Build system configuration
+```
+
+### Design Philosophy
+- **Modularity**: Clear separation of concerns between FEM operators, material models, and solvers
+- **Performance**: GPU acceleration and memory-efficient algorithms
+- **Extensibility**: Plugin architecture for material models and boundary conditions
+- **Standards**: Modern C++17 practices and comprehensive documentation
+
+## Source Directory Structure
+
+The `src/` directory contains the core ExaConstit implementation organized into modular components:
+
+### Primary Files
+- **`mechanics_driver.cpp`**: Main application entry point and simulation orchestration
+- **`system_driver.hpp/cpp`**: Core driver class managing the Newton-Raphson solution process
+- **`userumat.h`**: Interface definitions for UMAT material model integration
+
+### Key Directories
+
+#### `boundary_conditions/`
+**Purpose**: Boundary condition management and enforcement
+- **`BCData.hpp/cpp`**: Data structures for boundary condition storage
+- **`BCManager.hpp/cpp`**: Boundary condition application and management
+
+**Key Features**:
+- Dirichlet velocity and velocity gradient boundary conditions
+- Time-dependent BC scaling and ramping
+- Component-wise BC application for selective spatial directions
+- Support for multiple BC regions with different behaviors
+
+#### `fem_operators/`
+**Purpose**: Finite element operators and integration routines
+- **`mechanics_operator.hpp/cpp`**: Nonlinear mechanics operator implementation
+- **`mechanics_operator_ext.hpp/cpp`**: Extended operator functionality
+- **`mechanics_integrators.hpp/cpp`**: Element-level integration kernels
+
+**Key Features**:
+- Element assembly (EA) and partial assembly (PA) modes
+- B-bar integration for near-incompressible materials
+- GPU-optimized kernel implementations
+- Matrix-free operator evaluation
+
+#### `models/`
+**Purpose**: Material constitutive model interface and implementations
+- **`mechanics_model.hpp/cpp`**: Abstract base class `ExaModel` interface
+- **`mechanics_ecmech.hpp/cpp`**: ExaCMech crystal plasticity integration
+- **`mechanics_umat.hpp/cpp`**: UMAT interface implementation
+- **`mechanics_multi_model.hpp/cpp`**: Multi-region material management
+
+**Supported Models**:
+- ExaCMech crystal plasticity (FCC, BCC, HCP)
+- User-defined UMAT subroutines
+- Multi-material region support
+
+#### `options/`
+**Purpose**: Configuration file parsing and option management
+- **`option_parser_v2.hpp/cpp`**: Modern TOML-based parser
+- **`option_material.cpp`**: Material configuration parsing
+- **`option_mesh.cpp`**: Mesh and geometry options
+- **`option_boundary_conditions.cpp`**: BC configuration parsing
+- **`option_solvers.cpp`**: Linear and nonlinear solver settings
+- **`option_time.cpp`**: Time-stepping parameters
+- **`option_post_processing.cpp`**: Post-processing configuration
+- **`option_enum.cpp`**: Enumeration type conversions
+- **`option_util.hpp`**: Utility functions for option parsing
+
+**Features**:
+- Backward compatibility with legacy formats
+- Hierarchical configuration structure
+- Comprehensive validation and error reporting
+
+#### `postprocessing/`
+**Purpose**: Output management and field calculations
+- **`postprocessing_driver.hpp/cpp`**: Main post-processing orchestration
+- **`postprocessing_file_manager.hpp`**: File I/O and directory management
+- **`projection_class.hpp/cpp`**: Field projection operations
+- **`mechanics_lightup.hpp/cpp`**: Lattice strain calculations
+
+**Capabilities**:
+- Volume-averaged stress/strain/deformation gradient
+- Lattice strain calculations for diffraction experiments
+- Visualization output (VisIt, ParaView, ADIOS2)
+- Structured output file organization
+
+#### `sim_state/`
+**Purpose**: Simulation state management and field storage
+- **`simulation_state.hpp/cpp`**: Central state container class
+
+**Manages**:
+- Finite element spaces and mesh data
+- Solution vectors (displacement, velocity)
+- Material properties and state variables
+- Time-stepping information
+
+#### `solvers/`
+**Purpose**: Linear and nonlinear solver implementations
+- **`mechanics_solver.hpp/cpp`**: Newton-Raphson solver and variants
+
+**Features**:
+- Standard Newton-Raphson
+- Newton with line search
+- Adaptive step size control
+- Device-aware implementations
+
+#### `umat_tests/`
+**Purpose**: Example UMAT implementations for testing
+- **`umat.f`**: Example Fortran UMAT implementation
+- **`umat.cxx`**: Example C++ UMAT implementation
+- **`userumat.cxx`**: UMAT loader example
+- **`userumat.h`**: UMAT interface definitions
+
+#### `utilities/`
+**Purpose**: Helper functions and utility classes
+- **`mechanics_log.hpp`**: Logging and performance monitoring
+- **`unified_logger.hpp/cpp`**: Unified logging system for all components
+- **`mechanics_kernels.hpp/cpp`**: Computational kernels for mechanics operations
+- **`assembly_ops.hpp`**: Assembly operation utilities
+- **`rotations.hpp`**: Rotation and orientation utilities
+- **`strain_measures.hpp`**: Strain computation utilities
+- **`dynamic_umat_loader.hpp/cpp`**: Runtime UMAT library loading
+
+**Provides**:
+- Performance profiling integration
+- Mathematical operations for mechanics
+- Debugging and diagnostic tools
+- Dynamic loading of user material models
+
+#### `mfem_expt/`
+**Purpose**: MFEM extensions and experimental features
+- **`partial_qspace.hpp/cpp`**: Partial quadrature space implementations
+- **`partial_qfunc.hpp/cpp`**: Partial quadrature function utilities
+
+**Features**:
+- Experimental finite element enhancements
+- Performance optimizations for specific use cases
+- Research and development components
+
+### Organization Principles
+- **Modular Design**: Clear separation between components
+- **Header/Implementation Pairs**: Consistent `.hpp/.cpp` organization
+- **Device Portability**: GPU-aware implementations throughout
+- **Template Usage**: Modern C++17 templates for performance
+- **Namespace Structure**: `exaconstit::` for internal components
+
+## Key Components
+
+### SystemDriver Class
+The `SystemDriver` class orchestrates the entire simulation workflow, managing the Newton-Raphson solution process and coordinating between components.
+
+**Responsibilities**:
+- Newton-Raphson nonlinear solution management
+- Linear solver and preconditioner setup
+- Boundary condition enforcement and updates
+- Material model coordination
+- Solution advancement
+
+**Key Methods**:
+```cpp
+void SystemDriver::Solve(); // Main Newton-Raphson solution
+void SystemDriver::SolveInit(); // Initial corrector step for BC changes
+void SystemDriver::UpdateEssBdr(); // Update essential boundary conditions
+void SystemDriver::UpdateVelocity(); // Apply velocity boundary conditions
+void SystemDriver::UpdateModel(); // Update material models after convergence
+```
+
+### NonlinearMechOperator Class
+The finite element operator extending MFEM's NonlinearForm that provides:
+- Residual evaluation for Newton-Raphson iterations
+- Jacobian computation and assembly
+- Essential DOF management
+- Support for different assembly strategies (PA/EA/FULL)
+
+### SimulationState Class
+Central container managing all simulation data and providing unified access to:
+- Mesh and finite element spaces
+- Solution fields (velocity, displacement)
+- Material properties and state variables
+- Quadrature functions for field data
+- Time-stepping information
+
+### Material Model Interface
+Base class `ExaModel` defines the constitutive model interface:
+```cpp
+// Main execution method for material model computations
+virtual void ModelSetup(nqpts, nelems, space_dim, nnodes,
+ jacobian, loc_grad, vel) = 0;
+
+// Update state variables after converged solution
+virtual void UpdateModelVars() = 0;
+
+// Get material properties for this region
+const std::vector& GetMaterialProperties() const;
+```
+
+### MultiExaModel Class
+Manages multiple material regions within a single simulation:
+- Coordinates material model execution across regions
+- Routes region-specific data from SimulationState
+- Handles heterogeneous material configurations
+
+### PostProcessingDriver Class
+Manages all output and post-processing operations:
+- Volume averaging calculations (stress, strain, etc.)
+- Field projections for visualization
+- File output management
+- Support for VisIt, ParaView, and ADIOS2
+
+### BCManager Class
+Singleton pattern manager for boundary conditions:
+- Tracks time-dependent boundary condition changes
+- Manages multiple BC types (velocity, velocity gradient)
+- Provides BC data to SystemDriver and operators
+
+## Configuration System
+
+ExaConstit uses TOML-based configuration files for all simulation parameters:
+
+### Main Configuration File (`options.toml`)
+```toml
+basename = "simulation_name"
+version = "0.9.0"
+
+[Mesh]
+filename = "mesh.mesh"
+refine_serial = 0
+
+[Time.Fixed]
+dt = 1.0e-3
+t_final = 1.0
+[Solvers]
+ assembly = "ea"
+ [Solvers.Krylov]
+ rel_tol = 1.0e-12
+ abs_tol = 1.0e-30
+ linear_solver = "CG"
+
+[Materials]
+# Material definitions...
+
+[BCs]
+# Boundary condition specifications...
+```
+
+### Modular Configuration
+- **External material files**: `materials = ["material1.toml", "material2.toml"]`
+- **Grain data files**: `grain_file = "grain.txt"`, `orientation_file = "orientations.txt"`
+
+## Advanced Solver Configuration
+
+### **Assembly Methods**
+ExaConstit supports multiple finite element assembly strategies optimized for different hardware:
+
+#### **Partial Assembly (PA)**
+```toml
+[Solvers]
+assembly = "PA"
+```
+- **Memory efficient**: No global matrix formation
+- **GPU optimized**: Ideal for GPU acceleration only for very high p-refinement
+- **Matrix-free**: Jacobian actions computed on-the-fly
+- **Preconditioning**: Currently limited to Jacobi preconditioning
+
+#### **Element Assembly (EA)**
+```toml
+[Solvers]
+assembly = "EA"
+```
+- **Element-level**: Only element matrices formed
+- **Memory balanced**: minimal memory requirements for quadratic or fewer elements
+- **GPU compatible**: Supports GPU execution
+- **Flexibility**: Suitable for complex material models
+- **Preconditioning**: Currently limited to Jacobi preconditioning
+
+
+#### **Full Assembly**
+```toml
+[Solvers]
+assembly = "FULL"
+```
+- **Traditional**: Complete global matrix assembly
+- **Preconditioning**: Full preconditioner options available
+- **Memory intensive**: Requires moderate memory for large problems due to sparse matrix formats
+- **CPU optimized**: Best initial set-up for investigating new material models
+
+### **Integration Schemes**
+
+#### **Default Integration**
+```toml
+[Solvers]
+integ_model = "DEFAULT"
+```
+- **Full integration**: Complete quadrature point evaluation
+- **Standard**: Traditional finite element approach
+- **Most materials**: Suitable for general material models
+
+#### **B-Bar Integration**
+```toml
+[Solvers]
+integ_model = "BBAR"
+```
+- **Mixed formulation**: Deviatoric fully integrated and elemental averaged volume contribution
+- **Near-incompressible**: Prevents volumetric locking
+- **Advanced**: Based on Hughes-Brezzi formulation (Equation 23)
+- **Limitation**: Not compatible with partial assembly
+
+### **Linear Solver Options**
+
+#### **Krylov Methods**
+```toml
+[Solvers.Krylov]
+linear_solver = "GMRES" # or "cg", "minres"
+rel_tol = 1.0e-6
+abs_tol = 1.0e-10
+max_iter = 1000
+```
+
+**GMRES**: General minimal residual
+- **Nonsymmetric systems**: Handles general Jacobian matrices
+- **Memory**: Requires restart for memory management
+- **Robust**: Suitable for challenging material models
+
+**Conjugate Gradient (CG)**:
+- **Symmetric positive definite**: Requires symmetric Jacobian
+- **Memory efficient**: Minimal memory requirements
+- **Fast convergence**: Optimal for appropriate problems
+
+**MINRES**: Minimal residual for symmetric indefinite
+- **Symmetric indefinite**: Handles saddle point problems
+- **Specialized**: Useful for constrained problems
+
+#### **Preconditioner Options**
+```toml
+[Solvers.Krylov]
+preconditioner = "JACOBI" # Assembly-dependent options
+```
+
+**Assembly-Dependent Availability:**
+
+For **PA/EA Assembly** (limited to):
+- **JACOBI**: Diagonal scaling, GPU-compatible (automatic selection)
+
+For **FULL Assembly** (all options available):
+- **JACOBI**: Diagonal scaling
+- **AMG**: Algebraic multigrid
+- **ILU**: Incomplete LU factorization
+- **L1GS**: ℓ¹-scaled Gauss-Seidel
+- **CHEBYSHEV**: Polynomial smoother
+
+**Preconditioner Details:**
+
+**JACOBI** (Diagonal Scaling):
+- **Characteristics**: Simple and fast, works everywhere but slow convergence
+- **Assembly**: Works with PA, EA, and FULL
+- **GPU**: Fully GPU-compatible
+- **Use case**: Default for PA/EA assembly, baseline option
+
+**AMG** (Algebraic Multigrid):
+- **Characteristics**: Fewer iterations but expensive setup, can fail on some problems
+- **Implementation**: HYPRE BoomerAMG
+- **Configuration**: Pre-tuned for 3D elasticity
+- **Use case**: Large-scale problems with single materials
+
+**ILU** (Incomplete LU Factorization):
+- **Characteristics**: Good middle-ground option
+- **Implementation**: HYPRE Euclid
+- **Use case**: Particularly useful for multi-material systems
+- **Try this**: If JACOBI convergence is too slow
+
+**L1GS** (ℓ¹-Scaled Gauss-Seidel):
+- **Characteristics**: Advanced smoother
+- **Implementation**: HYPRE smoother
+- **Use case**: Multi-material systems with contrasting properties
+- **Try this**: When materials have very different stiffness values
+
+**CHEBYSHEV** (Chebyshev Polynomial):
+- **Characteristics**: Polynomial smoother
+- **Implementation**: HYPRE smoother
+- **Use case**: Problems with multiple material scales
+- **Try this**: For heterogeneous material distributions
+
+**Practical Selection Guidelines:**
+
+For **Single Material Problems**:
+- Start with JACOBI (simple, predictable)
+- Try AMG if convergence is slow
+- Use ILU as a reliable alternative
+
+For **Multi-Material Systems**:
+- Start with ILU (good middle-ground)
+- Try L1GS for contrasting material properties
+- Use CHEBYSHEV for multiple material scales
+- AMG may struggle with material interfaces
+
+**Performance Tips**:
+- PA/EA assembly automatically uses JACOBI
+- If JACOBI convergence is too slow with FULL assembly, try ILU → L1GS → CHEBYSHEV
+- AMG has high setup cost but fewer iterations
+- Multi-material systems often benefit from experimenting with different preconditioners
+
+### **Nonlinear Solver Configuration**
+
+#### **Newton-Raphson Variants**
+```toml
+[Solvers.NR]
+nonlinear_solver = "NR" # or "NRLS"
+rel_tol = 1.0e-5
+abs_tol = 1.0e-10
+max_iter = 25
+```
+
+**Standard Newton-Raphson**:
+- **Full steps**: Always takes complete Newton step
+- **Fast convergence**: Quadratic convergence near solution
+- **Robustness**: May fail for poor initial guesses
+
+**Newton with Line Search**:
+- **Globalization**: Backtracking line search for robustness
+- **Convergence**: Improved convergence from poor starting points
+- **Cost**: Additional function evaluations per iteration
+
+## Building and Testing
+
+### Build Configuration Options
+```bash
+# Enable GPU support
+-DENABLE_CUDA=ON
+-DENABLE_HIP=ON
+
+# Enable specific features
+-DENABLE_CALIPER=ON
+```
+
+### Running Tests
+```bash
+# Run example simulations
+cd test/data
+mpirun -np 4 ../../build/mechanics -opt example.toml
+```
+
+### Example Workflows
+The `test/data/` directory contains various example cases:
+- **Crystal plasticity simulations**
+- **Multi-material problems**
+- **Complex boundary condition examples**
+- **GPU acceleration tests**
+
+## Development Workflow
+
+### Code Organization Best Practices
+1. **Header-only utilities**: Place in `utilities/` directory
+2. **New material models**: Extend `ExaModel` base class in `models/`
+3. **Post-processing features**: Add to `postprocessing/` directory
+4. **Configuration options**: Update corresponding `option_*.cpp` files
+
+### Adding New Features
+
+#### New Material Model
+1. Create header/source in `models/mechanics_newmodel.hpp/cpp`
+2. Inherit from `ExaModel` base class
+3. Implement required virtual methods
+4. Add configuration parsing support
+5. Update `CMakeLists.txt`
+
+#### New Boundary Condition Type
+1. Extend `BCManager` class
+2. Add parsing support in `option_boundary_conditions.cpp`
+3. Update documentation and examples
+
+### Performance Considerations
+- **GPU kernels**: Use RAJA for performance portability
+- **Memory management**: Follow MFEM memory patterns
+- **MPI communication**: Minimize collective operations
+- **Assembly strategy**: Choose PA vs EA based on problem size
+
+### Debugging and Profiling
+- **Caliper integration**: Built-in performance profiling
+- **MFEM debugging**: Use MFEM's debugging capabilities
+- **GPU debugging**: CUDA/HIP debugging tools
+- **MPI debugging**: TotalView, DDT support
+
+## UMAT Development Resources
+
+### **Interface Requirements**
+While UMAT interfaces are traditionally described using Fortran signatures, ExaConstit supports implementation in **Fortran, C++, or C**:
+
+#### **Standard UMAT Signature** (Fortran style)
+```fortran
+SUBROUTINE UMAT(STRESS,STATEV,DDSDDE,SSE,SPD,SCD,
+ 1 RPL,DDSDDT,DRPLDE,DRPLDT,
+ 2 STRAN,DSTRAN,TIME,DTIME,TEMP,DTEMP,PREDEF,DPRED,CMNAME,
+ 3 NDI,NSHR,NTENS,NSTATV,PROPS,NPROPS,COORDS,DROT,PNEWDT,
+ 4 CELENT,DFGRD0,DFGRD1,NOEL,NPT,LAYER,KSPT,KSTEP,KINC)
+```
+
+#### **C++ Implementation Example**
+```cpp
+extern "C" void umat_(double* stress, double* statev, double* ddsdde,
+ double* sse, double* spd, double* scd,
+ // ... additional parameters
+ int* ndi, int* nshr, int* ntens, int* nstatv,
+ double* props, int* nprops,
+ // ... remaining parameters
+ );
+```
+
+### **UMAT Development Best Practices**
+
+#### **Memory Management**
+- **ExaConstit handles**: State variable allocation and persistence
+- **UMAT responsible**: Local variable management within subroutine
+- **No dynamic allocation**: Avoid malloc/new within UMAT calls
+
+#### **Thread Safety**
+- **No global variables**: UMATs must be thread-safe
+- **Local computations**: All calculations using passed parameters
+- **State persistence**: Only through provided state variable arrays
+
+#### **Error Handling**
+- **Convergence issues**: Set appropriate flags for Newton-Raphson
+- **Material failure**: Handle through state variables or stress reduction
+- **Numerical stability**: Check for divide-by-zero and overflow conditions
+
+#### **Performance Considerations**
+- **CPU execution only**: No current GPU acceleration for UMATs but might be possible in future updates
+- **Vectorization**: Ensure compiler optimization is possible
+- **Minimal function calls**: Reduce computational overhead within UMAT
+
+### **Development Resources**
+
+#### **Reference Implementations**
+- **`src/umat_tests/`**: Example UMAT implementations and conversion guides
+- **Template UMATs**: Starting points for custom development
+
+#### **External Resources**
+- **NJIT UMAT Collection**: https://web.njit.edu/~sac3/Software.html
+- **Academic examples**: Various constitutive models available
+- **License considerations**: Verify licensing before use
+
+#### **Build System Integration**
+```bash
+# Compile UMAT to shared library (Fortran)
+gfortran -shared -fPIC -o my_umat.so my_umat.f90
+
+# Compile UMAT (C++)
+g++ -shared -fPIC -o my_umat.so my_umat.cpp
+
+# Compile UMAT (C)
+gcc -shared -fPIC -o my_umat.so my_umat.c
+```
+
+#### **Configuration Integration**
+```toml
+[Materials.regions.model.UMAT]
+library_path = "/path/to/my_umat.so"
+num_props = 8
+num_state_vars = 12
+props = [
+ 210000.0, # Young's modulus
+ 0.3, # Poisson's ratio
+ # ... additional parameters
+]
+```
+
+## Contributing Guidelines
+
+### Code Standards
+- **C++17 compliance**: Use modern C++ features
+- **Documentation**: Doxygen-style comments for all public interfaces
+- **Testing**: Include test cases for new features
+- **Performance**: Maintain GPU and MPI scalability
+- **Name Formating**:
+ - Function names should be in `PascalCase` for any file but those related to IO (src/options/* and src/utilities/unified_loggers.*) which are `snake_case`.
+ - Class / enum names should be in `PascalCase`
+ - Enum values should be `UPPER_CASE`
+ - Class member variables going forward should be `snake_case` and preferably have a `m_` prefix. However, the `m_` prefix is **not** required if it makes things harder to understand. We're still converting variables over from previous in-consistent naming conventions so if you spot something that needs fixing please do so.
+ - Local / function variables going forward should be `snake_case`. Like above we are slowly in the process of converting old code over to this new format so feel free to help out if you can.
+ - If doing formatting changes split those into their own commits so it's easier to track changes. Additionally try to change the world all at once and do things in piece meal as it makes it easier to track down where a bug might have been introduced during renaming of things.
+- **Code Formating**: We have a `.clang-format` that we make use to enfore a unified coding experience across the code base. An example of how to run the formatter is: `find src -type f \( -name "*.cpp" -o -name "*.hpp" -o -name "*.h" \) ! -path "*/TOML_Reader/*" -exec $CLANG_FORMAT -i {} +` . Note, if you see any changes in the `src/TOML_Reader` directory to revert those changes as that is a TPL that we directly include in the repo and not something we want to update unless directly bringing in the changes from its upstream repo.
+
+### Pull Request Process
+1. Fork the repository (if non-LLNL employee)
+2. Create feature branch from `exaconstit-dev`
+3. Implement changes with tests
+4. Ensure all existing tests pass
+5. Submit pull request with detailed description
+
+### Licensing
+- **BSD-3-Clause license**: All contributions must use this license
+- **Third-party code**: Ensure compatible licensing for external dependencies
+
+### Getting Help
+- **Primary Developer**: Robert A. Carson (carson16@llnl.gov)
+- **GitHub Issues**: Report bugs and feature requests
+- **Documentation**: Refer to MFEM and ExaCMech documentation for underlying libraries
+
+## Additional Resources
+
+### Related Projects
+- **ExaCMech**: Crystal plasticity library (https://github.com/LLNL/ExaCMech)
+- **MFEM**: Finite element library (https://mfem.org)
+- **ExaCA**: Cellular automata for microstructure generation
+
+### Workflows and Applications
+- **Optimization workflows**: Multi-objective genetic algorithm parameter optimization
+- **UQ workflows**: Uncertainty quantification for additive manufacturing
+- **Post-processing tools**: Python scripts for data analysis
+
+### Citation
+If using ExaConstit in your research, please cite:
+```bibtex
+@misc{ exaconstit,
+title = {{ExaConstit}},
+author = {Carson, Robert A. and Wopschall, Steven R. and Bramwell, Jamie A.},
+abstractNote = {The principal purpose of this code is to determine bulk constitutive properties and response of polycrystalline materials. This is a nonlinear quasi-static, implicit solid mechanics code built on the MFEM library based on an updated Lagrangian formulation (velocity based). Within this context, there is flexibility in the type of constitutive model employed, with the code allowing for various UMATs to be interfaced within the code framework or for the use of the ExaCMech library. Using crystal-mechanics-based constitutive models, the code can be used, for example, to compute homogenized response behavior over a polycrystal. },
+howpublished = {[Computer Software] \url{https://doi.org/10.11578/dc.20191024.2}},
+url = {https://github.com/LLNL/ExaConstit},
+doi = {10.11578/dc.20191024.2},
+year = {2019},
+month = {Aug},
+annote = {
+ https://www.osti.gov//servlets/purl/1571640
+ https://www.osti.gov/biblio/1571640-exaconstit
+}
+}
+```
+
+---
+
+This guide provides a foundation for new developers to understand and contribute to ExaConstit. For specific implementation details, refer to the extensive inline documentation throughout the codebase and the example configurations in `test/data/`.
\ No newline at end of file
diff --git a/doc/install.md b/doc/install.md
new file mode 100644
index 0000000..1471ffb
--- /dev/null
+++ b/doc/install.md
@@ -0,0 +1,813 @@
+# ExaConstit Installation Guide
+
+ExaConstit provides a modular build system with automated installation scripts for different platforms and backends. The build system automatically handles all dependencies including RAJA, MFEM, ExaCMech, Hypre, and METIS.
+
+---
+
+## Table of Contents
+
+- [Quick Start](#quick-start)
+- [Build System Architecture](#build-system-architecture)
+- [First-Time Setup](#first-time-setup)
+- [Advanced Configuration](#advanced-configuration)
+- [Build Locations and Output](#build-locations-and-output)
+- [Troubleshooting](#troubleshooting)
+- [Manual Build](#manual-build-advanced-users)
+
+---
+
+## Quick Start
+
+### **1. Download the Repository**
+```bash
+# Clone the repository
+git clone https://github.com/LLNL/ExaConstit.git
+cd ExaConstit
+
+# Create a separate build directory (recommended)
+cd ..
+mkdir exaconstit_builds
+cd exaconstit_builds
+```
+
+### **2. Choose Your Platform**
+
+#### **Intel CPU Systems (Linux)**
+```bash
+../ExaConstit/scripts/install/unix_cpu_intel_install.sh
+```
+
+#### **MacOS Systems**
+```bash
+../ExaConstit/scripts/install/unix_cpu_mac_install.sh
+```
+
+#### **NVIDIA GPU Systems (CUDA)**
+```bash
+../ExaConstit/scripts/install/unix_gpu_cuda_install.sh
+```
+
+#### **AMD GPU Systems (HIP/ROCm)**
+```bash
+../ExaConstit/scripts/install/unix_gpu_hip_install.sh
+```
+
+**Note for MI300A users:** Set `HSA_XNACK=1` in your environment before running simulations. This is required due to unified memory requirements and current limitations in MFEM's HIP backend.
+
+---
+
+## Build System Architecture
+
+The installation framework is organized into three components:
+```
+scripts/install/
+├── common/
+│ ├── dependency_versions.sh # Centralized version control
+│ ├── preflight_checks.sh # Validation and utilities
+│ └── build_functions.sh # Shared build logic
+├── configs/
+│ ├── cpu_intel_config.sh # Intel compiler configuration
+│ ├── cpu_mac_config.sh # macOS configuration
+│ ├── gpu_cuda_config.sh # NVIDIA CUDA configuration
+│ └── gpu_hip_config.sh # AMD HIP configuration
+└── unix_*_install.sh # Platform-specific entry points
+```
+
+- **common/**: Shared build logic used across all platforms
+- **configs/**: Platform-specific compiler paths, flags, and settings
+- **Entry scripts**: Simple launchers that source the appropriate config and common functions
+
+---
+
+## First-Time Setup
+
+Before running an install script, you'll need to customize the configuration file for your system.
+
+### **Step 1: Edit the Configuration File**
+
+Navigate to the ExaConstit repository and open the appropriate config file in `scripts/install/configs/` with either a built in editor or something like VSCode:
+```bash
+cd ExaConstit
+
+# For Intel CPU builds
+code scripts/install/configs/cpu_intel_config.sh
+
+# For CUDA GPU builds
+code scripts/install/configs/gpu_cuda_config.sh
+
+# For HIP/AMD GPU builds
+code scripts/install/configs/gpu_hip_config.sh
+
+# For macOS builds
+code scripts/install/configs/cpu_mac_config.sh
+```
+
+### **Step 2: Update Compiler Paths and Versions**
+
+Each config file has a clearly marked section at the top for system-specific paths. Update these for your environment:
+
+#### **Intel CPU Configuration Example**
+```bash
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+INTEL_VERSION="2023.2.1-magic" # Update to your Intel version
+COMPILER_VERSION="intel-${INTEL_VERSION}"
+INTEL_BASE="/usr/tce/packages/intel/${COMPILER_VERSION}" # Update to your path
+
+MPI_IMPL="mvapich2" # Or openmpi, mpich, etc.
+MPI_VERSION="2.3.7" # Update to your MPI version
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}" # update to your path
+
+PYTHON_VERSION="3.12.2" # Update to your Python version
+PYTHON_BASE="/usr/apps/python-${PYTHON_VERSION}" # Update to your path
+```
+
+**How to find your paths:**
+```bash
+# Find your compilers
+which icc # Intel C compiler
+which icpc # Intel C++ compiler
+which mpicc # MPI C wrapper
+which mpicxx # MPI C++ wrapper
+which python3 # Python executable
+
+# Get version information
+icc --version
+mpicc --version
+python3 --version
+```
+
+#### **CUDA GPU Configuration Example**
+```bash
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+# Host Compiler
+CLANG_VERSION="ibm-14.0.5" # Update to your Clang version
+COMPILER_VERSION="clang-${CLANG_VERSION}"
+CLANG_BASE="/usr/tce/packages/clang/${COMPILER_VERSION}"
+
+# CUDA
+CUDA_VERSION="11.8.0" # Update to your CUDA version
+CUDA_BASE="/usr/tce/packages/cuda/cuda-${CUDA_VERSION}" # Update to your CUDA Path
+
+# MPI
+MPI_IMPL="spectrum-mpi" # Update to your MPI implementation / version / path
+MPI_VERSION="rolling-release"
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}"
+
+# Python
+PYTHON_VERSION="3.8.2" # Like stated earlier update to your version / path
+PYTHON_BASE="/usr/tce/packages/python/python-${PYTHON_VERSION}"
+```
+
+**How to find CUDA paths:**
+```bash
+# Find CUDA installation
+which nvcc
+nvcc --version
+
+# CUDA is typically at /usr/local/cuda or /usr/local/cuda-11.8
+echo $CUDA_HOME # May already be set
+```
+
+#### **HIP/AMD GPU Configuration Example**
+```bash
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+# ROCm Compiler
+# Update all of the below to your own relevant versions / paths / anything specific to your
+# system
+ROCM_VERSION="6.4.2"
+ROCM_MAGIC_SUFFIX="magic"
+COMPILER_VERSION="rocmcc-${ROCM_VERSION}-${ROCM_MAGIC_SUFFIX}"
+ROCM_BASE="/usr/tce/packages/rocmcc/${COMPILER_VERSION}"
+
+# MPI - Cray MPICH
+MPI_IMPL="cray-mpich"
+MPI_VERSION="9.0.1"
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}"
+
+# Python
+PYTHON_VERSION="3.9.12"
+PYTHON_BASE="/usr/tce/packages/python/python-${PYTHON_VERSION}"
+```
+
+**How to find ROCm paths:**
+```bash
+# Find ROCm installation
+which amdclang
+which hipcc
+hipcc --version
+
+# ROCm is typically at /opt/rocm or /opt/rocm-6.4.2
+echo $ROCM_PATH # May already be set
+```
+
+#### **macOS Configuration Example**
+```bash
+###########################################
+# User-Configurable Paths
+###########################################
+# Homebrew location
+HOMEBREW_PREFIX="${HOMEBREW_PREFIX:-/opt/homebrew}" # or /usr/local for Intel Macs
+
+# System Clang (usually fine as-is)
+CLANG_BASE="/usr/bin"
+
+# MPI installation (REQUIRED: Update this!)
+MPI_BASE="${HOME}/local/bin" # Update to your MPI location
+# Common locations:
+# Homebrew: /opt/homebrew/bin
+# MacPorts: /opt/local/bin
+# Anaconda: ${HOME}/anaconda3/bin
+
+# Python location (REQUIRED: Update this!)
+PYTHON_BASE="${HOME}/anaconda3/bin" # Update to your Python location
+# Common locations:
+# Homebrew: /opt/homebrew/bin
+# System: /usr/bin
+```
+
+**How to find paths on macOS:**
+```bash
+# Check architecture
+uname -m # arm64 for Apple Silicon, x86_64 for Intel
+
+# Find MPI (install if missing: brew install open-mpi)
+which mpicc
+which mpicxx
+
+# Find Python
+which python3
+python3 --version # Should be 3.8 or newer
+
+# Check Homebrew prefix
+brew --prefix
+```
+
+### **Step 3: Update Module Commands (HPC Systems Only)**
+
+If you're on an HPC system with a module system, update the `module load` commands to match your system:
+```bash
+###########################################
+# Module Loading
+###########################################
+module load intel/2023.2.1-magic # Update to match your system's modules
+module load CMake/3.26.3
+module load python/3.12
+module list
+```
+
+**How to find available modules:**
+```bash
+module avail # List all available modules
+module avail intel # Search for Intel modules
+module avail cuda # Search for CUDA modules
+module list # Show currently loaded modules
+```
+
+If your system doesn't use modules (like most macOS or personal Linux systems), you can comment out or remove the module commands.
+
+### **Step 4: Run the Install Script**
+
+Once you've customized the config file, run the appropriate install script from your build directory:
+```bash
+cd ../exaconstit_builds # Or wherever you want to build
+../ExaConstit/scripts/install/unix_cpu_intel_install.sh # Or appropriate script
+```
+
+The script will:
+1. Validate your configuration
+2. Display a build summary
+3. Download and build all dependencies
+4. Build ExaConstit
+5. Save detailed logs in each component's build directory
+
+**Expected build time:** 10 minutes to 45 minutes depending on system / parallelism / GPU builds or not.
+
+---
+
+## Advanced Configuration
+
+### **Updating Dependency Versions**
+
+All dependency versions are centralized in `common/dependency_versions.sh`:
+```bash
+# Edit version file
+code ExaConstit/scripts/install/common/dependency_versions.sh
+```
+```bash
+# Portability libraries
+export CAMP_VER="v2025.09.2" # Update to newer version
+export RAJA_VER="v2025.09.1"
+export UMPIRE_VER="v2025.09.0"
+export CHAI_VER="v2025.09.1"
+
+# Material models
+export EXACMECH_REPO="https://github.com/LLNL/ExaCMech.git"
+export EXACMECH_BRANCH="develop" # Change to different branch if needed
+
+# FEM infrastructure
+export HYPRE_VER="v2.32.0" # Update to newer version
+export METIS_VER="5.1.0"
+
+export MFEM_REPO="https://github.com/rcarson3/mfem.git"
+export MFEM_BRANCH="exaconstit-dev" # Change branch if needed
+
+# Main application
+export EXACONSTIT_REPO="https://github.com/llnl/ExaConstit.git"
+export EXACONSTIT_BRANCH="exaconstit-dev" # Change branch if needed
+
+# Build standards
+export CMAKE_CXX_STANDARD="17"
+export CMAKE_BUILD_TYPE="Release"
+```
+
+After updating versions, **all** build scripts will automatically use the new versions. No other changes needed.
+
+### **Changing GPU Architecture**
+
+Override the default GPU architecture at runtime:
+```bash
+# CUDA: Target Ampere A100 instead of default Volta V100
+CMAKE_GPU_ARCHITECTURES=80 ./unix_gpu_cuda_install.sh
+
+# HIP: Target MI250X instead of default MI300A
+CMAKE_GPU_ARCHITECTURES=gfx90a ./unix_gpu_hip_install.sh
+```
+
+Common GPU architectures:
+
+**NVIDIA CUDA:**
+- `60` - Pascal (P100)
+- `70` - Volta (V100)
+- `75` - Turing (RTX 20xx, T4)
+- `80` - Ampere (A100)
+- `86` - Ampere (RTX 30xx, A40)
+- `89` - Ada Lovelace (RTX 40xx, L40)
+- `90` - Hopper (H100)
+
+**AMD HIP:**
+- `gfx906` - MI50
+- `gfx908` - MI100
+- `gfx90a` - MI200 series (MI210, MI250)
+- `gfx940` - MI300X (compute-only)
+- `gfx942` - MI300A (APU)
+- `gfx942:xnack+` - MI300A with unified memory support
+
+### **Build Control Options**
+
+Control the build behavior with environment variables:
+```bash
+# Clean rebuild (removes all build directories and rebuilds from scratch)
+REBUILD=ON ./unix_gpu_hip_install.sh
+
+# Force submodule updates (syncs and updates all git submodules)
+SYNC_SUBMODULES=ON ./unix_gpu_cuda_install.sh
+
+# Adjust parallel build jobs (default is 4)
+MAKE_JOBS=16 ./unix_cpu_intel_install.sh
+
+# Combine multiple options
+REBUILD=ON MAKE_JOBS=8 CMAKE_GPU_ARCHITECTURES=80 ./unix_gpu_cuda_install.sh
+```
+
+**Available environment variables:**
+- `REBUILD` - `ON` to clean and rebuild, `OFF` to reuse existing builds (default: `OFF`)
+- `SYNC_SUBMODULES` - `ON` to force submodule sync, `OFF` to skip (default: `OFF`)
+- `MAKE_JOBS` - Number of parallel build jobs (default: `4`)
+- `CMAKE_GPU_ARCHITECTURES` - GPU architecture target (default varies by platform)
+- `MFEM_HIP_ARCHITECTURES` - MFEM-specific HIP arch (HIP only, default: `gfx942`)
+- `OPENMP_ON` - Enable OpenMP (default: `OFF`)
+- `ENABLE_TESTS_EXACONSTIT` - Build tests (default: `ON`)
+
+### **Using Different Repositories or Branches**
+
+To use a fork or different branch, edit `common/dependency_versions.sh`:
+```bash
+# Use your fork of MFEM
+export MFEM_REPO="https://github.com/YOUR_USERNAME/mfem.git"
+export MFEM_BRANCH="my-custom-feature"
+
+# Use development branch of ExaConstit
+export EXACONSTIT_BRANCH="develop"
+
+# Use a different ExaCMech repository
+export EXACMECH_REPO="https://github.com/YOUR_USERNAME/ExaCMech.git"
+export EXACMECH_BRANCH="custom-material-models"
+```
+
+To use a specific commit instead of a branch:
+```bash
+# The build scripts will clone the repo, then manually checkout the commit:
+cd mfem && git checkout abc123def456
+cd ../exaconstit_builds
+# Re-run the build script with REBUILD=ON
+```
+
+### **Custom Compiler Flags**
+
+You can add custom flags by editing the config file:
+```bash
+# In your config file (e.g., configs/gpu_cuda_config.sh)
+
+# Add optimization flags
+export CMAKE_CXX_FLAGS="-fPIC -std=c++17 --gcc-toolchain=${GCC_BASE} -O3 -march=native"
+
+# Add debugging symbols
+export CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -g"
+
+# Add preprocessor definitions
+export CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DMY_CUSTOM_DEFINE"
+```
+
+---
+
+## Build Locations and Output
+
+By default, all builds install to your build directory:
+```
+your_build_directory/
+├── camp/
+│ ├── build_${BUILD_SUFFIX}/ # Build artifacts
+│ └── install_${BUILD_SUFFIX}/ # Installed library
+├── RAJA/
+│ ├── build_${BUILD_SUFFIX}/
+│ └── install_${BUILD_SUFFIX}/
+├── Umpire/ # GPU builds only
+│ ├── build_${BUILD_SUFFIX}/
+│ └── install_${BUILD_SUFFIX}/
+├── CHAI/ # GPU builds only
+│ ├── build_${BUILD_SUFFIX}/
+│ └── install_${BUILD_SUFFIX}/
+├── ExaCMech/
+│ ├── build_${BUILD_SUFFIX}/
+│ └── install_${BUILD_SUFFIX}/
+├── hypre/
+│ ├── build_${BUILD_SUFFIX}/
+│ └── src/hypre_${BUILD_SUFFIX}/ # Installed library
+├── metis-5.1.0/
+│ └── install_${BUILD_SUFFIX}/
+├── mfem/
+│ ├── build_${BUILD_SUFFIX}/
+│ └── install_${BUILD_SUFFIX}/
+└── ExaConstit/
+ ├── build_${BUILD_SUFFIX}/ # Build artifacts
+ └── install_dir/ # Final installation
+```
+
+Where `${BUILD_SUFFIX}` is:
+- `cpu` for CPU builds
+- `cuda` for CUDA/NVIDIA builds
+- `hip` for HIP/AMD builds
+
+### **Build Logs**
+
+Build logs are saved in each component's build directory with standardized names:
+- `my__config` - CMake configuration output
+- `my__build` - Compilation output
+- `my__install` - Installation output
+
+Example: To check why RAJA failed to build:
+```bash
+cd RAJA/build_cuda
+less my_raja_build # View the build log
+```
+
+### **Disk Space Requirements**
+
+Typical disk space usage:
+- **CPU build**: ~1 GB total
+- **GPU build (CUDA/HIP)**: ~2 GB total
+ - Includes additional Umpire and CHAI libraries
+ - GPU architectures add to binary sizes
+
+---
+
+## Troubleshooting
+
+### **Configuration Issues**
+
+#### **"Module not found" errors**
+```
+ERROR: Unable to locate a modulefile for 'intel/2023.2.1-magic'
+```
+
+**Solution:**
+- Check available modules: `module avail intel`
+- Update the module version in your config file
+- If not using a module system, comment out the `module load` commands
+
+#### **"Compiler not found" errors**
+```
+CMake Error: CMAKE_C_COMPILER not found
+```
+
+**Solution:**
+- Verify the compiler path: `ls -la /path/to/compiler`
+- Check that the executable exists: `which icc` or `which clang`
+- Update the `*_BASE` variable in your config file
+- Ensure the compiler is in your `PATH`
+
+#### **"Python not found" errors**
+```
+Could NOT find Python3 (missing: Python3_EXECUTABLE)
+```
+
+**Solution:**
+- Verify Python installation: `which python3` or `which python`
+- Check Python version (must be 3.8+): `python3 --version`
+- Update `PYTHON_BASE` in your config file
+- If using Anaconda: `conda activate your_env` before building
+
+#### **MPI errors**
+```
+Could not find MPI compiler wrappers
+```
+
+**Solution:**
+- Verify MPI installation: `which mpicc && which mpicxx`
+- Update `MPI_BASE` in your config file
+- Test MPI: `mpicc --version`
+- On macOS, install with: `brew install open-mpi`
+
+### **Build Failures**
+
+#### **Out of memory during compilation**
+```
+c++: fatal error: Killed signal terminated program cc1plus
+```
+
+**Solution:**
+- Reduce parallel jobs: `MAKE_JOBS=2 ./unix_*_install.sh`
+- Close other applications
+- Add swap space if building on a resource-constrained system
+
+#### **Disk space errors**
+```
+No space left on device
+```
+
+**Solution:**
+- Check available space: `df -h .`
+- Clean previous builds: `REBUILD=ON ./unix_*_install.sh`
+- Build in a location with more space
+- Remove old build directories
+
+#### **Dependency build fails partway through**
+
+**Solution:**
+1. Check the specific log file in the failing component's build directory
+2. Common issues:
+ - Missing system libraries (install via package manager)
+ - Version incompatibilities (check `dependency_versions.sh`)
+ - Network issues during git clone (retry with `SYNC_SUBMODULES=ON`)
+3. Try a clean rebuild: `REBUILD=ON ./unix_*_install.sh`
+4. Build dependencies individually to isolate the issue
+
+#### **Git submodule errors**
+```
+fatal: unable to access 'https://github.com/...': Failed to connect
+```
+
+**Solution:**
+- Check network connectivity
+- If behind a firewall, configure git proxy
+- Clone repositories manually if needed
+- Force submodule sync: `SYNC_SUBMODULES=ON ./unix_*_install.sh`
+
+### **Platform-Specific Issues**
+
+#### **macOS: "xcrun: error: invalid active developer path"**
+
+**Solution:**
+```bash
+xcode-select --install
+```
+
+#### **macOS: Missing dependencies**
+
+**Solution:**
+```bash
+# Install Homebrew if not already installed
+/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
+
+# Install required tools
+brew install cmake
+brew install open-mpi
+brew install python3
+```
+
+#### **macOS: Architecture mismatch (Apple Silicon vs Intel)**
+
+**Solution:**
+- Ensure all dependencies are built for the same architecture
+- For Apple Silicon, use ARM-native tools: `arch -arm64 brew install ...`
+- For Intel compatibility on Apple Silicon: `arch -x86_64 brew install ...`
+
+#### **HPC: Module conflicts**
+
+**Solution:**
+```bash
+# Clear all modules and reload
+# Some HPC will automatically switch things for you
+# So only do this if your HPC center advises you to purge modules
+module purge
+module load intel/2023.2.1-magic
+module load cmake/3.26.3
+# ... load other required modules
+```
+
+#### **HPC: Quota exceeded**
+
+**Solution:**
+- Build in your scratch space instead of home directory
+- Clean old builds regularly
+- Check quota: `quota -s` or `lfs quota -h $HOME`
+
+### **GPU-Specific Issues**
+
+#### **CUDA: "nvcc not found"**
+
+**Solution:**
+- Verify CUDA installation: `which nvcc`
+- Update `CUDA_BASE` in `gpu_cuda_config.sh`
+- Load CUDA module if on HPC: `module load cuda`
+- Set `CUDA_HOME` environment variable
+
+#### **CUDA: Architecture mismatch**
+```
+nvcc fatal: Unsupported gpu architecture 'compute_XX'
+```
+
+**Solution:**
+- Check your GPU architecture: `nvidia-smi`
+- Update `CMAKE_GPU_ARCHITECTURES` to match your hardware
+- Common fix: `CMAKE_GPU_ARCHITECTURES=70 ./unix_gpu_cuda_install.sh`
+
+#### **HIP: "amdclang not found"**
+
+**Solution:**
+- Verify ROCm installation: `which amdclang`
+- Update `ROCM_BASE` in `gpu_hip_config.sh`
+- Load ROCm modules: `module load rocm`
+- Set `ROCM_PATH` environment variable
+
+#### **HIP: MI300A memory issues**
+
+**Solution:**
+- Set unified memory flag: `export HSA_XNACK=1`
+- Verify architecture: `CMAKE_GPU_ARCHITECTURES=gfx942:xnack+`
+- Check system: `rocminfo | grep xnack`
+
+### **Runtime Issues**
+
+#### **Segmentation fault on startup**
+
+**Possible causes:**
+1. Library path issues - Ensure `LD_LIBRARY_PATH` includes all dependency lib directories
+2. ABI incompatibility - Rebuild with consistent compiler versions
+3. Missing runtime dependencies - Check with `ldd` on the executable
+
+#### **MPI initialization failures**
+
+**Solution:**
+```bash
+# Test MPI installation
+mpirun -np 2 hostname
+
+# Verify MPI library paths
+ldd ExaConstit/build_*/mechanics_driver | grep mpi
+
+# Check module environment
+module list
+```
+
+### **Getting Help**
+
+If you encounter issues not covered here:
+
+1. **Check the build logs**
+ - Navigate to the failing component's build directory
+ - Review `my__config`, `my__build`, or `my__install`
+ - Look for specific error messages
+
+2. **Verify your configuration**
+ - Confirm all paths in your config file are correct
+ - Test each tool independently: `which compiler`, `mpicc --version`, etc.
+
+3. **Search existing issues**
+ - Check the [GitHub Issues](https://github.com/LLNL/ExaConstit/issues) page
+ - Search for similar error messages
+
+4. **Open a new issue**
+ - Go to [GitHub Issues](https://github.com/LLNL/ExaConstit/issues/new)
+ - Include:
+ - Your platform and OS version (`uname -a`, `lsb_release -a`, etc.)
+ - The config file you're using
+ - Relevant sections from error logs
+ - Steps you've already tried
+ - Output of `module list` (if applicable)
+
+---
+
+## Manual Build (Advanced Users)
+
+If you prefer to build manually or need more control over the build process:
+
+### **Prerequisites**
+
+You'll need to manually build all dependencies first:
+1. **CAMP** (v2025.09.2)
+2. **RAJA** (v2025.09.1)
+3. **Umpire** (v2025.09.0) - GPU builds only
+4. **CHAI** (v2025.09.1) - GPU builds only
+5. **ExaCMech** (develop branch)
+6. **Hypre** (v2.32.0)
+7. **METIS** (5.1.0)
+8. **MFEM** (exaconstit-smart-ptrs branch)
+
+See `scripts/install/common/build_functions.sh` for the exact build commands and CMake options.
+
+### **Building ExaConstit**
+
+Once all dependencies are built:
+```bash
+# 1. Clone ExaConstit
+git clone https://github.com/LLNL/ExaConstit.git
+cd ExaConstit
+git checkout the_great_refactoring
+git submodule update --init --recursive
+
+# 2. Create build directory
+mkdir build && cd build
+
+# 3. Configure (CPU example)
+cmake .. \
+ -DCMAKE_CXX_COMPILER=mpicxx \
+ -DCMAKE_C_COMPILER=mpicc \
+ -DENABLE_TESTS=ON \
+ -DENABLE_OPENMP=OFF \
+ -DENABLE_FORTRAN=OFF \
+ -DPYTHON_EXECUTABLE=/usr/bin/python3 \
+ -DMFEM_DIR=${MFEM_INSTALL_DIR}/lib/cmake/mfem/ \
+ -DECMECH_DIR=${EXACMECH_INSTALL_DIR}/ \
+ -DSNLS_DIR=${EXACMECH_INSTALL_DIR}/ \
+ -DRAJA_DIR=${RAJA_INSTALL_DIR}/lib/cmake/raja/ \
+ -Dcamp_DIR=${CAMP_INSTALL_DIR}/lib/cmake/camp/ \
+ -DCMAKE_BUILD_TYPE=Release
+
+# 4. Build
+make -j $(nproc)
+
+# 5. Test
+ctest
+```
+
+### **GPU Build Options**
+
+For CUDA builds, add:
+```bash
+cmake .. \
+ ... (all the above options) ... \
+ -DCMAKE_CXX_COMPILER=${CUDA_ROOT}/bin/nvcc \
+ -DCMAKE_CUDA_COMPILER=${CUDA_ROOT}/bin/nvcc \
+ -DCMAKE_CUDA_HOST_COMPILER=${HOST_CXX_COMPILER} \
+ -DCMAKE_CUDA_ARCHITECTURES=80 \
+ -DENABLE_CUDA=ON \
+ -DFMT_DIR=${UMPIRE_INSTALL_DIR}/lib64/cmake/fmt \
+ -DUMPIRE_DIR=${UMPIRE_INSTALL_DIR}/lib64/cmake/umpire \
+ -DCHAI_DIR=${CHAI_INSTALL_DIR}/lib/cmake/chai
+```
+
+For HIP builds, add:
+```bash
+cmake .. \
+ ... (all the above options) ... \
+ -DCMAKE_CXX_COMPILER=${ROCM_ROOT}/bin/amdclang++ \
+ -DCMAKE_HIP_COMPILER=${ROCM_ROOT}/bin/amdclang++ \
+ -DCMAKE_HIP_ARCHITECTURES=gfx942 \
+ -DENABLE_HIP=ON \
+ -DFMT_DIR=${UMPIRE_INSTALL_DIR}/lib64/cmake/fmt \
+ -DUMPIRE_DIR=${UMPIRE_INSTALL_DIR}/lib64/cmake/umpire \
+ -DCHAI_DIR=${CHAI_INSTALL_DIR}/lib/cmake/chai
+```
+
+---
+
+## Next Steps
+
+After successful installation:
+
+- **Run the test suite**: `cd ExaConstit/build_*/` then `ctest` or `make test`
+- **Try example problems**: See `examples/` directory
+- **Read the documentation**: Check the `docs/` folder for detailed usage guides
+- **Join the community**: Open issues or discussions on GitHub
+
+For questions about using ExaConstit after installation, see the main [README](../README.md) and documentation in the `docs/` folder.
\ No newline at end of file
diff --git a/mechanics.bash b/mechanics.bash
deleted file mode 100755
index 7d174a6..0000000
--- a/mechanics.bash
+++ /dev/null
@@ -1,3 +0,0 @@
-#The options.toml file contains all of the options that drive the simulations
-
-srun -ppdebug -n1 ./mechanics_driver -opt options.toml
diff --git a/scripts/install/common/build_functions.sh b/scripts/install/common/build_functions.sh
new file mode 100644
index 0000000..c7e8001
--- /dev/null
+++ b/scripts/install/common/build_functions.sh
@@ -0,0 +1,562 @@
+#!/usr/bin/env bash
+# Common build functions for all ExaConstit dependencies
+
+# Logging wrapper
+run_with_log() {
+ local log="$1"; shift
+ "$@" |& tee "$log"
+}
+
+# Clone repository only if missing, initialize submodules on first clone
+clone_if_missing() {
+ local repo="$1" branch="$2" dest="$3"
+ if [ ! -d "$dest/.git" ]; then
+ echo "Cloning ${dest}..."
+ git clone --branch "$branch" "$repo" "$dest"
+ cd "$dest"
+ if [ -f .gitmodules ]; then
+ git submodule update --init --recursive
+ fi
+ cd "$BASE_DIR"
+ else
+ echo "${dest} already exists, skipping clone."
+ fi
+}
+
+# Optional: force submodule sync when explicitly requested
+sync_submodules() {
+ local dest="$1"
+ if [ "${SYNC_SUBMODULES}" = "ON" ] && [ -f "$dest/.gitmodules" ]; then
+ echo "Syncing submodules in ${dest}..."
+ cd "$dest"
+ git submodule sync --recursive
+ git submodule update --init --recursive
+ cd "$BASE_DIR"
+ fi
+}
+
+# Respect REBUILD flag when preparing build directories
+prepare_build_dir() {
+ local dir="$1"
+ if [ "${REBUILD}" = "ON" ]; then
+ mkdir -p "$dir"
+ rm -rf "$dir"/*
+ echo "Cleaned build directory: ${dir}"
+ else
+ if [ ! -d "$dir" ]; then
+ mkdir -p "$dir"
+ echo "Created build directory: ${dir}"
+ else
+ echo "Reusing existing build directory: ${dir}"
+ fi
+ fi
+}
+
+###########################################
+# CAMP
+###########################################
+build_camp() {
+ echo "=========================================="
+ echo "Building CAMP"
+ echo "=========================================="
+
+ clone_if_missing "https://github.com/LLNL/camp.git" "${CAMP_VER}" "${BASE_DIR}/camp"
+ sync_submodules "${BASE_DIR}/camp"
+
+ prepare_build_dir "${BASE_DIR}/camp/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/camp/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DENABLE_TESTS=OFF
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+ )
+
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_${GPU_BACKEND}=ON
+ )
+ fi
+
+ run_with_log my_camp_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_camp_build make -j "${MAKE_JOBS}"
+ run_with_log my_camp_install make install
+
+ CAMP_ROOT="${BASE_DIR}/camp/install_${BUILD_SUFFIX}"
+ export CAMP_ROOT
+ echo "CAMP installed to: ${CAMP_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# RAJA
+###########################################
+build_raja() {
+ echo "=========================================="
+ echo "Building RAJA"
+ echo "=========================================="
+
+ clone_if_missing "https://github.com/LLNL/RAJA.git" "${RAJA_VER}" "${BASE_DIR}/RAJA"
+ sync_submodules "${BASE_DIR}/RAJA"
+
+ prepare_build_dir "${BASE_DIR}/RAJA/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/RAJA/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DENABLE_TESTS=OFF
+ -DRAJA_ENABLE_TESTS=OFF
+ -DRAJA_ENABLE_EXAMPLES=OFF
+ -DRAJA_ENABLE_BENCHMARKS=OFF
+ -DRAJA_ENABLE_REPRODUCERS=OFF
+ -DRAJA_ENABLE_EXERCISES=OFF
+ -DRAJA_ENABLE_VECTORIZATION=OFF
+ -DRAJA_ENABLE_DOCUMENTATION=OFF
+ -DRAJA_USE_DOUBLE=ON
+ -DRAJA_TIMER=chrono
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+ -Dcamp_DIR="${CAMP_ROOT}/lib/cmake/camp"
+ )
+
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_${GPU_BACKEND}=ON
+ )
+ if [ "${GPU_BACKEND}" = "CUDA" ]; then
+ CMAKE_ARGS+=(
+ -DRAJA_USE_BARE_PTR=ON
+ )
+ fi
+ fi
+
+ run_with_log my_raja_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_raja_build make -j "${MAKE_JOBS}"
+ run_with_log my_raja_install make install
+
+ RAJA_ROOT="${BASE_DIR}/RAJA/install_${BUILD_SUFFIX}"
+ export RAJA_ROOT
+ echo "RAJA installed to: ${RAJA_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# Umpire (GPU only)
+###########################################
+build_umpire() {
+ if [ "${BUILD_TYPE}" = "cpu" ]; then
+ echo "Skipping Umpire (not needed for CPU builds)"
+ return 0
+ fi
+
+ echo "=========================================="
+ echo "Building Umpire"
+ echo "=========================================="
+
+ clone_if_missing "https://github.com/LLNL/Umpire.git" "${UMPIRE_VER}" "${BASE_DIR}/Umpire"
+ sync_submodules "${BASE_DIR}/Umpire"
+
+ prepare_build_dir "${BASE_DIR}/Umpire/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/Umpire/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DENABLE_TESTS=OFF
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DENABLE_MPI=OFF
+ -DUMPIRE_ENABLE_C=OFF
+ -DENABLE_FORTRAN=OFF
+ -DENABLE_GMOCK=OFF
+ -DUMPIRE_ENABLE_IPC_SHARED_MEMORY=OFF
+ -DUMPIRE_ENABLE_TOOLS=ON
+ -DUMPIRE_ENABLE_BACKTRACE=ON
+ -DUMPIRE_ENABLE_BACKTRACE_SYMBOLS=ON
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_${GPU_BACKEND}=ON
+ -Dcamp_DIR="${CAMP_ROOT}/lib/cmake/camp"
+ )
+
+ run_with_log my_umpire_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_umpire_build make -j "${MAKE_JOBS}"
+ run_with_log my_umpire_install make install
+
+ UMPIRE_ROOT="${BASE_DIR}/Umpire/install_${BUILD_SUFFIX}"
+ export UMPIRE_ROOT
+
+ # Find fmt directory
+ FMT_DIR_CMAKE=$(find "${UMPIRE_ROOT}" -name 'fmtConfig.cmake' -print -quit || true)
+ if [ -n "${FMT_DIR_CMAKE}" ]; then
+ FMT_DIR=$(dirname "${FMT_DIR_CMAKE}")
+ else
+ FMT_DIR="${UMPIRE_ROOT}"
+ fi
+ export FMT_DIR
+
+ echo "Umpire installed to: ${UMPIRE_ROOT}"
+ echo "fmt found at: ${FMT_DIR}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# CHAI (GPU only)
+###########################################
+build_chai() {
+ if [ "${BUILD_TYPE}" = "cpu" ]; then
+ echo "Skipping CHAI (not needed for CPU builds)"
+ return 0
+ fi
+
+ echo "=========================================="
+ echo "Building CHAI"
+ echo "=========================================="
+
+ clone_if_missing "https://github.com/LLNL/CHAI.git" "${CHAI_VER}" "${BASE_DIR}/CHAI"
+ sync_submodules "${BASE_DIR}/CHAI"
+
+ prepare_build_dir "${BASE_DIR}/CHAI/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/CHAI/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DENABLE_TESTS=OFF
+ -DENABLE_EXAMPLES=OFF
+ -DENABLE_DOCS=OFF
+ -DENABLE_GMOCK=OFF
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DENABLE_MPI=OFF
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_${GPU_BACKEND}=ON
+ -DCHAI_ENABLE_RAJA_PLUGIN=ON
+ -DCHAI_ENABLE_RAJA_NESTED_TEST=OFF
+ -DCHAI_THIN_GPU_ALLOCATE="${CHAI_THIN_GPU_ALLOCATE}"
+ -DCHAI_ENABLE_PINNED="${CHAI_ENABLE_PINNED}"
+ -DCHAI_DISABLE_RM="${CHAI_DISABLE_RM}"
+ -DCHAI_ENABLE_PICK="${CHAI_ENABLE_PICK}"
+ -DCHAI_DEBUG="${CHAI_DEBUG}"
+ -DCHAI_ENABLE_GPU_SIMULATION_MODE="${CHAI_ENABLE_GPU_SIMULATION_MODE}"
+ -DCHAI_ENABLE_UM="${CHAI_ENABLE_UM}"
+ -DCHAI_ENABLE_MANAGED_PTR="${CHAI_ENABLE_MANAGED_PTR}"
+ -DCHAI_ENABLE_MANAGED_PTR_ON_GPU="${CHAI_ENABLE_MANAGED_PTR_ON_GPU}"
+ -Dfmt_DIR="${FMT_DIR}"
+ -Dumpire_DIR="${UMPIRE_ROOT}"
+ -DRAJA_DIR="${RAJA_ROOT}"
+ -Dcamp_DIR="${CAMP_ROOT}"
+ )
+
+ run_with_log my_chai_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_chai_build make -j "${MAKE_JOBS}"
+ run_with_log my_chai_install make install
+
+ CHAI_ROOT="${BASE_DIR}/CHAI/install_${BUILD_SUFFIX}"
+ export CHAI_ROOT
+ echo "CHAI installed to: ${CHAI_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# ExaCMech
+###########################################
+build_exacmech() {
+ echo "=========================================="
+ echo "Building ExaCMech"
+ echo "=========================================="
+
+ clone_if_missing "${EXACMECH_REPO}" "${EXACMECH_BRANCH}" "${BASE_DIR}/ExaCMech"
+ sync_submodules "${BASE_DIR}/ExaCMech"
+
+ prepare_build_dir "${BASE_DIR}/ExaCMech/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/ExaCMech/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DENABLE_TESTS=OFF
+ -DENABLE_MINIAPPS=OFF
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DBUILD_SHARED_LIBS=OFF
+ -DCMAKE_CXX_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DRAJA_DIR="${RAJA_ROOT}/lib/cmake/raja"
+ -DCAMP_DIR="${CAMP_ROOT}/lib/cmake/camp"
+ )
+
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_${GPU_BACKEND}=ON
+ -DFMT_DIR="${FMT_DIR}"
+ -DUMPIRE_DIR="${UMPIRE_ROOT}/lib64/cmake/umpire"
+ -DCHAI_DIR="${CHAI_ROOT}/lib/cmake/chai"
+ )
+ fi
+
+ run_with_log my_ecmech_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_ecmech_build make -j "${MAKE_JOBS}"
+ run_with_log my_ecmech_install make install
+
+ ECMECH_ROOT="${BASE_DIR}/ExaCMech/install_${BUILD_SUFFIX}"
+ export ECMECH_ROOT
+ echo "ExaCMech installed to: ${ECMECH_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# Hypre
+###########################################
+build_hypre() {
+ echo "=========================================="
+ echo "Building Hypre"
+ echo "=========================================="
+
+ if [ ! -d "${BASE_DIR}/hypre" ]; then
+ git clone https://github.com/hypre-space/hypre.git --branch "${HYPRE_VER}" --single-branch "${BASE_DIR}/hypre"
+ fi
+
+ prepare_build_dir "${BASE_DIR}/hypre/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/hypre/build_${BUILD_SUFFIX}"
+
+ run_with_log my_hypre_config cmake ../src \
+ -DCMAKE_INSTALL_PREFIX=../src/hypre_${BUILD_SUFFIX}/ \
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}" \
+ -DMPI_CXX_COMPILER="${MPI_CXX_COMPILER}" \
+ -DMPI_C_COMPILER="${MPI_C_COMPILER}" \
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+
+ run_with_log my_hypre_build make -j "${MAKE_JOBS}"
+ run_with_log my_hypre_install make install
+
+ HYPRE_ROOT="${BASE_DIR}/hypre/src/hypre_${BUILD_SUFFIX}"
+ export HYPRE_ROOT
+ echo "Hypre installed to: ${HYPRE_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# METIS
+###########################################
+build_metis() {
+ echo "=========================================="
+ echo "Building METIS"
+ echo "=========================================="
+
+ if [ ! -d "${BASE_DIR}/metis-${METIS_VER}" ]; then
+ curl -o metis-${METIS_VER}.tar.gz "${METIS_URL}"
+ tar -xzf metis-${METIS_VER}.tar.gz
+ rm metis-${METIS_VER}.tar.gz
+ fi
+
+ cd "${BASE_DIR}/metis-${METIS_VER}"
+
+ # METIS doesn't have a proper incremental build, so always clean
+ make distclean 2>/dev/null || true
+
+ prepare_build_dir "${BASE_DIR}/metis-${METIS_VER}/install_${BUILD_SUFFIX}"
+
+ run_with_log my_metis_config make config \
+ prefix="${BASE_DIR}/metis-${METIS_VER}/install_${BUILD_SUFFIX}" \
+ CC="${CMAKE_C_COMPILER}" \
+ CXX="${CMAKE_CXX_COMPILER}"
+
+ run_with_log my_metis_build make -j "${MAKE_JOBS}"
+ run_with_log my_metis_install make install
+
+ METIS_ROOT="${BASE_DIR}/metis-${METIS_VER}/install_${BUILD_SUFFIX}"
+ export METIS_ROOT
+ echo "METIS installed to: ${METIS_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# MFEM
+###########################################
+build_mfem() {
+ echo "=========================================="
+ echo "Building MFEM"
+ echo "=========================================="
+
+ clone_if_missing "${MFEM_REPO}" "${MFEM_BRANCH}" "${BASE_DIR}/mfem"
+ # Don't sync submodules for MFEM to preserve local changes
+
+ prepare_build_dir "${BASE_DIR}/mfem/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/mfem/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DMFEM_USE_MPI=YES
+ -DMFEM_USE_SIMD=NO
+ -DMETIS_DIR="${METIS_ROOT}"
+ -DHYPRE_DIR="${HYPRE_ROOT}"
+ -DMFEM_USE_RAJA=YES
+ -DRAJA_DIR="${RAJA_ROOT}"
+ -DRAJA_REQUIRED_PACKAGES="camp"
+ -DMFEM_USE_CAMP=ON
+ -Dcamp_DIR="${CAMP_ROOT}/lib/cmake/camp"
+ -DMFEM_USE_OPENMP="${OPENMP_ON}"
+ -DMFEM_USE_ZLIB=YES
+ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX}/
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ )
+
+ if [ "${BUILD_TYPE}" = "cpu" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_CXX_COMPILER="${MPI_CXX_COMPILER}"
+ )
+ else
+ CMAKE_ARGS+=(
+ -DCMAKE_CXX_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DMPI_CXX_COMPILER="${MPI_CXX_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DMFEM_USE_${GPU_BACKEND}=ON
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ )
+
+ if [ "${GPU_BACKEND}" = "CUDA" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_CUDA_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_CUDA_HOST_COMPILER="${CMAKE_CXX_COMPILER}"
+ -DCMAKE_CUDA_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DCMAKE_CUDA_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DENABLE_CUDA=ON
+ )
+ elif [ "${GPU_BACKEND}" = "HIP" ]; then
+ CMAKE_ARGS+=(
+ -DHIP_ARCH="${MFEM_HIP_ARCHITECTURES}"
+ -DCMAKE_HIP_ARCHITECTURES="${MFEM_HIP_ARCHITECTURES}"
+ )
+ fi
+ fi
+
+ run_with_log my_mfem_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_mfem_build make -j "${MAKE_JOBS}"
+ run_with_log my_mfem_install make install
+
+ MFEM_ROOT="${BASE_DIR}/mfem/install_${BUILD_SUFFIX}"
+ export MFEM_ROOT
+ echo "MFEM installed to: ${MFEM_ROOT}"
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# ExaConstit
+###########################################
+build_exaconstit() {
+ echo "=========================================="
+ echo "Building ExaConstit"
+ echo "=========================================="
+
+ clone_if_missing "${EXACONSTIT_REPO}" "${EXACONSTIT_BRANCH}" "${BASE_DIR}/ExaConstit"
+ sync_submodules "${BASE_DIR}/ExaConstit"
+
+ prepare_build_dir "${BASE_DIR}/ExaConstit/build_${BUILD_SUFFIX}"
+ cd "${BASE_DIR}/ExaConstit/build_${BUILD_SUFFIX}"
+
+ local CMAKE_ARGS=(
+ -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD}"
+ -DCMAKE_C_COMPILER="${CMAKE_C_COMPILER}"
+ -DMPI_CXX_COMPILER="${MPI_CXX_COMPILER}"
+ -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}"
+ -DENABLE_TESTS="${ENABLE_TESTS_EXACONSTIT}"
+ -DENABLE_OPENMP="${OPENMP_ON}"
+ -DENABLE_FORTRAN=OFF
+ -DENABLE_SNLS_V03=ON
+ -DCMAKE_INSTALL_PREFIX=../install_dir/
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -DMFEM_DIR="${MFEM_ROOT}/lib/cmake/mfem"
+ -DECMECH_DIR="${ECMECH_ROOT}"
+ -DSNLS_DIR="${ECMECH_ROOT}"
+ -DRAJA_DIR="${RAJA_ROOT}/lib/cmake/raja"
+ -DCAMP_DIR="${CAMP_ROOT}/lib/cmake/camp"
+ )
+
+ if [ "${BUILD_TYPE}" = "cpu" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_CXX_COMPILER="${MPI_CXX_COMPILER}"
+ )
+ else
+ CMAKE_ARGS+=(
+ -DCMAKE_CXX_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}"
+ -DCMAKE_${GPU_BACKEND}_COMPILER="${CMAKE_GPU_COMPILER}"
+ -DCMAKE_${GPU_BACKEND}_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES}"
+ -DENABLE_${GPU_BACKEND}=ON
+ -DFMT_DIR="${FMT_DIR}"
+ -DUMPIRE_DIR="${UMPIRE_ROOT}/lib64/cmake/umpire"
+ -DCHAI_DIR="${CHAI_ROOT}/lib/cmake/chai"
+ )
+
+ if [ "${GPU_BACKEND}" = "CUDA" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_CUDA_FLAGS="${CMAKE_GPU_FLAGS}"
+ -DBLT_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}"
+ )
+ elif [ "${GPU_BACKEND}" = "HIP" ]; then
+ CMAKE_ARGS+=(
+ -DCMAKE_HIP_FLAGS="${CMAKE_GPU_FLAGS}"
+ )
+ fi
+ fi
+
+ run_with_log my_exconstit_config cmake ../ "${CMAKE_ARGS[@]}"
+ run_with_log my_exconstit_build make -j "${MAKE_JOBS}"
+
+ EXACONSTIT_ROOT="${BASE_DIR}/ExaConstit/install_dir"
+ export EXACONSTIT_ROOT
+ echo "=========================================="
+ echo "ExaConstit build complete!"
+ echo "Install prefix: ${EXACONSTIT_ROOT}"
+ echo "=========================================="
+ cd "${BASE_DIR}"
+}
+
+###########################################
+# Main orchestration function
+###########################################
+build_all_dependencies() {
+ build_camp
+ build_raja
+ build_umpire
+ build_chai
+ build_exacmech
+ build_hypre
+ build_metis
+ build_mfem
+ build_exaconstit
+}
\ No newline at end of file
diff --git a/scripts/install/common/dependency_versions.sh b/scripts/install/common/dependency_versions.sh
new file mode 100644
index 0000000..7ddb73d
--- /dev/null
+++ b/scripts/install/common/dependency_versions.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+# Central version control for all dependencies
+
+# Portability libraries
+export CAMP_VER="v2025.09.2"
+export RAJA_VER="v2025.09.1"
+export UMPIRE_VER="v2025.09.0"
+export CHAI_VER="v2025.09.1"
+
+# Material models
+export EXACMECH_REPO="https://github.com/LLNL/ExaCMech.git"
+export EXACMECH_BRANCH="develop"
+
+# FEM infrastructure
+export HYPRE_VER="v2.32.0"
+export METIS_VER="5.1.0"
+export METIS_URL="https://mfem.github.io/tpls/metis-${METIS_VER}.tar.gz"
+
+export MFEM_REPO="https://github.com/rcarson3/mfem.git"
+export MFEM_BRANCH="exaconstit-dev"
+
+# Main application
+export EXACONSTIT_REPO="https://github.com/llnl/ExaConstit.git"
+export EXACONSTIT_BRANCH="exaconstit-dev"
+
+# Build standards
+export CMAKE_CXX_STANDARD="17"
+export CMAKE_BUILD_TYPE="Release"
\ No newline at end of file
diff --git a/scripts/install/common/preflight_checks.sh b/scripts/install/common/preflight_checks.sh
new file mode 100644
index 0000000..b6867ae
--- /dev/null
+++ b/scripts/install/common/preflight_checks.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# Preflight checks and utility functions
+
+# Resolve BASE_DIR portably across systems
+resolve_base_dir() {
+ if command -v readlink >/dev/null 2>&1 && readlink -f "$0" >/dev/null 2>&1; then
+ SCRIPT=$(readlink -f "$0")
+ BASE_DIR=$(dirname "$SCRIPT")
+ else
+ # Mac-compatible fallback
+ SCRIPT="$0"
+ BASE_DIR=$(cd "$(dirname "$SCRIPT")"; pwd -P)
+ fi
+ export BASE_DIR
+ cd "$BASE_DIR"
+}
+
+# Check for required executables and paths
+check_required_paths() {
+ local missing=0
+ for p in "$@"; do
+ if [[ "$p" == */bin/* ]]; then
+ if [ ! -x "$p" ]; then
+ echo "ERROR: Missing executable: $p" >&2
+ missing=1
+ fi
+ else
+ if [ ! -e "$p" ]; then
+ echo "ERROR: Missing path: $p" >&2
+ missing=1
+ fi
+ fi
+ done
+ if [ "$missing" -ne 0 ]; then
+ echo "ERROR: Required paths missing. Exiting." >&2
+ exit 1
+ fi
+}
+
+# Check for required commands
+check_required_commands() {
+ local missing=0
+ for cmd in "$@"; do
+ if ! command -v "$cmd" >/dev/null 2>&1; then
+ echo "ERROR: Required command not found: $cmd" >&2
+ missing=1
+ fi
+ done
+ if [ "$missing" -ne 0 ]; then
+ echo "ERROR: Required commands missing. Exiting." >&2
+ exit 1
+ fi
+}
+
+# Print build configuration summary
+print_build_summary() {
+ echo "=========================================="
+ echo "ExaConstit Build Configuration"
+ echo "=========================================="
+ echo "BASE_DIR: ${BASE_DIR}"
+ echo "BUILD_TYPE: ${BUILD_TYPE}"
+ echo "BUILD_SUFFIX: ${BUILD_SUFFIX}"
+ echo "REBUILD: ${REBUILD}"
+ echo "SYNC_SUBMODULES: ${SYNC_SUBMODULES}"
+ echo ""
+ echo "Compilers:"
+ echo " C: ${CMAKE_C_COMPILER}"
+ echo " CXX: ${CMAKE_CXX_COMPILER}"
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ echo " GPU: ${CMAKE_GPU_COMPILER}"
+ echo " GPU Arch: ${CMAKE_GPU_ARCHITECTURES}"
+ fi
+ echo ""
+ echo "MPI Wrappers:"
+ echo " mpicc: ${MPI_C_COMPILER}"
+ echo " mpicxx: ${MPI_CXX_COMPILER}"
+ echo " mpifort: ${MPI_Fortran_COMPILER}"
+ echo ""
+ echo "Flags:"
+ echo " CXX: ${CMAKE_CXX_FLAGS}"
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ echo " GPU: ${CMAKE_GPU_FLAGS}"
+ fi
+ echo " Linker: ${CMAKE_EXE_LINKER_FLAGS}"
+ echo ""
+ echo "Key Versions:"
+ echo " CAMP: ${CAMP_VER}"
+ echo " RAJA: ${RAJA_VER}"
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ echo " Umpire: ${UMPIRE_VER}"
+ echo " CHAI: ${CHAI_VER}"
+ fi
+ echo " Hypre: ${HYPRE_VER}"
+ echo " MFEM: ${MFEM_BRANCH}"
+ echo " ExaCMech: ${EXACMECH_BRANCH}"
+ echo " ExaConstit: ${EXACONSTIT_BRANCH}"
+ echo "=========================================="
+}
+
+# Validate configuration before proceeding
+validate_configuration() {
+ echo "Validating configuration..."
+
+ # Check compilers exist
+ check_required_paths "${CMAKE_C_COMPILER}" "${CMAKE_CXX_COMPILER}"
+
+ if [ "${BUILD_TYPE}" != "cpu" ]; then
+ check_required_paths "${CMAKE_GPU_COMPILER}"
+ fi
+
+ # Check MPI wrappers
+ check_required_paths "${MPI_C_COMPILER}" "${MPI_CXX_COMPILER}" "${MPI_Fortran_COMPILER}"
+
+ # Check required commands
+ check_required_commands git cmake make curl tar
+
+ echo "Configuration validation complete."
+}
\ No newline at end of file
diff --git a/scripts/install/configs/cpu_intel_config.sh b/scripts/install/configs/cpu_intel_config.sh
new file mode 100644
index 0000000..063b41e
--- /dev/null
+++ b/scripts/install/configs/cpu_intel_config.sh
@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Configuration for Intel CPU builds
+
+# Build type identification
+export BUILD_TYPE="cpu"
+export BUILD_SUFFIX="cpu"
+
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+INTEL_VERSION="2023.2.1-magic"
+COMPILER_VERSION="intel-${INTEL_VERSION}"
+INTEL_BASE="/usr/tce/packages/intel/${COMPILER_VERSION}"
+
+MPI_IMPL="mvapich2"
+MPI_VERSION="2.3.7"
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}"
+
+PYTHON_VERSION="3.12.2"
+PYTHON_BASE="/usr/apps/python-${PYTHON_VERSION}"
+
+###########################################
+# Module Loading
+###########################################
+module load intel/${INTEL_VERSION}
+module load CMake/3.26.3
+module load python/3.12
+module list
+
+###########################################
+# Compilers
+###########################################
+export CMAKE_C_COMPILER="${INTEL_BASE}/bin/icx"
+export CMAKE_CXX_COMPILER="${INTEL_BASE}/bin/icpx"
+
+###########################################
+# MPI Wrappers
+###########################################
+export MPI_C_COMPILER="${MPI_BASE}/bin/mpicc"
+export MPI_CXX_COMPILER="${MPI_BASE}/bin/mpicxx"
+export MPI_Fortran_COMPILER="${MPI_BASE}/bin/mpifort"
+
+###########################################
+# Python
+###########################################
+export PYTHON_EXECUTABLE="${PYTHON_BASE}/bin/python"
+
+###########################################
+# Build Flags
+###########################################
+export CMAKE_CXX_FLAGS="-fPIC"
+export CMAKE_C_FLAGS="-fPIC"
+export CMAKE_EXE_LINKER_FLAGS=""
+
+###########################################
+# Build Options
+###########################################
+export OPENMP_ON="OFF"
+export ENABLE_TESTS_EXACONSTIT="ON"
+export MAKE_JOBS="${MAKE_JOBS:-4}"
+
+###########################################
+# GPU Settings (Not Applicable)
+###########################################
+export GPU_BACKEND="NONE"
+export CMAKE_GPU_COMPILER=""
+export CMAKE_GPU_ARCHITECTURES=""
+export CMAKE_GPU_FLAGS=""
+
+###########################################
+# CHAI Options (Not Used in CPU Build)
+###########################################
+export CHAI_DISABLE_RM="OFF"
+export CHAI_THIN_GPU_ALLOCATE="OFF"
+export CHAI_ENABLE_PINNED="OFF"
+export CHAI_ENABLE_PICK="OFF"
+export CHAI_DEBUG="OFF"
+export CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
+export CHAI_ENABLE_UM="OFF"
+export CHAI_ENABLE_MANAGED_PTR="OFF"
+export CHAI_ENABLE_MANAGED_PTR_ON_GPU="OFF"
\ No newline at end of file
diff --git a/scripts/install/configs/cpu_mac_config.sh b/scripts/install/configs/cpu_mac_config.sh
new file mode 100644
index 0000000..b2598c8
--- /dev/null
+++ b/scripts/install/configs/cpu_mac_config.sh
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+# Configuration for Mac CPU builds (Apple Silicon or Intel)
+
+# Build type identification
+export BUILD_TYPE="cpu"
+export BUILD_SUFFIX="cpu"
+
+###########################################
+# User-Configurable Paths
+###########################################
+# IMPORTANT: Update these paths for your local Mac environment
+# These are example paths - you MUST customize them for your system
+
+# Homebrew location (typical paths shown)
+# Apple Silicon: /opt/homebrew
+# Intel Mac: /usr/local
+HOMEBREW_PREFIX="${HOMEBREW_PREFIX:-/opt/homebrew}"
+
+# System Clang (or specify Homebrew LLVM if preferred)
+# System Clang is typically fine for macOS
+CLANG_BASE="/usr/bin"
+
+# MPI installation location
+# Options:
+# - Homebrew: ${HOMEBREW_PREFIX}/bin
+# - MacPorts: /opt/local/bin
+# - Custom build: ${HOME}/local/bin
+# - Anaconda: ${HOME}/anaconda3/bin
+MPI_BASE="${HOME}/local/bin"
+
+# Python location
+# Options:
+# - Homebrew: ${HOMEBREW_PREFIX}/bin/python3
+# - Anaconda: ${HOME}/anaconda3/bin/python
+# - System: /usr/bin/python3
+PYTHON_BASE="${HOME}/anaconda3/bin"
+
+###########################################
+# Compiler Detection
+###########################################
+# Note: No module system on Mac, so we rely on PATH and explicit settings
+
+# Check if we're on Apple Silicon or Intel
+if [[ $(uname -m) == "arm64" ]]; then
+ MAC_ARCH="arm64"
+ echo "Detected Apple Silicon (ARM64)"
+else
+ MAC_ARCH="x86_64"
+ echo "Detected Intel Mac (x86_64)"
+fi
+
+###########################################
+# Compilers
+###########################################
+export CMAKE_C_COMPILER="${CLANG_BASE}/clang"
+export CMAKE_CXX_COMPILER="${CLANG_BASE}/clang++"
+
+###########################################
+# MPI Wrappers
+###########################################
+export MPI_C_COMPILER="${MPI_BASE}/mpicc"
+export MPI_CXX_COMPILER="${MPI_BASE}/mpicxx"
+export MPI_Fortran_COMPILER="${MPI_BASE}/mpifort"
+
+###########################################
+# Python
+###########################################
+export PYTHON_EXECUTABLE="${PYTHON_BASE}/python"
+
+###########################################
+# Build Flags
+###########################################
+# Mac-specific: may need to handle SDK location
+# Homebrew libraries are in ${HOMEBREW_PREFIX}/lib
+export CMAKE_CXX_FLAGS="-fPIC"
+export CMAKE_C_FLAGS="-fPIC"
+export CMAKE_EXE_LINKER_FLAGS=""
+
+# Optional: Add Homebrew library paths if needed
+# export CMAKE_EXE_LINKER_FLAGS="-L${HOMEBREW_PREFIX}/lib -Wl,-rpath,${HOMEBREW_PREFIX}/lib"
+
+###########################################
+# Build Options
+###########################################
+export OPENMP_ON="OFF"
+export ENABLE_TESTS_EXACONSTIT="ON"
+
+# Mac-specific: may want to limit parallelism to avoid overheating
+export MAKE_JOBS="${MAKE_JOBS:-$(sysctl -n hw.ncpu)}"
+
+###########################################
+# GPU Settings (Not Applicable)
+###########################################
+export GPU_BACKEND="NONE"
+export CMAKE_GPU_COMPILER=""
+export CMAKE_GPU_ARCHITECTURES=""
+export CMAKE_GPU_FLAGS=""
+
+###########################################
+# CHAI Options (Not Used in CPU Build)
+###########################################
+export CHAI_DISABLE_RM="OFF"
+export CHAI_THIN_GPU_ALLOCATE="OFF"
+export CHAI_ENABLE_PINNED="OFF"
+export CHAI_ENABLE_PICK="OFF"
+export CHAI_DEBUG="OFF"
+export CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
+export CHAI_ENABLE_UM="OFF"
+export CHAI_ENABLE_MANAGED_PTR="OFF"
+export CHAI_ENABLE_MANAGED_PTR_ON_GPU="OFF"
+
+###########################################
+# Mac-Specific Notes
+###########################################
+echo "=========================================="
+echo "Mac Build Configuration Notes"
+echo "=========================================="
+echo "Architecture: ${MAC_ARCH}"
+echo "Homebrew prefix: ${HOMEBREW_PREFIX}"
+echo ""
+echo "IMPORTANT: Verify these paths are correct for your system:"
+echo " Compilers: ${CLANG_BASE}"
+echo " MPI: ${MPI_BASE}"
+echo " Python: ${PYTHON_BASE}"
+echo ""
+echo "If builds fail, common issues:"
+echo " 1. MPI not installed: brew install open-mpi"
+echo " 2. CMake too old: brew install cmake"
+echo " 3. Wrong Python: Set PYTHON_BASE in this config"
+echo " 4. Path issues: Ensure MPI/Python are in your PATH"
+echo "=========================================="
\ No newline at end of file
diff --git a/scripts/install/configs/gpu_cuda_config.sh b/scripts/install/configs/gpu_cuda_config.sh
new file mode 100644
index 0000000..1ca82d6
--- /dev/null
+++ b/scripts/install/configs/gpu_cuda_config.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+# Configuration for CUDA GPU builds
+
+# Build type identification
+export BUILD_TYPE="cuda"
+export BUILD_SUFFIX="cuda"
+export GPU_BACKEND="CUDA"
+
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+# Host Compiler
+CLANG_VERSION="ibm-14.0.5"
+COMPILER_VERSION="clang-${CLANG_VERSION}"
+CLANG_BASE="/usr/tce/packages/clang/${COMPILER_VERSION}"
+
+# GCC for toolchain
+GCC_VERSION="11.2.1"
+GCC_BASE="/usr/tce/packages/gcc/gcc-${GCC_VERSION}"
+GCC_ARCH_SUBDIR="ppc64le-redhat-linux/11" # Architecture-specific lib path
+
+# CUDA
+CUDA_VERSION="11.8.0"
+CUDA_BASE="/usr/tce/packages/cuda/cuda-${CUDA_VERSION}"
+
+# MPI
+MPI_IMPL="spectrum-mpi"
+MPI_VERSION="rolling-release"
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}"
+
+# Python
+PYTHON_VERSION="3.8.2"
+PYTHON_BASE="/usr/tce/packages/python/python-${PYTHON_VERSION}"
+
+###########################################
+# Module Loading
+###########################################
+module load clang/${CLANG_VERSION}
+module load cmake/3.29.2
+module load cuda/${CUDA_VERSION}
+module list
+
+###########################################
+# Compilers
+###########################################
+export CMAKE_C_COMPILER="${CLANG_BASE}/bin/clang"
+export CMAKE_CXX_COMPILER="${CLANG_BASE}/bin/clang++"
+export CMAKE_GPU_COMPILER="${CUDA_BASE}/bin/nvcc"
+
+###########################################
+# MPI Wrappers
+###########################################
+export MPI_C_COMPILER="${MPI_BASE}/bin/mpicc"
+export MPI_CXX_COMPILER="${MPI_BASE}/bin/mpicxx"
+export MPI_Fortran_COMPILER="${MPI_BASE}/bin/mpifort"
+
+###########################################
+# Python
+###########################################
+export PYTHON_EXECUTABLE="${PYTHON_BASE}/bin/python3"
+
+###########################################
+# GPU Architecture (Configurable)
+###########################################
+# Default to Volta (SM_70), can override with environment variable
+# Common options: 60 (Pascal), 70 (Volta), 75 (Turing), 80 (Ampere), 86 (Ampere), 90 (Hopper)
+export CMAKE_GPU_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES:-70}"
+
+###########################################
+# Build Flags
+###########################################
+export CMAKE_CXX_FLAGS="-fPIC -std=c++17 --gcc-toolchain=${GCC_BASE}"
+export CMAKE_C_FLAGS="-fPIC"
+export CMAKE_GPU_FLAGS="-restrict --expt-extended-lambda -Xcompiler --gcc-toolchain=${GCC_BASE} -Xnvlink --suppress-stack-size-warning -std=c++17"
+
+# Linker flags for GCC toolchain integration
+GCC_LIB_PATH="${GCC_BASE}/rh/usr/lib/gcc/${GCC_ARCH_SUBDIR}"
+export CMAKE_EXE_LINKER_FLAGS="-L${GCC_LIB_PATH} -Wl,-rpath,${GCC_LIB_PATH}"
+
+# BLT-specific flags (used by some dependencies)
+export BLT_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}"
+
+###########################################
+# Build Options
+###########################################
+export OPENMP_ON="OFF"
+export ENABLE_TESTS_EXACONSTIT="ON"
+export MAKE_JOBS="${MAKE_JOBS:-4}"
+
+###########################################
+# CHAI Options
+###########################################
+# Conservative settings for V100 GPUs
+export CHAI_DISABLE_RM="OFF" # Keep resource manager enabled
+export CHAI_THIN_GPU_ALLOCATE="OFF" # Use full allocations for stability
+export CHAI_ENABLE_PINNED="ON"
+export CHAI_ENABLE_PICK="ON"
+export CHAI_DEBUG="OFF"
+export CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
+export CHAI_ENABLE_UM="ON"
+export CHAI_ENABLE_MANAGED_PTR="ON"
+export CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON"
+
+###########################################
+# CUDA-Specific Build Options
+###########################################
+# Ensure NVCC uses the correct host compiler
+export CUDAHOSTCXX="${CMAKE_CXX_COMPILER}"
+export CUDA_TOOLKIT_ROOT_DIR="${CUDA_BASE}"
\ No newline at end of file
diff --git a/scripts/install/configs/gpu_hip_config.sh b/scripts/install/configs/gpu_hip_config.sh
new file mode 100644
index 0000000..89e8470
--- /dev/null
+++ b/scripts/install/configs/gpu_hip_config.sh
@@ -0,0 +1,134 @@
+#!/usr/bin/env bash
+# Configuration for HIP GPU builds (AMD GPUs)
+
+# Build type identification
+export BUILD_TYPE="hip"
+export BUILD_SUFFIX="hip"
+export GPU_BACKEND="HIP"
+
+###########################################
+# Compiler Versions and Base Paths
+###########################################
+# ROCm Compiler
+ROCM_VERSION="6.4.2"
+ROCM_MAGIC_SUFFIX="magic"
+COMPILER_VERSION="rocmcc-${ROCM_VERSION}-${ROCM_MAGIC_SUFFIX}"
+ROCM_BASE="/usr/tce/packages/rocmcc/${COMPILER_VERSION}"
+
+# MPI - Cray MPICH
+MPI_IMPL="cray-mpich"
+MPI_VERSION="9.0.1"
+MPI_COMPILER_VERSION="${MPI_IMPL}-${MPI_VERSION}"
+MPI_BASE="/usr/tce/packages/${MPI_IMPL}/${MPI_COMPILER_VERSION}-${COMPILER_VERSION}"
+
+# Cray PE paths for linking
+CRAY_MPICH_VERSION="${MPI_VERSION}"
+CRAY_LIBFABRIC_VERSION="2.1"
+CRAY_PMI_VERSION="6.1.16"
+CRAY_PALS_VERSION="1.2.12"
+
+# Python
+PYTHON_VERSION="3.9.12"
+PYTHON_BASE="/usr/tce/packages/python/python-${PYTHON_VERSION}"
+
+###########################################
+# Module Loading
+###########################################
+module load cmake/3.29.2
+module load rocmcc/${ROCM_VERSION}-${ROCM_MAGIC_SUFFIX}
+module load rocm/${ROCM_VERSION}
+module load ${MPI_IMPL}/${MPI_VERSION}
+module list
+
+###########################################
+# Compilers
+###########################################
+export CMAKE_C_COMPILER="${ROCM_BASE}/bin/amdclang"
+export CMAKE_CXX_COMPILER="${ROCM_BASE}/bin/amdclang++"
+export CMAKE_GPU_COMPILER="${ROCM_BASE}/bin/amdclang++"
+
+###########################################
+# MPI Wrappers
+###########################################
+export MPI_C_COMPILER="${MPI_BASE}/bin/mpicc"
+export MPI_CXX_COMPILER="${MPI_BASE}/bin/mpicxx"
+export MPI_Fortran_COMPILER="${MPI_BASE}/bin/mpifort"
+
+###########################################
+# Python
+###########################################
+export PYTHON_EXECUTABLE="${PYTHON_BASE}/bin/python3"
+
+###########################################
+# GPU Architectures (Configurable)
+###########################################
+# Default to MI300A with xnack+ for unified memory
+# Common options:
+# gfx908 (MI100)
+# gfx90a (MI200 series)
+# gfx940 (MI300X - compute only)
+# gfx942 (MI300A - APU with xnack support)
+# gfx942:xnack+ (MI300A with unified memory)
+export CMAKE_GPU_ARCHITECTURES="${CMAKE_GPU_ARCHITECTURES:-gfx942:xnack+}"
+
+# MFEM has issues with xnack+ in its compilation, so use base arch
+export MFEM_HIP_ARCHITECTURES="${MFEM_HIP_ARCHITECTURES:-gfx942}"
+
+# Also set AMDGPU_TARGETS for completeness
+export AMDGPU_TARGETS="${CMAKE_GPU_ARCHITECTURES}"
+
+###########################################
+# Build Flags
+###########################################
+export CMAKE_CXX_FLAGS="-fPIC -std=c++17 -munsafe-fp-atomics"
+export CMAKE_C_FLAGS="-fPIC"
+export CMAKE_GPU_FLAGS="-munsafe-fp-atomics -fgpu-rdc"
+
+###########################################
+# MPI Linking Flags (Cray-Specific)
+###########################################
+# Cray MPICH requires explicit linking to GTL and OFI libraries
+MPICH_GTL_LIB="/opt/cray/pe/mpich/${CRAY_MPICH_VERSION}/gtl/lib"
+MPICH_OFI_AMD_LIB="/opt/cray/pe/mpich/${CRAY_MPICH_VERSION}/ofi/amd/6.0/lib"
+
+# Runtime library paths for Cray PE
+CRAY_LIBFABRIC_LIB="/opt/cray/libfabric/${CRAY_LIBFABRIC_VERSION}/lib64"
+CRAY_PMI_LIB="/opt/cray/pe/pmi/${CRAY_PMI_VERSION}/lib"
+CRAY_PALS_LIB="/opt/cray/pe/pals/${CRAY_PALS_VERSION}/lib"
+ROCM_LLVM_LIB="/opt/rocm-${ROCM_VERSION}/llvm/lib"
+
+# Construct the full MPI linking flags
+MPI_CRAY_RPATH_FLAGS="-Wl,-rpath,${CRAY_LIBFABRIC_LIB}:${CRAY_PMI_LIB}:${CRAY_PALS_LIB}:${ROCM_LLVM_LIB}"
+MPI_CRAY_LINK_FLAGS="-lxpmem" # Cray xpmem for shared memory
+
+export CMAKE_EXE_LINKER_FLAGS="-lroctx64 -Wl,-rpath,${MPICH_OFI_AMD_LIB} ${MPI_CRAY_RPATH_FLAGS} -L${MPICH_GTL_LIB} -lmpi_gtl_hsa -Wl,-rpath,${MPICH_GTL_LIB} ${MPI_CRAY_LINK_FLAGS}"
+
+###########################################
+# Build Options
+###########################################
+export OPENMP_ON="OFF"
+export ENABLE_TESTS_EXACONSTIT="ON"
+export MAKE_JOBS="${MAKE_JOBS:-4}"
+
+###########################################
+# CHAI Options (MI300-Specific Tuning)
+###########################################
+# Aggressive settings optimized for MI300A APU architecture
+# CHAI_DISABLE_RM=ON: Disable resource manager for APU unified memory
+# CHAI_THIN_GPU_ALLOCATE=ON: Use thin allocations for better APU performance
+export CHAI_DISABLE_RM="ON"
+export CHAI_THIN_GPU_ALLOCATE="ON"
+export CHAI_ENABLE_PINNED="ON"
+export CHAI_ENABLE_PICK="ON"
+export CHAI_DEBUG="OFF"
+export CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
+export CHAI_ENABLE_UM="ON"
+export CHAI_ENABLE_MANAGED_PTR="ON"
+export CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON"
+
+###########################################
+# HIP-Specific Build Options
+###########################################
+export ROCM_PATH="${ROCM_BASE}"
+export HIP_PLATFORM="amd"
+export HIP_COMPILER="clang"
\ No newline at end of file
diff --git a/scripts/install/unix_cpu_intel_install.sh b/scripts/install/unix_cpu_intel_install.sh
new file mode 100644
index 0000000..b2eb274
--- /dev/null
+++ b/scripts/install/unix_cpu_intel_install.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# ExaConstit CPU build with Intel compilers
+
+set -Eeuo pipefail
+trap 'echo "Build failed at line $LINENO while running: $BASH_COMMAND" >&2' ERR
+
+# Resolve script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Source common infrastructure
+source "${SCRIPT_DIR}/common/dependency_versions.sh"
+source "${SCRIPT_DIR}/common/preflight_checks.sh"
+source "${SCRIPT_DIR}/common/build_functions.sh"
+
+# Resolve BASE_DIR and change to it
+resolve_base_dir
+
+# Source configuration
+source "${SCRIPT_DIR}/configs/cpu_intel_config.sh"
+
+# User-controllable options
+export REBUILD="${REBUILD:-OFF}"
+export SYNC_SUBMODULES="${SYNC_SUBMODULES:-OFF}"
+
+# Validate and summarize
+validate_configuration
+print_build_summary
+
+# Build everything
+build_all_dependencies
+
+echo ""
+echo "=========================================="
+echo "Build complete!"
+echo "=========================================="
\ No newline at end of file
diff --git a/scripts/install/unix_cpu_intel_install_example.sh b/scripts/install/unix_cpu_intel_install_example.sh
deleted file mode 100644
index 6ad5e0a..0000000
--- a/scripts/install/unix_cpu_intel_install_example.sh
+++ /dev/null
@@ -1,280 +0,0 @@
-#!/usr/bin/bash
-# For ease all of this should be run in its own directory
-# Build and run this in $SCRATCH/csm3_builds/
-
-SCRIPT=$(readlink -f "$0")
-BASE_DIR=$(dirname "$SCRIPT")
-
-# On macs the above two lines won't work but can be replaced with this line
-# BASE_DIR=$(cd "$(dirname "$0")"; pwd -P)
-
-module load intel/2023.2.1-magic
-module load CMake/3.26.3
-module load python/3.12
-module list
-
-CC="/usr/tce/packages/intel/intel-2023.2.1-magic/bin/icx"
-CXX="/usr/tce/packages/intel/intel-2023.2.1-magic/bin/icpx"
-MPICXX="/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1-magic/bin/mpicxx"
-MPICC="/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1-magic/bin/mpicc"
-PYTHON_EXE="/usr/apps/python-3.12.2/bin/python"
-
-#Build raja
-if [ ! -d "camp" ]; then
- git clone https://github.com/LLNL/camp.git -b v2024.07.0
- cd ${BASE_DIR}/camp
- git submodule init
- git submodule update
-
- if [ ! -d "build" ]; then
- mkdir build
- cd ${BASE_DIR}/camp/build
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DRAJA_TIMER=chrono \
- -DENABLE_OPENMP=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DENABLE_CUDA=OFF |& tee my_camp_config
- make -j 2 |& tee my_camp_build
- make install |& tee my_camp_install
- fi
-fi
-
-OLCF_CAMP_ROOT=${BASE_DIR}/camp/install_dir/
-
-cd ${BASE_DIR}
-
-#exit
-if [ ! -d "RAJA" ]; then
- git clone https://github.com/LLNL/RAJA.git -b v2024.07.0
- cd ${BASE_DIR}/RAJA
- git submodule init
- git submodule update
- cd ${BASE_DIR}/RAJA
- if [ ! -d "build" ]; then
- mkdir build
- cd ${BASE_DIR}/RAJA/build
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DRAJA_ENABLE_TESTS=OFF \
- -DRAJA_ENABLE_EXAMPLES=OFF \
- -DRAJA_ENABLE_BENCHMARKS=OFF \
- -DRAJA_TIMER=chrono \
- -DENABLE_OPENMP=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DENABLE_CUDA=OFF \
- -Dcamp_DIR=${OLCF_CAMP_ROOT} |& tee my_raja_config
- make -j 4 |& tee my_raja_build
- make install |& tee my_raja_install
- fi
-fi
-
-OLCF_RAJA_ROOT=${BASE_DIR}/RAJA/install_dir/
-
-echo ${OLCF_RAJA_ROOT}
-
-cd ${BASE_DIR}
-if [ ! -d "ExaCMech" ]; then
- # Clone the repo
- git clone https://github.com/LLNL/ExaCMech.git
- cd ${BASE_DIR}/ExaCMech
- # Checkout the branch that has the HIP features on it
- git checkout develop
- # Update all the various submodules
- git submodule init && git submodule update
- if [ ! -d "${BASE_DIR}/ExaCMech/build" ]; then
- mkdir build
- cd ${BASE_DIR}/ExaCMech/build
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_MINIAPPS=OFF \
- -DENABLE_OPENMP=OFF \
- -DRAJA_DIR=${OLCF_RAJA_ROOT}/lib/cmake/raja/ \
- -DBUILD_SHARED_LIBS=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DENABLE_CUDA=OFF \
- -Dcamp_DIR=${OLCF_CAMP_ROOT}/lib/cmake/camp |& tee my_exacmech_config
-
- make -j 4 |& tee my_exacmech_build
- make install |& tee my_exacmech_install
- fi
-fi
-cd ${BASE_DIR}
-
-# Now to build our MFEM dependencies
-# First let's install Hypre v2.23.0
-cd ${BASE_DIR}
-if [ ! -d "hypre" ]; then
-
- git clone https://github.com/hypre-space/hypre.git --branch v2.30.0 --single-branch
- cd ${BASE_DIR}/hypre/
- mkdir build
- cd ${BASE_DIR}/hypre/build
- rm -rf *
- # Based on their install instructions
- # This should work on most systems
- # Hypre's default suggestions of just using configure don't always work
- cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre/ \
- -DWITH_MPI=TRUE \
- -DCMAKE_C_COMPILER=${MPICC} \
- -DCMAKE_CXX_COMPILER=${MPICXX} \
- -DCMAKE_Fortran_COMPILER=${MPIFORT} \
- -DCMAKE_BUILD_TYPE=Release \
- |& tee my_hypre_config
-
- make -j 4 |& tee my_hypre_build
- make install |& tee my_hypre_install
-
- cd ${BASE_DIR}/hypre/src/hypre
- OLCF_HYPRE_ROOT="$(pwd)"
-
-else
-
- echo " hypre already built "
- OLCF_HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre
-
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "metis-5.1.0" ]; then
-
- curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz
- tar -xzf metis-5.1.0.tar.gz
- rm metis-5.1.0.tar.gz
- cd metis-5.1.0
- mkdir install_dir
- make config prefix=${BASE_DIR}/metis-5.1.0/install_dir/ CC=${CC} CXX=${CXX} |& tee my_metis_config
- make -j 4 |& tee my_metis_build
- make install |& tee my_metis_install
- cd ${BASE_DIR}/metis-5.1.0/install_dir/
- OLCF_METIS_ROOT="$(pwd)"
-else
-
- echo " metis-5.1.0 already built "
- OLCF_METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir/
-
-fi
-
-cd ${BASE_DIR}
-if [ ! -d "ADIOS2" ]; then
- # Clone the repo
- git clone https://github.com/ornladios/ADIOS2.git
- cd ${BASE_DIR}/ADIOS2
- # Checkout the branch that has the HIP features on it
- git checkout v2.10.1
- # Update all the various submodules
- git submodule init && git submodule update
-
- cd ${BASE_DIR}
- if [ ! -d "${BASE_DIR}/ADIOS2/build" ]; then
- cd ${BASE_DIR}/ADIOS2
- mkdir build
- cd ${BASE_DIR}/ADIOS2/build
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DADIOS2_USE_MPI=ON \
- -DADIOS2_USE_Blosc2=OFF \
- -DADIOS2_USE_BZip2=OFF \
- -DADIOS2_USE_ZeroMQ=OFF \
- -DADIOS2_USE_Endian_Reverse=OFF \
- -DADIOS2_USE_Fortran=OFF \
- -DADIOS2_USE_Python=OFF \
- -DADIOS2_USE_HDF5=OFF \
- -DADIOS2_USE_MPI=ON \
- -DADIOS2_USE_PNG=OFF \
- -DBUILD_SHARED_LIBS=ON \
- -DADIOS2_USE_SZ=OFF \
- -DADIOS2_USE_ZFP=OFF
-
-
- make -j 4 |& tee my_adios2_build
- make install |& tee my_adios2_install
- fi
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "mfem" ]; then
- git clone https://github.com/rcarson3/mfem.git
- cd ${BASE_DIR}/mfem/
- git checkout exaconstit-dev
- if [ ! -d "build" ]; then
- mkdir build
- fi
- cd ${BASE_DIR}/mfem/build
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM"
- #All the options
- cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\
- -DCMAKE_CXX_COMPILER=${MPICXX} \
- -DMETIS_DIR=${OLCF_METIS_ROOT} \
- -DHYPRE_DIR=${OLCF_HYPRE_ROOT} \
- -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DMFEM_USE_OPENMP=OFF \
- -DMFEM_USE_RAJA=YES \
- -DRAJA_DIR:PATH=${OLCF_RAJA_ROOT} \
- -DMFEM_USE_ZLIB=YES \
- -DMFEM_USE_ADIOS2=ON \
- -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DRAJA_REQUIRED_PACKAGES="camp" \
- -DMFEM_USE_CAMP=ON \
- -Dcamp_DIR:PATH=${OLCF_CAMP_ROOT}/lib/cmake/camp/ \
- -DCMAKE_CXX_STANDARD=14 \
- -DCMAKE_BUILD_TYPE=Release \
- |& tee my_mfem_config
-
- make -j 4 |& tee my_mfem_build
- make install |& tee my_mfem_install
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "ExaConstit" ]; then
- git clone https://github.com/llnl/ExaConstit.git
- cd ${BASE_DIR}/ExaConstit/
- git checkout exaconstit-dev
- git submodule init && git submodule update
-
- cd ${BASE_DIR}/ExaConstit/
- if [ ! -d "build" ]; then
- mkdir build
- fi
-
- cd ${BASE_DIR}/ExaConstit/build && rm -rf *
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM"
-
- cmake ../ -DCMAKE_C_COMPILER=${MPICC} \
- -DCMAKE_CXX_COMPILER=${MPICXX} \
- -DENABLE_TESTS=ON \
- -DENABLE_OPENMP=OFF \
- -DENABLE_FORTRAN=OFF \
- -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
- -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/lib/cmake/mfem/ \
- -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir/ \
- -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir/ \
- -DENABLE_SNLS_V03=ON \
- -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DRAJA_DIR:PATH=${OLCF_RAJA_ROOT}/lib/cmake/raja/ \
- -DCMAKE_BUILD_TYPE=Release \
- -Dcamp_DIR=${OLCF_CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config
-
- make -j 4|& tee my_exconstit_build
-
-fi
diff --git a/scripts/install/unix_cpu_mac_install.sh b/scripts/install/unix_cpu_mac_install.sh
new file mode 100644
index 0000000..307b027
--- /dev/null
+++ b/scripts/install/unix_cpu_mac_install.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# ExaConstit CPU build for macOS
+
+set -Eeuo pipefail
+trap 'echo "Build failed at line $LINENO while running: $BASH_COMMAND" >&2' ERR
+
+# Resolve script directory (Mac-compatible)
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Source common infrastructure
+source "${SCRIPT_DIR}/common/dependency_versions.sh"
+source "${SCRIPT_DIR}/common/preflight_checks.sh"
+source "${SCRIPT_DIR}/common/build_functions.sh"
+
+# Resolve BASE_DIR and change to it
+resolve_base_dir
+
+# Source configuration
+source "${SCRIPT_DIR}/configs/cpu_mac_config.sh"
+
+# User-controllable options
+export REBUILD="${REBUILD:-OFF}"
+export SYNC_SUBMODULES="${SYNC_SUBMODULES:-OFF}"
+
+# Validate and summarize
+validate_configuration
+print_build_summary
+
+# Build everything
+build_all_dependencies
+
+echo ""
+echo "=========================================="
+echo "Build complete!"
+echo "=========================================="
\ No newline at end of file
diff --git a/scripts/install/unix_gpu_cuda_install.sh b/scripts/install/unix_gpu_cuda_install.sh
new file mode 100644
index 0000000..2ea5520
--- /dev/null
+++ b/scripts/install/unix_gpu_cuda_install.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# ExaConstit CUDA build
+
+set -Eeuo pipefail
+trap 'echo "Build failed at line $LINENO while running: $BASH_COMMAND" >&2' ERR
+
+# Resolve script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Source common infrastructure
+source "${SCRIPT_DIR}/common/dependency_versions.sh"
+source "${SCRIPT_DIR}/common/preflight_checks.sh"
+source "${SCRIPT_DIR}/common/build_functions.sh"
+
+# Resolve BASE_DIR and change to it
+resolve_base_dir
+
+# Source configuration
+source "${SCRIPT_DIR}/configs/gpu_cuda_config.sh"
+
+# User-controllable options
+export REBUILD="${REBUILD:-OFF}"
+export SYNC_SUBMODULES="${SYNC_SUBMODULES:-OFF}"
+
+# Validate and summarize
+validate_configuration
+print_build_summary
+
+# Build everything
+build_all_dependencies
+
+echo ""
+echo "=========================================="
+echo "Build complete!"
+echo "=========================================="
\ No newline at end of file
diff --git a/scripts/install/unix_gpu_cuda_install_example.sh b/scripts/install/unix_gpu_cuda_install_example.sh
deleted file mode 100644
index b9edc10..0000000
--- a/scripts/install/unix_gpu_cuda_install_example.sh
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/usr/bin/bash
-# For ease all of this should be run in its own directory
-
-SCRIPT=$(readlink -f "$0")
-BASE_DIR=$(dirname "$SCRIPT")
-
-echo $BASH_VERSION
-
-# This is a bit system dependent but for El Capitan-like systems the below should work
-# You should be able to modify it to work for your own system easily enough.
-# Most of the options are defined by the first set of bash variables defined
-# below. You'll likely need to modify the ROCM_BASE, MPIHOME, and then the various
-# MPI/linker flags
-# While this is largely targeted towards AMD GPU builds, you can probably update
-# it easily enough for a NVidia GPU build of things...
-module load cmake/3.29.2 clang/ibm-14.0.5 cuda/11.8.0
-
-CLANG_BASE="/usr/tce/packages/clang/clang-ibm-14.0.5/"
-NVCC_BASE=""
-CC="${CLANG_BASE}/bin/clang"
-CXX="${CLANG_BASE}/bin/clang++"
-
-
-GCC_HOME="/usr/tce/packages/gcc/gcc-11.2.1"
-CUDA_VER="11.8.0"
-CUDA_TOOLKIT_ROOT_DIR="/usr/tce/packages/cuda/cuda-${CUDA_VER}"
-NVCC="${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc"
-
-BLT_EXE_LINKER_FLAGS="-L${GCC_HOME}/rh/usr/lib/gcc/ppc64le-redhat-linux/11 -Wl,-rpath,${GCC_HOME}/rh/usr/lib/gcc/ppc64le-redhat-linux/11"
-
-MPIHOME="/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-ibm-14.0.5/"
-MPICXX="${MPIHOME}/bin/mpicxx"
-MPICC="${MPIHOME}/bin/mpicc"
-MPIFORT="${MPIHOME}/bin/mpifort"
-CUDAON="ON"
-OPENMP_ON="OFF"
-GPU_TARGETS="70"
-CXX_FLAGS="-fPIC -std=c++17 --gcc-toolchain=${GCC_HOME}"
-CUDA_FLAGS="-restrict --expt-extended-lambda -Xcompiler --gcc-toolchain=${GCC_HOME} -Xnvlink --suppress-stack-size-warning -std=c++17"
-
-PYTHON_EXE="/usr/tce/packages/python/python-3.8.2/bin/python3"
-# Various build options for our various libaries
-UMPIRE_ENABLE_TOOLS="ON"
-UMPIRE_ENABLE_BACKTRACE="ON"
-UMPIRE_ENABLE_BACKTRACE_SYMBOLS="ON"
-# On V100s turn this off
-CHAI_DISABLE_RM="OFF"
-# Only for MI300a s other systems we need to turn this off
-CHAI_THIN_GPU_ALLOCATE="OFF"
-CHAI_ENABLE_PINNED="ON"
-CHAI_ENABLE_PICK="ON"
-CHAI_DEBUG="OFF"
-CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
-CHAI_ENABLE_UM="ON"
-CHAI_ENABLE_MANAGED_PTR="ON"
-CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON"
-
-#Build camp
-if [ ! -d "camp" ]; then
- git clone https://github.com/LLNL/camp.git -b v2024.07.0
- cd ${BASE_DIR}/camp
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/camp/build_cuda" ]; then
- cd ${BASE_DIR}/camp
- mkdir build_cuda
- cd ${BASE_DIR}/camp/build_cuda
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DENABLE_CUDA=${CUDAON}
- make -j 2
- make install
-fi
-
-CAMP_ROOT=${BASE_DIR}/camp/install_dir_cuda/
-echo ${CAMP_ROOT}
-cd ${BASE_DIR}
-
-#exit
-if [ ! -d "RAJA" ]; then
- git clone https://github.com/LLNL/RAJA.git -b v2024.07.0
- cd ${BASE_DIR}/RAJA
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/RAJA/build_cuda" ]; then
- cd ${BASE_DIR}/RAJA
- mkdir build_cuda
- cd ${BASE_DIR}/RAJA/build_cuda
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DRAJA_ENABLE_TESTS=OFF \
- -DRAJA_ENABLE_EXAMPLES=OFF \
- -DRAJA_ENABLE_BENCHMARKS=OFF \
- -DRAJA_ENABLE_REPRODUCERS=OFF \
- -DRAJA_ENABLE_EXERCISES=OFF \
- -DRAJA_ENABLE_VECTORIZATION=OFF \
- -DRAJA_ENABLE_DOCUMENTATION=OFF \
- -DRAJA_USE_DOUBLE=ON \
- -DRAJA_USE_BARE_PTR=ON \
- -DRAJA_TIMER=chrono \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -Dcamp_DIR=${CAMP_ROOT}
- make -j 4
- make install
-fi
-
-RAJA_ROOT=${BASE_DIR}/RAJA/install_dir_cuda/
-echo ${RAJA_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "Umpire" ]; then
- git clone https://github.com/LLNL/Umpire.git -b v2024.07.0
- cd ${BASE_DIR}/Umpire
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/Umpire/build_cuda" ]; then
- cd ${BASE_DIR}/Umpire
- mkdir build_cuda
- cd ${BASE_DIR}/Umpire/build_cuda
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DENABLE_MPI=OFF \
- -DUMPIRE_ENABLE_C=OFF \
- -DENABLE_FORTRAN=OFF \
- -DENABLE_GMOCK=OFF \
- -DUMPIRE_ENABLE_IPC_SHARED_MEMORY=OFF \
- -DUMPIRE_ENABLE_TOOLS=${UMPIRE_ENABLE_TOOLS} \
- -DUMPIRE_ENABLE_BACKTRACE=${UMPIRE_ENABLE_BACKTRACE} \
- -DUMPIRE_ENABLE_BACKTRACE_SYMBOLS=${UMPIRE_ENABLE_BACKTRACE_SYMBOLS} \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -Dcamp_DIR=${CAMP_ROOT}
-
- make -j 4
- make install
-fi
-
-UMPIRE_ROOT=${BASE_DIR}/Umpire/install_dir_cuda/
-echo ${UMPIRE_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "CHAI" ]; then
- git clone https://github.com/LLNL/CHAI.git -b v2024.07.0
- cd ${BASE_DIR}/CHAI
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/CHAI/build_cuda" ]; then
- cd ${BASE_DIR}/CHAI
- mkdir build_cuda
- cd ${BASE_DIR}/CHAI/build_cuda
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_EXAMPLES=OFF \
- -DENABLE_DOCS=OFF \
- -DENABLE_GMOCK=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DENABLE_MPI=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -DCHAI_ENABLE_RAJA_PLUGIN=ON \
- -DCHAI_ENABLE_RAJA_NESTED_TEST=OFF \
- -DCHAI_ENABLE_PINNED=${CHAI_ENABLE_PINNED} \
- -DCHAI_DISABLE_RM=${CHAI_DISABLE_RM} \
- -DCHAI_THIN_GPU_ALLOCATE=${CHAI_THIN_GPU_ALLOCATE} \
- -DCHAI_ENABLE_PICK=${CHAI_ENABLE_PICK} \
- -DCHAI_DEBUG=${CHAI_DEBUG} \
- -DCHAI_ENABLE_GPU_SIMULATION_MODE=${CHAI_ENABLE_GPU_SIMULATION_MODE} \
- -DCHAI_ENABLE_UM=${CHAI_ENABLE_UM} \
- -DCHAI_ENABLE_MANAGED_PTR=${CHAI_ENABLE_MANAGED_PTR} \
- -DCHAI_ENABLE_MANAGED_PTR_ON_GPU=${CHAI_ENABLE_MANAGED_PTR_ON_GPU} \
- -Dfmt_DIR=${UMPIRE_ROOT} \
- -Dumpire_DIR=${UMPIRE_ROOT} \
- -DRAJA_DIR=${RAJA_ROOT} \
- -Dcamp_DIR=${CAMP_ROOT}
- make -j 4
- make install
-fi
-
-CHAI_ROOT=${BASE_DIR}/CHAI/install_dir_cuda/
-echo ${CHAI_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "ExaCMech" ]; then
- # Clone the repo
- git clone https://github.com/LLNL/ExaCMech.git
- cd ${BASE_DIR}/ExaCMech
- # Checkout the branch that has the HIP features on it
- git checkout develop
- # Update all the various submodules
- git submodule init && git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/ExaCMech/build_cuda" ]; then
- cd ${BASE_DIR}/ExaCMech
- mkdir build_cuda
- cd ${BASE_DIR}/ExaCMech/build_cuda
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_MINIAPPS=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DBUILD_SHARED_LIBS=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \
- -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \
- -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \
- -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \
- -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp
-
- make -j 4
- make install
-fi
-
-ECMECH_ROOT=${BASE_DIR}/ExaCMech/install_dir_cuda/
-echo ${ECMECH_ROOT}
-cd ${BASE_DIR}
-
-# Now to build our MFEM dependencies
-# First let's install Hypre v2.23.0
-cd ${BASE_DIR}
-if [ ! -d "hypre" ]; then
- git clone https://github.com/hypre-space/hypre.git --branch v2.32.0 --single-branch
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/hypre/build_cuda" ]; then
- cd ${BASE_DIR}/hypre/
- mkdir build_cuda
- cd ${BASE_DIR}/hypre/build_cuda
- rm -rf *
- # Based on their install instructions
- # This should work on most systems
- # Hypre's default suggestions of just using configure don't always work
- cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre_hip/ \
- -DCMAKE_C_COMPILER=${CC} \
- -DMPI_CXX_COMPILER=${MPICXX} \
- -DMPI_C_COMPILER=${MPICC} \
- -DCMAKE_BUILD_TYPE=Release \
- |& tee my_hypre_config
-
- make -j 4 |& tee my_hypre_build
- make install |& tee my_hypre_install
-
- cd ${BASE_DIR}/hypre/src/hypre_hip
- HYPRE_ROOT="$(pwd)"
-
-else
-
- echo " hypre already built "
- HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre_hip
-
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "metis-5.1.0" ]; then
-
- curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz
- tar -xzf metis-5.1.0.tar.gz
- rm metis-5.1.0.tar.gz
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/metis-5.1.0/install_dir_cuda" ]; then
- cd ${BASE_DIR}/metis-5.1.0
- mkdir install_dir_cuda
- make distclean
- make config prefix=${BASE_DIR}/metis-5.1.0/install_dir_cuda/ CC=${CC} CXX=${CXX} |& tee my_metis_config
- make -j 4 |& tee my_metis_build
- make install |& tee my_metis_install
- cd ${BASE_DIR}/metis-5.1.0/install_dir_cuda/
- METIS_ROOT="$(pwd)"
-else
- echo " metis-5.1.0 already built "
- METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir_cuda/
-fi
-
-# cd ${BASE_DIR}
-# if [ ! -d "ADIOS2" ]; then
-# # Clone the repo
-# git clone https://github.com/ornladios/ADIOS2.git
-# cd ${BASE_DIR}/ADIOS2
-# # Checkout the branch that has the HIP features on it
-# git checkout v2.10.0
-# # Update all the various submodules
-# git submodule init && git submodule update
-# fi
-# cd ${BASE_DIR}
-# if [ ! -d "${BASE_DIR}/ADIOS2/build_cuda" ]; then
-# cd ${BASE_DIR}/ADIOS2
-# mkdir build_cuda
-# cd ${BASE_DIR}/ADIOS2/build_cuda
-# rm -rf *
-
-# cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
-# -DCMAKE_BUILD_TYPE=Release \
-# -DCMAKE_C_COMPILER=${CC} \
-# -DCMAKE_CXX_COMPILER=${CXX} \
-# -DADIOS2_USE_MPI=ON \
-# -DADIOS2_USE_Blosc2=OFF \
-# -DADIOS2_USE_BZip2=OFF \
-# -DADIOS2_USE_ZeroMQ=OFF \
-# -DADIOS2_USE_Endian_Reverse=OFF \
-# -DADIOS2_USE_Fortran=OFF \
-# -DADIOS2_USE_Python=ON \
-# -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
-# -DADIOS2_USE_HDF5=OFF \
-# -DADIOS2_USE_MPI=ON \
-# -DADIOS2_USE_PNG=OFF \
-# -DBUILD_SHARED_LIBS=ON \
-# -DADIOS2_USE_SZ=OFF \
-# -DADIOS2_USE_ZFP=OFF
-
-
-# make -j 16 |& tee my_adios2_build
-# make install |& tee my_adios2_install
-# fi
-
-
-cd ${BASE_DIR}
-
-if [ ! -d "mfem" ]; then
- git clone https://github.com/rcarson3/mfem.git
- cd ${BASE_DIR}/mfem/
- git checkout exaconstit-dev
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "${BASE_DIR}/mfem/build_cuda" ]; then
- mkdir ${BASE_DIR}/mfem/build_cuda
- cd ${BASE_DIR}/mfem/build_cuda
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM"
- #All the options
- cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\
- -DMETIS_DIR=${METIS_ROOT} \
- -DHYPRE_DIR=${HYPRE_ROOT} \
- -DMFEM_USE_RAJA=YES \
- -DRAJA_DIR:PATH=${RAJA_ROOT} \
- -DRAJA_REQUIRED_PACKAGES="camp" \
- -DMFEM_USE_CAMP=ON \
- -Dcamp_DIR:PATH=${CAMP_ROOT}/lib/cmake/camp/ \
- -DMFEM_USE_OPENMP=${OPENMP_ON} \
- -DMFEM_USE_ZLIB=YES \
- -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \
- -DCMAKE_CXX_STANDARD=17 \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -DMFEM_USE_CUDA=${CUDAON} \
- -DCMAKE_BUILD_TYPE=Release \
- |& tee my_mfem_config
- # -DMFEM_USE_MAGMA=ON \
- # -DMAGMA_DIR=${BASE_DIR}/magma/install_dir/ \
- # -DMFEM_USE_ADIOS2=ON \
- # -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir_cuda/ \
-
- make -j 16 |& tee my_mfem_build
- make install |& tee my_mfem_install
-fi
-
-cd ${BASE_DIR}
-
-# : << 'END_COMMENT'
-if [ ! -d "ExaConstit" ]; then
- git clone https://github.com/llnl/ExaConstit.git
- cd ${BASE_DIR}/ExaConstit/
- git checkout exaconstit-dev
- git submodule init && git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/ExaConstit/build_cuda" ]; then
- cd ${BASE_DIR}/ExaConstit/
- mkdir build_cuda
-
- cd ${BASE_DIR}/ExaConstit/build_cuda #&& rm -rf *
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM"
-
- cmake ../ \
- -DCMAKE_CXX_STANDARD=17 \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${CXX} \
- -DMPI_CXX_COMPILER=${MPICXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \
- -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \
- -DCMAKE_CUDA_COMPILER=${NVCC} \
- -DCMAKE_CUDA_HOST_COMPILER=${CXX} \
- -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \
- -DENABLE_CUDA=${CUDAON} \
- -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
- -DENABLE_TESTS=ON \
- -DENABLE_OPENMP=OFF \
- -DENABLE_FORTRAN=OFF \
- -DENABLE_SNLS_V03=ON \
- -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DRAJA_DIR:PATH=${RAJA_ROOT}/lib/cmake/raja/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DMFEM_DIR=${BASE_DIR}/mfem/install_dir_cuda/lib/cmake/mfem/ \
- -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir_cuda/ \
- -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir_cuda/ \
- -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \
- -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \
- -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \
- -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \
- -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config
-
- make -j 4|& tee my_exconstit_build
-fi
-###END_COMMENT
diff --git a/scripts/install/unix_gpu_hip_install.sh b/scripts/install/unix_gpu_hip_install.sh
new file mode 100644
index 0000000..22495ac
--- /dev/null
+++ b/scripts/install/unix_gpu_hip_install.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# ExaConstit HIP build for AMD GPUs
+
+set -Eeuo pipefail
+trap 'echo "Build failed at line $LINENO while running: $BASH_COMMAND" >&2' ERR
+
+# Resolve script directory
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# Source common infrastructure
+source "${SCRIPT_DIR}/common/dependency_versions.sh"
+source "${SCRIPT_DIR}/common/preflight_checks.sh"
+source "${SCRIPT_DIR}/common/build_functions.sh"
+
+# Resolve BASE_DIR and change to it
+resolve_base_dir
+
+# Source configuration
+source "${SCRIPT_DIR}/configs/gpu_hip_config.sh"
+
+# User-controllable options
+export REBUILD="${REBUILD:-OFF}"
+export SYNC_SUBMODULES="${SYNC_SUBMODULES:-OFF}"
+
+# Validate and summarize
+validate_configuration
+print_build_summary
+
+# Build everything
+build_all_dependencies
+
+echo ""
+echo "=========================================="
+echo "Build complete!"
+echo "=========================================="
\ No newline at end of file
diff --git a/scripts/install/unix_gpu_hip_install_example.sh b/scripts/install/unix_gpu_hip_install_example.sh
deleted file mode 100644
index fa15503..0000000
--- a/scripts/install/unix_gpu_hip_install_example.sh
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/usr/bin/bash
-# For ease all of this should be run in its own directory
-
-SCRIPT=$(readlink -f "$0")
-BASE_DIR=$(dirname "$SCRIPT")
-
-echo $BASH_VERSION
-
-# This is a bit system dependent but for El Capitan-like systems the below should work
-# You should be able to modify it to work for your own system easily enough.
-# Most of the options are defined by the first set of bash variables defined
-# below. You'll likely need to modify the ROCM_BASE, MPIHOME, and then the various
-# MPI/linker flags
-# While this is largely targeted towards AMD GPU builds, you can probably update
-# it easily enough for a NVidia GPU build of things...
-module load cmake/3.29.2 rocmcc/6.3.1-magic rocm/6.3.1 cray-mpich/8.1.31
-
-ROCM_BASE="/usr/tce/packages/rocmcc/rocmcc-6.3.1-magic/"
-CC="${ROCM_BASE}/bin/amdclang"
-CXX="${ROCM_BASE}/bin/amdclang++"
-HIPCC="${ROCM_BASE}/bin/hipcc"
-MPIHOME="/usr/tce/packages/cray-mpich/cray-mpich-8.1.31-rocmcc-6.3.1-magic/"
-MPILIBHOME="/opt/cray/pe/mpich/8.1.31/gtl/lib"
-MPIAMDHOME="/opt/cray/pe/mpich/8.1.31/ofi/amd/6.0/lib"
-MPICRAYFLAGS="-Wl,-rpath,/opt/cray/libfabric/2.1/lib64:/opt/cray/pe/pmi/6.1.15/lib:/opt/cray/pe/pals/1.2.12/lib:/opt/rocm-6.3.1/llvm/lib -lxpmem"
-MPICXX="$MPIHOME/bin/mpicxx"
-MPICC="$MPIHOME/bin/mpicc"
-MPIFORT="$MPIHOME/bin/mpifort"
-ROCMON="ON"
-OPENMP_ON="OFF"
-LOC_ROCM_ARCH="gfx942"
-GPU_TARGETS="gfx942"
-AMDGPU_TARGETS="gfx942"
-CXX_FLAGS="-fPIC -std=c++17 -munsafe-fp-atomics"
-
-EXE_LINK_FLAGS="--hip-link -lroctx64 -Wl,-rpath,${MPIAMDHOME} ${MPICRAYFLAGS} -L${MPILIBHOME} -lmpi_gtl_hsa -Wl,-rpath,${MPILIBHOME}"
-PYTHON_EXE="/usr/tce/packages/python/python-3.9.12/bin/python3"
-# Various build options for our various libaries
-UMPIRE_ENABLE_TOOLS="ON"
-UMPIRE_ENABLE_BACKTRACE="ON"
-UMPIRE_ENABLE_BACKTRACE_SYMBOLS="ON"
-# On V100s turn this off
-CHAI_DISABLE_RM="ON"
-# Only for MI300a s other systems we need to turn this off
-CHAI_THIN_GPU_ALLOCATE="ON"
-CHAI_ENABLE_PINNED="ON"
-CHAI_ENABLE_PICK="ON"
-CHAI_DEBUG="OFF"
-CHAI_ENABLE_GPU_SIMULATION_MODE="OFF"
-CHAI_ENABLE_UM="ON"
-CHAI_ENABLE_MANAGED_PTR="ON"
-CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON"
-
-#Build camp
-if [ ! -d "camp" ]; then
- git clone https://github.com/LLNL/camp.git -b v2024.07.0
- cd ${BASE_DIR}/camp
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/camp/build_hip" ]; then
- cd ${BASE_DIR}/camp
- mkdir build_hip
- cd ${BASE_DIR}/camp/build_hip
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_OPENMP=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DENABLE_HIP=$ROCMON
- make -j 2
- make install
-fi
-
-CAMP_ROOT=${BASE_DIR}/camp/install_dir_hip/
-echo ${CAMP_ROOT}
-cd ${BASE_DIR}
-
-#exit
-if [ ! -d "RAJA" ]; then
- git clone https://github.com/LLNL/RAJA.git -b v2024.07.0
- cd ${BASE_DIR}/RAJA
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/RAJA/build_hip" ]; then
- cd ${BASE_DIR}/RAJA
- mkdir build_hip
- cd ${BASE_DIR}/RAJA/build_hip
- rm -rf *
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DRAJA_ENABLE_TESTS=OFF \
- -DRAJA_ENABLE_EXAMPLES=OFF \
- -DRAJA_ENABLE_BENCHMARKS=OFF \
- -DRAJA_ENABLE_REPRODUCERS=OFF \
- -DRAJA_ENABLE_EXERCISES=OFF \
- -DRAJA_ENABLE_VECTORIZATION=OFF \
- -DRAJA_ENABLE_DOCUMENTATION=OFF \
- -DRAJA_USE_DOUBLE=ON \
- -DRAJA_USE_BARE_PTR=ON \
- -DRAJA_TIMER=chrono \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DENABLE_HIP=${ROCMON} \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- -Dcamp_DIR=${CAMP_ROOT}
- make -j 4
- make install
-fi
-
-RAJA_ROOT=${BASE_DIR}/RAJA/install_dir_hip/
-echo ${RAJA_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "Umpire" ]; then
- git clone https://github.com/LLNL/Umpire.git -b v2024.07.0
- cd ${BASE_DIR}/Umpire
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/Umpire/build_hip" ]; then
- cd ${BASE_DIR}/Umpire
- mkdir build_hip
- cd ${BASE_DIR}/Umpire/build_hip
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DENABLE_MPI=OFF \
- -DUMPIRE_ENABLE_C=OFF \
- -DENABLE_FORTRAN=OFF \
- -DENABLE_GMOCK=OFF \
- -DUMPIRE_ENABLE_IPC_SHARED_MEMORY=OFF \
- -DUMPIRE_ENABLE_TOOLS=${UMPIRE_ENABLE_TOOLS} \
- -DUMPIRE_ENABLE_BACKTRACE=${UMPIRE_ENABLE_BACKTRACE} \
- -DUMPIRE_ENABLE_BACKTRACE_SYMBOLS=${UMPIRE_ENABLE_BACKTRACE_SYMBOLS} \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DENABLE_HIP=${ROCMON} \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- -Dcamp_DIR=${CAMP_ROOT}
-
- make -j 4
- make install
-fi
-
-UMPIRE_ROOT=${BASE_DIR}/Umpire/install_dir_hip/
-echo ${UMPIRE_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "CHAI" ]; then
- git clone https://github.com/LLNL/CHAI.git -b v2024.07.0
- cd ${BASE_DIR}/CHAI
- git submodule init
- git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/CHAI/build_hip" ]; then
- cd ${BASE_DIR}/CHAI
- mkdir build_hip
- cd ${BASE_DIR}/CHAI/build_hip
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_EXAMPLES=OFF \
- -DENABLE_DOCS=OFF \
- -DENABLE_GMOCK=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DENABLE_MPI=OFF \
- -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DENABLE_HIP=${ROCMON} \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- -DCHAI_ENABLE_RAJA_PLUGIN=ON \
- -DCHAI_ENABLE_RAJA_NESTED_TEST=OFF \
- -DCHAI_ENABLE_PINNED=${CHAI_ENABLE_PINNED} \
- -DCHAI_DISABLE_RM=${CHAI_DISABLE_RM} \
- -DCHAI_THIN_GPU_ALLOCATE=${CHAI_THIN_GPU_ALLOCATE} \
- -DCHAI_ENABLE_PICK=${CHAI_ENABLE_PICK} \
- -DCHAI_DEBUG=${CHAI_DEBUG} \
- -DCHAI_ENABLE_GPU_SIMULATION_MODE=${CHAI_ENABLE_GPU_SIMULATION_MODE} \
- -DCHAI_ENABLE_UM=${CHAI_ENABLE_UM} \
- -DCHAI_ENABLE_MANAGED_PTR=${CHAI_ENABLE_MANAGED_PTR} \
- -DCHAI_ENABLE_MANAGED_PTR_ON_GPU=${CHAI_ENABLE_MANAGED_PTR_ON_GPU} \
- -Dfmt_DIR=${UMPIRE_ROOT} \
- -Dumpire_DIR=${UMPIRE_ROOT} \
- -DRAJA_DIR=${RAJA_ROOT} \
- -Dcamp_DIR=${CAMP_ROOT}
- make -j 4
- make install
-fi
-
-CHAI_ROOT=${BASE_DIR}/CHAI/install_dir_hip/
-echo ${CHAI_ROOT}
-cd ${BASE_DIR}
-
-if [ ! -d "ExaCMech" ]; then
- # Clone the repo
- git clone https://github.com/LLNL/ExaCMech.git
- cd ${BASE_DIR}/ExaCMech
- # Checkout the branch that has the HIP features on it
- git checkout develop
- # Update all the various submodules
- git submodule init && git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/ExaCMech/build_hip" ]; then
- cd ${BASE_DIR}/ExaCMech
- mkdir build_hip
- cd ${BASE_DIR}/ExaCMech/build_hip
- rm -rf *
-
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=OFF \
- -DENABLE_MINIAPPS=OFF \
- -DENABLE_OPENMP=${OPENMP_ON} \
- -DBUILD_SHARED_LIBS=OFF \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DENABLE_HIP=$ROCMON \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \
- -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \
- -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \
- -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \
- -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp
-
- make -j 4
- make install
-fi
-
-ECMECH_ROOT=${BASE_DIR}/ExaCMech/install_dir_hip/
-echo ${ECMECH_ROOT}
-cd ${BASE_DIR}
-
-# Now to build our MFEM dependencies
-# First let's install Hypre v2.23.0
-cd ${BASE_DIR}
-if [ ! -d "hypre" ]; then
- git clone https://github.com/hypre-space/hypre.git --branch v2.32.0 --single-branch
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/hypre/build_hip" ]; then
- cd ${BASE_DIR}/hypre/
- mkdir build_hip
- cd ${BASE_DIR}/hypre/build_hip
- rm -rf *
- # Based on their install instructions
- # This should work on most systems
- # Hypre's default suggestions of just using configure don't always work
- cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre_hip/ \
- -DCMAKE_C_COMPILER=${CC} \
- -DMPI_CXX_COMPILER=${MPICXX} \
- -DMPI_C_COMPILER=${MPICC} \
- -DCMAKE_BUILD_TYPE=Release \
- |& tee my_hypre_config
-
- make -j 4 |& tee my_hypre_build
- make install |& tee my_hypre_install
-
- cd ${BASE_DIR}/hypre/src/hypre_hip
- HYPRE_ROOT="$(pwd)"
-
-else
-
- echo " hypre already built "
- HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre_hip
-
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "metis-5.1.0" ]; then
-
- curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz
- tar -xzf metis-5.1.0.tar.gz
- rm metis-5.1.0.tar.gz
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/metis-5.1.0/install_dir_hip" ]; then
- cd ${BASE_DIR}/metis-5.1.0
- mkdir install_dir_hip
- make distclean
- make config prefix=${BASE_DIR}/metis-5.1.0/install_dir_hip/ CC=${CC} CXX=${CXX} |& tee my_metis_config
- make -j 4 |& tee my_metis_build
- make install |& tee my_metis_install
- cd ${BASE_DIR}/metis-5.1.0/install_dir_hip/
- METIS_ROOT="$(pwd)"
-else
- echo " metis-5.1.0 already built "
- METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir_hip/
-fi
-
-# cd ${BASE_DIR}
-# if [ ! -d "ADIOS2" ]; then
-# # Clone the repo
-# git clone https://github.com/ornladios/ADIOS2.git
-# cd ${BASE_DIR}/ADIOS2
-# # Checkout the branch that has the HIP features on it
-# git checkout v2.10.0
-# # Update all the various submodules
-# git submodule init && git submodule update
-# fi
-# cd ${BASE_DIR}
-# if [ ! -d "${BASE_DIR}/ADIOS2/build_hip" ]; then
-# cd ${BASE_DIR}/ADIOS2
-# mkdir build_hip
-# cd ${BASE_DIR}/ADIOS2/build_hip
-# rm -rf *
-
-# cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
-# -DCMAKE_BUILD_TYPE=Release \
-# -DCMAKE_C_COMPILER=${CC} \
-# -DCMAKE_CXX_COMPILER=${CXX} \
-# -DADIOS2_USE_MPI=ON \
-# -DADIOS2_USE_Blosc2=OFF \
-# -DADIOS2_USE_BZip2=OFF \
-# -DADIOS2_USE_ZeroMQ=OFF \
-# -DADIOS2_USE_Endian_Reverse=OFF \
-# -DADIOS2_USE_Fortran=OFF \
-# -DADIOS2_USE_Python=ON \
-# -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
-# -DADIOS2_USE_HDF5=OFF \
-# -DADIOS2_USE_MPI=ON \
-# -DADIOS2_USE_PNG=OFF \
-# -DBUILD_SHARED_LIBS=ON \
-# -DADIOS2_USE_SZ=OFF \
-# -DADIOS2_USE_ZFP=OFF
-
-
-# make -j 16 |& tee my_adios2_build
-# make install |& tee my_adios2_install
-# fi
-
-
-cd ${BASE_DIR}
-
-if [ ! -d "mfem" ]; then
- git clone https://github.com/rcarson3/mfem.git
- cd ${BASE_DIR}/mfem/
- git checkout exaconstit-dev
-fi
-
-cd ${BASE_DIR}
-
-if [ ! -d "${BASE_DIR}/mfem/build_hip" ]; then
- mkdir ${BASE_DIR}/mfem/build_hip
- cd ${BASE_DIR}/mfem/build_hip
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM"
- #All the options
- cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\
- -DMETIS_DIR=${METIS_ROOT} \
- -DHYPRE_DIR=${HYPRE_ROOT} \
- -DMFEM_USE_RAJA=YES \
- -DRAJA_DIR:PATH=${RAJA_ROOT} \
- -DRAJA_REQUIRED_PACKAGES="camp" \
- -DMFEM_USE_CAMP=ON \
- -Dcamp_DIR:PATH=${CAMP_ROOT}/lib/cmake/camp/ \
- -DMFEM_USE_OPENMP=${OPENMP_ON} \
- -DMFEM_USE_ZLIB=YES \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DMPI_CXX_COMPILER=${MPICXX} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \
- -DCMAKE_CXX_STANDARD=17 \
- -DMFEM_USE_HIP=${ROCMON} \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DHIP_ARCH=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- |& tee my_mfem_config
- # -DMFEM_USE_MAGMA=ON \
- # -DMAGMA_DIR=${BASE_DIR}/magma/install_dir/ \
- # -DMFEM_USE_ADIOS2=ON \
- # -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir_hip/ \
-
- make -j 16 |& tee my_mfem_build
- make install |& tee my_mfem_install
-fi
-
-cd ${BASE_DIR}
-
-# : << 'END_COMMENT'
-if [ ! -d "ExaConstit" ]; then
- git clone https://github.com/llnl/ExaConstit.git
- cd ${BASE_DIR}/ExaConstit/
- git checkout exaconstit-dev
- git submodule init && git submodule update
-fi
-cd ${BASE_DIR}
-if [ ! -d "${BASE_DIR}/ExaConstit/build_hip" ]; then
- cd ${BASE_DIR}/ExaConstit/
- mkdir build_hip
-
- cd ${BASE_DIR}/ExaConstit/build_hip #&& rm -rf *
- LOCAL_CMAKE_MFEM="$(which cmake)"
- echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM"
-
- cmake ../ -DCMAKE_C_COMPILER=${CC} \
- -DCMAKE_CXX_COMPILER=${HIPCC} \
- -DMPI_CXX_COMPILER=${MPICXX} \
- -DHIP_CXX_COMPILER=${HIPCC} \
- -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
- -DCMAKE_EXE_LINKER_FLAGS="${EXE_LINK_FLAGS}" \
- -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
- -DENABLE_TESTS=ON \
- -DENABLE_OPENMP=OFF \
- -DENABLE_FORTRAN=OFF \
- -DENABLE_HIP=${ROCMON} \
- -DENABLE_SNLS_V03=ON \
- -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DRAJA_DIR:PATH=${RAJA_ROOT}/lib/cmake/raja/ \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \
- -DGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \
- -DMFEM_DIR=${BASE_DIR}/mfem/install_dir_hip/lib/cmake/mfem/ \
- -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir_hip/ \
- -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir_hip/ \
- -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \
- -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \
- -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \
- -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \
- -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config
-
- make -j 4|& tee my_exconstit_build
-fi
-###END_COMMENT
diff --git a/scripts/install/unix_install_example.sh b/scripts/install/unix_install_example.sh
deleted file mode 100644
index f31a759..0000000
--- a/scripts/install/unix_install_example.sh
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/bin/bash
-# The below is a bash script that should work on most UNIX systems to download all of ExaConstit and its dependencies
-# and then install them.
-#
-# For ease all of this should be run in its own directory
-SCRIPT=$(readlink -f "$0")
-BASE_DIR=$(dirname "$SCRIPT")
-
-# Set this to your location of python
-# for example PYTHON_EXE for an anaconda build of python
-# on a mac might be somewhere like:
-PYTHON_EXE="/Users/USER/anaconda3/bin/python"
-
-# If you are using SPACK or have another module like system to set-up your developer environment
-# you'll want to load up the necessary compilers and devs environments
-# In other words make sure what ever MPI you want is loaded, C++, C, and Fortran compilers are loaded, and
-# a cmake version b/t 3.12 and 3.18.
-
-# Build raja
-if [ ! -d "raja" ]; then
- git clone --recursive https://github.com/llnl/raja.git --branch v2024.07.0 --single-branch
- cd ${BASE_DIR}/raja
- # Instantiate all the submodules
- git submodule init
- git submodule update
- # Build everything
- mkdir build
- cd ${BASE_DIR}/raja/build/
- # GPU build
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DENABLE_OPENMP=OFF \
- -DRAJA_TIMER=chrono \
- -DENABLE_TESTS=OFF \
- -DCMAKE_BUILD_TYPE=Release
- make -j 4
- make install
-else
-
- echo " RAJA already built "
-
-fi
-
-# Now to build ExaCMech
-cd ${BASE_DIR}
-
-if [ ! -d "ExaCMech" ]; then
-
- git clone https://github.com/LLNL/ExaCMech.git --single-branch
- cd ${BASE_DIR}/ExaCMech
- # Instantiate all the submodules
- git submodule init
- git submodule update
- # Build everything
- mkdir build
- cd ${BASE_DIR}/ExaCMech/build
- # GPU build
- cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DRAJA_DIR=${BASE_DIR}/raja/install_dir/lib/cmake/raja/ \
- -DENABLE_OPENMP=OFF \
- -DENABLE_TESTS=OFF \
- -DENABLE_MINIAPPS=OFF \
- -DCMAKE_BUILD_TYPE=Release \
- -DBUILD_SHARED_LIBS=OFF
- make -j 4
- make install
-else
-
- echo " ExaCMech already built "
-
-fi
-
-# Now to build our MFEM dependencies
-# First let's install Hypre v2.20.0
-cd ${BASE_DIR}
-if [ ! -d "hypre" ]; then
-
- git clone https://github.com/hypre-space/hypre.git --branch v2.30.0 --single-branch
- cd ${BASE_DIR}/hypre/src
- # Based on their install instructions
- # This should work on most systems
- # Hypre's default suggestions of just using configure don't always work
- ./configure CC=mpicc CXX=mpicxx FC=mpif90
- make -j 4
- make install
- cd hypre
- HYPRE_DIR="$(pwd)"
-
-else
-
- echo " hypre already built "
- HYPRE_DIR=${BASE_DIR}/hypre/src/hypre
-
-fi
-
-# Now to install metis-5.1.0
-# It appears that there are some minor differences in performance between metis-4 and metis-5
-# If you'd like to install metis-4 instead here's the commands needed
-# uncomment the below and then comment the metis-5 commands
-# cd ${BASE_DIR}
-# curl -o metis-4.0.3.tar.gz http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/OLD/metis-4.0.3.tar.gz
-# tar -xzf metis-4.0.3.tar.gz
-# rm metis-4.0.3.tar.gz
-# cd metis-4.0.3
-# make
-# METIS_DIR="$(pwd)"
-# metis-5 install down below
-cd ${BASE_DIR}
-
-if [ ! -d "metis-5.1.0" ]; then
-
- curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz
- tar -xzf metis-5.1.0.tar.gz
- rm metis-5.1.0.tar.gz
- cd metis-5.1.0
- mkdir install_dir
- make config prefix=${BASE_DIR}/metis-5.1.0/install_dir/
- make -j 4
- make install
- cd ${BASE_DIR}/metis-5.1.0/install_dir/
- METIS_DIR="$(pwd)"
-else
-
- echo " metis-5.1.0 already built "
- METIS_DIR=${BASE_DIR}/metis-5.1.0/install_dir/
-
-fi
-
-# If you want anyother MFEM options installed like Conduit, ADIOS2, or etc. install them now
-# We can now install MFEM with relevant data for ExaConstit
-
-cd ${BASE_DIR}
-
-if [ ! -d "mfem" ]; then
-
- git clone https://github.com/rcarson3/mfem.git --branch exaconstit-dev --single-branch
- cd ${BASE_DIR}/mfem/
- mkdir build
- cd ${BASE_DIR}/mfem/build/
- # All the options
- cmake ../ -DMFEM_USE_MPI=ON -DMFEM_USE_SIMD=OFF\
- -DMETIS_DIR=${METIS_DIR} \
- -DHYPRE_DIR=${HYPRE_DIR} \
- -DCMAKE_INSTALL_PREFIX=../install_dir/ \
- -DMFEM_USE_OPENMP=OFF \
- -DMFEM_USE_RAJA=ON -DRAJA_DIR=${BASE_DIR}/raja/install_dir/ \
- -DCMAKE_BUILD_TYPE=Release
- # The below are the relevant lines needed for ADIOS2 and conduit. You'll want to put them
- # before the -DCMAKE_BUILD_TYPE call
- # -DMFEM_USE_ADIOS2=ON -DADIOS2_DIR=${ADIOS2_DIR} \
- # -DMFEM_USE_CONDUIT=ON -DConduit_REQUIRED_PACKAGES=HDF5 -DCONDUIT_DIR=${CONDUIT_DIR} \
- # -DHDF5_ROOT:PATH=${HDF5_DIR} \
- make -j 4
- make install
-
-else
-
- echo " MFEM already built "
-
-fi
-
-#We can finally install ExaConstit
-cd ${BASE_DIR}
-
-if [ ! -d "ExaConstit" ]; then
-
- git clone https://github.com/LLNL/ExaConstit.git
- cd ${BASE_DIR}/ExaConstit/
- # Instantiate all the submodules
- git submodule init
- git submodule update
- # Build everything
- mkdir build
- cd ${BASE_DIR}/ExaConstit/build/
-
- cmake ../ -DENABLE_MPI=ON -DENABLE_FORTRAN=ON \
- -DPYTHON_EXECUTABLE=${PYTHON_EXE} \
- -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/lib/cmake/mfem/ \
- -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir/ \
- -DRAJA_DIR=${BASE_DIR}/raja/install_dir/lib/cmake/raja/ \
- -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir/ \
- -DENABLE_SNLS_V03=ON \
- -DCMAKE_BUILD_TYPE=Release \
- -DENABLE_TESTS=ON
- # Sometimes the cmake systems can be a bit difficult and not properly find the MFEM installed location
- # using the above. If that's the case the below should work:
- # -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/ \
-
- make -j 4
- # Check and make sure everything installed correctly by running the test suite
- make test
-
-else
-
- echo " ExaConstit already built "
-
-fi
-
-# ExaConstit is now installed
diff --git a/scripts/meshing/mesh_generator.cpp b/scripts/meshing/mesh_generator.cpp
index 52ab16d..507ae81 100644
--- a/scripts/meshing/mesh_generator.cpp
+++ b/scripts/meshing/mesh_generator.cpp
@@ -160,7 +160,8 @@ void setElementGrainIDs(Mesh *mesh, const Vector grainMap, int ncols, int offset
// loop over elements
for (int i = 0; iGetNE(); ++i) {
- mesh->SetAttribute(i, data[ncols * i + offset]);
+ const int grainID = static_cast(data[ncols * i + offset]);
+ mesh->SetAttribute(i, grainID);
}
return;
diff --git a/scripts/postprocessing/adios2_example.py b/scripts/postprocessing/adios2_example.py
index 36d4ef5..71d7ba2 100755
--- a/scripts/postprocessing/adios2_example.py
+++ b/scripts/postprocessing/adios2_example.py
@@ -88,7 +88,7 @@
istep = 0
for fstep in fh:
for i in range(nranks):
- arr = fstep.read('ElementVolume', block_id=i)
+ arr = fstep.read('Element Volumes', block_id=i)
ev[index[i, 0]:index[i, 1], istep] = arr[con1d[i]]
istep = istep + 1
@@ -109,12 +109,12 @@
# Note this method requires us to define start and count. We can't just
# set step_start and step_count. Also, note the transpose at the end to work
# in the same way as the previous method
- arr = fh.read('HydrostaticStress', start=[0], count=[isize], step_start=0, step_count=steps-1, block_id=i).T
+ arr = fh.read('Hydrostatic Stress', start=[0], count=[isize], step_start=0, step_count=steps-1, block_id=i).T
hss[index[i, 0]:index[i, 1], :] = arr[con1d[i], :]
- arr = fh.read('VonMisesStress', start=[0], count=[isize], step_start=0, step_count=steps-1, block_id=i).T
+ arr = fh.read('Von Mises Stress', start=[0], count=[isize], step_start=0, step_count=steps-1, block_id=i).T
vm[index[i, 0]:index[i, 1], :] = arr[con1d[i], :]
- arr1 = fstep.read('LatticeOrientation', start=[0, 0], count=[isize, 4], step_start=0, step_count=steps-1, block_id=i)
+ arr1 = fstep.read('Crystal Orientations', start=[0, 0], count=[isize, 4], step_start=0, step_count=steps-1, block_id=i)
quats[:, index[i, 0]:index[i, 1], :] = np.swapaxes(arr1[:, con1d[i], :], 0, 2)
#%%
# Always make sure to close the file when you're finished loading data from it
diff --git a/scripts/postprocessing/adios2_extraction.py b/scripts/postprocessing/adios2_extraction.py
index d3f7987..52eda81 100644
--- a/scripts/postprocessing/adios2_extraction.py
+++ b/scripts/postprocessing/adios2_extraction.py
@@ -55,12 +55,12 @@
# different variables are stored in different ways - not all variables are supported by this script
# this script should work for any variables that are saved off for every element - some examples of working variables are given below
vars_out = [
- 'DpEff' ,
- 'ElementVolume' ,
- 'LatticeOrientation' ,
- 'ShearRate' ,
- 'Stress' ,
- 'XtalElasticStrain'
+ 'Equivalent Plastic Strain Rate' ,
+ 'Element Volumes' ,
+ 'Crystal Orientations' ,
+ 'Shearing Rate' ,
+ 'Cauchy Stress' ,
+ 'Elastic Strains'
] #!!!
#%% Open ADIOS2 file and explore variables. (USER INPUTS HERE)
diff --git a/scripts/postprocessing/macro_stress_strain_plot.py b/scripts/postprocessing/macro_stress_strain_plot.py
index 2269d56..cbd55c6 100755
--- a/scripts/postprocessing/macro_stress_strain_plot.py
+++ b/scripts/postprocessing/macro_stress_strain_plot.py
@@ -23,42 +23,50 @@
fig, ax = plt.subplots(1)
-#number of time steps
nsteps = 40
-
# uncomment the below when the fileLoc is valid
#data = np.loadtxt(fileLoc+'avg_stress.txt', comments='%')
# only here to have something that'll plot
-data = np.ones((nsteps, 6))
+data = np.ones((nsteps, 8))
+# First two columns are time and volume
+# Next 6 columns are the Cauchy stress in voigt notation
+sig = data[:,4]
+vol = data[:,1]
+time = data[:,0]
+nsteps = data.shape[0] + 1
-epsdot = 1e-3
+sig = np.r_[0, sig]
+vol = np.r_[1, vol]
+time = np.r_[0, time]
+# If setup for it you can also request either the eulerian strain or deformation gradient
+# in which case most of the below can be ignored
-sig = data[:,2]
-# uncomment the below when the fileLoc is valid
-#time = np.loadtxt(fileLoc+'custom_dt.txt')
+epsdot = 1e-3
# only here to have something that'll plot
-time = np.ones(nsteps)
-
eps = np.zeros(nsteps)
for i in range(0, nsteps):
- dtime = time[i]
+ if (i == 0):
+ dtime = time[i]
+ else:
+ dtime = time[i] - time[i - 1]
+ # Stress is not always monotonically increasing so this is not always the
+ # best assumption
if sig[i] - sig[i - 1] >= 0:
eps[i] = eps[i - 1] + epsdot * dtime
else:
eps[i] = eps[i - 1] - epsdot * dtime
-ax.plot(eps, sig, 'r')
-
+# For true strain the np.log(1 + eps) will provide the correct conversion
+ax.plot(np.log(1.0 + eps), sig, 'r')
ax.grid()
-# change this to fit your data
+# change this to fit your data
# ax.axis([0, 0.01, 0, 0.3])
-ax.set_ylabel('Macroscopic engineering stress [GPa]')
-ax.set_xlabel('Macroscopic engineering strain [-]')
+ax.set_ylabel('Macroscopic true stress [GPa]')
+ax.set_xlabel('Macroscopic true strain [-]')
-plt.close()
fig.show()
plt.show()
\ No newline at end of file
diff --git a/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py b/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py
index b0075eb..ec820d6 100644
--- a/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py
+++ b/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py
@@ -118,7 +118,7 @@ def fiber_calc_ranks(args):
s_dir = np.asarray([0.0,0.0,1.0])
- top = fh.read('ElementVolume' , block_id = 0)
+ top = fh.read('Element Volumes' , block_id = 0)
# If we want per element quantities then uncomment below block
# elem_vols = np.empty((steps, conshape[0]))
@@ -142,20 +142,20 @@ def fiber_calc_ranks(args):
isize = con1d[ii].shape[0] * conshape[1]
# Read all of the data in
- ev_local = np.ascontiguousarray(fh.read('ElementVolume', start = [0], count = [isize], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize))[:, con1d[ii]])
+ ev_local = np.ascontiguousarray(fh.read('Element Volumes', start = [0], count = [isize], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize))[:, con1d[ii]])
# Provide info later related to RVE size so can see how many elements are
# actually used in the fiber calculations
total_volume += np.sum(ev_local, axis=1)
- xtal_oris_local = arr = np.ascontiguousarray(fh.read('LatticeOrientation', start = [0, 0], count = [isize, 4], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 4))[:, con1d[ii], :])
+ xtal_oris_local = arr = np.ascontiguousarray(fh.read('Crystal Orientations', start = [0, 0], count = [isize, 4], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 4))[:, con1d[ii], :])
- elas_strain_local = np.ascontiguousarray(fh.read('XtalElasticStrain', start = [0, 0], count = [isize, 6], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 6))[:, con1d[ii], :])
+ elas_strain_local = np.ascontiguousarray(fh.read('Elastic Strains', start = [0, 0], count = [isize, 6], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 6))[:, con1d[ii], :])
- stress_local = np.ascontiguousarray(fh.read('Stress', start = [0, 0], count = [isize, 6], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, 6))[:, con1d[ii], :])
+ stress_local = np.ascontiguousarray(fh.read('Cauchy Stress', start = [0, 0], count = [isize, 6], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, 6))[:, con1d[ii], :])
- top = fh.read('ShearRate' , block_id = 0)
- gdots_local = np.ascontiguousarray(fh.read('ShearRate', start = [0, 0], count = [isize, top.shape[1]], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, top.shape[1]))[:, con1d[ii], :])
+ top = fh.read('Shearing Rate' , block_id = 0)
+ gdots_local = np.ascontiguousarray(fh.read('Shearing Rate', start = [0, 0], count = [isize, top.shape[1]], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, top.shape[1]))[:, con1d[ii], :])
in_fibers_local = np.zeros((hkl.shape[0], steps, elas_strain_local.shape[1]), dtype=bool)
@@ -164,7 +164,8 @@ def fiber_calc_ranks(args):
ev_local1 = np.ascontiguousarray(ev_local[1:steps,:])
# All of our local calculations
- xlup.strain_lattice2sample(xtal_oris_local, elas_strain_local)
+ # We're already in the sample frame as ExaConstit as of v0.9 automatically converts it for us
+ # xlup.strain_lattice2sample(xtal_oris_local, elas_strain_local)
xlup.calc_lattice_strains(elas_strain_local, s_dir, ev_local, in_fibers_local, lattice_strains, lattice_vols, True)
xlup.calc_directional_stiffness_lattice_fiber(stress_local, elas_strain_local[1:steps,:,:], lattice_dir_stiff, ev_local1, in_fiber_local1, True)
xlup.calc_taylor_factors_lattice_fiber(gdots_local, lattice_tay_fact, lattice_eps_rate, ev_local1, in_fiber_local1, True)
@@ -222,7 +223,7 @@ def fiber_calc_ranks(args):
# s.write("Strains", strains, shape=strains.shape, start=[0,0,0], count=strains.shape)
# s.write("DirectionalModulus", direct_stiffness, shape=direct_stiffness.shape, start=[0,0], count=direct_stiffness.shape)
# s.write("TaylorFactor", tay_fact, shape=tay_fact.shape, start=[0,0], count=tay_fact.shape)
- # s.write("DpEff", eps_rate, shape=eps_rate.shape, start=[0,0], count=eps_rate.shape)
+ # s.write("EquivalentPlasticStrainRate", eps_rate, shape=eps_rate.shape, start=[0,0], count=eps_rate.shape)
tf_total = time.time()
print('%.3f seconds to process %s.' % (tf_total - ts_total, "all items"))
diff --git a/src/BCData.cpp b/src/BCData.cpp
deleted file mode 100644
index 02cc510..0000000
--- a/src/BCData.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-
-#include "mfem.hpp"
-#include "BCData.hpp"
-
-using namespace mfem;
-
-BCData::BCData()
-{
- // TODO constructor stub
-}
-
-BCData::~BCData()
-{
- // TODO destructor stub
-}
-
-void BCData::setDirBCs(Vector& y)
-{
- // When doing the velocity based methods we only
- // need to do the below.
- y = 0.0;
- y[0] = essVel[0] * scale[0];
- y[1] = essVel[1] * scale[1];
- y[2] = essVel[2] * scale[2];
-}
-
-void BCData::setScales()
-{
- switch (compID) {
- case 7:
- scale[0] = 1.0;
- scale[1] = 1.0;
- scale[2] = 1.0;
- break;
- case 1:
- scale[0] = 1.0;
- scale[1] = 0.0;
- scale[2] = 0.0;
- break;
- case 2:
- scale[0] = 0.0;
- scale[1] = 1.0;
- scale[2] = 0.0;
- break;
- case 3:
- scale[0] = 0.0;
- scale[1] = 0.0;
- scale[2] = 1.0;
- break;
- case 4:
- scale[0] = 1.0;
- scale[1] = 1.0;
- scale[2] = 0.0;
- break;
- case 5:
- scale[0] = 0.0;
- scale[1] = 1.0;
- scale[2] = 1.0;
- break;
- case 6:
- scale[0] = 1.0;
- scale[1] = 0.0;
- scale[2] = 1.0;
- break;
- case 0:
- scale[0] = 0.0;
- scale[1] = 0.0;
- scale[2] = 0.0;
- break;
- }
-}
-
-void BCData::getComponents(int id, Array &component)
-{
- switch (id) {
- case 0:
- component[0] = false;
- component[1] = false;
- component[2] = false;
- break;
-
- case 1:
- component[0] = true;
- component[1] = false;
- component[2] = false;
- break;
- case 2:
- component[0] = false;
- component[1] = true;
- component[2] = false;
- break;
- case 3:
- component[0] = false;
- component[1] = false;
- component[2] = true;
- break;
- case 4:
- component[0] = true;
- component[1] = true;
- component[2] = false;
- break;
- case 5:
- component[0] = false;
- component[1] = true;
- component[2] = true;
- break;
- case 6:
- component[0] = true;
- component[1] = false;
- component[2] = true;
- break;
- case 7:
- component[0] = true;
- component[1] = true;
- component[2] = true;
- break;
- }
-}
diff --git a/src/BCData.hpp b/src/BCData.hpp
deleted file mode 100644
index 360a40b..0000000
--- a/src/BCData.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-
-#ifndef BCDATA
-#define BCDATA
-
-#include "mfem.hpp"
-#include "mfem/linalg/vector.hpp"
-#include
-
-class BCData
-{
- public:
- BCData();
- ~BCData();
-
- // scales for nonzero Dirichlet BCs
- double essVel[3];
- double scale[3];
- int compID;
-
- void setDirBCs(mfem::Vector& y);
-
- void setScales();
-
- static void getComponents(int id, mfem::Array &component);
-};
-#endif
diff --git a/src/BCManager.cpp b/src/BCManager.cpp
deleted file mode 100644
index 1151614..0000000
--- a/src/BCManager.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-
-
-#include "mfem.hpp"
-#include "BCManager.hpp"
-#include
-
-using namespace mfem;
-
-
-void BCManager::updateBCData(std::unordered_map> & ess_bdr,
- mfem::Array2D & scale,
- mfem::Vector & vgrad,
- std::unordered_map> & component)
-{
- ess_bdr["total"] = 0;
- scale = 0.0;
-
- auto ess_comp = map_ess_comp["total"].find(step)->second;
- auto ess_id = map_ess_id["total"].find(step)->second;
-
- Array cmp_row;
- cmp_row.SetSize(3);
-
- component["total"] = false;
- cmp_row = false;
-
- for (std::uint32_t i = 0; i < ess_id.size(); ++i) {
- // set the active boundary attributes
- if (ess_comp[i] != 0) {
- const int bcID = ess_id[i] - 1;
- ess_bdr["total"][bcID] = 1;
- BCData::getComponents(std::abs(ess_comp[i]), cmp_row);
-
- component["total"](bcID, 0) = cmp_row[0];
- component["total"](bcID, 1) = cmp_row[1];
- component["total"](bcID, 2) = cmp_row[2];
- }
- }
-
- updateBCData(ess_bdr["ess_vel"], scale, component["ess_vel"]);
- updateBCData(ess_bdr["ess_vgrad"], vgrad, component["ess_vgrad"]);
-}
-
-void BCManager::updateBCData(mfem::Array & ess_bdr, mfem::Array2D & scale, mfem::Array2D & component)
-{
- m_bcInstances.clear();
- ess_bdr = 0;
- scale = 0.0;
-
- // The size here is set explicitly
- component.SetSize(ess_bdr.Size(), 3);
- Array cmp_row;
- cmp_row.SetSize(3);
-
- component = false;
- cmp_row = false;
-
- if (map_ess_vel.find(step) == map_ess_vel.end())
- {
- return;
- }
-
- auto ess_vel = map_ess_vel.find(step)->second;
- auto ess_comp = map_ess_comp["ess_vel"].find(step)->second;
- auto ess_id = map_ess_id["ess_vel"].find(step)->second;
-
- for (std::uint32_t i = 0; i < ess_id.size(); ++i) {
- // set the active boundary attributes
- if (ess_comp[i] != 0) {
- // set the boundary condition id based on the attribute id
- int bcID = ess_id[i];
-
- // instantiate a boundary condition manager instance and
- // create a BCData object
- BCData & bc = this->CreateBCs(bcID);
-
- // set the velocity component values
- bc.essVel[0] = ess_vel[3 * i];
- bc.essVel[1] = ess_vel[3 * i + 1];
- bc.essVel[2] = ess_vel[3 * i + 2];
- bc.compID = ess_comp[i];
-
- // set the boundary condition scales
- bc.setScales();
-
- scale(bcID - 1, 0) = bc.scale[0];
- scale(bcID - 1, 1) = bc.scale[1];
- scale(bcID - 1, 2) = bc.scale[2];
- ess_bdr[bcID - 1] = 1;
- }
- }
-
- for (std::uint32_t i = 0; i < ess_id.size(); ++i) {
- // set the active boundary attributes
- if (ess_comp[i] != 0) {
- const int bcID = ess_id[i] - 1;
- ess_bdr[bcID] = 1;
- BCData::getComponents(ess_comp[i], cmp_row);
- component(bcID, 0) = cmp_row[0];
- component(bcID, 1) = cmp_row[1];
- component(bcID, 2) = cmp_row[2];
- }
- }
-}
-
-void BCManager::updateBCData(mfem::Array & ess_bdr, mfem::Vector & vgrad, mfem::Array2D & component)
-{
- ess_bdr = 0;
- vgrad.HostReadWrite();
- vgrad = 0.0;
-
- // The size here is set explicitly
- component.SetSize(ess_bdr.Size(), 3);
- Array cmp_row;
- cmp_row.SetSize(3);
-
- component = false;
- cmp_row = false;
-
- if (map_ess_vgrad.find(step) == map_ess_vgrad.end())
- {
- return;
- }
-
- auto ess_vgrad = map_ess_vgrad.find(step)->second;
- auto ess_comp = map_ess_comp["ess_vgrad"].find(step)->second;
- auto ess_id = map_ess_id["ess_vgrad"].find(step)->second;
-
- for (std::uint32_t i = 0; i < ess_vgrad.size(); ++i) {
- vgrad(i) = ess_vgrad.at(i);
- }
-
- for (std::uint32_t i = 0; i < ess_id.size(); ++i) {
- // set the active boundary attributes
- if (ess_comp[i] != 0) {
- const int bcID = ess_id[i] - 1;
- ess_bdr[bcID] = 1;
- BCData::getComponents(ess_comp[i], cmp_row);
- component(bcID, 0) = cmp_row[0];
- component(bcID, 1) = cmp_row[1];
- component(bcID, 2) = cmp_row[2];
- }
- }
-}
-
diff --git a/src/BCManager.hpp b/src/BCManager.hpp
deleted file mode 100644
index f54e575..0000000
--- a/src/BCManager.hpp
+++ /dev/null
@@ -1,93 +0,0 @@
-
-#ifndef BCMANAGER
-#define BCMANAGER
-
-#include "BCData.hpp"
-#include "option_parser.hpp"
-
-// C/C++ includes
-#include // for std::unordered_map
-#include
-#include
-#include
-
-
-class BCManager
-{
- public:
- static BCManager & getInstance()
- {
- static BCManager bcManager;
- return bcManager;
- }
-
- void init(const std::vector &uStep,
- const std::unordered_map> &ess_vel,
- const std::unordered_map> &ess_vgrad,
- const map_of_imap &ess_comp,
- const map_of_imap &ess_id) {
- std::call_once(init_flag, [&](){
- updateStep = uStep;
- map_ess_vel = ess_vel;
- map_ess_vgrad = ess_vgrad;
- map_ess_comp = ess_comp;
- map_ess_id = ess_id;
- });
- }
-
- BCData & GetBCInstance(int bcID)
- {
- return m_bcInstances.find(bcID)->second;
- }
-
- const BCData & GetBCInstance(int bcID) const
- {
- return m_bcInstances.find(bcID)->second;
- }
-
- BCData & CreateBCs(int bcID)
- {
- return m_bcInstances[bcID];
- }
-
- std::unordered_map&GetBCInstances()
- {
- return m_bcInstances;
- }
-
- void updateBCData(std::unordered_map> & ess_bdr,
- mfem::Array2D & scale,
- mfem::Vector & vgrad,
- std::unordered_map> & component);
-
- bool getUpdateStep(int step_)
- {
- if(std::find(updateStep.begin(), updateStep.end(), step_) != updateStep.end()) {
- step = step_;
- return true;
- }
- else {
- return false;
- }
- }
- private:
- BCManager() {}
- BCManager(const BCManager&) = delete;
- BCManager& operator=(const BCManager &) = delete;
- BCManager(BCManager &&) = delete;
- BCManager & operator=(BCManager &&) = delete;
-
- void updateBCData(mfem::Array & ess_bdr, mfem::Vector & vgrad, mfem::Array2D & component);
- void updateBCData(mfem::Array & ess_bdr, mfem::Array2D & scale, mfem::Array2D & component);
-
- std::once_flag init_flag;
- int step = 0;
- std::unordered_map m_bcInstances;
- std::vector updateStep;
- std::unordered_map> map_ess_vel;
- std::unordered_map> map_ess_vgrad;
- map_of_imap map_ess_comp;
- map_of_imap map_ess_id;
-};
-
-#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3517b24..24e830a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -3,48 +3,79 @@
#------------------------------------------------------------------------------
set(EXACONSTIT_HEADERS
${HEADER_INCLUDE_DIR}/ExaConstit_Version.h
- BCData.hpp
- BCManager.hpp
- mechanics_model.hpp
- mechanics_integrators.hpp
- mechanics_ecmech.hpp
- mechanics_kernels.hpp
- mechanics_lightup.hpp
- mechanics_log.hpp
- mechanics_umat.hpp
- mechanics_operator_ext.hpp
- mechanics_operator.hpp
- mechanics_solver.hpp
system_driver.hpp
- option_types.hpp
- option_parser.hpp
- userumat.h
- ./TOML_Reader/toml.hpp
+ boundary_conditions/BCData.hpp
+ boundary_conditions/BCManager.hpp
+ fem_operators/mechanics_integrators.hpp
+ fem_operators/mechanics_operator_ext.hpp
+ fem_operators/mechanics_operator.hpp
+ mfem_expt/partial_qspace.hpp
+ mfem_expt/partial_qfunc.hpp
+ models/mechanics_model.hpp
+ models/mechanics_ecmech.hpp
+ models/mechanics_multi_model.hpp
+ models/mechanics_umat.hpp
+ options/option_parser_v2.hpp
+ postprocessing/projection_class.hpp
+ postprocessing/postprocessing_driver.hpp
+ postprocessing/mechanics_lightup.hpp
+ sim_state/simulation_state.hpp
+ solvers/mechanics_solver.hpp
+ utilities/dynamic_function_loader.hpp
+ utilities/mechanics_kernels.hpp
+ utilities/mechanics_log.hpp
+ utilities/assembly_ops.hpp
+ utilities/rotations.hpp
+ utilities/strain_measures.hpp
+ utilities/unified_logger.hpp
+ umats/userumat.h
+ umats/unified_umat_loader.hpp
+ TOML_Reader/toml.hpp
)
set(EXACONSTIT_SOURCES
- BCData.cpp
- BCManager.cpp
- mechanics_model.cpp
- mechanics_integrators.cpp
- mechanics_ecmech.cpp
- mechanics_kernels.cpp
- mechanics_umat.cpp
- mechanics_operator_ext.cpp
- mechanics_operator.cpp
- mechanics_solver.cpp
system_driver.cpp
- option_parser.cpp
- ./umat_tests/userumat.cxx
+ boundary_conditions/BCData.cpp
+ boundary_conditions/BCManager.cpp
+ fem_operators/mechanics_integrators.cpp
+ fem_operators/mechanics_operator_ext.cpp
+ fem_operators/mechanics_operator.cpp
+ mfem_expt/partial_qspace.cpp
+ mfem_expt/partial_qfunc.cpp
+ models/mechanics_model.cpp
+ models/mechanics_ecmech.cpp
+ models/mechanics_umat.cpp
+ models/mechanics_multi_model.cpp
+ options/option_parser_v2.cpp
+ options/option_boundary_conditions.cpp
+ options/option_enum.cpp
+ options/option_material.cpp
+ options/option_mesh.cpp
+ options/option_post_processing.cpp
+ options/option_solvers.cpp
+ options/option_time.cpp
+ postprocessing/postprocessing_driver.cpp
+ postprocessing/projection_class.cpp
+ postprocessing/mechanics_lightup.cpp
+ sim_state/simulation_state.cpp
+ solvers/mechanics_solver.cpp
+ utilities/mechanics_kernels.cpp
+ utilities/unified_logger.cpp
)
if (ENABLE_FORTRAN)
- list(APPEND EXACONSTIT_SOURCES ./umat_tests/umat.f)
+ list(APPEND EXACONSTIT_SOURCES ./umats/umat.f)
else()
- list(APPEND EXACONSTIT_SOURCES ./umat_tests/umat.cxx)
+ list(APPEND EXACONSTIT_SOURCES ./umats/umat.cxx)
endif()
-
+
+set(DYNAMIC_LOADING_LIBS)
+
+# Windows uses kernel32 automatically
+if(UNIX)
+ list(APPEND DYNAMIC_LOADING_LIBS ${CMAKE_DL_LIBS})
+endif()
#------------------------------------------------------------------------------
# Dependencies
#------------------------------------------------------------------------------
@@ -81,6 +112,15 @@ if(ENABLE_CALIPER)
list(APPEND EXACONSTIT_DEPENDS caliper)
endif()
+if(UNIX AND NOT APPLE AND TARGET Threads::Threads)
+ # Check if we determined explicit linking is needed
+ if(EXACONSTIT_THREADS_EXPLICIT_LINK)
+ list(APPEND EXACONSTIT_DEPENDS Threads::Threads)
+ endif()
+endif()
+
+list(APPEND EXACONSTIT_DEPENDS ${DYNAMIC_LOADING_LIBS})
+
message("-- EXACONSTIT_DEPENDS: ${EXACONSTIT_DEPENDS}")
#------------------------------------------------------------------------------
diff --git a/src/boundary_conditions/BCData.cpp b/src/boundary_conditions/BCData.cpp
new file mode 100644
index 0000000..3714bc1
--- /dev/null
+++ b/src/boundary_conditions/BCData.cpp
@@ -0,0 +1,111 @@
+#include "boundary_conditions/BCData.hpp"
+
+#include "mfem.hpp"
+
+BCData::BCData() {
+ // TODO constructor stub
+}
+
+BCData::~BCData() {
+ // TODO destructor stub
+}
+
+void BCData::SetDirBCs(mfem::Vector& y) {
+ // When doing the velocity based methods we only
+ // need to do the below.
+ y = 0.0;
+ y[0] = ess_vel[0] * scale[0];
+ y[1] = ess_vel[1] * scale[1];
+ y[2] = ess_vel[2] * scale[2];
+}
+
+void BCData::SetScales() {
+ switch (comp_id) {
+ case 7:
+ scale[0] = 1.0;
+ scale[1] = 1.0;
+ scale[2] = 1.0;
+ break;
+ case 1:
+ scale[0] = 1.0;
+ scale[1] = 0.0;
+ scale[2] = 0.0;
+ break;
+ case 2:
+ scale[0] = 0.0;
+ scale[1] = 1.0;
+ scale[2] = 0.0;
+ break;
+ case 3:
+ scale[0] = 0.0;
+ scale[1] = 0.0;
+ scale[2] = 1.0;
+ break;
+ case 4:
+ scale[0] = 1.0;
+ scale[1] = 1.0;
+ scale[2] = 0.0;
+ break;
+ case 5:
+ scale[0] = 0.0;
+ scale[1] = 1.0;
+ scale[2] = 1.0;
+ break;
+ case 6:
+ scale[0] = 1.0;
+ scale[1] = 0.0;
+ scale[2] = 1.0;
+ break;
+ case 0:
+ scale[0] = 0.0;
+ scale[1] = 0.0;
+ scale[2] = 0.0;
+ break;
+ }
+}
+
+void BCData::GetComponents(int id, mfem::Array& component) {
+ switch (id) {
+ case 0:
+ component[0] = false;
+ component[1] = false;
+ component[2] = false;
+ break;
+
+ case 1:
+ component[0] = true;
+ component[1] = false;
+ component[2] = false;
+ break;
+ case 2:
+ component[0] = false;
+ component[1] = true;
+ component[2] = false;
+ break;
+ case 3:
+ component[0] = false;
+ component[1] = false;
+ component[2] = true;
+ break;
+ case 4:
+ component[0] = true;
+ component[1] = true;
+ component[2] = false;
+ break;
+ case 5:
+ component[0] = false;
+ component[1] = true;
+ component[2] = true;
+ break;
+ case 6:
+ component[0] = true;
+ component[1] = false;
+ component[2] = true;
+ break;
+ case 7:
+ component[0] = true;
+ component[1] = true;
+ component[2] = true;
+ break;
+ }
+}
diff --git a/src/boundary_conditions/BCData.hpp b/src/boundary_conditions/BCData.hpp
new file mode 100644
index 0000000..075e46b
--- /dev/null
+++ b/src/boundary_conditions/BCData.hpp
@@ -0,0 +1,106 @@
+
+#ifndef BCDATA
+#define BCDATA
+
+#include "mfem.hpp"
+#include "mfem/linalg/vector.hpp"
+
+#include
+
+/**
+ * @brief Individual boundary condition data container and processor
+ *
+ * @details This class stores and processes data for a single boundary condition instance.
+ * It handles the application of Dirichlet boundary conditions for velocity-based formulations
+ * and manages component-wise scaling for different constraint types.
+ *
+ * The class supports component-wise boundary conditions where different velocity components
+ * can be constrained independently using a component ID system:
+ * - 0: No constraints
+ * - 1: X-component only
+ * - 2: Y-component only
+ * - 3: Z-component only
+ * - 4: X and Y components
+ * - 5: Y and Z components
+ * - 6: X and Z components
+ * - 7: All components (X, Y, Z)
+ */
+class BCData {
+public:
+ /**
+ * @brief Default constructor
+ *
+ * @details Initializes a BCData object with default values. Currently a stub
+ * implementation that should be expanded based on initialization requirements.
+ */
+ BCData();
+
+ /**
+ * @brief Destructor
+ *
+ * @details Cleans up BCData resources. Currently a stub implementation.
+ */
+ ~BCData();
+
+ /** @brief Essential velocity values for each component [x, y, z] */
+ double ess_vel[3];
+
+ /** @brief Scaling factors for each velocity component [x, y, z] */
+ double scale[3];
+
+ /** @brief Component ID indicating which velocity components are constrained */
+ int comp_id;
+
+ /**
+ * @brief Apply Dirichlet boundary conditions to a velocity vector
+ *
+ * @param y Output velocity vector where boundary conditions will be applied
+ *
+ * @details Sets the velocity vector components based on the essential velocity values
+ * and their corresponding scaling factors. For velocity-based methods, this function:
+ * - Initializes the output vector to zero
+ * - Applies scaled essential velocities: y[i] = ess_vel[i] * scale[i]
+ *
+ * This is used during the assembly process to enforce velocity boundary conditions.
+ */
+ void SetDirBCs(mfem::Vector& y);
+
+ /**
+ * @brief Set scaling factors based on component ID
+ *
+ * @details Configures the scale array based on the comp_id value to determine which
+ * velocity components should be constrained. The scaling pattern is:
+ * - comp_id = 0: No scaling (all zeros)
+ * - comp_id = 1: X-component only (1,0,0)
+ * - comp_id = 2: Y-component only (0,1,0)
+ * - comp_id = 3: Z-component only (0,0,1)
+ * - comp_id = 4: X,Y components (1,1,0)
+ * - comp_id = 5: Y,Z components (0,1,1)
+ * - comp_id = 6: X,Z components (1,0,1)
+ * - comp_id = 7: All components (1,1,1)
+ */
+ void SetScales();
+
+ /**
+ * @brief Static utility to decode component ID into boolean flags
+ *
+ * @param id Component ID to decode
+ * @param component Output array of boolean flags for each component [x, y, z]
+ *
+ * @details Converts a component ID integer into a boolean array indicating which
+ * velocity components are active. This is used throughout the boundary condition
+ * system to determine which degrees of freedom should be constrained.
+ *
+ * The mapping follows the same pattern as SetScales():
+ * - id = 0: (false, false, false)
+ * - id = 1: (true, false, false)
+ * - id = 2: (false, true, false)
+ * - id = 3: (false, false, true)
+ * - id = 4: (true, true, false)
+ * - id = 5: (false, true, true)
+ * - id = 6: (true, false, true)
+ * - id = 7: (true, true, true)
+ */
+ static void GetComponents(int id, mfem::Array& component);
+};
+#endif
diff --git a/src/boundary_conditions/BCManager.cpp b/src/boundary_conditions/BCManager.cpp
new file mode 100644
index 0000000..5f0e7db
--- /dev/null
+++ b/src/boundary_conditions/BCManager.cpp
@@ -0,0 +1,143 @@
+
+
+#include "boundary_conditions/BCManager.hpp"
+
+#include "mfem.hpp"
+
+#include
+
+void BCManager::UpdateBCData(std::unordered_map>& ess_bdr,
+ mfem::Array2D& scale,
+ mfem::Vector& vgrad,
+ std::unordered_map>& component) {
+ ess_bdr["total"] = 0;
+ scale = 0.0;
+
+ auto ess_comp = map_ess_comp["total"].find(step)->second;
+ auto ess_id = map_ess_id["total"].find(step)->second;
+
+ mfem::Array cmp_row;
+ cmp_row.SetSize(3);
+
+ component["total"] = false;
+ cmp_row = false;
+
+ for (size_t i = 0; i < ess_id.size(); ++i) {
+ // set the active boundary attributes
+ if (ess_comp[i] != 0) {
+ const int bcID = ess_id[i] - 1;
+ ess_bdr["total"][bcID] = 1;
+ BCData::GetComponents(std::abs(ess_comp[i]), cmp_row);
+
+ component["total"](bcID, 0) = cmp_row[0];
+ component["total"](bcID, 1) = cmp_row[1];
+ component["total"](bcID, 2) = cmp_row[2];
+ }
+ }
+
+ UpdateBCData(ess_bdr["ess_vel"], scale, component["ess_vel"]);
+ UpdateBCData(ess_bdr["ess_vgrad"], vgrad, component["ess_vgrad"]);
+}
+
+void BCManager::UpdateBCData(mfem::Array& ess_bdr,
+ mfem::Array2D& scale,
+ mfem::Array2D& component) {
+ m_bc_instances.clear();
+ ess_bdr = 0;
+ scale = 0.0;
+
+ // The size here is set explicitly
+ component.SetSize(ess_bdr.Size(), 3);
+ mfem::Array cmp_row;
+ cmp_row.SetSize(3);
+
+ component = false;
+ cmp_row = false;
+
+ if (map_ess_vel.find(step) == map_ess_vel.end()) {
+ return;
+ }
+
+ auto ess_vel = map_ess_vel.find(step)->second;
+ auto ess_comp = map_ess_comp["ess_vel"].find(step)->second;
+ auto ess_id = map_ess_id["ess_vel"].find(step)->second;
+
+ for (size_t i = 0; i < ess_id.size(); ++i) {
+ // set the active boundary attributes
+ if (ess_comp[i] != 0) {
+ // set the boundary condition id based on the attribute id
+ int bcID = ess_id[i];
+
+ // instantiate a boundary condition manager instance and
+ // create a BCData object
+ BCData& bc = this->CreateBCs(bcID);
+
+ // set the velocity component values
+ bc.ess_vel[0] = ess_vel[3 * i];
+ bc.ess_vel[1] = ess_vel[3 * i + 1];
+ bc.ess_vel[2] = ess_vel[3 * i + 2];
+ bc.comp_id = ess_comp[i];
+
+ // set the boundary condition scales
+ bc.SetScales();
+
+ scale(bcID - 1, 0) = bc.scale[0];
+ scale(bcID - 1, 1) = bc.scale[1];
+ scale(bcID - 1, 2) = bc.scale[2];
+ ess_bdr[bcID - 1] = 1;
+ }
+ }
+
+ for (size_t i = 0; i < ess_id.size(); ++i) {
+ // set the active boundary attributes
+ if (ess_comp[i] != 0) {
+ const int bcID = ess_id[i] - 1;
+ ess_bdr[bcID] = 1;
+ BCData::GetComponents(ess_comp[i], cmp_row);
+ component(bcID, 0) = cmp_row[0];
+ component(bcID, 1) = cmp_row[1];
+ component(bcID, 2) = cmp_row[2];
+ }
+ }
+}
+
+void BCManager::UpdateBCData(mfem::Array& ess_bdr,
+ mfem::Vector& vgrad,
+ mfem::Array2D& component) {
+ ess_bdr = 0;
+ vgrad.HostReadWrite();
+ vgrad = 0.0;
+ auto data = vgrad.HostReadWrite();
+
+ // The size here is set explicitly
+ component.SetSize(ess_bdr.Size(), 3);
+ mfem::Array cmp_row;
+ cmp_row.SetSize(3);
+
+ component = false;
+ cmp_row = false;
+
+ if (map_ess_vgrad.find(step) == map_ess_vgrad.end()) {
+ return;
+ }
+
+ auto ess_vgrad = map_ess_vgrad.find(step)->second;
+ auto ess_comp = map_ess_comp["ess_vgrad"].find(step)->second;
+ auto ess_id = map_ess_id["ess_vgrad"].find(step)->second;
+
+ for (size_t i = 0; i < ess_vgrad.size(); ++i) {
+ data[i] = ess_vgrad.at(i);
+ }
+
+ for (size_t i = 0; i < ess_id.size(); ++i) {
+ // set the active boundary attributes
+ if (ess_comp[i] != 0) {
+ const int bcID = ess_id[i] - 1;
+ ess_bdr[bcID] = 1;
+ BCData::GetComponents(ess_comp[i], cmp_row);
+ component(bcID, 0) = cmp_row[0];
+ component(bcID, 1) = cmp_row[1];
+ component(bcID, 2) = cmp_row[2];
+ }
+ }
+}
diff --git a/src/boundary_conditions/BCManager.hpp b/src/boundary_conditions/BCManager.hpp
new file mode 100644
index 0000000..8252523
--- /dev/null
+++ b/src/boundary_conditions/BCManager.hpp
@@ -0,0 +1,261 @@
+
+#ifndef BCMANAGER
+#define BCMANAGER
+
+#include "boundary_conditions/BCData.hpp"
+#include "options/option_parser_v2.hpp"
+
+// C/C++ includes
+#include
+#include
+#include // for std::unordered_map
+#include
+
+/**
+ * @brief Singleton manager for all boundary conditions in the simulation
+ *
+ * @details This class implements the Singleton pattern to provide centralized management
+ * of boundary conditions throughout the simulation. It coordinates time-dependent boundary
+ * conditions, manages multiple BCData instances, and provides the interface between
+ * the options system and the finite element assembly process.
+ *
+ * Key responsibilities:
+ * - Manage time-dependent boundary condition changes
+ * - Store and organize essential velocity and velocity gradient data
+ * - Create and maintain BCData instances for each boundary
+ * - Coordinate between different boundary condition types (velocity vs velocity gradient)
+ * - Provide thread-safe initialization and access
+ *
+ * The class supports complex boundary condition scenarios including:
+ * - Multi-step boundary condition evolution
+ * - Mixed velocity and velocity gradient constraints
+ * - Component-wise boundary condition application
+ * - Time-dependent boundary condition updates
+ */
+class BCManager {
+public:
+ /**
+ * @brief Get the singleton instance of BCManager
+ *
+ * @return Reference to the singleton BCManager instance
+ *
+ * @details Implements the Meyer's singleton pattern for thread-safe initialization.
+ * The instance is created on first call and persists for the lifetime of the program.
+ */
+ static BCManager& GetInstance() {
+ static BCManager bc_manager;
+ return bc_manager;
+ }
+
+ /**
+ * @brief Initialize the BCManager with time-dependent boundary condition data
+ *
+ * @param u_step Vector of time steps when boundary conditions should be updated
+ * @param ess_vel Map from time step to essential velocity values
+ * @param ess_vgrad Map from time step to essential velocity gradient values
+ * @param ess_comp Map from BC type and time step to component IDs
+ * @param ess_id Map from BC type and time step to boundary IDs
+ *
+ * @details Thread-safe initialization using std::call_once. This method should be called
+ * once during simulation setup to configure all time-dependent boundary condition data.
+ * The data structures support complex time-dependent scenarios where different boundaries
+ * can have different constraint patterns that change over time.
+ *
+ * The map_of_imap type represents nested maps: map>>
+ * where the outer key is the BC type ("ess_vel", "ess_vgrad", "total") and the inner
+ * key is the time step number.
+ */
+ void Init(const std::vector& u_step,
+ const std::unordered_map>& ess_vel,
+ const std::unordered_map>& ess_vgrad,
+ const map_of_imap& ess_comp,
+ const map_of_imap& ess_id) {
+ std::call_once(init_flag, [&]() {
+ update_step = u_step;
+ map_ess_vel = ess_vel;
+ map_ess_vgrad = ess_vgrad;
+ map_ess_comp = ess_comp;
+ map_ess_id = ess_id;
+ });
+ }
+
+ /**
+ * @brief Get a boundary condition instance by ID
+ *
+ * @param bcID Boundary condition identifier
+ * @return Reference to the BCData instance for the specified boundary
+ *
+ * @details Provides access to a specific boundary condition instance. The bcID
+ * corresponds to mesh boundary attributes. Used during assembly to access
+ * boundary condition data for specific mesh boundaries.
+ */
+ BCData& GetBCInstance(int bcID) {
+ return m_bc_instances.find(bcID)->second;
+ }
+
+ /**
+ * @brief Get a boundary condition instance by ID (const version)
+ *
+ * @param bcID Boundary condition identifier
+ * @return Const reference to the BCData instance for the specified boundary
+ *
+ * @details Const version of GetBCInstance for read-only access to boundary condition data.
+ */
+ const BCData& GetBCInstance(int bcID) const {
+ return m_bc_instances.find(bcID)->second;
+ }
+
+ /**
+ * @brief Create or access a boundary condition instance
+ *
+ * @param bcID Boundary condition identifier
+ * @return Reference to the BCData instance (created if it doesn't exist)
+ *
+ * @details Creates a new BCData instance if one doesn't exist for the given bcID,
+ * or returns a reference to the existing instance. This is used during boundary
+ * condition setup to ensure all required BCData objects are available.
+ */
+ BCData& CreateBCs(int bcID) {
+ return m_bc_instances[bcID];
+ }
+
+ /**
+ * @brief Get all boundary condition instances
+ *
+ * @return Reference to the map containing all BCData instances
+ *
+ * @details Provides access to the complete collection of boundary condition instances.
+ * Useful for iteration or bulk operations on all boundary conditions.
+ */
+ std::unordered_map& GetBCInstances() {
+ return m_bc_instances;
+ }
+
+ /**
+ * @brief Update boundary condition data for the current time step
+ *
+ * @param ess_bdr Map of essential boundary arrays by BC type
+ * @param scale 2D array of scaling factors for boundary conditions
+ * @param vgrad Vector of velocity gradient values
+ * @param component Map of component activation arrays by BC type
+ *
+ * @details Main coordination method that updates all boundary condition data structures
+ * for the current simulation time step. This method:
+ * 1. Clears previous boundary condition data
+ * 2. Sets up combined boundary condition information
+ * 3. Calls specialized update methods for velocity and velocity gradient BCs
+ * 4. Coordinates between different boundary condition types
+ *
+ * This is called at the beginning of each time step where boundary conditions change.
+ */
+ void UpdateBCData(std::unordered_map>& ess_bdr,
+ mfem::Array2D& scale,
+ mfem::Vector& vgrad,
+ std::unordered_map>& component);
+
+ /**
+ * @brief Check if the current step requires boundary condition updates
+ *
+ * @param step_ Time step number to check
+ * @return True if boundary conditions should be updated at this step
+ *
+ * @details Determines whether boundary conditions need to be updated at the specified
+ * time step by checking against the list of update steps provided during initialization.
+ * If an update is needed, the internal step counter is also updated.
+ */
+ bool GetUpdateStep(int step_) {
+ if (std::find(update_step.begin(), update_step.end(), step_) != update_step.end()) {
+ step = step_;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+private:
+ /**
+ * @brief Private constructor for singleton pattern
+ *
+ * @details Default constructor is private to enforce singleton pattern.
+ */
+ BCManager() {}
+
+ /**
+ * @brief Deleted copy constructor for singleton pattern
+ */
+ BCManager(const BCManager&) = delete;
+
+ /**
+ * @brief Deleted copy assignment operator for singleton pattern
+ */
+ BCManager& operator=(const BCManager&) = delete;
+
+ /**
+ * @brief Deleted move constructor for singleton pattern
+ */
+ BCManager(BCManager&&) = delete;
+
+ /**
+ * @brief Deleted move assignment operator for singleton pattern
+ */
+ BCManager& operator=(BCManager&&) = delete;
+
+ /**
+ * @brief Update velocity gradient boundary condition data
+ *
+ * @param ess_bdr Essential boundary array for velocity gradient BCs
+ * @param vgrad Velocity gradient vector to populate
+ * @param component Component activation array for velocity gradient BCs
+ *
+ * @details Specialized update method for velocity gradient boundary conditions.
+ * Processes the velocity gradient data for the current time step and sets up
+ * the appropriate data structures for finite element assembly.
+ */
+ void
+ UpdateBCData(mfem::Array& ess_bdr, mfem::Vector& vgrad, mfem::Array2D& component);
+
+ /**
+ * @brief Update velocity boundary condition data
+ *
+ * @param ess_bdr Essential boundary array for velocity BCs
+ * @param scale Scaling factors for velocity BCs
+ * @param component Component activation array for velocity BCs
+ *
+ * @details Specialized update method for velocity boundary conditions. Creates BCData
+ * instances for each active boundary, sets up scaling factors, and prepares data
+ * structures for finite element assembly. This method:
+ * 1. Clears existing BCData instances
+ * 2. Processes essential velocity data for the current time step
+ * 3. Creates BCData objects with appropriate velocity and component settings
+ * 4. Sets up scaling and boundary activation arrays
+ */
+ void UpdateBCData(mfem::Array& ess_bdr,
+ mfem::Array2D& scale,
+ mfem::Array2D& component);
+
+ /** @brief Thread-safe initialization flag */
+ std::once_flag init_flag;
+
+ /** @brief Current simulation time step */
+ int step = 0;
+
+ /** @brief Collection of boundary condition data instances */
+ std::unordered_map m_bc_instances;
+
+ /** @brief Time steps when boundary conditions should be updated */
+ std::vector update_step;
+
+ /** @brief Essential velocity values by time step */
+ std::unordered_map> map_ess_vel;
+
+ /** @brief Essential velocity gradient values by time step */
+ std::unordered_map> map_ess_vgrad;
+
+ /** @brief Component IDs by BC type and time step */
+ map_of_imap map_ess_comp;
+
+ /** @brief Boundary IDs by BC type and time step */
+ map_of_imap map_ess_id;
+};
+
+#endif
diff --git a/src/fem_operators/mechanics_integrators.cpp b/src/fem_operators/mechanics_integrators.cpp
new file mode 100644
index 0000000..9ade98d
--- /dev/null
+++ b/src/fem_operators/mechanics_integrators.cpp
@@ -0,0 +1,2152 @@
+
+
+#include "fem_operators/mechanics_integrators.hpp"
+
+#include "utilities/assembly_ops.hpp"
+#include "utilities/mechanics_log.hpp"
+
+#include "RAJA/RAJA.hpp"
+#include "mfem.hpp"
+#include "mfem/general/forall.hpp"
+
+#include
+#include // cerr
+#include // log
+
+// Outside of the UMAT function calls this should be the function called
+// to assemble our residual vectors.
+void ExaNLFIntegrator::AssembleElementVector(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& elfun,
+ mfem::Vector& elvect) {
+ CALI_CXX_MARK_SCOPE("enlfi_assembleElemVec");
+ int dof = el.GetDof(), dim = el.GetDim();
+
+ mfem::DenseMatrix DSh, DS;
+ mfem::DenseMatrix Jpt;
+ mfem::DenseMatrix PMatI, PMatO;
+ // This is our stress tensor
+ mfem::DenseMatrix P(3);
+
+ DSh.SetSize(dof, dim);
+ DS.SetSize(dof, dim);
+ Jpt.SetSize(dim);
+
+ // PMatI would be our velocity in this case
+ PMatI.UseExternalData(elfun.GetData(), dof, dim);
+ elvect.SetSize(dof * dim);
+
+ // PMatO would be our residual vector
+ elvect = 0.0;
+ PMatO.UseExternalData(elvect.HostReadWrite(), dof, dim);
+
+ const mfem::IntegrationRule* ir = IntRule;
+ if (!ir) {
+ ir = &(mfem::IntRules.Get(el.GetGeomType(),
+ 2 * el.GetOrder() + 1)); // must match quadrature space
+ }
+
+ for (int i = 0; i < ir->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+
+ // compute Jacobian of the transformation
+ Jpt = Ttr.InverseJacobian(); // Jrt = dxi / dX
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jpt, DS); // dN_a(xi) / dX = dN_a(xi)/dxi * dxi/dX
+
+ double stress[6];
+ GetQFData(
+ Ttr.ElementNo, i, stress, m_sim_state->GetQuadratureFunction("cauchy_stress_end"));
+ // Could probably later have this only set once...
+ // Would reduce the number mallocs that we're doing and
+ // should potentially provide a small speed boost.
+ /**
+ * @brief Map Voigt notation stress components to full 3x3 symmetric stress tensor.
+ *
+ * Converts stress data from Voigt notation [σ_xx, σ_yy, σ_zz, σ_xy, σ_xz, σ_yz]
+ * to full symmetric 3x3 stress tensor for use in matrix operations.
+ * The symmetry is enforced by setting P(i,j) = P(j,i) for off-diagonal terms.
+ */
+ P(0, 0) = stress[0];
+ P(1, 1) = stress[1];
+ P(2, 2) = stress[2];
+ P(1, 2) = stress[3];
+ P(0, 2) = stress[4];
+ P(0, 1) = stress[5];
+
+ P(2, 1) = P(1, 2);
+ P(2, 0) = P(0, 2);
+ P(1, 0) = P(0, 1);
+
+ DS *= (Ttr.Weight() * ip.weight);
+ AddMult(DS, P, PMatO);
+ }
+
+ return;
+}
+
+void ExaNLFIntegrator::AssembleElementGrad(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& /*elfun*/,
+ mfem::DenseMatrix& elmat) {
+ CALI_CXX_MARK_SCOPE("enlfi_assembleElemGrad");
+ int dof = el.GetDof(), dim = el.GetDim();
+
+ mfem::DenseMatrix DSh, DS, Jrt;
+
+ // Now time to start assembling stuff
+ mfem::DenseMatrix grad_trans, temp;
+ mfem::DenseMatrix tan_stiff;
+
+ constexpr int ngrad_dim2 = 36;
+ double matGrad[ngrad_dim2];
+
+ // temp1 is now going to become the transpose Bmatrix as seen in
+ // [B^t][tan_stiff][B]
+ grad_trans.SetSize(dof * dim, 6);
+ // We need a temp matrix to store our first matrix results as seen in here
+ temp.SetSize(6, dof * dim);
+
+ tan_stiff.UseExternalData(&matGrad[0], 6, 6);
+
+ DSh.SetSize(dof, dim);
+ DS.SetSize(dof, dim);
+ Jrt.SetSize(dim);
+ elmat.SetSize(dof * dim);
+
+ const mfem::IntegrationRule* ir = IntRule;
+ if (!ir) {
+ ir = &(mfem::IntRules.Get(el.GetGeomType(),
+ 2 * el.GetOrder() + 1)); // <--- must match quadrature space
+ }
+
+ elmat = 0.0;
+
+ for (int i = 0; i < ir->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+ CalcInverse(Ttr.Jacobian(), Jrt);
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jrt, DS);
+
+ GetQFData(
+ Ttr.ElementNo, i, matGrad, m_sim_state->GetQuadratureFunction("tangent_stiffness"));
+ // temp1 is B^t
+ GenerateGradMatrix(DS, grad_trans);
+ // We multiple our quadrature wts here to our tan_stiff matrix
+ tan_stiff *= ip.weight * Ttr.Weight();
+ // We use kgeom as a temporary matrix
+ // kgeom = [Cstiff][B]
+ MultABt(tan_stiff, grad_trans, temp);
+ // We now add our [B^t][kgeom] product to our tangent stiffness matrix that
+ // we want to output to our material tangent stiffness matrix
+ AddMult(grad_trans, temp, elmat);
+ }
+
+ return;
+}
+
+// This performs the assembly step of our RHS side of our system:
+// f_ik =
+void ExaNLFIntegrator::AssemblePA(const mfem::FiniteElementSpace& fes) {
+ CALI_CXX_MARK_SCOPE("enlfi_assemblePA");
+ mfem::Mesh* mesh = fes.GetMesh();
+ const mfem::FiniteElement& el = *fes.GetFE(0);
+ space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+
+ nqpts = ir->GetNPoints();
+ nnodes = el.GetDof();
+ nelems = fes.GetNE();
+
+ auto W = ir->GetWeights().Read();
+ geom = mesh->GetGeometricFactors(*ir, mfem::GeometricFactors::JACOBIANS);
+
+ // return a pointer to beginning step stress. This is used for output visualization
+ auto stress_end = m_sim_state->GetQuadratureFunction("cauchy_stress_end");
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ if (grad.Size() != (nqpts * dim * nnodes)) {
+ grad.SetSize(nqpts * dim * nnodes, mfem::Device::GetMemoryType());
+ {
+ mfem::DenseMatrix DSh;
+ const int offset = nnodes * dim;
+ double* qpts_dshape_data = grad.HostReadWrite();
+ for (int i = 0; i < nqpts; i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ DSh.UseExternalData(&qpts_dshape_data[offset * i], nnodes, dim);
+ el.CalcDShape(ip, DSh);
+ }
+ }
+ grad.UseDevice(true);
+ }
+
+ // geom->J really isn't going to work for us as of right now. We could just reorder it
+ // to the version that we want it to be in instead...
+ if (jacobian.Size() != (dim * dim * nqpts * nelems)) {
+ jacobian.SetSize(dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ jacobian.UseDevice(true);
+ }
+
+ if (dmat.Size() != (dim * dim * nqpts * nelems)) {
+ dmat.SetSize(dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ dmat.UseDevice(true);
+ }
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_stress = RAJA::make_permuted_layout({{2 * dim, nqpts, nelems}},
+ perm3);
+ RAJA::View> S(stress_end->ReadWrite(),
+ layout_stress);
+
+ RAJA::View> D(dmat.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_geom = RAJA::make_permuted_layout({{nqpts, dim, dim, nelems}},
+ perm4);
+ RAJA::View> geom_j_view(
+ geom->J.Read(), layout_geom);
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i) {
+ for (int j = 0; j < nqpts_; j++) {
+ for (int k = 0; k < dim_; k++) {
+ for (int l = 0; l < dim_; l++) {
+ J(l, k, j, i) = geom_j_view(j, l, k, i);
+ }
+ }
+ }
+ });
+
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points. adj is actually in row
+ // major memory order but if we set this to col. major than this view will act as the
+ // transpose of adj A which is what we want.
+ RAJA::View> A(&adj[0], dim_, dim_);
+ // RAJA::View > A(&adj[0],
+ // layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+
+ D(0, 0, j_qpts, i_elems) = S(0, j_qpts, i_elems) * A(0, 0) +
+ S(5, j_qpts, i_elems) * A(0, 1) +
+ S(4, j_qpts, i_elems) * A(0, 2);
+ D(1, 0, j_qpts, i_elems) = S(0, j_qpts, i_elems) * A(1, 0) +
+ S(5, j_qpts, i_elems) * A(1, 1) +
+ S(4, j_qpts, i_elems) * A(1, 2);
+ D(2, 0, j_qpts, i_elems) = S(0, j_qpts, i_elems) * A(2, 0) +
+ S(5, j_qpts, i_elems) * A(2, 1) +
+ S(4, j_qpts, i_elems) * A(2, 2);
+
+ D(0, 1, j_qpts, i_elems) = S(5, j_qpts, i_elems) * A(0, 0) +
+ S(1, j_qpts, i_elems) * A(0, 1) +
+ S(3, j_qpts, i_elems) * A(0, 2);
+ D(1, 1, j_qpts, i_elems) = S(5, j_qpts, i_elems) * A(1, 0) +
+ S(1, j_qpts, i_elems) * A(1, 1) +
+ S(3, j_qpts, i_elems) * A(1, 2);
+ D(2, 1, j_qpts, i_elems) = S(5, j_qpts, i_elems) * A(2, 0) +
+ S(1, j_qpts, i_elems) * A(2, 1) +
+ S(3, j_qpts, i_elems) * A(2, 2);
+
+ D(0, 2, j_qpts, i_elems) = S(4, j_qpts, i_elems) * A(0, 0) +
+ S(3, j_qpts, i_elems) * A(0, 1) +
+ S(2, j_qpts, i_elems) * A(0, 2);
+ D(1, 2, j_qpts, i_elems) = S(4, j_qpts, i_elems) * A(1, 0) +
+ S(3, j_qpts, i_elems) * A(1, 1) +
+ S(2, j_qpts, i_elems) * A(1, 2);
+ D(2, 2, j_qpts, i_elems) = S(4, j_qpts, i_elems) * A(2, 0) +
+ S(3, j_qpts, i_elems) * A(2, 1) +
+ S(2, j_qpts, i_elems) * A(2, 2);
+ } // End of doing J_{ij}\sigma_{jk} / nqpts loop
+ }); // End of elements
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ for (int i = 0; i < dim_; i++) {
+ for (int j = 0; j < dim_; j++) {
+ D(j, i, j_qpts, i_elems) *= W[j_qpts];
+ }
+ }
+ }
+ });
+ } // End of if statement
+}
+
+// In the below function we'll be applying the below action on our material
+// tangent matrix C^{tan} at each quadrature point as:
+// D_{ijkm} = 1 / det(J) * w_{qpt} * adj(J)^T_{ij} C^{tan}_{ijkl} adj(J)_{lm}
+// where D is our new 4th order tensor, J is our jacobian calculated from the
+// mesh geometric factors, and adj(J) is the adjugate of J.
+void ExaNLFIntegrator::AssembleGradPA(const mfem::Vector& /* x */,
+ const mfem::FiniteElementSpace& fes) {
+ this->AssembleGradPA(fes);
+}
+
+// In the below function we'll be applying the below action on our material
+// tangent matrix C^{tan} at each quadrature point as:
+// D_{ijkm} = 1 / det(J) * w_{qpt} * adj(J)^T_{ij} C^{tan}_{ijkl} adj(J)_{lm}
+// where D is our new 4th order tensor, J is our jacobian calculated from the
+// mesh geometric factors, and adj(J) is the adjugate of J.
+void ExaNLFIntegrator::AssembleGradPA(const mfem::FiniteElementSpace& fes) {
+ CALI_CXX_MARK_SCOPE("enlfi_assemblePAG");
+ mfem::Mesh* mesh = fes.GetMesh();
+ const mfem::FiniteElement& el = *fes.GetFE(0);
+ space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+
+ nqpts = ir->GetNPoints();
+ nnodes = el.GetDof();
+ nelems = fes.GetNE();
+ auto W = ir->GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ if (grad.Size() != (nqpts * dim * nnodes)) {
+ grad.SetSize(nqpts * dim * nnodes, mfem::Device::GetMemoryType());
+ {
+ mfem::DenseMatrix DSh;
+ const int offset = nnodes * dim;
+ double* qpts_dshape_data = grad.HostReadWrite();
+ for (int i = 0; i < nqpts; i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ DSh.UseExternalData(&qpts_dshape_data[offset * i], nnodes, dim);
+ el.CalcDShape(ip, DSh);
+ }
+ }
+ grad.UseDevice(true);
+ }
+
+ // geom->J really isn't going to work for us as of right now. We could just reorder it
+ // to the version that we want it to be in instead...
+ if (jacobian.Size() != (dim * dim * nqpts * nelems)) {
+ jacobian.SetSize(dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ jacobian.UseDevice(true);
+
+ geom = mesh->GetGeometricFactors(*ir, mfem::GeometricFactors::JACOBIANS);
+
+ const int DIM4 = 4;
+ std::array perm4{{3, 2, 1, 0}};
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout(
+ {{dim, dim, nqpts, nelems}}, perm4);
+ RAJA::View> J(jacobian.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_geom = RAJA::make_permuted_layout({{nqpts, dim, dim, nelems}},
+ perm4);
+ RAJA::View> geom_j_view(
+ geom->J.Read(), layout_geom);
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i) {
+ for (int j = 0; j < nqpts_; j++) {
+ for (int k = 0; k < dim_; k++) {
+ for (int l = 0; l < dim_; l++) {
+ J(l, k, j, i) = geom_j_view(j, l, k, i);
+ }
+ }
+ }
+ });
+ }
+
+ if (pa_dmat.Size() != (dim * dim * dim * dim * nqpts * nelems)) {
+ pa_dmat.SetSize(dim * dim * dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ pa_dmat.UseDevice(true);
+ }
+
+ if (pa_mat.Size() != (dim * dim * dim * dim * nqpts * nelems)) {
+ pa_mat.SetSize(dim * dim * dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ pa_mat.UseDevice(true);
+ }
+
+ TransformMatGradTo4D(m_sim_state->GetQuadratureFunction("tangent_stiffness"), pa_mat);
+
+ pa_dmat = 0.0;
+
+ const int DIM2 = 2;
+ const int DIM4 = 4;
+ const int DIM6 = 6;
+ std::array perm6{{5, 4, 3, 2, 1, 0}};
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ RAJA::Layout layout_4Dtensor = RAJA::make_permuted_layout(
+ {{dim, dim, dim, dim, nqpts, nelems}}, perm6);
+ RAJA::View> C(pa_mat.Read(),
+ layout_4Dtensor);
+ // Swapped over to row order since it makes sense in later applications...
+ // Should make C row order as well for PA operations
+ RAJA::View> D(
+ pa_dmat.ReadWrite(), nelems, nqpts, dim, dim, dim, dim);
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ c_detJ = 1.0 / detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ // Unrolled part of the loops just so we wouldn't have so many nested ones.
+ // If we were to get really ambitious we could eliminate also the m indexed
+ // loop...
+ for (int n = 0; n < dim_; n++) {
+ for (int m = 0; m < dim_; m++) {
+ for (int l = 0; l < dim_; l++) {
+ D(i_elems, j_qpts, 0, 0, l, n) +=
+ (A(0, 0) * C(0, 0, l, m, j_qpts, i_elems) +
+ A(1, 0) * C(1, 0, l, m, j_qpts, i_elems) +
+ A(2, 0) * C(2, 0, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 0, 1, l, n) +=
+ (A(0, 0) * C(0, 1, l, m, j_qpts, i_elems) +
+ A(1, 0) * C(1, 1, l, m, j_qpts, i_elems) +
+ A(2, 0) * C(2, 1, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 0, 2, l, n) +=
+ (A(0, 0) * C(0, 2, l, m, j_qpts, i_elems) +
+ A(1, 0) * C(1, 2, l, m, j_qpts, i_elems) +
+ A(2, 0) * C(2, 2, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 1, 0, l, n) +=
+ (A(0, 1) * C(0, 0, l, m, j_qpts, i_elems) +
+ A(1, 1) * C(1, 0, l, m, j_qpts, i_elems) +
+ A(2, 1) * C(2, 0, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 1, 1, l, n) +=
+ (A(0, 1) * C(0, 1, l, m, j_qpts, i_elems) +
+ A(1, 1) * C(1, 1, l, m, j_qpts, i_elems) +
+ A(2, 1) * C(2, 1, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 1, 2, l, n) +=
+ (A(0, 1) * C(0, 2, l, m, j_qpts, i_elems) +
+ A(1, 1) * C(1, 2, l, m, j_qpts, i_elems) +
+ A(2, 1) * C(2, 2, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 2, 0, l, n) +=
+ (A(0, 2) * C(0, 0, l, m, j_qpts, i_elems) +
+ A(1, 2) * C(1, 0, l, m, j_qpts, i_elems) +
+ A(2, 2) * C(2, 0, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 2, 1, l, n) +=
+ (A(0, 2) * C(0, 1, l, m, j_qpts, i_elems) +
+ A(1, 2) * C(1, 1, l, m, j_qpts, i_elems) +
+ A(2, 2) * C(2, 1, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ D(i_elems, j_qpts, 2, 2, l, n) +=
+ (A(0, 2) * C(0, 2, l, m, j_qpts, i_elems) +
+ A(1, 2) * C(1, 2, l, m, j_qpts, i_elems) +
+ A(2, 2) * C(2, 2, l, m, j_qpts, i_elems)) *
+ A(m, n);
+ }
+ }
+ } // End of Dikln = adj(J)_{ji} C_{jklm} adj(J)_{mn} loop
+
+ // Unrolled part of the loops just so we wouldn't have so many nested ones.
+ for (int n = 0; n < dim_; n++) {
+ for (int l = 0; l < dim_; l++) {
+ D(i_elems, j_qpts, l, n, 0, 0) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 0, 1) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 0, 2) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 1, 0) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 1, 1) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 1, 2) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 2, 0) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 2, 1) *= c_detJ;
+ D(i_elems, j_qpts, l, n, 2, 2) *= c_detJ;
+ }
+ } // End of D_{ijkl} *= 1/det(J) * w_{qpt} loop
+ } // End of quadrature loop
+ }); // End of Elements loop
+ } // End of else statement
+}
+
+// Here we're applying the following action operation using the assembled "D" 2nd order
+// tensor found above:
+// y_{ik} = \nabla_{ij}\phi^T_{\epsilon} D_{jk}
+void ExaNLFIntegrator::AddMultPA(const mfem::Vector& /*x*/, mfem::Vector& y) const {
+ CALI_CXX_MARK_SCOPE("enlfi_amPAV");
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ std::array perm3{{2, 1, 0}};
+ std::array perm4{{3, 2, 1, 0}};
+ // Swapped over to row order since it makes sense in later applications...
+ // Should make C row order as well for PA operations
+ RAJA::Layout layout_tensor = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> D(dmat.Read(),
+ layout_tensor);
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> Y(y.ReadWrite(), layout_field);
+ // Transpose of the local gradient variable
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ for (int k = 0; k < dim_; k++) {
+ for (int j = 0; j < dim_; j++) {
+ for (int i = 0; i < nnodes_; i++) {
+ Y(i, k, i_elems) += Gt(i, j, j_qpts) * D(j, k, j_qpts, i_elems);
+ }
+ }
+ } // End of the final action of Y_{ik} += Gt_{ij} T_{jk}
+ } // End of nQpts
+ }); // End of nelems
+ } // End of if statement
+}
+
+// Here we're applying the following action operation using the assembled "D" 4th order
+// tensor found above:
+// y_{ik} = \nabla_{ij}\phi^T_{\epsilon} D_{jklm} \nabla_{mn}\phi_{\epsilon} x_{nl}
+void ExaNLFIntegrator::AddMultGradPA(const mfem::Vector& x, mfem::Vector& y) const {
+ CALI_CXX_MARK_SCOPE("enlfi_amPAG");
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM6 = 6;
+
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+ // Swapped over to row order since it makes sense in later applications...
+ // Should make C row order as well for PA operations
+ RAJA::View> D(
+ pa_dmat.Read(), nelems, nqpts, dim, dim, dim, dim);
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> X(x.Read(), layout_field);
+ RAJA::View> Y(y.ReadWrite(), layout_field);
+ // Transpose of the local gradient variable
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ // View for our temporary 2d array
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ double T[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+ for (int i = 0; i < dim_; i++) {
+ for (int j = 0; j < dim_; j++) {
+ for (int k = 0; k < nnodes_; k++) {
+ T[0] += D(i_elems, j_qpts, 0, 0, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[1] += D(i_elems, j_qpts, 1, 0, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[2] += D(i_elems, j_qpts, 2, 0, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[3] += D(i_elems, j_qpts, 0, 1, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[4] += D(i_elems, j_qpts, 1, 1, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[5] += D(i_elems, j_qpts, 2, 1, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[6] += D(i_elems, j_qpts, 0, 2, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[7] += D(i_elems, j_qpts, 1, 2, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ T[8] += D(i_elems, j_qpts, 2, 2, i, j) * Gt(k, j, j_qpts) *
+ X(k, i, i_elems);
+ }
+ }
+ } // End of doing tensor contraction of D_{jkmo}G_{op}X_{pm}
+
+ RAJA::View> Tview(&T[0],
+ layout_adj);
+ for (int k = 0; k < dim_; k++) {
+ for (int j = 0; j < dim_; j++) {
+ for (int i = 0; i < nnodes_; i++) {
+ Y(i, k, i_elems) += Gt(i, j, j_qpts) * Tview(j, k);
+ }
+ }
+ } // End of the final action of Y_{ik} += Gt_{ij} T_{jk}
+ } // End of nQpts
+ }); // End of nelems
+ } // End of if statement
+}
+
+// This assembles the diagonal of our LHS which can be used as a preconditioner
+void ExaNLFIntegrator::AssembleGradDiagonalPA(mfem::Vector& diag) const {
+ CALI_CXX_MARK_SCOPE("enlfi_AssembleGradDiagonalPA");
+
+ const mfem::IntegrationRule& ir =
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->GetSpaceShared()->GetIntRule(0);
+ auto W = ir.GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ RAJA::Layout layout_tensor = RAJA::make_permuted_layout(
+ {{2 * dim, 2 * dim, nqpts, nelems}}, perm4);
+ RAJA::View> K(
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->Read(), layout_tensor);
+
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> Y(diag.ReadWrite(),
+ layout_field);
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.Read(),
+ layout_jacob);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ c_detJ = 1.0 / detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knodes = 0; knodes < nnodes_; knodes++) {
+ const double bx = Gt(knodes, 0, j_qpts) * A(0, 0) +
+ Gt(knodes, 1, j_qpts) * A(0, 1) +
+ Gt(knodes, 2, j_qpts) * A(0, 2);
+
+ const double by = Gt(knodes, 0, j_qpts) * A(1, 0) +
+ Gt(knodes, 1, j_qpts) * A(1, 1) +
+ Gt(knodes, 2, j_qpts) * A(1, 2);
+
+ const double bz = Gt(knodes, 0, j_qpts) * A(2, 0) +
+ Gt(knodes, 1, j_qpts) * A(2, 1) +
+ Gt(knodes, 2, j_qpts) * A(2, 2);
+
+ Y(knodes, 0, i_elems) +=
+ c_detJ *
+ (bx * (bx * K(0, 0, j_qpts, i_elems) + by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems)) +
+ by * (bx * K(5, 0, j_qpts, i_elems) + by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems)) +
+ bz * (bx * K(4, 0, j_qpts, i_elems) + by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems)));
+
+ Y(knodes, 1, i_elems) +=
+ c_detJ *
+ (bx * (bx * K(5, 5, j_qpts, i_elems) + by * K(5, 1, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems)) +
+ by * (bx * K(1, 5, j_qpts, i_elems) + by * K(1, 1, j_qpts, i_elems) +
+ bz * K(1, 3, j_qpts, i_elems)) +
+ bz * (bx * K(3, 5, j_qpts, i_elems) + by * K(3, 1, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems)));
+
+ Y(knodes, 2, i_elems) +=
+ c_detJ *
+ (bx * (bx * K(4, 4, j_qpts, i_elems) + by * K(4, 3, j_qpts, i_elems) +
+ bz * K(4, 2, j_qpts, i_elems)) +
+ by * (bx * K(3, 4, j_qpts, i_elems) + by * K(3, 3, j_qpts, i_elems) +
+ bz * K(3, 2, j_qpts, i_elems)) +
+ bz * (bx * K(2, 4, j_qpts, i_elems) + by * K(2, 3, j_qpts, i_elems) +
+ bz * K(2, 2, j_qpts, i_elems)));
+ }
+ }
+ });
+ }
+}
+
+/// Method defining element assembly.
+/** The result of the element assembly is added and stored in the @a emat
+ Vector. */
+void ExaNLFIntegrator::AssembleGradEA(const mfem::Vector& /*x*/,
+ const mfem::FiniteElementSpace& fes,
+ mfem::Vector& emat) {
+ AssembleEA(fes, emat);
+}
+void ExaNLFIntegrator::AssembleEA(const mfem::FiniteElementSpace& fes, mfem::Vector& emat) {
+ CALI_CXX_MARK_SCOPE("enlfi_assembleEA");
+ mfem::Mesh* mesh = fes.GetMesh();
+ const mfem::FiniteElement& el = *fes.GetFE(0);
+ space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+
+ nqpts = ir->GetNPoints();
+ nnodes = el.GetDof();
+ nelems = fes.GetNE();
+ auto W = ir->GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ if (grad.Size() != (nqpts * dim * nnodes)) {
+ grad.SetSize(nqpts * dim * nnodes, mfem::Device::GetMemoryType());
+ {
+ mfem::DenseMatrix DSh;
+ const int offset = nnodes * dim;
+ double* qpts_dshape_data = grad.HostReadWrite();
+ for (int i = 0; i < nqpts; i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ DSh.UseExternalData(&qpts_dshape_data[offset * i], nnodes, dim);
+ el.CalcDShape(ip, DSh);
+ }
+ }
+ grad.UseDevice(true);
+ }
+
+ // geom->J really isn't going to work for us as of right now. We could just reorder it
+ // to the version that we want it to be in instead...
+ if (jacobian.Size() != (dim * dim * nqpts * nelems)) {
+ jacobian.SetSize(dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ jacobian.UseDevice(true);
+
+ geom = mesh->GetGeometricFactors(*ir, mfem::GeometricFactors::JACOBIANS);
+
+ const int DIM4 = 4;
+ std::array perm4{{3, 2, 1, 0}};
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout(
+ {{dim, dim, nqpts, nelems}}, perm4);
+ RAJA::View> J(jacobian.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_geom = RAJA::make_permuted_layout({{nqpts, dim, dim, nelems}},
+ perm4);
+ RAJA::View> geom_j_view(
+ geom->J.Read(), layout_geom);
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i) {
+ for (int j = 0; j < nqpts_; j++) {
+ for (int k = 0; k < dim_; k++) {
+ for (int l = 0; l < dim_; l++) {
+ J(l, k, j, i) = geom_j_view(j, l, k, i);
+ }
+ }
+ }
+ });
+ }
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ RAJA::Layout layout_tensor = RAJA::make_permuted_layout(
+ {{2 * dim, 2 * dim, nqpts, nelems}}, perm4);
+ RAJA::View> K(
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->Read(), layout_tensor);
+
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout(
+ {{nnodes * dim, nnodes * dim, nelems}}, perm3);
+ RAJA::View> E(emat.ReadWrite(),
+ layout_field);
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.Read(),
+ layout_jacob);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ c_detJ = 1.0 / detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knds = 0; knds < nnodes_; knds++) {
+ const double bx = Gt(knds, 0, j_qpts) * A(0, 0) +
+ Gt(knds, 1, j_qpts) * A(0, 1) + Gt(knds, 2, j_qpts) * A(0, 2);
+
+ const double by = Gt(knds, 0, j_qpts) * A(1, 0) +
+ Gt(knds, 1, j_qpts) * A(1, 1) + Gt(knds, 2, j_qpts) * A(1, 2);
+
+ const double bz = Gt(knds, 0, j_qpts) * A(2, 0) +
+ Gt(knds, 1, j_qpts) * A(2, 1) + Gt(knds, 2, j_qpts) * A(2, 2);
+
+ const double k11x = c_detJ * (bx * K(0, 0, j_qpts, i_elems) +
+ by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems));
+ const double k11y = c_detJ * (bx * K(5, 0, j_qpts, i_elems) +
+ by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems));
+ const double k11z = c_detJ * (bx * K(4, 0, j_qpts, i_elems) +
+ by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems));
+
+ const double k12x = c_detJ * (bx * K(0, 5, j_qpts, i_elems) +
+ by * K(0, 1, j_qpts, i_elems) +
+ bz * K(0, 3, j_qpts, i_elems));
+ const double k12y = c_detJ * (bx * K(5, 5, j_qpts, i_elems) +
+ by * K(5, 1, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems));
+ const double k12z = c_detJ * (bx * K(4, 5, j_qpts, i_elems) +
+ by * K(4, 1, j_qpts, i_elems) +
+ bz * K(4, 3, j_qpts, i_elems));
+
+ const double k13x = c_detJ * (bx * K(0, 4, j_qpts, i_elems) +
+ by * K(0, 3, j_qpts, i_elems) +
+ bz * K(0, 2, j_qpts, i_elems));
+ const double k13y = c_detJ * (bx * K(5, 4, j_qpts, i_elems) +
+ by * K(5, 3, j_qpts, i_elems) +
+ bz * K(5, 2, j_qpts, i_elems));
+ const double k13z = c_detJ * (bx * K(4, 4, j_qpts, i_elems) +
+ by * K(4, 3, j_qpts, i_elems) +
+ bz * K(4, 2, j_qpts, i_elems));
+
+ const double k21x = c_detJ * (bx * K(5, 0, j_qpts, i_elems) +
+ by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems));
+ const double k21y = c_detJ * (bx * K(1, 0, j_qpts, i_elems) +
+ by * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 4, j_qpts, i_elems));
+ const double k21z = c_detJ * (bx * K(3, 0, j_qpts, i_elems) +
+ by * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 4, j_qpts, i_elems));
+
+ const double k22x = c_detJ * (bx * K(5, 5, j_qpts, i_elems) +
+ by * K(5, 1, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems));
+ const double k22y = c_detJ * (bx * K(1, 5, j_qpts, i_elems) +
+ by * K(1, 1, j_qpts, i_elems) +
+ bz * K(1, 3, j_qpts, i_elems));
+ const double k22z = c_detJ * (bx * K(3, 5, j_qpts, i_elems) +
+ by * K(3, 1, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems));
+
+ const double k23x = c_detJ * (bx * K(5, 4, j_qpts, i_elems) +
+ by * K(5, 3, j_qpts, i_elems) +
+ bz * K(5, 2, j_qpts, i_elems));
+ const double k23y = c_detJ * (bx * K(1, 4, j_qpts, i_elems) +
+ by * K(1, 3, j_qpts, i_elems) +
+ bz * K(1, 2, j_qpts, i_elems));
+ const double k23z = c_detJ * (bx * K(3, 4, j_qpts, i_elems) +
+ by * K(3, 3, j_qpts, i_elems) +
+ bz * K(3, 2, j_qpts, i_elems));
+
+ const double k31x = c_detJ * (bx * K(4, 0, j_qpts, i_elems) +
+ by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems));
+ const double k31y = c_detJ * (bx * K(3, 0, j_qpts, i_elems) +
+ by * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 4, j_qpts, i_elems));
+ const double k31z = c_detJ * (bx * K(2, 0, j_qpts, i_elems) +
+ by * K(2, 5, j_qpts, i_elems) +
+ bz * K(2, 4, j_qpts, i_elems));
+
+ const double k32x = c_detJ * (bx * K(4, 5, j_qpts, i_elems) +
+ by * K(4, 1, j_qpts, i_elems) +
+ bz * K(4, 3, j_qpts, i_elems));
+ const double k32y = c_detJ * (bx * K(3, 5, j_qpts, i_elems) +
+ by * K(3, 1, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems));
+ const double k32z = c_detJ * (bx * K(2, 5, j_qpts, i_elems) +
+ by * K(2, 1, j_qpts, i_elems) +
+ bz * K(2, 3, j_qpts, i_elems));
+
+ const double k33x = c_detJ * (bx * K(4, 4, j_qpts, i_elems) +
+ by * K(4, 3, j_qpts, i_elems) +
+ bz * K(4, 2, j_qpts, i_elems));
+ const double k33y = c_detJ * (bx * K(3, 4, j_qpts, i_elems) +
+ by * K(3, 3, j_qpts, i_elems) +
+ bz * K(3, 2, j_qpts, i_elems));
+ const double k33z = c_detJ * (bx * K(2, 4, j_qpts, i_elems) +
+ by * K(2, 3, j_qpts, i_elems) +
+ bz * K(2, 2, j_qpts, i_elems));
+
+ for (int lnds = 0; lnds < nnodes_; lnds++) {
+ const double gx = Gt(lnds, 0, j_qpts) * A(0, 0) +
+ Gt(lnds, 1, j_qpts) * A(0, 1) +
+ Gt(lnds, 2, j_qpts) * A(0, 2);
+
+ const double gy = Gt(lnds, 0, j_qpts) * A(1, 0) +
+ Gt(lnds, 1, j_qpts) * A(1, 1) +
+ Gt(lnds, 2, j_qpts) * A(1, 2);
+
+ const double gz = Gt(lnds, 0, j_qpts) * A(2, 0) +
+ Gt(lnds, 1, j_qpts) * A(2, 1) +
+ Gt(lnds, 2, j_qpts) * A(2, 2);
+
+ E(lnds, knds, i_elems) += gx * k11x + gy * k11y + gz * k11z;
+ E(lnds, knds + nnodes_, i_elems) += gx * k12x + gy * k12y + gz * k12z;
+ E(lnds, knds + 2 * nnodes_, i_elems) += gx * k13x + gy * k13y + gz * k13z;
+
+ E(lnds + nnodes_, knds, i_elems) += gx * k21x + gy * k21y + gz * k21z;
+ E(lnds + nnodes_, knds + nnodes_, i_elems) += gx * k22x + gy * k22y +
+ gz * k22z;
+ E(lnds + nnodes_, knds + 2 * nnodes_, i_elems) += gx * k23x + gy * k23y +
+ gz * k23z;
+
+ E(lnds + 2 * nnodes_, knds, i_elems) += gx * k31x + gy * k31y + gz * k31z;
+ E(lnds + 2 * nnodes_, knds + nnodes_, i_elems) += gx * k32x + gy * k32y +
+ gz * k32z;
+ E(lnds + 2 * nnodes_, knds + 2 * nnodes_, i_elems) += gx * k33x +
+ gy * k33y + gz * k33z;
+ }
+ }
+ }
+ });
+ }
+}
+
+// Outside of the UMAT function calls this should be the function called
+// to assemble our residual vectors.
+void ICExaNLFIntegrator::AssembleElementVector(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& elfun,
+ mfem::Vector& elvect) {
+ CALI_CXX_MARK_SCOPE("icenlfi_assembleElemVec");
+ int dof = el.GetDof(), dim = el.GetDim();
+
+ mfem::DenseMatrix DSh, DS, elem_deriv_shapes_loc;
+ mfem::DenseMatrix Jpt;
+ mfem::DenseMatrix PMatI, PMatO;
+ // This is our stress tensor
+ mfem::DenseMatrix P;
+ mfem::DenseMatrix grad_trans;
+ // temp1 is now going to become the transpose Bmatrix as seen in
+ // [B^t][tan_stiff][B]
+ grad_trans.SetSize(dof * dim, 6);
+
+ DSh.SetSize(dof, dim);
+ DS.SetSize(dof, dim);
+ elem_deriv_shapes_loc.SetSize(dof, dim);
+ elem_deriv_shapes_loc = 0.0;
+ Jpt.SetSize(dim);
+
+ // PMatI would be our velocity in this case
+ PMatI.UseExternalData(elfun.GetData(), dof, dim);
+ elvect.SetSize(dof * dim);
+
+ // PMatO would be our residual vector
+ elvect = 0.0;
+ PMatO.UseExternalData(elvect.HostReadWrite(), dof * dim, 1);
+
+ const mfem::IntegrationRule* ir = IntRule;
+ if (!ir) {
+ ir = &(mfem::IntRules.Get(el.GetGeomType(),
+ 2 * el.GetOrder() + 1)); // must match quadrature space
+ }
+
+ const mfem::IntegrationRule* irc = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ double eVol = 0.0;
+ /**
+ * @brief Compute element-averaged shape function derivatives for B-bar method.
+ *
+ * This loop integrates shape function derivatives over the entire element volume
+ * to compute volume-averaged quantities needed for the B-bar method. The averaged
+ * derivatives prevent volumetric locking in incompressible material problems.
+ *
+ * Process:
+ * 1. Integrate ∂N/∂x derivatives weighted by Jacobian and quadrature weights
+ * 2. Accumulate total element volume (eVol)
+ * 3. Normalize by total volume to obtain element averages
+ */
+ for (int i = 0; i < irc->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = irc->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+
+ // compute Jacobian of the transformation
+ Jpt = Ttr.InverseJacobian(); // Jrt = dxi / dX
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jpt, DS); // dN_a(xi) / dX = dN_a(xi)/dxi * dxi/dX
+ DS *= (Ttr.Weight() * ip.weight);
+ elem_deriv_shapes_loc += DS;
+
+ eVol += (Ttr.Weight() * ip.weight);
+ }
+
+ elem_deriv_shapes_loc *= (1.0 / eVol);
+
+ double stress[6];
+
+ P.UseExternalData(&stress[0], 6, 1);
+
+ for (int i = 0; i < ir->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+
+ // compute Jacobian of the transformation
+ Jpt = Ttr.InverseJacobian(); // Jrt = dxi / dX
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jpt, DS); // dN_a(xi) / dX = dN_a(xi)/dxi * dxi/dX
+
+ GetQFData(
+ Ttr.ElementNo, i, stress, m_sim_state->GetQuadratureFunction("cauchy_stress_end"));
+ GenerateGradBarMatrix(DS, elem_deriv_shapes_loc, grad_trans);
+
+ grad_trans *= (ip.weight * Ttr.Weight());
+ AddMult(grad_trans, P, PMatO);
+ }
+
+ return;
+}
+
+void ICExaNLFIntegrator::AssembleElementGrad(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& /*elfun*/,
+ mfem::DenseMatrix& elmat) {
+ CALI_CXX_MARK_SCOPE("icenlfi_assembleElemGrad");
+ int dof = el.GetDof(), dim = el.GetDim();
+
+ mfem::DenseMatrix DSh, DS, elem_deriv_shapes_loc, Jrt;
+
+ // Now time to start assembling stuff
+ mfem::DenseMatrix grad_trans, temp;
+ mfem::DenseMatrix tan_stiff;
+
+ constexpr int ngrad_dim2 = 36;
+ double matGrad[ngrad_dim2];
+
+ // temp1 is now going to become the transpose Bmatrix as seen in
+ // [B^t][tan_stiff][B]
+ grad_trans.SetSize(dof * dim, 6);
+ // We need a temp matrix to store our first matrix results as seen in here
+ temp.SetSize(6, dof * dim);
+
+ tan_stiff.UseExternalData(&matGrad[0], 6, 6);
+
+ DSh.SetSize(dof, dim);
+ DS.SetSize(dof, dim);
+ elem_deriv_shapes_loc.SetSize(dof, dim);
+ elem_deriv_shapes_loc = 0.0;
+ Jrt.SetSize(dim);
+ elmat.SetSize(dof * dim);
+
+ const mfem::IntegrationRule* ir = IntRule;
+ if (!ir) {
+ ir = &(mfem::IntRules.Get(el.GetGeomType(),
+ 2 * el.GetOrder() + 1)); // <--- must match quadrature space
+ }
+
+ elmat = 0.0;
+
+ const mfem::IntegrationRule* irc = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ double eVol = 0.0;
+
+ for (int i = 0; i < irc->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = irc->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+
+ // compute Jacobian of the transformation
+ Jrt = Ttr.InverseJacobian(); // Jrt = dxi / dX
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jrt, DS); // dN_a(xi) / dX = dN_a(xi)/dxi * dxi/dX
+ DS *= (Ttr.Weight() * ip.weight);
+ elem_deriv_shapes_loc += DS;
+
+ eVol += (Ttr.Weight() * ip.weight);
+ }
+
+ elem_deriv_shapes_loc *= (1.0 / eVol);
+
+ for (int i = 0; i < ir->GetNPoints(); i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ Ttr.SetIntPoint(&ip);
+ CalcInverse(Ttr.Jacobian(), Jrt);
+
+ el.CalcDShape(ip, DSh);
+ Mult(DSh, Jrt, DS);
+
+ GetQFData(
+ Ttr.ElementNo, i, matGrad, m_sim_state->GetQuadratureFunction("tangent_stiffness"));
+ // temp1 is B^t
+ GenerateGradBarMatrix(DS, elem_deriv_shapes_loc, grad_trans);
+ // We multiple our quadrature wts here to our tan_stiff matrix
+ tan_stiff *= ip.weight * Ttr.Weight();
+ // We use kgeom as a temporary matrix
+ // kgeom = [Cstiff][B]
+ MultABt(tan_stiff, grad_trans, temp);
+ // We now add our [B^t][kgeom] product to our tangent stiffness matrix that
+ // we want to output to our material tangent stiffness matrix
+ AddMult(grad_trans, temp, elmat);
+ }
+
+ return;
+}
+
+/// Method defining element assembly.
+/** The result of the element assembly is added and stored in the @a emat
+ Vector. */
+void ICExaNLFIntegrator::AssembleGradEA(const mfem::Vector& /*x*/,
+ const mfem::FiniteElementSpace& fes,
+ mfem::Vector& emat) {
+ AssembleEA(fes, emat);
+}
+void ICExaNLFIntegrator::AssembleEA(const mfem::FiniteElementSpace& fes, mfem::Vector& emat) {
+ CALI_CXX_MARK_SCOPE("icenlfi_assembleEA");
+ const mfem::FiniteElement& el = *fes.GetFE(0);
+ space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+
+ nqpts = ir->GetNPoints();
+ nnodes = el.GetDof();
+ nelems = fes.GetNE();
+ auto W = ir->GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ }
+
+ else {
+ const int dim = 3;
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ // Our field variables that are inputs and outputs
+
+ RAJA::Layout layout_egrads = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> elem_deriv_shapes_view(
+ elem_deriv_shapes.Read(), layout_egrads);
+
+ RAJA::Layout layout_tensor = RAJA::make_permuted_layout(
+ {{2 * dim, 2 * dim, nqpts, nelems}}, perm4);
+ RAJA::View> K(
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->Read(), layout_tensor);
+
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout(
+ {{nnodes * dim, nnodes * dim, nelems}}, perm3);
+ RAJA::View> E(emat.ReadWrite(),
+ layout_field);
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.Read(),
+ layout_jacob);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ const double i3 = 1.0 / 3.0;
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ double idetJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ idetJ = 1.0 / detJ;
+ c_detJ = detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knds = 0; knds < nnodes_; knds++) {
+ const double bx = idetJ * (Gt(knds, 0, j_qpts) * A(0, 0) +
+ Gt(knds, 1, j_qpts) * A(0, 1) +
+ Gt(knds, 2, j_qpts) * A(0, 2));
+
+ const double by = idetJ * (Gt(knds, 0, j_qpts) * A(1, 0) +
+ Gt(knds, 1, j_qpts) * A(1, 1) +
+ Gt(knds, 2, j_qpts) * A(1, 2));
+
+ const double bz = idetJ * (Gt(knds, 0, j_qpts) * A(2, 0) +
+ Gt(knds, 1, j_qpts) * A(2, 1) +
+ Gt(knds, 2, j_qpts) * A(2, 2));
+ const double b4 = i3 * (elem_deriv_shapes_view(knds, 0, i_elems) - bx);
+ const double b5 = b4 + bx;
+ const double b6 = i3 * (elem_deriv_shapes_view(knds, 1, i_elems) - by);
+ const double b7 = b6 + by;
+ const double b8 = i3 * (elem_deriv_shapes_view(knds, 2, i_elems) - bz);
+ const double b9 = b8 + bz;
+
+ const double k11w =
+ c_detJ * (b4 * K(1, 1, j_qpts, i_elems) + b4 * K(1, 2, j_qpts, i_elems) +
+ b5 * K(1, 0, j_qpts, i_elems) + by * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 4, j_qpts, i_elems) + b4 * K(2, 1, j_qpts, i_elems) +
+ b4 * K(2, 2, j_qpts, i_elems) + b5 * K(2, 0, j_qpts, i_elems) +
+ by * K(2, 5, j_qpts, i_elems) + bz * K(2, 4, j_qpts, i_elems));
+
+ const double k11x = c_detJ * (b4 * K(0, 1, j_qpts, i_elems) +
+ b4 * K(0, 2, j_qpts, i_elems) +
+ b5 * K(0, 0, j_qpts, i_elems) +
+ by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems));
+
+ const double k11y = c_detJ * (b4 * K(5, 1, j_qpts, i_elems) +
+ b4 * K(5, 2, j_qpts, i_elems) +
+ b5 * K(5, 0, j_qpts, i_elems) +
+ by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems));
+
+ const double k11z = c_detJ * (b4 * K(4, 1, j_qpts, i_elems) +
+ b4 * K(4, 2, j_qpts, i_elems) +
+ b5 * K(4, 0, j_qpts, i_elems) +
+ by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems));
+
+ const double k12w =
+ c_detJ * (b6 * K(1, 0, j_qpts, i_elems) + b6 * K(1, 2, j_qpts, i_elems) +
+ b7 * K(1, 1, j_qpts, i_elems) + bx * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 3, j_qpts, i_elems) + b6 * K(2, 0, j_qpts, i_elems) +
+ b6 * K(2, 2, j_qpts, i_elems) + b7 * K(2, 1, j_qpts, i_elems) +
+ bx * K(2, 5, j_qpts, i_elems) + bz * K(2, 3, j_qpts, i_elems));
+
+ const double k12x = c_detJ * (b6 * K(0, 0, j_qpts, i_elems) +
+ b6 * K(0, 2, j_qpts, i_elems) +
+ b7 * K(0, 1, j_qpts, i_elems) +
+ bx * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 3, j_qpts, i_elems));
+
+ const double k12y = c_detJ * (b6 * K(5, 0, j_qpts, i_elems) +
+ b6 * K(5, 2, j_qpts, i_elems) +
+ b7 * K(5, 1, j_qpts, i_elems) +
+ bx * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems));
+
+ const double k12z = c_detJ * (b6 * K(4, 0, j_qpts, i_elems) +
+ b6 * K(4, 2, j_qpts, i_elems) +
+ b7 * K(4, 1, j_qpts, i_elems) +
+ bx * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 3, j_qpts, i_elems));
+
+ const double k13w =
+ c_detJ * (b8 * K(1, 0, j_qpts, i_elems) + b8 * K(1, 1, j_qpts, i_elems) +
+ b9 * K(1, 2, j_qpts, i_elems) + bx * K(1, 4, j_qpts, i_elems) +
+ by * K(1, 3, j_qpts, i_elems) + b8 * K(2, 0, j_qpts, i_elems) +
+ b8 * K(2, 1, j_qpts, i_elems) + b9 * K(2, 2, j_qpts, i_elems) +
+ bx * K(2, 4, j_qpts, i_elems) + by * K(2, 3, j_qpts, i_elems));
+
+ const double k13x = c_detJ * (b8 * K(0, 0, j_qpts, i_elems) +
+ b8 * K(0, 1, j_qpts, i_elems) +
+ b9 * K(0, 2, j_qpts, i_elems) +
+ bx * K(0, 4, j_qpts, i_elems) +
+ by * K(0, 3, j_qpts, i_elems));
+
+ const double k13y = c_detJ * (b8 * K(5, 0, j_qpts, i_elems) +
+ b8 * K(5, 1, j_qpts, i_elems) +
+ b9 * K(5, 2, j_qpts, i_elems) +
+ bx * K(5, 4, j_qpts, i_elems) +
+ by * K(5, 3, j_qpts, i_elems));
+
+ const double k13z = c_detJ * (b8 * K(4, 0, j_qpts, i_elems) +
+ b8 * K(4, 1, j_qpts, i_elems) +
+ b9 * K(4, 2, j_qpts, i_elems) +
+ bx * K(4, 4, j_qpts, i_elems) +
+ by * K(4, 3, j_qpts, i_elems));
+
+ const double k21w =
+ c_detJ * (b4 * K(0, 1, j_qpts, i_elems) + b4 * K(0, 2, j_qpts, i_elems) +
+ b5 * K(0, 0, j_qpts, i_elems) + by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems) + b4 * K(2, 1, j_qpts, i_elems) +
+ b4 * K(2, 2, j_qpts, i_elems) + b5 * K(2, 0, j_qpts, i_elems) +
+ by * K(2, 5, j_qpts, i_elems) + bz * K(2, 4, j_qpts, i_elems));
+
+ const double k21x = c_detJ * (b4 * K(1, 1, j_qpts, i_elems) +
+ b4 * K(1, 2, j_qpts, i_elems) +
+ b5 * K(1, 0, j_qpts, i_elems) +
+ by * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 4, j_qpts, i_elems));
+
+ const double k21y = c_detJ * (b4 * K(5, 1, j_qpts, i_elems) +
+ b4 * K(5, 2, j_qpts, i_elems) +
+ b5 * K(5, 0, j_qpts, i_elems) +
+ by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems));
+
+ const double k21z = c_detJ * (b4 * K(3, 1, j_qpts, i_elems) +
+ b4 * K(3, 2, j_qpts, i_elems) +
+ b5 * K(3, 0, j_qpts, i_elems) +
+ by * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 4, j_qpts, i_elems));
+
+ const double k22w =
+ c_detJ * (b6 * K(0, 0, j_qpts, i_elems) + b6 * K(0, 2, j_qpts, i_elems) +
+ b7 * K(0, 1, j_qpts, i_elems) + bx * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 3, j_qpts, i_elems) + b6 * K(2, 0, j_qpts, i_elems) +
+ b6 * K(2, 2, j_qpts, i_elems) + b7 * K(2, 1, j_qpts, i_elems) +
+ bx * K(2, 5, j_qpts, i_elems) + bz * K(2, 3, j_qpts, i_elems));
+
+ const double k22x = c_detJ * (b6 * K(1, 0, j_qpts, i_elems) +
+ b6 * K(1, 2, j_qpts, i_elems) +
+ b7 * K(1, 1, j_qpts, i_elems) +
+ bx * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 3, j_qpts, i_elems));
+
+ const double k22y = c_detJ * (b6 * K(5, 0, j_qpts, i_elems) +
+ b6 * K(5, 2, j_qpts, i_elems) +
+ b7 * K(5, 1, j_qpts, i_elems) +
+ bx * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems));
+
+ const double k22z = c_detJ * (b6 * K(3, 0, j_qpts, i_elems) +
+ b6 * K(3, 2, j_qpts, i_elems) +
+ b7 * K(3, 1, j_qpts, i_elems) +
+ bx * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems));
+
+ const double k23w =
+ c_detJ * (b8 * K(0, 0, j_qpts, i_elems) + b8 * K(0, 1, j_qpts, i_elems) +
+ b9 * K(0, 2, j_qpts, i_elems) + bx * K(0, 4, j_qpts, i_elems) +
+ by * K(0, 3, j_qpts, i_elems) + b8 * K(2, 0, j_qpts, i_elems) +
+ b8 * K(2, 1, j_qpts, i_elems) + b9 * K(2, 2, j_qpts, i_elems) +
+ bx * K(2, 4, j_qpts, i_elems) + by * K(2, 3, j_qpts, i_elems));
+
+ const double k23x = c_detJ * (b8 * K(1, 0, j_qpts, i_elems) +
+ b8 * K(1, 1, j_qpts, i_elems) +
+ b9 * K(1, 2, j_qpts, i_elems) +
+ bx * K(1, 4, j_qpts, i_elems) +
+ by * K(1, 3, j_qpts, i_elems));
+
+ const double k23y = c_detJ * (b8 * K(5, 0, j_qpts, i_elems) +
+ b8 * K(5, 1, j_qpts, i_elems) +
+ b9 * K(5, 2, j_qpts, i_elems) +
+ bx * K(5, 4, j_qpts, i_elems) +
+ by * K(5, 3, j_qpts, i_elems));
+
+ const double k23z = c_detJ * (b8 * K(3, 0, j_qpts, i_elems) +
+ b8 * K(3, 1, j_qpts, i_elems) +
+ b9 * K(3, 2, j_qpts, i_elems) +
+ bx * K(3, 4, j_qpts, i_elems) +
+ by * K(3, 3, j_qpts, i_elems));
+
+ const double k31w =
+ c_detJ * (b4 * K(0, 1, j_qpts, i_elems) + b4 * K(0, 2, j_qpts, i_elems) +
+ b5 * K(0, 0, j_qpts, i_elems) + by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems) + b4 * K(1, 1, j_qpts, i_elems) +
+ b4 * K(1, 2, j_qpts, i_elems) + b5 * K(1, 0, j_qpts, i_elems) +
+ by * K(1, 5, j_qpts, i_elems) + bz * K(1, 4, j_qpts, i_elems));
+
+ const double k31x = c_detJ * (b4 * K(2, 1, j_qpts, i_elems) +
+ b4 * K(2, 2, j_qpts, i_elems) +
+ b5 * K(2, 0, j_qpts, i_elems) +
+ by * K(2, 5, j_qpts, i_elems) +
+ bz * K(2, 4, j_qpts, i_elems));
+
+ const double k31y = c_detJ * (b4 * K(4, 1, j_qpts, i_elems) +
+ b4 * K(4, 2, j_qpts, i_elems) +
+ b5 * K(4, 0, j_qpts, i_elems) +
+ by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems));
+
+ const double k31z = c_detJ * (b4 * K(3, 1, j_qpts, i_elems) +
+ b4 * K(3, 2, j_qpts, i_elems) +
+ b5 * K(3, 0, j_qpts, i_elems) +
+ by * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 4, j_qpts, i_elems));
+
+ const double k32w =
+ c_detJ * (b6 * K(0, 0, j_qpts, i_elems) + b6 * K(0, 2, j_qpts, i_elems) +
+ b7 * K(0, 1, j_qpts, i_elems) + bx * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 3, j_qpts, i_elems) + b6 * K(1, 0, j_qpts, i_elems) +
+ b6 * K(1, 2, j_qpts, i_elems) + b7 * K(1, 1, j_qpts, i_elems) +
+ bx * K(1, 5, j_qpts, i_elems) + bz * K(1, 3, j_qpts, i_elems));
+
+ const double k32x = c_detJ * (b6 * K(2, 0, j_qpts, i_elems) +
+ b6 * K(2, 2, j_qpts, i_elems) +
+ b7 * K(2, 1, j_qpts, i_elems) +
+ bx * K(2, 5, j_qpts, i_elems) +
+ bz * K(2, 3, j_qpts, i_elems));
+
+ const double k32y = c_detJ * (b6 * K(4, 0, j_qpts, i_elems) +
+ b6 * K(4, 2, j_qpts, i_elems) +
+ b7 * K(4, 1, j_qpts, i_elems) +
+ bx * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 3, j_qpts, i_elems));
+
+ const double k32z = c_detJ * (b6 * K(3, 0, j_qpts, i_elems) +
+ b6 * K(3, 2, j_qpts, i_elems) +
+ b7 * K(3, 1, j_qpts, i_elems) +
+ bx * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems));
+
+ const double k33w =
+ c_detJ * (b8 * K(0, 0, j_qpts, i_elems) + b8 * K(0, 1, j_qpts, i_elems) +
+ b9 * K(0, 2, j_qpts, i_elems) + bx * K(0, 4, j_qpts, i_elems) +
+ by * K(0, 3, j_qpts, i_elems) + b8 * K(1, 0, j_qpts, i_elems) +
+ b8 * K(1, 1, j_qpts, i_elems) + b9 * K(1, 2, j_qpts, i_elems) +
+ bx * K(1, 4, j_qpts, i_elems) + by * K(1, 3, j_qpts, i_elems));
+
+ const double k33x = c_detJ * (b8 * K(2, 0, j_qpts, i_elems) +
+ b8 * K(2, 1, j_qpts, i_elems) +
+ b9 * K(2, 2, j_qpts, i_elems) +
+ bx * K(2, 4, j_qpts, i_elems) +
+ by * K(2, 3, j_qpts, i_elems));
+
+ const double k33y = c_detJ * (b8 * K(4, 0, j_qpts, i_elems) +
+ b8 * K(4, 1, j_qpts, i_elems) +
+ b9 * K(4, 2, j_qpts, i_elems) +
+ bx * K(4, 4, j_qpts, i_elems) +
+ by * K(4, 3, j_qpts, i_elems));
+
+ const double k33z = c_detJ * (b8 * K(3, 0, j_qpts, i_elems) +
+ b8 * K(3, 1, j_qpts, i_elems) +
+ b9 * K(3, 2, j_qpts, i_elems) +
+ bx * K(3, 4, j_qpts, i_elems) +
+ by * K(3, 3, j_qpts, i_elems));
+
+ for (int lnds = 0; lnds < nnodes_; lnds++) {
+ const double gx = idetJ * (Gt(lnds, 0, j_qpts) * A(0, 0) +
+ Gt(lnds, 1, j_qpts) * A(0, 1) +
+ Gt(lnds, 2, j_qpts) * A(0, 2));
+
+ const double gy = idetJ * (Gt(lnds, 0, j_qpts) * A(1, 0) +
+ Gt(lnds, 1, j_qpts) * A(1, 1) +
+ Gt(lnds, 2, j_qpts) * A(1, 2));
+
+ const double gz = idetJ * (Gt(lnds, 0, j_qpts) * A(2, 0) +
+ Gt(lnds, 1, j_qpts) * A(2, 1) +
+ Gt(lnds, 2, j_qpts) * A(2, 2));
+
+ const double g4 = i3 * (elem_deriv_shapes_view(lnds, 0, i_elems) - gx);
+ const double g5 = g4 + gx;
+ const double g6 = i3 * (elem_deriv_shapes_view(lnds, 1, i_elems) - gy);
+ const double g7 = g6 + gy;
+ const double g8 = i3 * (elem_deriv_shapes_view(lnds, 2, i_elems) - gz);
+ const double g9 = g8 + gz;
+
+ E(lnds, knds, i_elems) += g4 * k11w + g5 * k11x + gy * k11y + gz * k11z;
+ E(lnds, knds + nnodes_, i_elems) += g4 * k12w + g5 * k12x + gy * k12y +
+ gz * k12z;
+ E(lnds, knds + 2 * nnodes_, i_elems) += g4 * k13w + g5 * k13x + gy * k13y +
+ gz * k13z;
+
+ E(lnds + nnodes_, knds, i_elems) += g6 * k21w + g7 * k21x + gx * k21y +
+ gz * k21z;
+ E(lnds + nnodes_, knds + nnodes_, i_elems) += g6 * k22w + g7 * k22x +
+ gx * k22y + gz * k22z;
+ E(lnds + nnodes_, knds + 2 * nnodes_, i_elems) += g6 * k23w + g7 * k23x +
+ gx * k23y + gz * k23z;
+
+ E(lnds + 2 * nnodes_, knds, i_elems) += g8 * k31w + g9 * k31x + gx * k31y +
+ gy * k31z;
+ E(lnds + 2 * nnodes_, knds + nnodes_, i_elems) += g8 * k32w + g9 * k32x +
+ gx * k32y + gy * k32z;
+ E(lnds + 2 * nnodes_, knds + 2 * nnodes_, i_elems) += g8 * k33w +
+ g9 * k33x +
+ gx * k33y + gy * k33z;
+ }
+ }
+ }
+ });
+ }
+}
+
+// This assembles the diagonal of our LHS which can be used as a preconditioner
+void ICExaNLFIntegrator::AssembleGradDiagonalPA(mfem::Vector& diag) const {
+ CALI_CXX_MARK_SCOPE("icenlfi_AssembleGradDiagonalPA");
+
+ const mfem::IntegrationRule& ir =
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->GetSpaceShared()->GetIntRule(0);
+ auto W = ir.GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+
+ RAJA::Layout layout_tensor = RAJA::make_permuted_layout(
+ {{2 * dim, 2 * dim, nqpts, nelems}}, perm4);
+ RAJA::View> K(
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->Read(), layout_tensor);
+
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> Y(diag.ReadWrite(),
+ layout_field);
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.Read(),
+ layout_jacob);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ RAJA::Layout layout_egrads = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> elem_deriv_shapes_view(
+ elem_deriv_shapes.Read(), layout_egrads);
+
+ const double i3 = 1.0 / 3.0;
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ double idetJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ idetJ = 1.0 / detJ;
+ c_detJ = detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knds = 0; knds < nnodes_; knds++) {
+ const double bx = idetJ * (Gt(knds, 0, j_qpts) * A(0, 0) +
+ Gt(knds, 1, j_qpts) * A(0, 1) +
+ Gt(knds, 2, j_qpts) * A(0, 2));
+
+ const double by = idetJ * (Gt(knds, 0, j_qpts) * A(1, 0) +
+ Gt(knds, 1, j_qpts) * A(1, 1) +
+ Gt(knds, 2, j_qpts) * A(1, 2));
+
+ const double bz = idetJ * (Gt(knds, 0, j_qpts) * A(2, 0) +
+ Gt(knds, 1, j_qpts) * A(2, 1) +
+ Gt(knds, 2, j_qpts) * A(2, 2));
+ const double b4 = i3 * (elem_deriv_shapes_view(knds, 0, i_elems) - bx);
+ const double b5 = b4 + bx;
+ const double b6 = i3 * (elem_deriv_shapes_view(knds, 1, i_elems) - by);
+ const double b7 = b6 + by;
+ const double b8 = i3 * (elem_deriv_shapes_view(knds, 2, i_elems) - bz);
+ const double b9 = b8 + bz;
+
+ const double k11w =
+ c_detJ * (b4 * K(1, 1, j_qpts, i_elems) + b4 * K(1, 2, j_qpts, i_elems) +
+ b5 * K(1, 0, j_qpts, i_elems) + by * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 4, j_qpts, i_elems) + b4 * K(2, 1, j_qpts, i_elems) +
+ b4 * K(2, 2, j_qpts, i_elems) + b5 * K(2, 0, j_qpts, i_elems) +
+ by * K(2, 5, j_qpts, i_elems) + bz * K(2, 4, j_qpts, i_elems));
+
+ const double k11x = c_detJ * (b4 * K(0, 1, j_qpts, i_elems) +
+ b4 * K(0, 2, j_qpts, i_elems) +
+ b5 * K(0, 0, j_qpts, i_elems) +
+ by * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 4, j_qpts, i_elems));
+
+ const double k11y = c_detJ * (b4 * K(5, 1, j_qpts, i_elems) +
+ b4 * K(5, 2, j_qpts, i_elems) +
+ b5 * K(5, 0, j_qpts, i_elems) +
+ by * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 4, j_qpts, i_elems));
+
+ const double k11z = c_detJ * (b4 * K(4, 1, j_qpts, i_elems) +
+ b4 * K(4, 2, j_qpts, i_elems) +
+ b5 * K(4, 0, j_qpts, i_elems) +
+ by * K(4, 5, j_qpts, i_elems) +
+ bz * K(4, 4, j_qpts, i_elems));
+
+ const double k22w =
+ c_detJ * (b6 * K(0, 0, j_qpts, i_elems) + b6 * K(0, 2, j_qpts, i_elems) +
+ b7 * K(0, 1, j_qpts, i_elems) + bx * K(0, 5, j_qpts, i_elems) +
+ bz * K(0, 3, j_qpts, i_elems) + b6 * K(2, 0, j_qpts, i_elems) +
+ b6 * K(2, 2, j_qpts, i_elems) + b7 * K(2, 1, j_qpts, i_elems) +
+ bx * K(2, 5, j_qpts, i_elems) + bz * K(2, 3, j_qpts, i_elems));
+
+ const double k22x = c_detJ * (b6 * K(1, 0, j_qpts, i_elems) +
+ b6 * K(1, 2, j_qpts, i_elems) +
+ b7 * K(1, 1, j_qpts, i_elems) +
+ bx * K(1, 5, j_qpts, i_elems) +
+ bz * K(1, 3, j_qpts, i_elems));
+
+ const double k22y = c_detJ * (b6 * K(5, 0, j_qpts, i_elems) +
+ b6 * K(5, 2, j_qpts, i_elems) +
+ b7 * K(5, 1, j_qpts, i_elems) +
+ bx * K(5, 5, j_qpts, i_elems) +
+ bz * K(5, 3, j_qpts, i_elems));
+
+ const double k22z = c_detJ * (b6 * K(3, 0, j_qpts, i_elems) +
+ b6 * K(3, 2, j_qpts, i_elems) +
+ b7 * K(3, 1, j_qpts, i_elems) +
+ bx * K(3, 5, j_qpts, i_elems) +
+ bz * K(3, 3, j_qpts, i_elems));
+
+ const double k33w =
+ c_detJ * (b8 * K(0, 0, j_qpts, i_elems) + b8 * K(0, 1, j_qpts, i_elems) +
+ b9 * K(0, 2, j_qpts, i_elems) + bx * K(0, 4, j_qpts, i_elems) +
+ by * K(0, 3, j_qpts, i_elems) + b8 * K(1, 0, j_qpts, i_elems) +
+ b8 * K(1, 1, j_qpts, i_elems) + b9 * K(1, 2, j_qpts, i_elems) +
+ bx * K(1, 4, j_qpts, i_elems) + by * K(1, 3, j_qpts, i_elems));
+
+ const double k33x = c_detJ * (b8 * K(2, 0, j_qpts, i_elems) +
+ b8 * K(2, 1, j_qpts, i_elems) +
+ b9 * K(2, 2, j_qpts, i_elems) +
+ bx * K(2, 4, j_qpts, i_elems) +
+ by * K(2, 3, j_qpts, i_elems));
+
+ const double k33y = c_detJ * (b8 * K(4, 0, j_qpts, i_elems) +
+ b8 * K(4, 1, j_qpts, i_elems) +
+ b9 * K(4, 2, j_qpts, i_elems) +
+ bx * K(4, 4, j_qpts, i_elems) +
+ by * K(4, 3, j_qpts, i_elems));
+
+ const double k33z = c_detJ * (b8 * K(3, 0, j_qpts, i_elems) +
+ b8 * K(3, 1, j_qpts, i_elems) +
+ b9 * K(3, 2, j_qpts, i_elems) +
+ bx * K(3, 4, j_qpts, i_elems) +
+ by * K(3, 3, j_qpts, i_elems));
+
+ Y(knds, 0, i_elems) += b4 * k11w + b5 * k11x + by * k11y + bz * k11z;
+ Y(knds, 1, i_elems) += b6 * k22w + b7 * k22x + bx * k22y + bz * k22z;
+ Y(knds, 2, i_elems) += b8 * k33w + b9 * k33x + bx * k33y + by * k33z;
+ }
+ }
+ });
+ }
+}
+
+// This performs the assembly step of our RHS side of our system:
+// f_ik =
+void ICExaNLFIntegrator::AssemblePA(const mfem::FiniteElementSpace& fes) {
+ CALI_CXX_MARK_SCOPE("icenlfi_assemblePA");
+ mfem::Mesh* mesh = fes.GetMesh();
+ const mfem::FiniteElement& el = *fes.GetFE(0);
+ space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+
+ nqpts = ir->GetNPoints();
+ nnodes = el.GetDof();
+ nelems = fes.GetNE();
+
+ auto W = ir->GetWeights().Read();
+ geom = mesh->GetGeometricFactors(*ir, mfem::GeometricFactors::JACOBIANS);
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+
+ if (grad.Size() != (nqpts * dim * nnodes)) {
+ grad.SetSize(nqpts * dim * nnodes, mfem::Device::GetMemoryType());
+ {
+ mfem::DenseMatrix DSh;
+ const int offset = nnodes * dim;
+ double* qpts_dshape_data = grad.HostReadWrite();
+ for (int i = 0; i < nqpts; i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ DSh.UseExternalData(&qpts_dshape_data[offset * i], nnodes, dim);
+ el.CalcDShape(ip, DSh);
+ }
+ }
+ grad.UseDevice(true);
+ }
+
+ if (elem_deriv_shapes.Size() != (nnodes * dim * nelems)) {
+ elem_deriv_shapes.SetSize(nnodes * space_dims * nelems, mfem::Device::GetMemoryType());
+ elem_deriv_shapes.UseDevice();
+ }
+
+ elem_deriv_shapes = 0.0;
+
+ // geom->J really isn't going to work for us as of right now. We could just reorder it
+ // to the version that we want it to be in instead...
+ if (jacobian.Size() != (dim * dim * nqpts * nelems)) {
+ jacobian.SetSize(dim * dim * nqpts * nelems, mfem::Device::GetMemoryType());
+ jacobian.UseDevice(true);
+ }
+
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_geom = RAJA::make_permuted_layout({{nqpts, dim, dim, nelems}},
+ perm4);
+ RAJA::View> geom_j_view(
+ geom->J.Read(), layout_geom);
+
+ RAJA::Layout layout_egrads = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> elem_deriv_shapes_view(
+ elem_deriv_shapes.ReadWrite(), layout_egrads);
+
+ // Transpose of the local gradient variable
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i) {
+ for (int j = 0; j < nqpts_; j++) {
+ for (int k = 0; k < dim_; k++) {
+ for (int l = 0; l < dim_; l++) {
+ J(l, k, j, i) = geom_j_view(j, l, k, i);
+ }
+ }
+ }
+ });
+
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ double volume = 0.0;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ c_detJ = W[j_qpts];
+ volume += c_detJ * detJ;
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knds = 0; knds < nnodes_; knds++) {
+ elem_deriv_shapes_view(knds, 0, i_elems) += c_detJ *
+ (Gt(knds, 0, j_qpts) * A(0, 0) +
+ Gt(knds, 1, j_qpts) * A(0, 1) +
+ Gt(knds, 2, j_qpts) * A(0, 2));
+
+ elem_deriv_shapes_view(knds, 1, i_elems) += c_detJ *
+ (Gt(knds, 0, j_qpts) * A(1, 0) +
+ Gt(knds, 1, j_qpts) * A(1, 1) +
+ Gt(knds, 2, j_qpts) * A(1, 2));
+
+ elem_deriv_shapes_view(knds, 2, i_elems) += c_detJ *
+ (Gt(knds, 0, j_qpts) * A(2, 0) +
+ Gt(knds, 1, j_qpts) * A(2, 1) +
+ Gt(knds, 2, j_qpts) * A(2, 2));
+ } // End of nnodes
+ } // End of nqpts
+
+ double ivol = 1.0 / volume;
+
+ for (int knds = 0; knds < nnodes_; knds++) {
+ elem_deriv_shapes_view(knds, 0, i_elems) *= ivol;
+ elem_deriv_shapes_view(knds, 1, i_elems) *= ivol;
+ elem_deriv_shapes_view(knds, 2, i_elems) *= ivol;
+ }
+ }); // End of mfem::MFEM_FORALL
+
+ } // End of space dims if else
+}
+
+// Here we're applying the following action operation using the assembled "D" 2nd order
+// tensor found above:
+// y_{ik} = \nabla_{ij}\phi^T_{\epsilon} D_{jk}
+void ICExaNLFIntegrator::AddMultPA(const mfem::Vector& /*x*/, mfem::Vector& y) const {
+ CALI_CXX_MARK_SCOPE("icenlfi_amPAV");
+
+ // return a pointer to beginning step stress. This is used for output visualization
+ auto stress_end = m_sim_state->GetQuadratureFunction("cauchy_stress_end");
+
+ const mfem::IntegrationRule& ir =
+ m_sim_state->GetQuadratureFunction("tangent_stiffness")->GetSpaceShared()->GetIntRule(0);
+ auto W = ir.GetWeights().Read();
+
+ if ((space_dims == 1) || (space_dims == 2)) {
+ MFEM_ABORT("Dimensions of 1 or 2 not supported.");
+ } else {
+ const int dim = 3;
+ const int DIM2 = 2;
+ const int DIM3 = 3;
+ const int DIM4 = 4;
+
+ std::array perm4{{3, 2, 1, 0}};
+ std::array perm3{{2, 1, 0}};
+ std::array perm2{{1, 0}};
+
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout({{dim, dim, nqpts, nelems}},
+ perm4);
+ RAJA::View> J(jacobian.Read(),
+ layout_jacob);
+
+ RAJA::Layout layout_stress = RAJA::make_permuted_layout({{2 * dim, nqpts, nelems}},
+ perm3);
+ RAJA::View> S(stress_end->ReadWrite(),
+ layout_stress);
+
+ // Our field variables that are inputs and outputs
+ RAJA::Layout layout_field = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> Y(y.ReadWrite(), layout_field);
+ // Transpose of the local gradient variable
+ RAJA::Layout layout_grads = RAJA::make_permuted_layout({{nnodes, dim, nqpts}}, perm3);
+ RAJA::View> Gt(grad.Read(),
+ layout_grads);
+
+ RAJA::Layout layout_egrads = RAJA::make_permuted_layout({{nnodes, dim, nelems}},
+ perm3);
+ RAJA::View> elem_deriv_shapes_view(
+ elem_deriv_shapes.Read(), layout_egrads);
+
+ RAJA::Layout layout_adj = RAJA::make_permuted_layout({{dim, dim}}, perm2);
+
+ const double i3 = 1.0 / 3.0;
+ const int nqpts_ = nqpts;
+ const int dim_ = dim;
+ const int nnodes_ = nnodes;
+
+ // This loop we'll want to parallelize the rest are all serial for now.
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i_elems) {
+ double adj[dim_ * dim_];
+ double c_detJ;
+ double idetJ;
+ // So, we're going to say this view is constant however we're going to mutate the values
+ // only in that one scoped section for the quadrature points.
+ RAJA::View> A(&adj[0],
+ layout_adj);
+ for (int j_qpts = 0; j_qpts < nqpts_; j_qpts++) {
+ // If we scope this then we only need to carry half the number of variables around
+ // with us for the adjugate term.
+ {
+ const double J11 = J(0, 0, j_qpts, i_elems); // 0,0
+ const double J21 = J(1, 0, j_qpts, i_elems); // 1,0
+ const double J31 = J(2, 0, j_qpts, i_elems); // 2,0
+ const double J12 = J(0, 1, j_qpts, i_elems); // 0,1
+ const double J22 = J(1, 1, j_qpts, i_elems); // 1,1
+ const double J32 = J(2, 1, j_qpts, i_elems); // 2,1
+ const double J13 = J(0, 2, j_qpts, i_elems); // 0,2
+ const double J23 = J(1, 2, j_qpts, i_elems); // 1,2
+ const double J33 = J(2, 2, j_qpts, i_elems); // 2,2
+ const double detJ = J11 * (J22 * J33 - J32 * J23) -
+ /* */ J21 * (J12 * J33 - J32 * J13) +
+ /* */ J31 * (J12 * J23 - J22 * J13);
+ idetJ = 1.0 / detJ;
+ c_detJ = detJ * W[j_qpts];
+ // adj(J)
+ adj[0] = (J22 * J33) - (J23 * J32); // 0,0
+ adj[1] = (J32 * J13) - (J12 * J33); // 0,1
+ adj[2] = (J12 * J23) - (J22 * J13); // 0,2
+ adj[3] = (J31 * J23) - (J21 * J33); // 1,0
+ adj[4] = (J11 * J33) - (J13 * J31); // 1,1
+ adj[5] = (J21 * J13) - (J11 * J23); // 1,2
+ adj[6] = (J21 * J32) - (J31 * J22); // 2,0
+ adj[7] = (J31 * J12) - (J11 * J32); // 2,1
+ adj[8] = (J11 * J22) - (J12 * J21); // 2,2
+ }
+ for (int knds = 0; knds < nnodes_; knds++) {
+ const double bx = idetJ * (Gt(knds, 0, j_qpts) * A(0, 0) +
+ Gt(knds, 1, j_qpts) * A(0, 1) +
+ Gt(knds, 2, j_qpts) * A(0, 2));
+
+ const double by = idetJ * (Gt(knds, 0, j_qpts) * A(1, 0) +
+ Gt(knds, 1, j_qpts) * A(1, 1) +
+ Gt(knds, 2, j_qpts) * A(1, 2));
+
+ const double bz = idetJ * (Gt(knds, 0, j_qpts) * A(2, 0) +
+ Gt(knds, 1, j_qpts) * A(2, 1) +
+ Gt(knds, 2, j_qpts) * A(2, 2));
+
+ const double b4 = i3 * (elem_deriv_shapes_view(knds, 0, i_elems) - bx);
+ const double b5 = b4 + bx;
+ const double b6 = i3 * (elem_deriv_shapes_view(knds, 1, i_elems) - by);
+ const double b7 = b6 + by;
+ const double b8 = i3 * (elem_deriv_shapes_view(knds, 2, i_elems) - bz);
+ const double b9 = b8 + bz;
+
+ Y(knds, 0, i_elems) += c_detJ * (b4 * S(1, j_qpts, i_elems) +
+ b4 * S(2, j_qpts, i_elems) +
+ b5 * S(0, j_qpts, i_elems) +
+ by * S(5, j_qpts, i_elems) +
+ bz * S(4, j_qpts, i_elems));
+
+ Y(knds, 1, i_elems) += c_detJ * (b6 * S(0, j_qpts, i_elems) +
+ b6 * S(2, j_qpts, i_elems) +
+ b7 * S(1, j_qpts, i_elems) +
+ bx * S(5, j_qpts, i_elems) +
+ bz * S(3, j_qpts, i_elems));
+
+ Y(knds, 2, i_elems) += c_detJ * (b8 * S(0, j_qpts, i_elems) +
+ b8 * S(1, j_qpts, i_elems) +
+ b9 * S(2, j_qpts, i_elems) +
+ bx * S(4, j_qpts, i_elems) +
+ by * S(3, j_qpts, i_elems));
+ } // End of nnodes
+ } // End of nQpts
+ }); // End of nelems
+ } // End of if statement
+}
diff --git a/src/fem_operators/mechanics_integrators.hpp b/src/fem_operators/mechanics_integrators.hpp
new file mode 100644
index 0000000..fb7d4f7
--- /dev/null
+++ b/src/fem_operators/mechanics_integrators.hpp
@@ -0,0 +1,938 @@
+#ifndef MECHANICS_INTEG
+#define MECHANICS_INTEG
+
+#include "sim_state/simulation_state.hpp"
+
+#include "mfem.hpp"
+
+#include
+#include
+#include
+
+/**
+ * @brief Nonlinear form integrator for general solid mechanics problems with material model
+ * integration.
+ *
+ * ExaNLFIntegrator implements a comprehensive finite element integrator specifically designed
+ * for ExaConstit's solid mechanics applications, including crystal plasticity, large deformation
+ * mechanics, and general material model integration. This integrator serves as the foundation
+ * for nonlinear finite element assembly operations in updated Lagrangian formulations.
+ *
+ * The integrator provides:
+ * - Element vector assembly for residual computation (internal forces)
+ * - Element matrix assembly for Jacobian computation (tangent stiffness)
+ * - Partial assembly (PA) operations for memory-efficient matrix-free methods
+ * - Element assembly (EA) operations for minimal memory usage
+ * - Device-compatible implementations for CPU and GPU execution
+ *
+ * Key features for crystal plasticity and micromechanics:
+ * - Integration with ExaConstit's material model framework
+ * - Support for heterogeneous material regions through SimulationState
+ * - Quadrature function data access for stress and tangent stiffness
+ * - Optimized assembly operations for large-scale simulations
+ * - Compatibility with MFEM's assembly level abstractions
+ *
+ * Assembly strategy support:
+ * - Traditional element-wise assembly for small problems
+ * - Partial assembly for memory-efficient large-scale problems
+ * - Element assembly for memory-constrained environments
+ * - Mixed assembly strategies for heterogeneous hardware
+ *
+ * The integrator coordinates with SimulationState to access:
+ * - Current stress tensors from material model evaluations
+ * - Material tangent stiffness matrices for linearization
+ * - Geometric data for coordinate transformations
+ * - Quadrature point data for integration operations
+ *
+ * @ingroup ExaConstit_fem_operators
+ */
+class ExaNLFIntegrator : public mfem::NonlinearFormIntegrator {
+protected:
+ /** @brief Reference to simulation state for accessing mesh, fields, and material data */
+ std::shared_ptr m_sim_state;
+
+ /** @brief Working vector for material data storage during assembly operations */
+ mfem::Vector dmat;
+
+ /** @brief Gradient data vector for partial assembly operations */
+ mfem::Vector grad;
+
+ /** @brief Partial assembly material data vector */
+ mfem::Vector pa_mat;
+
+ /** @brief Partial assembly diagonal material data vector */
+ mfem::Vector pa_dmat;
+
+ /** @brief Jacobian transformation data vector for geometric operations */
+ mfem::Vector jacobian;
+
+ /** @brief Geometric factors for mesh transformation operations (not owned) */
+ const mfem::GeometricFactors* geom; // Not owned
+
+ /** @brief Spatial dimension of the finite element problem */
+ int space_dims;
+
+ /** @brief Number of finite elements in the mesh */
+ int nelems;
+
+ /** @brief Number of quadrature points per element */
+ int nqpts;
+
+ /** @brief Number of nodes (degrees of freedom) per element */
+ int nnodes;
+
+public:
+ /**
+ * @brief Construct integrator with simulation state reference.
+ *
+ * @param sim_state Reference to simulation state containing mesh, fields, and material data
+ *
+ * Initializes the nonlinear form integrator with access to the simulation state,
+ * enabling integration with ExaConstit's material model framework and data management.
+ * The integrator is ready for element assembly operations upon construction.
+ *
+ * The constructor establishes:
+ * - Reference to simulation state for data access
+ * - Foundation for subsequent assembly strategy configuration
+ * - Integration with MFEM's NonlinearFormIntegrator interface
+ *
+ * @note Simulation state reference must remain valid for integrator lifetime
+ * @note Working vectors are allocated lazily during first assembly operations
+ */
+ ExaNLFIntegrator(std::shared_ptr sim_state) : m_sim_state(sim_state) {}
+
+ /**
+ * @brief Virtual destructor for proper cleanup of derived classes.
+ *
+ * Ensures proper cleanup of integrator resources and derived class data.
+ * The destructor handles cleanup of working vectors and any allocated
+ * data structures used during assembly operations.
+ *
+ * @note Base class destructor handles MFEM NonlinearFormIntegrator cleanup
+ * @note Working vectors are automatically cleaned up by MFEM Vector destructors
+ */
+ virtual ~ExaNLFIntegrator() {}
+
+ /// This doesn't do anything at this point. We can add the functionality
+ /// later on if a use case arises.
+ using mfem::NonlinearFormIntegrator::GetElementEnergy;
+ /**
+ * @brief Compute element energy contribution (placeholder implementation).
+ *
+ * @param el Finite element for energy computation
+ * @param Ttr Element transformation for coordinate mapping
+ * @param elfun Element solution vector
+ * @return Element energy contribution (currently always returns 0.0)
+ *
+ * This method provides the interface for element energy computation but
+ * currently returns zero. The functionality can be added later if energy
+ * calculations become required for the application.
+ *
+ * Potential future uses:
+ * - Total strain energy computation for post-processing
+ * - Energy-based error estimation for adaptive refinement
+ * - Thermodynamic consistency checks in material models
+ * - Variational constitutive updates
+ *
+ * @note Current implementation is placeholder returning 0.0
+ * @note Can be extended for specific energy computation requirements
+ */
+ virtual double GetElementEnergy([[maybe_unused]] const mfem::FiniteElement& el,
+ [[maybe_unused]] mfem::ElementTransformation& Ttr,
+ [[maybe_unused]] const mfem::Vector& elfun) override {
+ return 0.0;
+ };
+
+ using mfem::NonlinearFormIntegrator::AssembleElementVector;
+ /**
+ * @brief Assemble element residual vector for internal force computation.
+ *
+ * @param el Finite element providing shape functions and geometric information
+ * @param Ttr Element transformation for coordinate mapping
+ * @param elfun Element solution vector (typically nodal velocities or displacements)
+ * @param elvect Output element residual vector representing internal forces
+ *
+ * Computes the element contribution to the nonlinear residual vector, representing
+ * the internal forces arising from stress divergence in the current configuration.
+ * This is the core element-level computation in Newton-Raphson iterations.
+ *
+ * The assembly process:
+ * 1. Computes shape function derivatives in physical coordinates
+ * 2. Retrieves current stress state from quadrature function data
+ * 3. Integrates B^T * σ over element volume using Gauss quadrature
+ * 4. Accumulates contributions from all quadrature points
+ *
+ * Stress tensor handling:
+ * - Accesses Cauchy stress from simulation state quadrature functions
+ * - Uses full 3x3 stress tensor with proper symmetry treatment
+ * - Integrates stress divergence contribution to residual vector
+ *
+ * The residual represents the out-of-balance internal forces:
+ * f_internal = ∫_Ω B^T(x) σ(x) dΩ
+ *
+ * where B is the strain-displacement matrix and σ is the Cauchy stress tensor.
+ *
+ * Performance optimizations:
+ * - Reuses matrices across quadrature points for memory efficiency
+ * - Direct external data access for input/output vectors
+ * - Optimized matrix-vector operations using MFEM routines
+ *
+ * @note Assumes 3D problems with symmetric stress tensors
+ * @note Integration rule must match quadrature space for stress data
+ * @note Caliper profiling enabled for performance monitoring
+ */
+ virtual void AssembleElementVector(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& elfun,
+ mfem::Vector& elvect) override;
+
+ /**
+ * @brief Assemble element tangent stiffness matrix for Newton-Raphson linearization.
+ *
+ * @param el Finite element providing shape functions and geometric information
+ * @param Ttr Element transformation for coordinate mapping
+ * @param elfun Element solution vector (unused in current implementation)
+ * @param elmat Output element stiffness matrix
+ *
+ * Computes the element tangent stiffness matrix used in Newton-Raphson linearization,
+ * representing the derivative of internal forces with respect to nodal displacements.
+ * This matrix is essential for convergence of nonlinear iterations.
+ *
+ * The assembly process:
+ * 1. Computes shape function derivatives in physical coordinates
+ * 2. Retrieves material tangent stiffness from quadrature function data
+ * 3. Constructs strain-displacement B-matrix for current configuration
+ * 4. Integrates B^T * C * B over element volume using Gauss quadrature
+ *
+ * Tangent stiffness computation:
+ * K_element = ∫_Ω B^T(x) C(x) B(x) dΩ
+ *
+ * where:
+ * - B is the strain-displacement matrix (6×3n for 3D elements)
+ * - C is the material tangent stiffness matrix (6×6 for 3D)
+ * - Integration performed over current (deformed) element volume
+ *
+ * Material tangent matrix:
+ * - Accesses 6×6 tangent stiffness from material model evaluations
+ * - Uses Voigt notation for symmetric tensor operations
+ * - Includes both material and geometric stiffness contributions
+ *
+ * The algorithm performs the matrix triple product efficiently:
+ * 1. Computes temp = C * B (intermediate result)
+ * 2. Computes K += B^T * temp (final contribution)
+ * 3. Accumulates contributions from all quadrature points
+ *
+ * Performance considerations:
+ * - Optimized matrix operations using MFEM dense matrix routines
+ * - Memory reuse for intermediate matrices across quadrature points
+ * - Integration weights incorporated efficiently
+ *
+ * @note Material tangent matrix assumed to be 6×6 in Voigt notation
+ * @note B-matrix construction handles 3D elements with proper DOF ordering
+ * @note Caliper profiling enabled for performance analysis
+ */
+ virtual void AssembleElementGrad(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& /*elfun*/,
+ mfem::DenseMatrix& elmat) override;
+
+ /**
+ * @brief Initialize partial assembly data structures for gradient (Jacobian) operations.
+ *
+ * @param x Solution vector for state-dependent assembly (unused in current implementation)
+ * @param fes Finite element space providing mesh and element information
+ *
+ * Prepares geometric and material data structures needed for efficient partial
+ * assembly Jacobian operations. This method precomputes transformation data
+ * and material property layouts optimized for matrix-free operations.
+ *
+ * The gradient assembly setup includes:
+ * 1. Computing and storing shape function derivatives at quadrature points
+ * 2. Preparing 4D tensor layouts for material tangent operations
+ * 3. Setting up geometric factors for coordinate transformations
+ * 4. Organizing data for vectorized element-wise operations
+ *
+ * 4D tensor transformation:
+ * Applies the transformation: D_ijkm = (1/det(J)) * w_qpt * adj(J)^T_ij * C^tan_ijkl *
+ * adj(J)_lm where:
+ * - D is the transformed 4th order tensor for partial assembly
+ * - J is the Jacobian matrix from geometric factors
+ * - C^tan is the material tangent stiffness tensor
+ * - adj(J) is the adjugate of the Jacobian matrix
+ *
+ * Performance optimizations:
+ * - Precomputes shape function derivatives for all quadrature points
+ * - Uses RAJA views with optimized memory layouts for target architecture
+ * - Enables vectorization across elements and quadrature points
+ * - Supports both CPU and GPU execution
+ *
+ * @note Current implementation delegates to single-argument version
+ * @note Shape function derivatives cached for reuse in gradient operations
+ * @note 4D tensor layout optimized for specific hardware architectures
+ */
+ virtual void AssembleGradPA(const mfem::Vector& /* x */,
+ const mfem::FiniteElementSpace& fes) override;
+ /**
+ * @brief Initialize partial assembly data structures for gradient operations.
+ *
+ * @param fes Finite element space providing mesh and element information
+ *
+ * Performs the core setup for partial assembly gradient operations by precomputing
+ * geometric factors and material data layouts. This method transforms material
+ * tangent data into optimized formats for efficient matrix-vector operations.
+ *
+ * The setup process includes:
+ * 1. Computing spatial dimensions and element characteristics
+ * 2. Precomputing shape function derivatives at all quadrature points
+ * 3. Transforming material tangent tensors for partial assembly operations
+ * 4. Setting up memory layouts optimized for target hardware
+ *
+ * Shape function derivative computation:
+ * - Calculates ∂N/∂ξ derivatives for all quadrature points
+ * - Stores in device-compatible format for GPU execution
+ * - Organizes data for efficient vectorized operations
+ * - Reuses derivatives across multiple gradient assembly calls
+ *
+ * Material tensor transformation:
+ * - Applies geometric transformations to material tangent matrices
+ * - Incorporates quadrature weights and Jacobian determinants
+ * - Uses 4D tensor layouts optimized for partial assembly operations
+ * - Enables efficient matrix-vector products in AddMultGradPA()
+ *
+ * The method prepares data structures for:
+ * - Fast Jacobian-vector products via AddMultGradPA()
+ * - Diagonal assembly for preconditioning via AssembleGradDiagonalPA()
+ * - Memory-efficient operations without explicit matrix storage
+ *
+ * @note Must be called before AddMultGradPA() and diagonal assembly operations
+ * @note Material tangent data accessed from simulation state quadrature functions
+ * @note Supports only 3D problems (1D and 2D abort with error message)
+ */
+ virtual void AssembleGradPA(const mfem::FiniteElementSpace& fes) override;
+
+ /**
+ * @brief Apply partial assembly gradient (Jacobian) operation.
+ *
+ * @param x Input vector for Jacobian-vector product
+ * @param y Output vector for accumulated result
+ *
+ * Performs the partial assembly Jacobian-vector product operation using
+ * precomputed geometric factors and transformed material tangent data.
+ * This operation computes the action of the tangent stiffness matrix
+ * without explicit matrix assembly, providing memory-efficient Newton-Raphson iterations.
+ *
+ * The operation computes: y += K * x, where K is the tangent stiffness matrix
+ * represented implicitly through partial assembly data structures.
+ *
+ * Algorithm overview:
+ * 1. Uses precomputed shape function derivatives and material data
+ * 2. Performs element-wise matrix-vector operations
+ * 3. Applies geometric transformations on-the-fly
+ * 4. Accumulates contributions to global vector
+ *
+ * Memory efficiency features:
+ * - No explicit stiffness matrix storage required
+ * - Vectorized operations over elements and quadrature points
+ * - Device-compatible implementation for GPU acceleration
+ * - Minimal working memory requirements
+ *
+ * Performance characteristics:
+ * - Computational complexity: O(nelems × nqpts × ndof²)
+ * - Memory complexity: O(nelems × nqpts) for material data
+ * - Excellent parallel scaling for large problems
+ * - Cache-friendly memory access patterns
+ *
+ * The method is called repeatedly during Krylov solver iterations
+ * within Newton-Raphson steps, making performance optimization critical.
+ *
+ * @note Requires prior AssembleGradPA() call for data structure setup
+ * @note Input and output vectors must match finite element space dimensions
+ * @note Essential boundary conditions handled by calling operator
+ */
+ virtual void AddMultGradPA(const mfem::Vector& x, mfem::Vector& y) const override;
+
+ using mfem::NonlinearFormIntegrator::AssemblePA;
+ /**
+ * @brief Initialize partial assembly data structures for residual operations.
+ *
+ * @param fes Finite element space providing mesh and element information
+ *
+ * Performs the initial setup for partial assembly operations by precomputing
+ * and storing geometric factors needed for efficient element-wise operations.
+ * This method amortizes setup costs across multiple residual evaluations.
+ *
+ * The setup process includes:
+ * 1. Extracting mesh and finite element information
+ * 2. Computing integration rule and weights
+ * 3. Storing geometric factors for coordinate transformations
+ * 4. Precomputing element-invariant quantities
+ *
+ * Geometric factor computation:
+ * - Retrieves Jacobian matrices for all elements and quadrature points
+ * - Stores transformation data in device-compatible format
+ * - Enables efficient coordinate mapping during assembly
+ *
+ * Memory allocation strategy:
+ * - Allocates working vectors with appropriate device memory types
+ * - Sizes vectors based on problem dimensions and mesh size
+ * - Prepares data structures for GPU execution when available
+ *
+ * The method prepares for:
+ * - Fast element vector assembly via AddMultPA()
+ * - Reuse of geometric data across multiple assembly calls
+ * - Device-compatible data layouts for GPU execution
+ *
+ * @note Must be called before AddMultPA() operations
+ * @note Geometric factors cached for reuse across assembly calls
+ * @note Caliper profiling scope for performance monitoring
+ */
+ virtual void AssemblePA(const mfem::FiniteElementSpace& fes) override;
+ /**
+ * @brief Apply partial assembly element vector operation.
+ *
+ * @param x Input vector (unused in current implementation for residual assembly)
+ * @param y Output vector for accumulated element contributions
+ *
+ * Performs the partial assembly element vector operation, computing element
+ * residual contributions using precomputed geometric factors and current
+ * stress data. This operation is optimized for memory efficiency and
+ * computational performance in large-scale simulations.
+ *
+ * The partial assembly approach:
+ * - Uses precomputed geometric factors from AssemblePA()
+ * - Accesses stress data directly from quadrature functions
+ * - Performs element-wise operations without global matrix assembly
+ * - Accumulates results directly into global vector
+ *
+ * Operation sequence:
+ * 1. Initializes output vector appropriately
+ * 2. Loops over all elements in parallel-friendly manner
+ * 3. Applies element-wise stress integration
+ * 4. Accumulates results into global degrees of freedom
+ *
+ * Memory efficiency features:
+ * - Minimal working memory requirements
+ * - Direct access to stress quadrature function data
+ * - Vectorized operations over elements and quadrature points
+ * - Device-compatible implementation for GPU execution
+ *
+ * This method is called repeatedly during nonlinear iterations,
+ * so performance optimization is critical for overall solver efficiency.
+ *
+ * @note Input vector x currently unused for stress-based residual assembly
+ * @note Output vector y must be properly sized for true DOF space
+ * @note Requires prior AssemblePA() call for geometric factor setup
+ */
+ virtual void AddMultPA(const mfem::Vector& /*x*/, mfem::Vector& y) const override;
+
+ /**
+ * @brief Assemble diagonal entries for partial assembly preconditioning.
+ *
+ * @param diag Output vector for diagonal entries of the tangent stiffness matrix
+ *
+ * Computes diagonal entries of the tangent stiffness matrix using partial
+ * assembly techniques, providing diagonal approximations essential for
+ * Jacobi preconditioning in iterative linear solvers.
+ *
+ * The diagonal computation extracts entries: diag[i] = K[i,i] where K is
+ * the tangent stiffness matrix represented through partial assembly data.
+ *
+ * Algorithm approach:
+ * 1. Uses precomputed material tangent data from AssembleGradPA()
+ * 2. Extracts diagonal contributions element-by-element
+ * 3. Applies geometric transformations for diagonal terms
+ * 4. Assembles global diagonal through element restriction operations
+ *
+ * Diagonal extraction strategy:
+ * - Computes element-wise diagonal contributions
+ * - Uses vectorized operations for efficiency
+ * - Handles geometric transformations appropriately
+ * - Accumulates to global diagonal vector
+ *
+ * The diagonal approximation quality affects:
+ * - Jacobi preconditioner effectiveness
+ * - Krylov solver convergence rates
+ * - Overall Newton-Raphson performance
+ * - Numerical stability of iterative methods
+ *
+ * Memory and performance characteristics:
+ * - Linear scaling with problem size
+ * - Device-compatible implementation
+ * - Efficient vectorized operations
+ * - Minimal additional memory requirements
+ *
+ * @note Requires prior AssembleGradPA() call for material data setup
+ * @note Output vector must be properly sized for finite element space
+ * @note Diagonal quality depends on material tangent matrix conditioning
+ */
+ virtual void AssembleGradDiagonalPA(mfem::Vector& diag) const override;
+ /**
+ * @brief Perform element assembly for gradient operations with solution vector.
+ *
+ * @param x Solution vector for state-dependent assembly (unused in current implementation)
+ * @param fes Finite element space providing mesh and element information
+ * @param ea_data Output vector for assembled element matrix data
+ *
+ * Performs element assembly for gradient operations, computing and storing
+ * complete element matrices in a format suitable for element assembly (EA)
+ * operations. This method delegates to the base element assembly routine.
+ *
+ * Element assembly characteristics:
+ * - Computes full element stiffness matrices
+ * - Stores matrices in contiguous device-compatible format
+ * - Enables exact matrix-vector products through explicit element matrices
+ * - Provides maximum memory efficiency for large problems
+ *
+ * The method serves as an interface for state-dependent element assembly
+ * while currently delegating to the stateless version for implementation.
+ *
+ * @note Current implementation delegates to AssembleEA(fes, ea_data)
+ * @note Solution vector x currently unused but available for future extensions
+ * @note Element matrices stored in ea_data with specific layout requirements
+ */
+ virtual void AssembleGradEA(const mfem::Vector& /* x */,
+ const mfem::FiniteElementSpace& fes,
+ mfem::Vector& ea_data) override;
+ /**
+ * @brief Perform element assembly for gradient operations.
+ *
+ * @param fes Finite element space providing mesh and element information
+ * @param emat Output vector for assembled element matrix data
+ *
+ * Computes and stores complete element stiffness matrices for all elements
+ * in the mesh, providing an element assembly (EA) representation of the
+ * tangent stiffness operator for memory-constrained applications.
+ *
+ * Element assembly process:
+ * 1. Iterates over all elements in the mesh
+ * 2. Computes full element stiffness matrices
+ * 3. Stores matrices in contiguous device memory format
+ * 4. Organizes data for efficient element-wise matrix-vector products
+ *
+ * Memory layout:
+ * - Matrices stored element-by-element in contiguous memory
+ * - Dense matrices with row-major ordering within each element
+ * - Device-compatible allocation for GPU execution
+ * - Total size: nelems × (ndof×ncomps)² entries
+ *
+ * Performance characteristics:
+ * - Higher assembly cost compared to partial assembly
+ * - Minimal memory usage compared to global matrix assembly
+ * - Exact operator representation without approximation
+ * - Excellent performance for high DOF-per-element problems
+ *
+ * The element matrices enable:
+ * - Exact matrix-vector products in element assembly operators
+ * - Minimal memory footprint for large-scale problems
+ * - Natural parallelization over elements
+ * - Cache-friendly memory access patterns
+ *
+ * @note Supports only 3D problems (1D and 2D problems abort with error)
+ * @note Uses RAJA views for optimized memory layouts and vectorization
+ * @note Caliper profiling enabled for performance monitoring
+ */
+ virtual void AssembleEA(const mfem::FiniteElementSpace& fes, mfem::Vector& emat) override;
+};
+
+/**
+ * @brief B-bar method integrator for incompressible and nearly incompressible solid mechanics.
+ *
+ * ICExaNLFIntegrator extends ExaNLFIntegrator to implement the B-bar method for handling
+ * incompressible and nearly incompressible materials. This integrator is essential for
+ * crystal plasticity simulations where volume preservation constraints arise from
+ * incompressible plastic deformation or nearly incompressible elastic behavior.
+ *
+ * The B-bar method (Hughes, 1980):
+ * - Modifies the strain-displacement B-matrix to avoid volumetric locking
+ * - Uses volume-averaged dilatational strains to improve element performance
+ * - Maintains accuracy for incompressible and nearly incompressible materials
+ * - Enables stable finite element solutions for high bulk modulus problems
+ *
+ * Mathematical foundation:
+ * The B-bar method splits the strain into volumetric and deviatoric parts:
+ * ε = ε_vol + ε_dev, where ε_vol is volume-averaged over the element
+ *
+ * This approach prevents spurious pressure oscillations and volumetric locking
+ * that can occur with standard displacement-based finite elements when dealing
+ * with incompressible or nearly incompressible material behavior.
+ *
+ * Applications in crystal plasticity:
+ * - Incompressible plastic deformation in crystal slip
+ * - Nearly incompressible elastic response in metals
+ * - Volume-preserving deformation in single crystal simulations
+ * - Polycrystalline materials with incompressible phases
+ *
+ * Key features:
+ * - Inherits all standard solid mechanics capabilities from ExaNLFIntegrator
+ * - Modifies B-matrix construction for volumetric strain averaging
+ * - Maintains compatibility with all assembly strategies (PA, EA, standard)
+ * - Provides stable solutions for high bulk modulus materials
+ * - Supports large deformation kinematics with volume preservation
+ *
+ * Implementation details:
+ * - Computes element-averaged volumetric strain gradients
+ * - Modifies standard B-matrix with B-bar corrections
+ * - Uses Hughes' formulation from "The Finite Element Method" Section 4.5.2
+ * - Maintains computational efficiency comparable to standard elements
+ *
+ * @ingroup ExaConstit_fem_operators
+ */
+class ICExaNLFIntegrator : public ExaNLFIntegrator {
+private:
+ /** @brief Element-averaged shape function derivatives for B-bar computation */
+ mfem::Vector elem_deriv_shapes;
+
+public:
+ /**
+ * @brief Construct B-bar integrator with simulation state reference.
+ *
+ * @param sim_state Reference to simulation state containing mesh, fields, and material data
+ *
+ * Initializes the B-bar method integrator by calling the base ExaNLFIntegrator
+ * constructor and preparing data structures for B-bar method computations.
+ * The integrator is ready for element assembly operations with incompressible
+ * material handling upon construction.
+ *
+ * The constructor establishes:
+ * - Base class initialization for standard solid mechanics operations
+ * - Foundation for B-bar method implementation
+ * - Integration with ExaConstit's material model framework
+ *
+ * @note Simulation state reference must remain valid for integrator lifetime
+ * @note B-bar specific working vectors allocated during first assembly operation
+ */
+ ICExaNLFIntegrator(std::shared_ptr sim_state) : ExaNLFIntegrator(sim_state) {}
+ /**
+ * @brief Virtual destructor for proper cleanup of derived class resources.
+ *
+ * Ensures proper cleanup of B-bar integrator resources including any
+ * working vectors allocated for element-averaged calculations. The
+ * destructor handles cleanup of both base class and derived class data.
+ *
+ * @note Base class destructor handles ExaNLFIntegrator cleanup
+ * @note B-bar specific vectors automatically cleaned up by MFEM Vector destructors
+ */
+ virtual ~ICExaNLFIntegrator() {}
+
+ /// This doesn't do anything at this point. We can add the functionality
+ /// later on if a use case arises.
+ using ExaNLFIntegrator::GetElementEnergy;
+
+ using mfem::NonlinearFormIntegrator::AssembleElementVector;
+ /**
+ * @brief Assemble element residual vector using B-bar method for incompressible materials.
+ *
+ * @param el Finite element providing shape functions and geometric information
+ * @param Ttr Element transformation for coordinate mapping
+ * @param elfun Element solution vector (typically nodal velocities or displacements)
+ * @param elvect Output element residual vector representing internal forces
+ *
+ * Computes the element residual vector using the B-bar method to handle
+ * incompressible and nearly incompressible material behavior. This method
+ * modifies the standard residual computation to include volume-averaged
+ * strain measures that prevent volumetric locking.
+ *
+ * B-bar residual computation:
+ * 1. Computes element-averaged volumetric strain gradients over element volume
+ * 2. Constructs modified B-bar matrix with volumetric strain averaging
+ * 3. Retrieves current stress state from quadrature function data
+ * 4. Integrates B-bar^T * σ over element volume using Gauss quadrature
+ *
+ * Volume averaging process:
+ * - Integrates shape function derivatives over entire element
+ * - Normalizes by total element volume to obtain averages
+ * - Uses averaged derivatives to modify B-matrix construction
+ * - Maintains consistency with incompressible deformation constraints
+ *
+ * The B-bar matrix modification:
+ * B-bar = B_standard + B_volumetric_correction
+ * where B_volumetric_correction ensures proper volume averaging
+ *
+ * This approach prevents:
+ * - Volumetric locking in nearly incompressible materials
+ * - Spurious pressure oscillations in incompressible flow
+ * - Poor conditioning in high bulk modulus problems
+ * - Artificial stiffening due to volumetric constraints
+ *
+ * Performance considerations:
+ * - Requires additional integration loop for volume averaging
+ * - Slightly higher computational cost than standard elements
+ * - Significantly improved convergence for incompressible problems
+ * - Maintains stability for high bulk modulus materials
+ *
+ * @note Implements Hughes' B-bar method from FEM book Section 4.5.2
+ * @note Requires compatible stress tensor data in simulation state
+ * @note Caliper profiling enabled for performance monitoring
+ */
+ virtual void AssembleElementVector(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& elfun,
+ mfem::Vector& elvect) override;
+
+ /**
+ * @brief Assemble element tangent stiffness matrix using B-bar method.
+ *
+ * @param el Finite element providing shape functions and geometric information
+ * @param Ttr Element transformation for coordinate mapping
+ * @param elfun Element solution vector (unused in current implementation)
+ * @param elmat Output element stiffness matrix
+ *
+ * Computes the element tangent stiffness matrix using the B-bar method for
+ * proper handling of incompressible and nearly incompressible materials.
+ * This method ensures consistent linearization of the B-bar residual formulation.
+ *
+ * B-bar tangent stiffness computation:
+ * K_element = ∫_Ω B-bar^T(x) C(x) B-bar(x) dΩ
+ *
+ * The algorithm includes:
+ * 1. Computing element-averaged volumetric strain gradients
+ * 2. Constructing B-bar matrix with volume averaging corrections
+ * 3. Retrieving material tangent stiffness from quadrature function data
+ * 4. Integrating B-bar^T * C * B-bar over element volume
+ *
+ * Volume averaging for stiffness:
+ * - Uses same element-averaged derivatives as in residual computation
+ * - Ensures consistency between residual and tangent matrix
+ * - Maintains proper Newton-Raphson convergence properties
+ * - Preserves quadratic convergence near solution
+ *
+ * B-bar matrix construction:
+ * - Modifies volumetric strain components with element averages
+ * - Preserves deviatoric strain components from standard B-matrix
+ * - Ensures proper rank and stability for incompressible problems
+ * - Maintains compatibility with material tangent matrix structure
+ *
+ * Material tangent integration:
+ * - Uses full 6×6 material tangent matrix in Voigt notation
+ * - Applies B-bar transformation consistently with residual
+ * - Incorporates geometric transformations and quadrature weights
+ * - Ensures symmetric tangent matrix for proper solver behavior
+ *
+ * The resulting stiffness matrix provides:
+ * - Stable tangent stiffness for incompressible materials
+ * - Proper conditioning for nearly incompressible problems
+ * - Consistent linearization of B-bar residual formulation
+ * - Quadratic Newton-Raphson convergence properties
+ *
+ * @note Consistent with B-bar residual formulation in AssembleElementVector
+ * @note Material tangent matrix assumed to be 6×6 in Voigt notation
+ * @note Caliper profiling enabled for performance analysis
+ */
+ virtual void AssembleElementGrad(const mfem::FiniteElement& el,
+ mfem::ElementTransformation& Ttr,
+ const mfem::Vector& /*elfun*/,
+ mfem::DenseMatrix& elmat) override;
+
+ // This method doesn't easily extend to PA formulation, so we're punting on
+ // it for now.
+ using ExaNLFIntegrator::AddMultGradPA;
+ using ExaNLFIntegrator::AssembleGradPA;
+
+ /**
+ * @brief Initialize partial assembly data structures for B-bar residual operations.
+ *
+ * @param fes Finite element space providing mesh and element information
+ *
+ * Performs setup for B-bar method partial assembly operations by precomputing
+ * geometric factors and element-averaged quantities needed for efficient
+ * incompressible material handling in matrix-free operations.
+ *
+ * B-bar partial assembly setup:
+ * 1. Calls base class AssemblePA() for standard geometric factors
+ * 2. Computes element-averaged shape function derivatives
+ * 3. Stores volume-averaged data for B-bar matrix construction
+ * 4. Prepares data structures for efficient B-bar operations
+ *
+ * Element averaging computation:
+ * - Integrates shape function derivatives over each element
+ * - Normalizes by element volume to obtain averaged quantities
+ * - Stores averaged derivatives for use in AddMultPA operations
+ * - Enables consistent B-bar method in partial assembly framework
+ *
+ * The setup enables:
+ * - Memory-efficient B-bar residual assembly via AddMultPA()
+ * - Reuse of element-averaged data across multiple assembly calls
+ * - Device-compatible data layouts for GPU execution
+ * - Efficient handling of incompressible material constraints
+ *
+ * Performance characteristics:
+ * - Slightly higher setup cost due to volume averaging
+ * - Amortized over multiple assembly operations
+ * - Maintains memory efficiency of partial assembly approach
+ * - Enables stable solutions for incompressible problems
+ *
+ * @note Must be called before AddMultPA() operations for B-bar method
+ * @note Element averaging data cached for reuse across assembly calls
+ * @note Compatible with base class partial assembly infrastructure
+ */
+ virtual void AssemblePA(const mfem::FiniteElementSpace& fes) override;
+ /**
+ * @brief Apply partial assembly B-bar element vector operation.
+ *
+ * @param x Input vector (unused in current implementation for residual assembly)
+ * @param y Output vector for accumulated element contributions
+ *
+ * Performs the partial assembly B-bar element vector operation, computing
+ * element residual contributions using precomputed geometric factors and
+ * element-averaged quantities. This provides memory-efficient B-bar method
+ * implementation for large-scale incompressible material simulations.
+ *
+ * B-bar partial assembly operation:
+ * - Uses precomputed element-averaged shape function derivatives
+ * - Constructs B-bar matrices on-the-fly during assembly
+ * - Accesses stress data directly from quadrature functions
+ * - Accumulates B-bar contributions directly into global vector
+ *
+ * The operation sequence:
+ * 1. Loops over all elements using precomputed geometric data
+ * 2. Constructs B-bar matrix using element-averaged derivatives
+ * 3. Applies stress integration with B-bar formulation
+ * 4. Accumulates results into global degrees of freedom
+ *
+ * Volume averaging integration:
+ * - Uses cached element-averaged derivatives from AssemblePA()
+ * - Applies B-bar corrections to volumetric strain components
+ * - Maintains computational efficiency of partial assembly
+ * - Prevents volumetric locking in incompressible materials
+ *
+ * Memory efficiency features:
+ * - Minimal additional memory for element averaging data
+ * - Direct access to stress quadrature function data
+ * - Vectorized operations over elements and quadrature points
+ * - Device-compatible implementation for GPU execution
+ *
+ * This method provides the core B-bar computation in Newton-Raphson
+ * iterations while maintaining the memory efficiency advantages of
+ * partial assembly for large-scale simulations.
+ *
+ * @note Requires prior AssemblePA() call for B-bar geometric factor setup
+ * @note Input vector x currently unused for stress-based residual assembly
+ * @note Output vector y must be properly sized for true DOF space
+ */
+ virtual void AddMultPA(const mfem::Vector& /*x*/, mfem::Vector& y) const override;
+ /**
+ * @brief Assemble diagonal entries for B-bar partial assembly preconditioning.
+ *
+ * @param diag Output vector for diagonal entries of the B-bar tangent stiffness matrix
+ *
+ * Computes diagonal entries of the B-bar tangent stiffness matrix using
+ * partial assembly techniques, providing diagonal approximations essential
+ * for Jacobi preconditioning in iterative linear solvers for incompressible
+ * material problems.
+ *
+ * B-bar diagonal computation:
+ * 1. Uses precomputed element-averaged derivatives from AssembleGradPA()
+ * 2. Constructs B-bar matrix modifications for diagonal extraction
+ * 3. Applies material tangent data with B-bar transformations
+ * 4. Assembles global diagonal through element restriction operations
+ *
+ * The diagonal extraction process:
+ * - Accounts for B-bar modifications in volumetric strain components
+ * - Maintains consistency with B-bar tangent stiffness formulation
+ * - Uses vectorized operations for computational efficiency
+ * - Handles geometric transformations appropriately
+ *
+ * Diagonal quality considerations:
+ * - B-bar method affects diagonal structure and conditioning
+ * - Improved conditioning for incompressible material problems
+ * - Better preconditioner effectiveness for nearly incompressible materials
+ * - Enhanced Krylov solver convergence for high bulk modulus problems
+ *
+ * Performance characteristics:
+ * - Linear scaling with problem size
+ * - Device-compatible implementation for GPU execution
+ * - Efficient vectorized operations over elements
+ * - Minimal additional memory requirements beyond standard diagonal assembly
+ *
+ * The resulting diagonal provides:
+ * - Effective preconditioning for B-bar systems
+ * - Stable iterative solver behavior for incompressible problems
+ * - Consistent approximation quality across material parameter ranges
+ * - Robust performance for nearly incompressible materials
+ *
+ * @note Requires prior AssembleGradPA() call for B-bar material data setup
+ * @note Diagonal entries reflect B-bar modifications for incompressible behavior
+ * @note Caliper profiling enabled for performance monitoring
+ */
+ virtual void AssembleGradDiagonalPA(mfem::Vector& diag) const override;
+
+ /**
+ * @brief Perform B-bar element assembly for gradient operations with solution vector.
+ *
+ * @param x Solution vector for state-dependent assembly (unused in current implementation)
+ * @param fes Finite element space providing mesh and element information
+ * @param ea_data Output vector for assembled element matrix data
+ *
+ * Performs B-bar element assembly for gradient operations, computing and storing
+ * complete B-bar element stiffness matrices in a format suitable for element
+ * assembly (EA) operations. This method delegates to the base element assembly
+ * routine while maintaining B-bar method consistency.
+ *
+ * B-bar element assembly characteristics:
+ * - Computes full B-bar element stiffness matrices
+ * - Stores matrices in contiguous device-compatible format
+ * - Enables exact B-bar matrix-vector products through explicit element matrices
+ * - Provides maximum memory efficiency for large incompressible problems
+ *
+ * The method serves as an interface for state-dependent B-bar element assembly
+ * while currently delegating to the stateless version for implementation.
+ * Future extensions could include solution-dependent B-bar modifications.
+ *
+ * @note Current implementation delegates to AssembleEA(fes, ea_data)
+ * @note Solution vector x currently unused but available for future B-bar extensions
+ * @note Element matrices include B-bar modifications for incompressible behavior
+ */
+ virtual void AssembleGradEA(const mfem::Vector& /* x */,
+ const mfem::FiniteElementSpace& fes,
+ mfem::Vector& ea_data) override;
+
+ /**
+ * @brief Perform B-bar element assembly for gradient operations.
+ *
+ * @param fes Finite element space providing mesh and element information
+ * @param emat Output vector for assembled B-bar element matrix data
+ *
+ * Computes and stores complete B-bar element stiffness matrices for all elements
+ * in the mesh, providing an element assembly (EA) representation of the B-bar
+ * tangent stiffness operator for memory-constrained incompressible material applications.
+ *
+ * B-bar element assembly process:
+ * 1. Iterates over all elements in the mesh
+ * 2. Computes element-averaged volumetric derivatives for each element
+ * 3. Constructs B-bar element stiffness matrices with volume averaging
+ * 4. Stores matrices in contiguous device memory format
+ *
+ * B-bar matrix computation:
+ * - Computes element volume through integration of Jacobian determinants
+ * - Calculates element-averaged shape function derivatives
+ * - Constructs B-bar matrices with volumetric strain averaging
+ * - Integrates B-bar^T * C * B-bar over element volume
+ *
+ * Memory layout:
+ * - B-bar matrices stored element-by-element in contiguous memory
+ * - Dense matrices with row-major ordering within each element
+ * - Device-compatible allocation for GPU execution
+ * - Total size: nelems × (ndof×ncomps)² entries
+ *
+ * Performance characteristics:
+ * - Higher assembly cost due to B-bar volume averaging computations
+ * - Minimal memory usage compared to global B-bar matrix assembly
+ * - Exact B-bar operator representation without approximation
+ * - Excellent stability for incompressible material problems
+ *
+ * The B-bar element matrices enable:
+ * - Exact B-bar matrix-vector products in element assembly operators
+ * - Stable solutions for incompressible and nearly incompressible materials
+ * - Memory-efficient representation for large-scale problems
+ * - Natural parallelization over elements with B-bar consistency
+ *
+ * @note Supports only 3D problems (1D and 2D problems abort with error)
+ * @note Uses RAJA views for optimized B-bar memory layouts and vectorization
+ * @note Caliper profiling enabled for performance monitoring
+ */
+ virtual void AssembleEA(const mfem::FiniteElementSpace& fes, mfem::Vector& emat) override;
+};
+
+// }
+
+#endif
diff --git a/src/fem_operators/mechanics_operator.cpp b/src/fem_operators/mechanics_operator.cpp
new file mode 100644
index 0000000..b95cd74
--- /dev/null
+++ b/src/fem_operators/mechanics_operator.cpp
@@ -0,0 +1,331 @@
+
+#include "fem_operators/mechanics_operator.hpp"
+
+#include "models/mechanics_multi_model.hpp"
+#include "utilities/mechanics_kernels.hpp"
+#include "utilities/mechanics_log.hpp"
+#include "utilities/unified_logger.hpp"
+
+#include "RAJA/RAJA.hpp"
+#include "mfem/general/forall.hpp"
+
+#include
+#include
+#include
+
+NonlinearMechOperator::NonlinearMechOperator(mfem::Array& ess_bdr,
+ mfem::Array2D& ess_bdr_comp,
+ std::shared_ptr sim_state)
+ : mfem::NonlinearForm(sim_state->GetMeshParFiniteElementSpace().get()),
+ ess_bdr_comps(ess_bdr_comp), m_sim_state(sim_state) {
+ CALI_CXX_MARK_SCOPE("mechop_class_setup");
+ mfem::Vector* rhs;
+ rhs = nullptr;
+
+ const auto& options = m_sim_state->GetOptions();
+ auto loc_fe_space = m_sim_state->GetMeshParFiniteElementSpace();
+
+ // Define the parallel nonlinear form
+ h_form = std::make_unique(
+ m_sim_state->GetMeshParFiniteElementSpace().get());
+
+ // Set the essential boundary conditions
+ h_form->SetEssentialBC(ess_bdr, ess_bdr_comps, rhs);
+
+ // Set the essential boundary conditions that we can store on our class
+ SetEssentialBC(ess_bdr, ess_bdr_comps, rhs);
+
+ assembly = options.solvers.assembly;
+
+ model = std::make_shared(m_sim_state, options);
+ // Add the user defined integrator
+ if (options.solvers.integ_model == IntegrationModel::DEFAULT) {
+ h_form->AddDomainIntegrator(new ExaNLFIntegrator(m_sim_state));
+ } else if (options.solvers.integ_model == IntegrationModel::BBAR) {
+ h_form->AddDomainIntegrator(new ICExaNLFIntegrator(m_sim_state));
+ }
+
+ if (assembly == AssemblyType::PA) {
+ h_form->SetAssemblyLevel(mfem::AssemblyLevel::PARTIAL, mfem::ElementDofOrdering::NATIVE);
+ diag.SetSize(loc_fe_space->GetTrueVSize(), mfem::Device::GetMemoryType());
+ diag.UseDevice(true);
+ diag = 1.0;
+ prec_oper = std::make_shared(diag,
+ this->GetEssentialTrueDofs());
+ } else if (assembly == AssemblyType::EA) {
+ h_form->SetAssemblyLevel(mfem::AssemblyLevel::ELEMENT, mfem::ElementDofOrdering::NATIVE);
+ diag.SetSize(loc_fe_space->GetTrueVSize(), mfem::Device::GetMemoryType());
+ diag.UseDevice(true);
+ diag = 1.0;
+ prec_oper = std::make_shared(diag,
+ this->GetEssentialTrueDofs());
+ }
+
+ // So, we're going to originally support non tensor-product type elements originally.
+ const mfem::ElementDofOrdering ordering = mfem::ElementDofOrdering::NATIVE;
+ // const ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC;
+ elem_restrict_lex = loc_fe_space->GetElementRestriction(ordering);
+
+ el_x.SetSize(elem_restrict_lex->Height(), mfem::Device::GetMemoryType());
+ el_x.UseDevice(true);
+ px.SetSize(P->Height(), mfem::Device::GetMemoryType());
+ px.UseDevice(true);
+
+ {
+ const mfem::FiniteElement& el = *loc_fe_space->GetFE(0);
+ const int space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ ;
+
+ const int nqpts = ir->GetNPoints();
+ const int ndofs = el.GetDof();
+ const int nelems = loc_fe_space->GetNE();
+
+ el_jac.SetSize(space_dims * space_dims * nqpts * nelems, mfem::Device::GetMemoryType());
+ el_jac.UseDevice(true);
+
+ qpts_dshape.SetSize(nqpts * space_dims * ndofs, mfem::Device::GetMemoryType());
+ qpts_dshape.UseDevice(true);
+ {
+ mfem::DenseMatrix DSh;
+ const int offset = ndofs * space_dims;
+ double* qpts_dshape_data = qpts_dshape.HostReadWrite();
+ for (int i = 0; i < nqpts; i++) {
+ const mfem::IntegrationPoint& ip = ir->IntPoint(i);
+ DSh.UseExternalData(&qpts_dshape_data[offset * i], ndofs, space_dims);
+ el.CalcDShape(ip, DSh);
+ }
+ }
+ }
+}
+
+const mfem::Array& NonlinearMechOperator::GetEssTDofList() {
+ return h_form->GetEssentialTrueDofs();
+}
+
+void NonlinearMechOperator::UpdateEssTDofs(const mfem::Array& ess_bdr, bool mono_def_flag) {
+ if (mono_def_flag) {
+ h_form->SetEssentialTrueDofs(ess_bdr);
+ ess_tdof_list = ess_bdr;
+ } else {
+ // Set the essential boundary conditions
+ h_form->SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr);
+ auto tmp = h_form->GetEssentialTrueDofs();
+ // Set the essential boundary conditions that we can store on our class
+ SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr);
+ }
+}
+
+// compute: y = H(x,p)
+void NonlinearMechOperator::Mult(const mfem::Vector& k, mfem::Vector& y) const {
+ CALI_CXX_MARK_SCOPE("mechop_Mult");
+ // We first run a setup step before actually doing anything.
+ // We'll want to move this outside of Mult() at some given point in time
+ // and have it live in the NR solver itself or whatever solver
+ // we're going to be using.
+ Setup(k);
+ // We now perform our element vector operation.
+ CALI_MARK_BEGIN("mechop_mult_setup");
+ // Assemble our operator
+ h_form->Setup();
+ CALI_MARK_END("mechop_mult_setup");
+ CALI_MARK_BEGIN("mechop_mult_Mult");
+ h_form->Mult(k, y);
+ CALI_MARK_END("mechop_mult_Mult");
+}
+
+template
+void NonlinearMechOperator::Setup(const mfem::Vector& k) const {
+ CALI_CXX_MARK_SCOPE("mechop_setup");
+ // Wanted to put this in the mechanics_solver.cpp file, but I would have needed to update
+ // Solver class to use the NonlinearMechOperator instead of Operator class.
+ // We now update our end coordinates based on the solved for velocity.
+ if (upd_crds) {
+ UpdateEndCoords(k);
+ }
+
+ // This performs the computation of the velocity gradient if needed,
+ // det(J), material tangent stiffness matrix, state variable update,
+ // stress update, and other stuff that might be needed in the integrators.
+ auto loc_fe_space = m_sim_state->GetMeshParFiniteElementSpace();
+
+ const mfem::FiniteElement& el = *loc_fe_space->GetFE(0);
+ const int space_dims = el.GetDim();
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ ;
+
+ const int nqpts = ir->GetNPoints();
+ const int ndofs = el.GetDof();
+ const int nelems = loc_fe_space->GetNE();
+
+ SetupJacobianTerms();
+
+ // We can now make the call to our material model set-up stage...
+ // Everything else that we need should live on the class.
+ // Within this function the model just needs to produce the Cauchy stress
+ // and the material tangent matrix (d \sigma / d Vgrad_{sym})
+ // bool succeed_t = false;
+ bool succeed = false;
+ try {
+ // Takes in k vector and transforms into into our E-vector array
+ P->Mult(k, px);
+ elem_restrict_lex->Mult(px, el_x);
+ model->ModelSetup(nqpts, nelems, space_dims, ndofs, el_jac, qpts_dshape, el_x);
+ succeed = true;
+ } catch (const std::exception& exc) {
+ // catch anything thrown within try block that derives from std::exception
+ MFEM_WARNING_0(exc.what());
+ succeed = false;
+ } catch (...) {
+ succeed = false;
+ }
+ // MPI_Allreduce(&succeed_t, &succeed, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD);
+ if (!succeed) {
+ throw std::runtime_error(std::string(
+ "Material model setup portion of code failed for at least one integration point."));
+ }
+} // End of model setup
+
+void NonlinearMechOperator::SetupJacobianTerms() const {
+ auto mesh = m_sim_state->GetMesh();
+ auto fe_space = m_sim_state->GetMeshParFiniteElementSpace();
+ const mfem::FiniteElement& el = *fe_space->GetFE(0);
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ ;
+
+ const int space_dims = el.GetDim();
+ const int nqpts = ir->GetNPoints();
+ const int nelems = fe_space->GetNE();
+
+ // We need to make sure these are deleted at the start of each iteration
+ // since we have meshes that are constantly changing.
+ mesh->DeleteGeometricFactors();
+ const mfem::GeometricFactors* geom = mesh->GetGeometricFactors(
+ *ir, mfem::GeometricFactors::JACOBIANS);
+ // geom->J really isn't going to work for us as of right now. We could just reorder it
+ // to the version that we want it to be in instead...
+
+ const int DIM4 = 4;
+ std::array perm4{{3, 2, 1, 0}};
+ // bunch of helper RAJA views to make dealing with data easier down below in our kernel.
+ RAJA::Layout layout_jacob = RAJA::make_permuted_layout(
+ {{space_dims, space_dims, nqpts, nelems}}, perm4);
+ RAJA::View> jac_view(el_jac.ReadWrite(),
+ layout_jacob);
+
+ RAJA::Layout layout_geom = RAJA::make_permuted_layout(
+ {{nqpts, space_dims, space_dims, nelems}}, perm4);
+ RAJA::View> geom_j_view(geom->J.Read(),
+ layout_geom);
+
+ const int nqpts1 = nqpts;
+ const int space_dims1 = space_dims;
+ mfem::forall(nelems, [=] MFEM_HOST_DEVICE(int i) {
+ const int nqpts_ = nqpts1;
+ const int space_dims_ = space_dims1;
+ for (int j = 0; j < nqpts_; j++) {
+ for (int k = 0; k < space_dims_; k++) {
+ for (int l = 0; l < space_dims_; l++) {
+ jac_view(l, k, j, i) = geom_j_view(j, l, k, i);
+ }
+ }
+ }
+ });
+}
+
+void NonlinearMechOperator::CalculateDeformationGradient(mfem::QuadratureFunction& def_grad) const {
+ auto mesh = m_sim_state->GetMesh();
+ auto fe_space = m_sim_state->GetMeshParFiniteElementSpace();
+ const mfem::FiniteElement& el = *fe_space->GetFE(0);
+ const mfem::IntegrationRule* ir = &(
+ mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));
+ ;
+
+ const int nqpts = ir->GetNPoints();
+ const int nelems = fe_space->GetNE();
+ const int ndofs = fe_space->GetFE(0)->GetDof();
+
+ auto x_ref = m_sim_state->GetRefCoords();
+ auto x_cur = m_sim_state->GetCurrentCoords();
+ // Since we never modify our mesh nodes during this operations this is okay.
+ mfem::GridFunction* nodes =
+ x_ref.get(); // set a nodes grid function to global current configuration
+ int owns_nodes = 0;
+ mesh->SwapNodes(nodes, owns_nodes); // pmesh has current configuration nodes
+ SetupJacobianTerms();
+
+ mfem::Vector x_true(fe_space->TrueVSize(), mfem::Device::GetMemoryType());
+
+ x_cur->GetTrueDofs(x_true);
+ // Takes in k vector and transforms into into our E-vector array
+ P->Mult(x_true, px);
+ elem_restrict_lex->Mult(px, el_x);
+
+ def_grad = 0.0;
+ exaconstit::kernel::GradCalc(
+ nqpts, nelems, ndofs, el_jac.Read(), qpts_dshape.Read(), el_x.Read(), def_grad.ReadWrite());
+
+ // We're returning our mesh nodes to the original object they were pointing to.
+ // So, we need to cast away the const here.
+ // We just don't want other functions outside this changing things.
+ nodes = x_cur.get();
+ mesh->SwapNodes(nodes, owns_nodes);
+ // Delete the old geometric factors since they dealt with the original reference frame.
+ mesh->DeleteGeometricFactors();
+}
+
+// Update the end coords used in our model
+void NonlinearMechOperator::UpdateEndCoords(const mfem::Vector& vel) const {
+ m_sim_state->GetPrimalField()->operator=(vel);
+ m_sim_state->UpdateNodalEndCoords();
+}
+
+// Compute the Jacobian from the nonlinear form
+mfem::Operator& NonlinearMechOperator::GetGradient(const mfem::Vector& x) const {
+ CALI_CXX_MARK_SCOPE("mechop_getgrad");
+ jacobian = &h_form->GetGradient(x);
+ // Reset our preconditioner operator aka recompute the diagonal for our jacobi.
+ jacobian->AssembleDiagonal(diag);
+ return *jacobian;
+}
+
+// Compute the Jacobian from the nonlinear form
+mfem::Operator& NonlinearMechOperator::GetUpdateBCsAction(const mfem::Vector& k,
+ const mfem::Vector& x,
+ mfem::Vector& y) const {
+ CALI_CXX_MARK_SCOPE("mechop_GetUpdateBCsAction");
+ // We first run a setup step before actually doing anything.
+ // We'll want to move this outside of Mult() at some given point in time
+ // and have it live in the NR solver itself or whatever solver
+ // we're going to be using.
+ Setup(k);
+ // We now perform our element vector operation.
+ mfem::Vector resid(y);
+ resid.UseDevice(true);
+ mfem::Array zero_tdofs;
+ CALI_MARK_BEGIN("mechop_h_form_LocalGrad");
+ h_form->Setup();
+ h_form->SetEssentialTrueDofs(zero_tdofs);
+ auto& loc_jacobian = h_form->GetGradient(x);
+ loc_jacobian.Mult(x, y);
+ h_form->SetEssentialTrueDofs(ess_tdof_list);
+ h_form->Mult(k, resid);
+ jacobian = &h_form->GetGradient(x);
+ CALI_MARK_END("mechop_h_form_LocalGrad");
+
+ {
+ auto I = ess_tdof_list.Read();
+ auto size = ess_tdof_list.Size();
+ auto Y = y.Write();
+ // Need to get rid of all the constrained values here
+ mfem::forall(size, [=] MFEM_HOST_DEVICE(int i) {
+ Y[I[i]] = 0.0;
+ });
+ }
+
+ y += resid;
+ return *jacobian;
+}
\ No newline at end of file
diff --git a/src/fem_operators/mechanics_operator.hpp b/src/fem_operators/mechanics_operator.hpp
new file mode 100644
index 0000000..3a83b76
--- /dev/null
+++ b/src/fem_operators/mechanics_operator.hpp
@@ -0,0 +1,466 @@
+
+#ifndef mechanics_operator_hpp
+#define mechanics_operator_hpp
+
+#include "fem_operators/mechanics_integrators.hpp"
+#include "fem_operators/mechanics_operator_ext.hpp"
+#include "models/mechanics_model.hpp"
+#include "options/option_parser_v2.hpp"
+#include "sim_state/simulation_state.hpp"
+
+#include "mfem.hpp"
+
+#include
+/**
+ * @brief Central nonlinear mechanics operator for updated Lagrangian finite element formulations.
+ *
+ * NonlinearMechOperator drives the entire ExaConstit nonlinear mechanics system, implementing
+ * an updated Lagrangian finite element formulation for large deformation solid mechanics.
+ * It manages the Newton-Raphson solver, Krylov iterative solvers, material models, and
+ * coordinates the interaction between finite element operations and constitutive models.
+ *
+ * The class extends MFEM's NonlinearForm to provide specialized mechanics operations including:
+ * - Updated Lagrangian formulation with current configuration updates
+ * - Material model integration (crystal plasticity, UMAT, multi-model support)
+ * - Partial and element assembly support for high-performance computing
+ * - Jacobian computation and preconditioning for Newton-Raphson convergence
+ * - Deformation gradient calculation and coordinate updates
+ * - Essential boundary condition management
+ *
+ * Key features for large-scale simulations:
+ * - GPU/CPU device compatibility through MFEM's device abstraction
+ * - Memory-efficient partial assembly operations
+ * - Support for heterogeneous material regions
+ * - Automatic coordinate updating for finite deformation problems
+ * - Integration with ExaConstit's simulation state management
+ *
+ * The operator works in conjunction with SimulationState to manage:
+ * - Current and reference configurations
+ * - Material state variables across time steps
+ * - Boundary condition updates
+ * - Multi-material region handling
+ *
+ * @ingroup ExaConstit_fem_operators
+ */
+class NonlinearMechOperator : public mfem::NonlinearForm {
+protected:
+ /** @brief MFEM parallel nonlinear form for distributed memory computations */
+ std::unique_ptr h_form;
+
+ /** @brief Diagonal vector for Jacobian preconditioning operations */
+ mutable mfem::Vector diag;
+
+ /** @brief Shape function derivatives at quadrature points for element operations */
+ mutable mfem::Vector qpts_dshape;
+
+ /** @brief Element-wise solution vector in local element ordering */
+ mutable mfem::Vector el_x;
+
+ /** @brief Prolongation operation intermediate vector for assembly operations */
+ mutable mfem::Vector px;
+
+ /** @brief Element Jacobian matrices for geometric transformation computations */
+ mutable mfem::Vector el_jac;
+
+ /** @brief Pointer to current Jacobian operator for Newton-Raphson iterations */
+ mutable mfem::Operator* jacobian;
+
+ /** @brief Jacobi preconditioner for iterative linear solvers */
+ mutable std::shared_ptr prec_oper;
+
+ /** @brief Element restriction operator for local-to-global degree of freedom mapping */
+ const mfem::Operator* elem_restrict_lex;
+
+ /** @brief Assembly strategy (FULL, PARTIAL, ELEMENT) controlling computational approach */
+ AssemblyType assembly;
+
+ /** @brief Material model manager handling constitutive relationships */
+ std::shared_ptr