forked from E3SM-Project/E3SM
-
Notifications
You must be signed in to change notification settings - Fork 6
Add state validation checks for NaNs and OOBounds #383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
amametjanov
wants to merge
1
commit into
E3SM-Project:develop
Choose a base branch
from
amametjanov:omega/add-state-validation
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,219 @@ | ||
| //===-- ocn/StateValidation.cpp - ocean state validation --------*- C++ -*-===// | ||
| // | ||
| // Validates ocean state fields by checking for NaN values and | ||
| // out-of-bounds conditions. Any failure triggers a critical error log with | ||
| // backtrace and MPI_Abort on the local communicator. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "StateValidation.h" | ||
|
|
||
| #include "AuxiliaryState.h" | ||
| #include "DataTypes.h" | ||
| #include "Error.h" | ||
| #include "Logging.h" | ||
| #include "MachEnv.h" | ||
| #include "OceanState.h" | ||
| #include "OmegaKokkos.h" | ||
| #include "Tracers.h" | ||
| #include "mpi.h" | ||
|
|
||
| #include <cmath> | ||
| #include <cpptrace/cpptrace.hpp> | ||
| #include <string> | ||
| #include <utility> | ||
|
|
||
| namespace OMEGA { | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Helper: abort on the local Omega communicator with a message and backtrace | ||
| static void abortWithMessage(const std::string &Msg) { | ||
| LOG_CRITICAL("{}", Msg); | ||
| cpptrace::generate_trace().print(); | ||
| MPI_Comm Comm = MachEnv::getDefault()->getComm(); | ||
| MPI_Abort(Comm, static_cast<int>(ErrorCode::Critical)); | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Helper: count NaN entries and out-of-range entries in a 2-D Real device | ||
| // array over the first NCells/NEdges rows and NVert columns. | ||
| // Returns {NaNCount, OutOfRangeCount}. | ||
| static std::pair<I4, I4> checkArray2D(const Array2DReal &Arr, I4 NRows, | ||
| I4 NCols, Real MinVal, Real MaxVal, | ||
| bool CheckMin) { | ||
| I4 NaNCount = 0; | ||
| I4 OutOfRangeCount = 0; | ||
|
|
||
| parallelReduce( | ||
| "CheckNaN", {NRows, NCols}, | ||
| KOKKOS_LAMBDA(int Row, int Col, int &Accum) { | ||
| Real Val = Arr(Row, Col); | ||
| if (Kokkos::isnan(Val)) { | ||
| ++Accum; | ||
| } | ||
| }, | ||
| NaNCount); | ||
|
|
||
| parallelReduce( | ||
| "CheckBounds", {NRows, NCols}, | ||
| KOKKOS_LAMBDA(int Row, int Col, int &Accum) { | ||
| Real Val = Arr(Row, Col); | ||
| if (!Kokkos::isnan(Val)) { | ||
| if (Val > MaxVal) { | ||
| ++Accum; | ||
| } else if (CheckMin && Val < MinVal) { | ||
| ++Accum; | ||
| } | ||
| } | ||
| }, | ||
| OutOfRangeCount); | ||
|
|
||
| return {NaNCount, OutOfRangeCount}; | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Helper: count NaN and out-of-range entries for a single tracer (row = cell, | ||
| // col = vert) extracted from the 3-D tracer array at the given tracer index. | ||
| static std::pair<I4, I4> checkTracerArray(const Array3DReal &Tracers3D, | ||
| I4 TracerIdx, I4 NCells, I4 NVert, | ||
| Real MinVal, Real MaxVal) { | ||
| I4 NaNCount = 0; | ||
| I4 OutOfRangeCount = 0; | ||
|
|
||
| parallelReduce( | ||
| "CheckTracerNaN", {NCells, NVert}, | ||
| KOKKOS_LAMBDA(int Cell, int K, int &Accum) { | ||
| Real Val = Tracers3D(TracerIdx, Cell, K); | ||
| if (Kokkos::isnan(Val)) { | ||
| ++Accum; | ||
| } | ||
| }, | ||
| NaNCount); | ||
|
|
||
| parallelReduce( | ||
| "CheckTracerBounds", {NCells, NVert}, | ||
| KOKKOS_LAMBDA(int Cell, int K, int &Accum) { | ||
| Real Val = Tracers3D(TracerIdx, Cell, K); | ||
| if (!Kokkos::isnan(Val)) { | ||
| if (Val < MinVal || Val > MaxVal) { | ||
| ++Accum; | ||
| } | ||
| } | ||
| }, | ||
| OutOfRangeCount); | ||
|
|
||
| return {NaNCount, OutOfRangeCount}; | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| /// Validate ocean state fields for NaN and out-of-bounds conditions. | ||
| /// Aborts via MPI_Abort on failure. | ||
| void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, | ||
| I4 TimeLevel) { | ||
|
|
||
| bool AnyFailure = false; | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // LayerThickness: valid range [1e-10, 1000] | ||
| // ------------------------------------------------------------------------- | ||
| { | ||
| Array2DReal LayerThick = State->getLayerThickness(TimeLevel); | ||
| auto [NaNs, OOB] = | ||
| checkArray2D(LayerThick, State->NCellsOwned, State->NVertLayers, | ||
| static_cast<Real>(1e-10), static_cast<Real>(1000.0), | ||
| /*CheckMin=*/true); | ||
|
|
||
| if (NaNs > 0) { | ||
| LOG_CRITICAL( | ||
| "StateValidation: LayerThickness contains {} NaN value(s)", NaNs); | ||
| AnyFailure = true; | ||
| } | ||
| if (OOB > 0) { | ||
| LOG_CRITICAL("StateValidation: LayerThickness has {} value(s) outside " | ||
| "valid range [1e-10, 1000]", | ||
| OOB); | ||
| AnyFailure = true; | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // KineticEnergyCell: valid range [0, 10] | ||
| // ------------------------------------------------------------------------- | ||
| { | ||
| const Array2DReal &KE = AuxState->KineticAux.KineticEnergyCell; | ||
| auto [NaNs, OOB] = | ||
| checkArray2D(KE, State->NCellsOwned, State->NVertLayers, | ||
| static_cast<Real>(0.0), static_cast<Real>(10.0), | ||
| /*CheckMin=*/true); | ||
|
|
||
| if (NaNs > 0) { | ||
| LOG_CRITICAL( | ||
| "StateValidation: KineticEnergyCell contains {} NaN value(s)", | ||
| NaNs); | ||
| AnyFailure = true; | ||
| } | ||
| if (OOB > 0) { | ||
| LOG_CRITICAL( | ||
| "StateValidation: KineticEnergyCell has {} value(s) outside " | ||
| "valid range [0, 10]", | ||
| OOB); | ||
| AnyFailure = true; | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Temperature tracer: valid range [-10, 50] | ||
| // ------------------------------------------------------------------------- | ||
| if (Tracers::IndxTemp != Tracers::IndxInvalid) { | ||
| Array3DReal AllTracers = Tracers::getAll(TimeLevel); | ||
| auto [NaNs, OOB] = checkTracerArray( | ||
| AllTracers, Tracers::IndxTemp, State->NCellsOwned, State->NVertLayers, | ||
| static_cast<Real>(-10.0), static_cast<Real>(50.0)); | ||
|
|
||
| if (NaNs > 0) { | ||
| LOG_CRITICAL("StateValidation: Temperature contains {} NaN value(s)", | ||
| NaNs); | ||
| AnyFailure = true; | ||
| } | ||
| if (OOB > 0) { | ||
| LOG_CRITICAL("StateValidation: Temperature has {} value(s) outside " | ||
| "valid range [-10, 50]", | ||
| OOB); | ||
| AnyFailure = true; | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Salinity tracer: valid range [-2, 60] | ||
| // ------------------------------------------------------------------------- | ||
| if (Tracers::IndxSalt != Tracers::IndxInvalid) { | ||
| Array3DReal AllTracers = Tracers::getAll(TimeLevel); | ||
| auto [NaNs, OOB] = checkTracerArray( | ||
| AllTracers, Tracers::IndxSalt, State->NCellsOwned, State->NVertLayers, | ||
| static_cast<Real>(-2.0), static_cast<Real>(60.0)); | ||
|
|
||
| if (NaNs > 0) { | ||
| LOG_CRITICAL("StateValidation: Salinity contains {} NaN value(s)", | ||
| NaNs); | ||
| AnyFailure = true; | ||
| } | ||
| if (OOB > 0) { | ||
| LOG_CRITICAL("StateValidation: Salinity has {} value(s) outside " | ||
| "valid range [-2, 60]", | ||
| OOB); | ||
| AnyFailure = true; | ||
| } | ||
| } | ||
|
|
||
| // ------------------------------------------------------------------------- | ||
| // Abort if any check failed | ||
| // ------------------------------------------------------------------------- | ||
| if (AnyFailure) { | ||
| abortWithMessage("StateValidation: Ocean state validation failed. " | ||
| "See critical messages above for details."); | ||
| } | ||
| } | ||
|
|
||
| } // namespace OMEGA | ||
|
|
||
| //===----------------------------------------------------------------------===// | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| #ifndef OMEGA_STATEVALIDATION_H | ||
| #define OMEGA_STATEVALIDATION_H | ||
| //===-- ocn/StateValidation.h - ocean state validation ----------*- C++ -*-===// | ||
| // | ||
| /// \file | ||
| /// \brief Declares the validateOceanState function for ocean state validation | ||
| /// | ||
| /// Provides a function that validates the ocean prognostic state and selected | ||
| /// auxiliary/tracer fields by checking for NaN values and out-of-bounds | ||
| /// conditions. If any check fails the function logs a critical error with a | ||
| /// backtrace and aborts via MPI_Abort on the local MPI communicator. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "AuxiliaryState.h" | ||
| #include "OceanState.h" | ||
| #include "Tracers.h" | ||
|
|
||
| namespace OMEGA { | ||
|
|
||
| /// Check ocean state fields for NaN values and out-of-bounds conditions. | ||
| /// | ||
| /// The following fields are validated: | ||
| /// - LayerThickness : [1e-10, 1000] (from OceanState) | ||
| /// - KineticEnergyCell : [0, 10] | ||
| /// (from AuxiliaryState::KineticAux) | ||
| /// - Temperature tracer : [-10, 50] (from Tracers) | ||
| /// - Salinity tracer : [-2, 60] (from Tracers) | ||
| /// | ||
| /// If any check fails a critical error is logged with an informative message | ||
| /// and a stack backtrace, and the run is aborted via MPI_Abort on the | ||
| /// communicator obtained from the default MachEnv. | ||
| /// | ||
| /// \param[in] State Ocean state to validate | ||
| /// \param[in] AuxState Auxiliary state containing KineticEnergyCell | ||
| /// \param[in] TimeLevel Time level index to validate (typically 0 = current) | ||
| void validateOceanState(const OceanState *State, const AuxiliaryState *AuxState, | ||
| I4 TimeLevel); | ||
|
|
||
| } // namespace OMEGA | ||
|
|
||
| //===----------------------------------------------------------------------===// | ||
| #endif // defined OMEGA_STATEVALIDATION_H |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.