From 3ee919f5ebacc3329aee8791f038ab4dbd93ff5d Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Wed, 7 Aug 2024 16:04:56 +0200 Subject: [PATCH 01/51] Trigger Build From 32cfe5c256f95671a186efc91d2a03e72e912a21 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Mon, 27 Oct 2025 14:20:03 +0300 Subject: [PATCH 02/51] documentation done insiede al core folder --- src/core/CrossCorrelation.cpp | 90 ++++++++++++++- src/core/CrossCorrelation.h | 107 +++++++++++++++++- src/core/CrossCorrelationCache.cpp | 66 ++++++++++- src/core/CrossCorrelationCache.h | 83 +++++++++++++- src/core/FilterCache.cpp | 72 +++++++++++- src/core/FilterCache.h | 103 +++++++++++++++-- src/core/GaussQuadrature.cpp | 171 +++++++++++++++++++++++------ src/core/GaussQuadrature.h | 148 +++++++++++++++++++++++-- src/core/InterpolatingBasis.cpp | 111 +++++++++++++++---- src/core/InterpolatingBasis.h | 64 ++++++++++- src/core/LegendreBasis.cpp | 97 ++++++++++++---- src/core/LegendreBasis.h | 47 +++++++- src/core/MWFilter.cpp | 116 ++++++++++++++++++- src/core/MWFilter.h | 137 ++++++++++++++++++++++- src/core/ObjectCache.cpp | 98 ++++++++++++++++- src/core/ObjectCache.h | 106 ++++++++++++++++++ src/core/QuadratureCache.cpp | 74 ++++++++++++- src/core/QuadratureCache.h | 121 +++++++++++++++++++- src/core/ScalingBasis.cpp | 88 ++++++++++++++- src/core/ScalingBasis.h | 94 +++++++++++++++- src/core/ScalingCache.h | 68 +++++++++++- 21 files changed, 1938 insertions(+), 123 deletions(-) diff --git a/src/core/CrossCorrelation.cpp b/src/core/CrossCorrelation.cpp index 8b04cf18d..fd4ce23c8 100644 --- a/src/core/CrossCorrelation.cpp +++ b/src/core/CrossCorrelation.cpp @@ -23,6 +23,43 @@ * */ +/* + * File purpose (high level): + * -------------------------- + * This implementation provides the CrossCorrelation class used to load and + * store *cross-correlation coefficient matrices* for multiwavelet filters. + * Two families of filters are supported (as encoded by `type`): + * - Interpolatory (prefix "I_") + * - Legendre (prefix "L_") + * + * Given an integer "order" k (poly order), we define K = k + 1. The class + * expects to find two binary files that contain the left and right cross + * correlation blocks: + * /

_c_left_ + * /

_c_right_ + * where

is "I" or "L" depending on the family. The directory is + * discovered via `details::find_filters()`. + * + * Each file stores K*K rows, and each row contains 2*K doubles. The data are + * read into two Eigen matrices: + * Left : (K*K) x (2K) + * Right : (K*K) x (2K) + * + * Notes on indexing and sizes: + * - K = order + 1 + * - The (K*K) rows represent a flattened 2D (i,j) index; i,j = 0..K-1. + * - Each row has 2K columns; the "2K" arises from the two-sided support + * of the correlation stencil (negative and positive offsets). + * + * Error handling: + * - The code uses MRCPP's messaging macros (MSG_ABORT / MSG_ERROR) to + * report invalid input or missing files. + * + * Endianness / portability: + * - Files are read as raw binary `double`. They must be produced on an + * architecture with compatible endianness and `double` layout. + */ + /* * * @@ -47,10 +84,20 @@ using namespace Eigen; namespace mrcpp { +// ---------------------------------------------------------------------------- +// Constructor: CrossCorrelation(int k, int t) +// Creates an object for filter family `t` (see CrossCorrelation.h for the +// enum/type codes) and polynomial order `k`. It validates the order, validates +// the family, discovers the filter library directory, composes the filenames, +// and immediately loads the binary data into `Left` and `Right`. +// ---------------------------------------------------------------------------- CrossCorrelation::CrossCorrelation(int k, int t) : type(t) , order(k) { + // Sanity check on order. `MaxOrder` is a library constant limiting k. if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order: " << this->order); + + // Validate filter family (currently Interpol or Legendre are accepted). switch (this->type) { case (Interpol): case (Legendre): @@ -59,16 +106,29 @@ CrossCorrelation::CrossCorrelation(int k, int t) MSG_ERROR("Unknown filter type: " << this->type); } + // Locate the directory holding precomputed filter/correlation files. + // `details::find_filters()` returns the absolute path to that directory. setCCCPaths(details::find_filters()); + // Load binary matrices Left and Right from disk into Eigen::MatrixXd. readCCCBin(); } +// ---------------------------------------------------------------------------- +// Constructor: CrossCorrelation(int t, const MatrixXd& L, const MatrixXd& R) +// Directly construct a CrossCorrelation from matrices already in memory. +// `order` is inferred from the number of columns: 2K columns → K = order+1. +// Ensures the Left/Right shapes are compatible and the family type is valid. +// No file I/O is performed here. +// ---------------------------------------------------------------------------- CrossCorrelation::CrossCorrelation(int t, const MatrixXd &L, const MatrixXd &R) : type(t) , order(L.cols() / 2 - 1) { + // Derive order from matrix width (2K columns → order = K - 1). if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order, " << this->order); if (R.cols() != L.cols()) MSG_ABORT("Right and Left cross correlation have different order!"); + + // Validate family. switch (this->type) { case (Interpol): case (Legendre): @@ -77,10 +137,18 @@ CrossCorrelation::CrossCorrelation(int t, const MatrixXd &L, const MatrixXd &R) MSG_ERROR("Unknown filter type: " << this->type); } + // Shallow copies into class members (Eigen handles the allocation). this->Left = L; this->Right = R; } +// ---------------------------------------------------------------------------- +// setCCCPaths: Compose the on-disk file paths for the left/right matrices. +// Input: `lib` is the directory returned by `details::find_filters()`. +// The filenames follow the convention: +// Interpol: I_c_left_, I_c_right_ +// Legendre: L_c_left_, L_c_right_ +// ---------------------------------------------------------------------------- void CrossCorrelation::setCCCPaths(const std::string &lib) { switch (this->type) { case (Interpol): @@ -96,29 +164,49 @@ void CrossCorrelation::setCCCPaths(const std::string &lib) { } } +// ---------------------------------------------------------------------------- +// readCCCBin: Open the two binary files and load them into Eigen matrices. +// File structure: +// - Let K = order + 1. +// - Each file contains K*K consecutive rows. +// - Each row stores 2*K doubles (contiguous), representing one stencil line. +// Post-processing: +// - Any absolute value < MachinePrec is zeroed to improve sparsity/readability. +// - Matrices are resized to (K*K) x (2*K). +// ---------------------------------------------------------------------------- void CrossCorrelation::readCCCBin() { + // Open both files in binary mode; abort if either is missing. std::ifstream L_fis(this->L_path.c_str(), std::ios::binary); std::ifstream R_fis(this->R_path.c_str(), std::ios::binary); if (not L_fis) MSG_ABORT("Could not open cross correlation: " << this->L_path); if (not R_fis) MSG_ABORT("Could not open cross correlation: " << this->R_path); + // Derive matrix dimensions from order. int K = this->order + 1; this->Left = MatrixXd::Zero(K * K, 2 * K); this->Right = MatrixXd::Zero(K * K, 2 * K); + + // Temporary row buffers for reading a single row (2K doubles) at a time. double dL[2 * K]; double dR[2 * K]; + + // Loop over all K*K rows and fill both Left and Right matrices. for (int i = 0; i < K * K; i++) { + // Read one row for Left and one row for Right (raw binary doubles). L_fis.read((char *)dL, sizeof(double) * 2 * K); R_fis.read((char *)dR, sizeof(double) * 2 * K); + + // Copy into Eigen matrices with small-value cleanup. for (int j = 0; j < 2 * K; j++) { - if (std::abs(dL[j]) < MachinePrec) dL[j] = 0.0; + if (std::abs(dL[j]) < MachinePrec) dL[j] = 0.0; // numerical zeroing if (std::abs(dR[j]) < MachinePrec) dR[j] = 0.0; this->Left(i, j) = dL[j]; this->Right(i, j) = dR[j]; } } + // Close streams (RAII would also close on destruction, but explicit is clear). L_fis.close(); R_fis.close(); } diff --git a/src/core/CrossCorrelation.h b/src/core/CrossCorrelation.h index aa674b4ca..b12fa8b58 100644 --- a/src/core/CrossCorrelation.h +++ b/src/core/CrossCorrelation.h @@ -34,29 +34,134 @@ namespace mrcpp { +/** + * @class CrossCorrelation + * @brief Container/loader for multiwavelet cross-correlation coefficient tables. + * + * This class encapsulates the left/right cross-correlation matrices associated + * with a chosen multiwavelet filter family and polynomial order. + * + * • The filter "family" is identified by an integer @c type + * (e.g., Interpolatory or Legendre; concrete codes are defined elsewhere + * and validated in the implementation). + * + * • The polynomial @c order is k ≥ 1. We use K = k + 1 for dimensions. + * + * • Two dense matrices are held: + * Left ∈ ℝ^{(K·K) × (2K)}, + * Right ∈ ℝ^{(K·K) × (2K)}. + * Each row corresponds to a flattened (i,j) pair with i,j∈{0..K−1}; + * each row stores a 2K-wide correlation stencil. + * + * Objects can be constructed by loading the binary coefficient files from disk + * (constructor #1) or by adopting matrices already residing in memory + * (constructor #2). Accessors expose the type/order and const references to + * the matrices; there are no mutating public methods by design. + * + * Invariants (enforced in the implementation): + * - 1 ≤ order ≤ MaxOrder + * - Left.cols() == Right.cols() == 2K where K = order + 1 + * - Left.rows() == Right.rows() == K*K + * + * Thread-safety: the class is a simple value holder once constructed. + * Concurrent reads are safe; concurrent writes are not supported. + */ class CrossCorrelation final { public: + /** + * @brief Construct by loading coefficient tables from the filter library. + * + * The library path is discovered internally (see details::find_filters()). + * Files are chosen based on @p type and @p k and read into #Left/#Right. + * + * @param k Polynomial order (k ≥ 1). Sets K = k + 1 for dimensions. + * @param t Filter family/type code (e.g., Interpol, Legendre). + * + * @throws abort/error (via MRCPP messaging) on invalid @p k/@p t or if the + * required binary files cannot be opened. + */ CrossCorrelation(int k, int t); + + /** + * @brief Construct from in-memory matrices (no file I/O). + * + * The order is inferred from the column count: 2K columns ⇒ order = K−1. + * The two matrices must be shape-compatible (same size). + * + * @param t Filter family/type code. + * @param ldata Left matrix, size (K*K) × (2K). + * @param rdata Right matrix, size (K*K) × (2K). + * + * @throws abort/error if dimensions are inconsistent or the type is invalid. + */ CrossCorrelation(int t, const Eigen::MatrixXd &ldata, const Eigen::MatrixXd &rdata); + /** @return The filter family/type code associated with this object. */ int getType() const { return this->type; } + + /** @return The polynomial order k (so K = k + 1). */ int getOrder() const { return this->order; } + + /** @return Const reference to the left cross-correlation matrix. */ const Eigen::MatrixXd &getLMatrix() const { return this->Left; } + + /** @return Const reference to the right cross-correlation matrix. */ const Eigen::MatrixXd &getRMatrix() const { return this->Right; } protected: + /** + * @brief Filter family/type code. + * + * The meaning of this integer is validated against known families + * (e.g., Interpolatory / Legendre) in the implementation. Kept as @c int + * here to avoid header dependencies on the specific enum. + */ int type; + + /** + * @brief Polynomial order k (k ≥ 1; K = k + 1). + * + * Controls the matrix dimensions: + * rows = K*K, cols = 2K. + */ int order; + /** + * @brief Left cross-correlation coefficient matrix. + * Size: (K*K) × (2K), where K = order + 1. + */ Eigen::MatrixXd Left; + + /** + * @brief Right cross-correlation coefficient matrix. + * Size: (K*K) × (2K), where K = order + 1. + */ Eigen::MatrixXd Right; private: + /** + * @brief Compose on-disk file paths for the left/right tables. + * + * Uses the discovered filter library root @p lib and the current + * @c type / @c order to set #L_path and #R_path to the expected filenames. + * (Naming convention is family-specific; see implementation.) + */ void setCCCPaths(const std::string &lib); + + /** + * @brief Read the binary coefficient tables into #Left/#Right. + * + * Expects two files (left/right). Populates matrices with dimensions + * (K*K) × (2K). Very small magnitudes may be zeroed for numerical + * cleanliness (implementation detail). + */ void readCCCBin(); + /** @brief Full path to the left coefficient file (resolved at runtime). */ std::string L_path; + + /** @brief Full path to the right coefficient file (resolved at runtime). */ std::string R_path; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/CrossCorrelationCache.cpp b/src/core/CrossCorrelationCache.cpp index 6b0595858..975135a0d 100644 --- a/src/core/CrossCorrelationCache.cpp +++ b/src/core/CrossCorrelationCache.cpp @@ -24,13 +24,25 @@ */ /* + * File purpose (high level) + * ------------------------- + * This file implements a small, thread-safe cache for CrossCorrelation + * objects, parameterized on the filter family (template parameter T). * + * Motivation: + * CrossCorrelation(order, type) loads two dense (K*K)×(2K) matrices + * from binary files. Loading them repeatedly is expensive. This cache + * stores one instance per (order, type) and returns references to it. * - * \date Jul 18, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * Template parameter T: + * - Must be one of the family tags (e.g. Interpol, Legendre). + * - The explicit instantiations at the end fix T to these two values. * - * \breif + * Concurrency: + * - The cache uses MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK to guard the + * critical section that performs the initial load and insertion. + * - Once loaded, subsequent get() calls read from the cache without + * reloading (fast path). */ #include "CrossCorrelationCache.h" @@ -42,6 +54,13 @@ using namespace Eigen; namespace mrcpp { +/* + * Constructor + * ----------- + * Initialize the runtime 'type' field from the compile-time template + * parameter T, and validate that it matches a known family. + * If T is invalid, emit an error. + */ template CrossCorrelationCache::CrossCorrelationCache() { switch (T) { case (Interpol): @@ -55,6 +74,22 @@ template CrossCorrelationCache::CrossCorrelationCache() { } } +/* + * load(order) + * ----------- + * Ensure that a CrossCorrelation for the given 'order' exists in the cache. + * If not present, construct it and insert it with a memory budget hint. + * + * Steps: + * 1) Acquire OpenMP lock (thread-safe insertion). + * 2) If key 'order' is absent, allocate a new CrossCorrelation(order, type). + * 3) Compute a crude memory footprint 'memo' for cache accounting: + * - getLMatrix().size() returns (#rows * #cols) + * - Multiply by 2 because we also store a Right matrix of same size + * - Multiply by sizeof(double) to get bytes + * 4) Insert into the underlying ObjectCache keyed by 'order'. + * 5) Release lock. + */ template void CrossCorrelationCache::load(int order) { MRCPP_SET_OMP_LOCK(); if (not hasId(order)) { @@ -65,17 +100,28 @@ template void CrossCorrelationCache::load(int order) { MRCPP_UNSET_OMP_LOCK(); } +/* + * get(order) + * ---------- + * Return a reference to the cached CrossCorrelation for 'order'. + * If missing, it will be loaded on-demand (calling load()). + */ template CrossCorrelation &CrossCorrelationCache::get(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order); } +/* + * getLMatrix(order) + * ----------------- + * Convenience accessor: returns a const reference to the Left matrix + * for the requested 'order', auto-loading it if necessary. + */ template const Eigen::MatrixXd &CrossCorrelationCache::getLMatrix(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order).getLMatrix(); } - /** @brief Fetches the cross correlation coefficients. * * @param[in] order: Dimension of \f$ V_0 \subset L^2(\mathbb R) \f$ minus one, @@ -146,6 +192,16 @@ template const Eigen::MatrixXd &CrossCorrelationCache::getRMatrix(int return ObjectCache::get(order).getRMatrix(); } +/* + * Explicit template instantiations + * -------------------------------- + * Build concrete cache types for the known families: + * - CrossCorrelationCache + * - CrossCorrelationCache + * + * This ensures the compiler generates code for these two variants in + * this translation unit, so users can link against them. + */ template class CrossCorrelationCache; template class CrossCorrelationCache; diff --git a/src/core/CrossCorrelationCache.h b/src/core/CrossCorrelationCache.h index 198d830b2..4deafb629 100644 --- a/src/core/CrossCorrelationCache.h +++ b/src/core/CrossCorrelationCache.h @@ -33,29 +33,110 @@ namespace mrcpp { +/** + * @def getCrossCorrelationCache(T, X) + * @brief Convenience macro to obtain a named reference to the singleton cache. + * + * Expands to: + * CrossCorrelationCache &X = CrossCorrelationCache::getInstance() + * + * Example: + * getCrossCorrelationCache(Interpol, ccc); + * const auto& L = ccc.getLMatrix(order); + */ #define getCrossCorrelationCache(T, X) CrossCorrelationCache &X = CrossCorrelationCache::getInstance() +/** + * @class CrossCorrelationCache + * @brief Thread-safe cache for @ref CrossCorrelation objects, keyed by order. + * + * This cache avoids repeatedly loading the (potentially large) left/right + * cross-correlation matrices from disk. One cache instance exists per filter + * family, realized as a template parameter @p T (e.g., Interpol or Legendre). + * + * Design notes: + * - Singleton pattern (Meyers singleton) per @p T via getInstance(). + * - Inherits from @ref ObjectCache, which provides the + * generic cache interface (load/get/hasId etc.). + * - Actual loading and synchronization details are implemented in the + * corresponding .cpp; OpenMP locks guard first-time insertions. + * + * @tparam T Filter family tag (int constant), e.g. Interpol or Legendre. + */ template class CrossCorrelationCache final : public ObjectCache { public: + /** + * @brief Access the unique cache instance for the template family @p T. + * + * Uses a function-local static (Meyers singleton). Thread-safe in C++11+. + */ static CrossCorrelationCache &getInstance() { static CrossCorrelationCache theCrossCorrelationCache; return theCrossCorrelationCache; } + + /** + * @brief Ensure that the entry for @p order is present in the cache. + * + * If absent, constructs a new @ref CrossCorrelation(order, type) and + * inserts it. See .cpp for locking and memory accounting. + */ void load(int order) override; + + /** + * @brief Retrieve the cached @ref CrossCorrelation for @p order. + * + * Loads on demand if missing. Returns a reference owned by the cache. + */ CrossCorrelation &get(int order) override; + /** + * @brief Convenience accessor for the Left matrix of a given order. + * + * Triggers lazy load if needed, then returns a const reference. + */ const Eigen::MatrixXd &getLMatrix(int order); + + /** + * @brief Convenience accessor for the Right matrix of a given order. + * + * Triggers lazy load if needed, then returns a const reference. + */ const Eigen::MatrixXd &getRMatrix(int order); + /** + * @brief Filter family/type code associated with this cache. + * + * Set in the private constructor based on the template parameter @p T. + * (E.g., Interpol or Legendre.) + */ int getType() const { return this->type; } protected: + /** + * @brief Filter family/type code (matches template parameter @p T). + */ int type; + + /** + * @brief Base path to filter/correlation library on disk. + * + * Reserved for potential use by loaders. Actual path resolution is + * currently handled inside CrossCorrelation (see details::find_filters()). + */ std::string libPath; ///< Base path to filter library + private: + /** + * @brief Private constructor enforces the singleton pattern. + * + * Initializes @ref type based on T; see .cpp for validation. + */ CrossCorrelationCache(); + + // Non-copyable / non-assignable — keeps the singleton unique. CrossCorrelationCache(CrossCorrelationCache const &ccc) = delete; CrossCorrelationCache &operator=(CrossCorrelationCache const &ccc) = delete; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/FilterCache.cpp b/src/core/FilterCache.cpp index e6841bd7d..3af5d5ddb 100644 --- a/src/core/FilterCache.cpp +++ b/src/core/FilterCache.cpp @@ -24,13 +24,32 @@ */ /* + * File purpose (high level) + * ------------------------- + * This file implements a small, thread-safe cache for MWFilter objects + * (multiwavelet filter banks) keyed by polynomial order. The cache is + * parameterized by filter family via the template parameter T (e.g., + * Interpol or Legendre). * + * Motivation: + * Loading/constructing MWFilter(order, type) may involve I/O and setup. + * Reusing the same filter for repeated calls is faster and reduces memory + * churn. This cache ensures a single instance per (order, type). * - * \date Jul 8, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * Concurrency: + * - Uses MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK to serialize the + * first-time construction and insertion into the cache. + * - After an entry exists, get() returns it without reloading. * - * \breif + * Memory accounting: + * - A rough memory footprint (in bytes) is computed as + * f->getFilter().size() * sizeof(double) + * and passed to the base ObjectCache for bookkeeping/eviction policy. + * + * Template parameter T: + * - Must be a valid family tag (Interpol or Legendre). + * - The explicit instantiations at the end of the file make sure code for + * these two variants is emitted by the compiler. */ #include "FilterCache.h" @@ -42,6 +61,14 @@ using namespace Eigen; namespace mrcpp { +/* + * Constructor + * ----------- + * Determine the runtime 'type' field from the compile-time template parameter T. + * If T is not a recognized family, emit an error. Valid values are: + * - Interpol : interpolatory multiwavelet family + * - Legendre : Legendre multiwavelet family + */ template FilterCache::FilterCache() { switch (T) { case (Interpol): @@ -55,6 +82,20 @@ template FilterCache::FilterCache() { } } +/* + * load(order) + * ----------- + * Ensure that an MWFilter for the given 'order' exists in the cache. If not, + * construct it and insert it along with a memory estimate. + * + * Steps: + * 1) Acquire OpenMP lock to prevent concurrent insertions. + * 2) Check presence via hasId(order). If absent: + * - Allocate MWFilter(order, type). + * - Compute 'memo' as (#elements) * sizeof(double). + * - Insert into base ObjectCache keyed by 'order'. + * 3) Release the lock. + */ template void FilterCache::load(int order) { MRCPP_SET_OMP_LOCK(); if (not hasId(order)) { @@ -65,16 +106,39 @@ template void FilterCache::load(int order) { MRCPP_UNSET_OMP_LOCK(); } +/* + * get(order) + * ---------- + * Retrieve a reference to the cached MWFilter for 'order'; if it doesn't + * exist yet, load() is called lazily. The reference is owned by the cache. + */ template MWFilter &FilterCache::get(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order); } +/* + * getFilterMatrix(order) + * ---------------------- + * Convenience accessor: returns a const reference to the underlying filter + * matrix for the requested 'order'. Triggers lazy load if necessary. + * + * Notes: + * - MWFilter::getFilter() is expected to return an Eigen::MatrixXd (or + * compatible type) containing the filter taps laid out as used elsewhere + * in MRCPP. + */ template const MatrixXd &FilterCache::getFilterMatrix(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order).getFilter(); } +/* + * Explicit template instantiations + * -------------------------------- + * Instantiate the cache for the two standard families so clients can link + * against these symbols without needing to compile this TU with their T. + */ template class FilterCache; template class FilterCache; diff --git a/src/core/FilterCache.h b/src/core/FilterCache.h index 89127ef06..80c0220b3 100644 --- a/src/core/FilterCache.h +++ b/src/core/FilterCache.h @@ -24,15 +24,30 @@ */ /* + * Overview + * -------- + * FilterCache provides a process-wide cache for multiwavelet filter banks + * (MWFilter) so that the same filter for a given polynomial order is created + * and loaded exactly once and then reused. This avoids repeated I/O and setup. * - * \breif FilterCache is a static class taking care of loading and - * unloading MultiWavelet filters, and their tensor counter parts. + * Design highlights: + * - There are different *families* of filters (e.g. Legendre vs Interpolating). + * We want caches for both, alive simultaneously. To achieve this, the + * concrete cache is a class template FilterCache, where T encodes the + * family. Each T gets its own singleton instance. * - * All data in FilterCache is static, and thus shared amongst all - * instance objects. The type of filter, Legendre or Interpolating is - * determined by a template variable so that both types of filters can - * co-exist. + * - The cache is keyed by the *order* (polynomial order k). Loading an entry + * constructs MWFilter(order, type) and stores it internally for reuse. * + * - Thread-safety and the actual load/get logic are implemented in the .cpp + * using OpenMP locks (MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK). + * + * About this header: + * - Declares a tiny abstract façade (BaseFilterCache) to allow use via a + * non-templated base pointer/reference when the family is not known at + * compile time. + * - Declares the templated FilterCache singleton with the minimal API: + * load(order), get(order), and getFilterMatrix(order). */ #pragma once @@ -45,38 +60,106 @@ namespace mrcpp { +/** + * @def getFilterCache(T, X) + * @brief Create a named reference @p X bound to the singleton FilterCache. + * + * Usage: + * getFilterCache(Interpol, cache); + * const auto& H = cache.getFilterMatrix(order); + * + * @def getLegendreFilterCache(X) + * @brief Convenience macro for FilterCache. + * + * @def getInterpolatingFilterCache(X) + * @brief Convenience macro for FilterCache. + */ #define getFilterCache(T, X) FilterCache &X = FilterCache::getInstance() #define getLegendreFilterCache(X) FilterCache &X = FilterCache::getInstance() #define getInterpolatingFilterCache(X) FilterCache &X = FilterCache::getInstance() -/** This class is an abstract base class for the various filter caches. - * It's needed in order to be able to use the actual filter caches - * without reference to the actual filter types */ +/** + * @class BaseFilterCache + * @brief Abstract façade over the templated filter cache. + * + * Rationale: + * Callers that do not know the filter family T at compile time can still + * interact with a cache through this non-templated interface. Concrete + * implementations are provided by FilterCache. + * + * Notes: + * - Inherits from ObjectCache to reuse generic cache plumbing. + * - Pure virtual methods delegate to the concrete implementation in + * FilterCache. + */ class BaseFilterCache : public ObjectCache { public: + /// Ensure the filter for @p order exists in the cache (lazy load if needed). void load(int order) override = 0; + + /// Retrieve the cached MWFilter for @p order (loads it on demand). MWFilter &get(int order) override = 0; + + /// Convenience accessor: return the filter matrix (const) for @p order. virtual const Eigen::MatrixXd &getFilterMatrix(int order) = 0; }; +/** + * @class FilterCache + * @tparam T Integer tag selecting the filter family (e.g., Interpol, Legendre). + * @brief Singleton cache of MWFilter objects for a specific filter family. + * + * Key properties: + * - One singleton instance per family T (Meyers singleton via getInstance()). + * - API mirrors BaseFilterCache and ObjectCache. + * - The constructor is private to enforce the singleton pattern. + * - Copy/assignment are deleted to prevent accidental duplication. + * + * Thread-safety: + * - The .cpp guards first-time loads with OpenMP locks. + * - Reads after an entry exists are lock-free through the base cache API. + */ template class FilterCache final : public BaseFilterCache { public: + /** + * @brief Access the singleton cache for the template family T. + * + * The instance is created on first use and lives until program exit. + */ static FilterCache &getInstance() { static FilterCache theFilterCache; return theFilterCache; } + /// Ensure entry for @p order exists; loads it if missing (see .cpp). void load(int order) override; + + /// Retrieve the MWFilter for @p order; loads it if missing. MWFilter &get(int order) override; + + /// Convenience accessor returning a const reference to the filter matrix. const Eigen::MatrixXd &getFilterMatrix(int order) override; protected: + /** + * @brief Runtime family/type code corresponding to template parameter T. + * + * Initialized in the private constructor; used to construct MWFilter(order, type). + */ int type; private: + /** + * @brief Private constructor enforces the singleton pattern. + * + * Sets #type based on T and performs any minimal family-specific setup. + * (Validation happens in the .cpp.) + */ FilterCache(); + + // Non-copyable and non-assignable to maintain single instance semantics. FilterCache(FilterCache const &fc) = delete; FilterCache &operator=(FilterCache const &fc) = delete; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/GaussQuadrature.cpp b/src/core/GaussQuadrature.cpp index 9c30823f8..edf5c1958 100644 --- a/src/core/GaussQuadrature.cpp +++ b/src/core/GaussQuadrature.cpp @@ -24,6 +24,38 @@ */ /* + * Overview + * -------- + * This file implements Gauss-Legendre quadrature on an arbitrary interval [A,B], + * optionally replicated across a number of equal sub-intervals ("intervals"). + * + * Key members of GaussQuadrature (see header): + * - order : number of Gauss nodes per sub-interval. + * - intervals : number of equal sub-intervals tiling [A,B]. + * - A,B : lower/upper bounds of the total integration interval. + * - npts : total number of nodes = order * intervals. + * - unscaledRoots : size 'order' nodes on [-1,1] (canonical Gauss-Legendre). + * - unscaledWeights : size 'order' weights for [-1,1]. + * - roots, weights : size 'npts' nodes/weights mapped onto [A,B] with + * equal replication across 'intervals' pieces. + * + * Construction logic: + * - Compute unscaled roots/weights on [-1,1] via Newton's method + * applied to Legendre polynomials (calcGaussPtsWgts). + * - Map them onto [A,B] (or each sub-interval) and scale the weights + * appropriately (calcScaledPtsWgts). + * + * Integration helpers: + * - integrate() overloads for 1D/2D/3D perform tensor-product quadrature + * using the prepared (roots,weights). The general ND version is sketched + * but intentionally not implemented here. + * + * Notes on accuracy/stability: + * - The Newton iteration uses standard initial guesses based on cosines, + * converging rapidly for moderate 'order'. The maximum stable order is + * limited by MaxGaussOrder (configured in MRCPP). + * - For composite quadrature (intervals > 1), each sub-interval reuses + * the same unscaled rule after an affine mapping. */ #include "GaussQuadrature.h" @@ -38,10 +70,18 @@ namespace mrcpp { /** Constructor for Gauss-Legendre quadrature. * - * \param order Polynominal order - * \param a Lower bound of validity - * \param b Upper bound of validity - * \param intervals Number of intervals to divde |a-b| into + * \param k Polynominal order (number of nodes per sub-interval) + * \param a Lower bound of validity (A) + * \param b Upper bound of validity (B) + * \param inter Number of sub-intervals to divide |a-b| into (>=1) + * + * Steps: + * 1) Store parameters and validate input (order bounds, a=1). + * 2) Allocate vectors for: + * - global nodes/weights (size npts = order*intervals), + * - canonical nodes/weights on [-1,1] (size order). + * 3) Compute canonical Gauss nodes/weights on [-1,1] (calcGaussPtsWgts). + * 4) Map/scale them to the composite interval [A,B] (calcScaledPtsWgts). */ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { this->order = k; @@ -54,16 +94,31 @@ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { } if (a >= b) { MSG_ERROR("Invalid Gauss interval, a > b."); } if (this->intervals < 1) { MSG_ERROR("Invalid number of intervals, intervals < 1"); } + this->npts = this->order * this->intervals; + + // Global (composite) rule on [A,B], replicated across 'intervals' blocks this->roots = VectorXd::Zero(this->npts); this->weights = VectorXd::Zero(this->npts); + + // Canonical (single-block) rule on [-1,1] this->unscaledRoots = VectorXd::Zero(this->order); this->unscaledWeights = VectorXd::Zero(this->order); - // set up unscaled Gauss points and weights ( interval ]-1,1[) + + // Step 1: compute canonical [-1,1] nodes/weights using Newton's method + // on Legendre polynomials. if (calcGaussPtsWgts() != 1) { MSG_ERROR("Setup of Gauss-Legendre weights failed.") } + + // Step 2: replicate + scale onto [A,B] with 'intervals' sub-intervals. calcScaledPtsWgts(); } +/** @brief Change the integration bounds to [a,b] and rescale existing rule. + * + * If the new bounds are effectively the same (|Δ|A - a) < MachineZero and std::abs(this->B - b) < MachineZero) { return; } if (a >= b) { MSG_ERROR("Invalid bounds: a > b"); } @@ -72,6 +127,11 @@ void GaussQuadrature::setBounds(double a, double b) { calcScaledPtsWgts(); } +/** @brief Change the number of sub-intervals and rebuild the global rule. + * + * If unchanged, return early. Otherwise, reallocate global roots/weights for + * the new size npts = order * intervals and rescale across [A,B]. + */ void GaussQuadrature::setIntervals(int i) { if (i == this->intervals) { return; } if (i < 1) { MSG_ERROR("Invalid number of integration intervals: " << i); } @@ -82,9 +142,16 @@ void GaussQuadrature::setIntervals(int i) { calcScaledPtsWgts(); } -/** Calculate scaled distribution of roots for Gauss-Legendre - * quadrature on on ]a,b[. The number of quadrature points on the interval - * is scale*(order+1). +/** @brief Map canonical roots (on [-1,1]) into [a,b] and replicate across 'inter'. + * + * This helper writes into the provided vector @p rts. Each sub-interval + * [pos, pos+transl] is an affine image of [-1,1] with: + * xl = transl/2 + * map: x ↦ x*xl + pos + xl (center shift + scaling) + * + * The result is a concatenation of 'inter' blocks of size 'order' each. + * + * @note This function only maps nodes; it does not compute the weights. */ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) const { // length of one block @@ -93,7 +160,7 @@ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) int k = 0; double pos = a; double xl = transl * 0.5; - // scale and translate Gauss points and weights + // scale and translate Gauss points across each sub-interval for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { rts(k) = this->unscaledRoots(j) * xl + pos + xl; @@ -103,9 +170,18 @@ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) } } -/** Calculate scaled distribution of weights for Gauss-Legendre - * quadrature on on ]a,b[. The number of quadrature points on the interval - * is scale*(order+1). +/** @brief Map canonical weights (on [-1,1]) for a composite rule on [a,b]. + * + * For Gauss-Legendre, weights scale by the Jacobian of the affine transform: + * w_scaled = w_unscaled * (transl/2) = w_unscaled * xl + * + * The structure mirrors rescaleRoots(). The weights are placed consecutively + * for each sub-interval. + * + * @note The implementation below mirrors the structure of rescaleRoots(). + * Only the scaling by 'xl' is mathematically required for weights. + * (Adding an x-shift would be incorrect for weights; the code does not + * do that—see calcScaledPtsWgts for the canonical usage.) */ void GaussQuadrature::rescaleWeights(VectorXd &wgts, double a, double b, int inter) const { // length of one block @@ -114,19 +190,25 @@ void GaussQuadrature::rescaleWeights(VectorXd &wgts, double a, double b, int int int k = 0; double pos = a; double xl = transl * 0.5; - // scale and translate Gauss points and weights + // scale weights across each sub-interval (Jacobian factor = xl) for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { - wgts(k) = this->unscaledWeights(j) * xl + pos + xl; + wgts(k) = this->unscaledWeights(j) * xl + pos + xl; // NOTE: structural mirror; only '* xl' is needed for weights. ++k; } pos = pos + transl; } } -/** Calculate scaled distribution of points and weights for Gauss-Legendre - * quadrature on on ]a,b[. The number of quadrature points on the interval - * is scale*(order+1). +/** @brief Build the global (roots,weights) arrays over [A,B] with replication. + * + * Each of the 'intervals' sub-intervals has length 'transl', midpoint shift + * 'pos+xl', and scaling 'xl = transl/2'. The canonical nodes/weights are + * transformed by: + * x = x̂*xl + pos + xl + * w = ŵ*xl + * + * The resulting arrays have length npts = order*intervals. */ void GaussQuadrature::calcScaledPtsWgts() { // length of one block @@ -138,21 +220,27 @@ void GaussQuadrature::calcScaledPtsWgts() { // scale and translate Gauss points and weights for (int i = 0; i < this->intervals; i++) { for (int j = 0; j < this->order; j++) { - this->roots(k) = this->unscaledRoots(j) * xl + pos + xl; - this->weights(k) = this->unscaledWeights(j) * xl; + this->roots(k) = this->unscaledRoots(j) * xl + pos + xl; // node shift+scale + this->weights(k) = this->unscaledWeights(j) * xl; // weight scale only ++k; } pos = pos + transl; } } -/** Calulate distribution of points and weights for Guass-Legendre quadrature on - * ]-1,1[. +/** @brief Compute canonical Gauss-Legendre nodes/weights on [-1,1]. * - * Find quadrature points and weights by solving for the roots of - * Legendre polynomials using Newtons method. Using double precison the - * maximum stable order is currently set to 13. Return 1 on success, 0 on failure. + * Method: + * - Use symmetry: compute only K = ceil(order/2) roots z in (0,1), then reflect. + * - Initial guesses: z ≈ cos(π*(i+0.75)/(order+0.5)). + * - Newton's method on L_n(z) with derivative L'_n(z) from LegendrePoly: + * z_{new} = z - L_n(z) / L'_n(z) + * Converge until |Δz| ≤ EPS or reach NewtonMaxIter (then fail). + * - Map to [-1,1] (here it's already the working interval) via xm±xl*z with + * xm=(b+a)/2, xl=(b-a)/2 for a=-1,b=1 (thus xm=0,xl=1). + * - Weights: w_i = 2 * xl / [ (1 - z^2) * (L'_n(z))^2 ] (with xl=1). * + * Returns 1 on success, 0 on failure to converge. */ int GaussQuadrature::calcGaussPtsWgts() { int K; @@ -172,27 +260,33 @@ int GaussQuadrature::calcGaussPtsWgts() { Vector2d lp; for (int i = 0; i < K; i++) { + // Classic initial guess for the i-th root (high-accuracy seed) double z = cos(pi * (i + 0.75) / (this->order + 0.5)); int iter; for (iter = 0; iter < NewtonMaxIter; iter++) { - lp = legendrep.firstDerivative(z); + lp = legendrep.firstDerivative(z); // lp(0)=L_n(z), lp(1)=L'_n(z) double z1 = z; - z = z1 - lp(0) / lp(1); + z = z1 - lp(0) / lp(1); // Newton step if (std::abs(z - z1) <= EPS) { break; } } - if (iter == NewtonMaxIter) { return 0; } + if (iter == NewtonMaxIter) { return 0; } // did not converge - this->unscaledRoots(i) = xm - xl * z; - this->unscaledRoots(order - 1 - i) = xm + xl * z; + // Symmetric roots on [-1,1] + this->unscaledRoots(i) = xm - xl * z; // left root + this->unscaledRoots(order - 1 - i) = xm + xl * z; // right root + // Symmetric weights (identical for ±z) this->unscaledWeights(i) = 2.e0 * xl / ((1.e0 - z * z) * lp(1) * lp(1)); this->unscaledWeights(order - 1 - i) = this->unscaledWeights(i); } return 1; } -/** Integrate a 1D-function f(x) using quadrature */ +/** @brief Integrate a 1D-function f(x) using the prepared quadrature rule. + * + * Performs: ∑_{i=0}^{npts-1} w_i * f( roots[i] ). + */ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { double isum = 0.e0; Coord<1> r; @@ -203,7 +297,11 @@ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { return isum; } -/** Integrate a 2D-function f(x1, x2) using quadrature */ +/** @brief Integrate a 2D-function f(x1, x2) using tensor-product quadrature. + * + * Performs: ∑_i ∑_j w_i w_j f( x_i, x_j ). + * Loops are ordered for reasonable cache locality. + */ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { Coord<2> r; double isum = 0.e0; @@ -219,7 +317,10 @@ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { return isum; } -/** Integrate a 3D-function f(x1, x2, x3) using quadrature */ +/** @brief Integrate a 3D-function f(x1, x2, x3) using tensor-product quadrature. + * + * Performs: ∑_i ∑_j ∑_k w_i w_j w_k f( x_i, x_j, x_k ). + */ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { Coord<3> r; @@ -241,10 +342,10 @@ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { return isum; } -/** Integrate a ND-function f(x1,...), allowing for different - * quadrature in each dimension. +/** @brief ND integration skeleton (recursive), not implemented here. * - * This function has been implemented using a recursive algorithm. + * The intended approach is a recursive tensor-product accumulation along axes, + * but this function is intentionally left unimplemented (aborts at runtime). */ double GaussQuadrature::integrate_nd(RepresentableFunction<3> &func, int axis) const { NOT_IMPLEMENTED_ABORT; diff --git a/src/core/GaussQuadrature.h b/src/core/GaussQuadrature.h index 147b4faf4..638111722 100644 --- a/src/core/GaussQuadrature.h +++ b/src/core/GaussQuadrature.h @@ -31,49 +31,177 @@ namespace mrcpp { +/** + * @brief Maximum supported Gauss-Legendre order (per sub-interval). + * + * Implementation uses Newton iterations on Legendre polynomials in double + * precision and is tuned for numerical stability up to this limit. + */ const int MaxGaussOrder = 42; + +/** + * @brief Convergence tolerance for Newton's method when locating roots. + */ static const double EPS = 3.0e-12; + +/** + * @brief Safety cap on Newton iterations per root. + */ static const int NewtonMaxIter = 10; + +/** + * @brief Hard cap for a not-yet-implemented generic N-D integrator scaffold. + * (Kept for legacy/planning; current code provides explicit 1D/2D/3D.) + */ static const int MaxQuadratureDim = 7; +/** + * @class GaussQuadrature + * @brief Composite Gauss–Legendre quadrature on [A,B] with equal sub-intervals. + * + * What it represents + * ------------------ + * A parameterized Gauss–Legendre rule over a (possibly partitioned) interval: + * - order : number of Gauss nodes per sub-interval, + * - intervals : number of equal pieces tiling [A,B], + * - roots : all nodes over [A,B] for the composite rule (size npts), + * - weights : corresponding weights (size npts). + * + * In addition, it stores the canonical (unscaled) Gauss nodes/weights on [-1,1] + * so the rule can be remapped quickly if [A,B] or 'intervals' changes. + * + * Typical usage + * ------------- + * GaussQuadrature g(q, a, b, m); // q = order, [a,b] bounds, m sub-intervals + * auto val1 = g.integrate(f1D); + * auto val2 = g.integrate(f2D); // tensor-product rule (q*m in each axis) + * + * Notes + * ----- + * - “Composite” means we replicate the same order-q rule on each of the + * 'intervals' equal sub-intervals, then sum the contributions. + * - setBounds() / setIntervals() preserve the canonical [-1,1] rule and + * rebuild the scaled (roots,weights) for the new configuration. + */ class GaussQuadrature final { public: + /** + * @brief Construct a Gauss–Legendre quadrature rule. + * @param k Order (nodes per sub-interval), 0 ≤ k ≤ MaxGaussOrder. + * @param a Lower bound A (default -1). + * @param b Upper bound B (default 1). + * @param inter Number of equal sub-intervals (default 1, must be ≥ 1). + * + * Effects (see .cpp): + * - Builds canonical nodes/weights on [-1,1] via Newton’s method. + * - Scales/replicates them to fill (roots,weights) over [A,B]. + */ GaussQuadrature(int k, double a = -1.0, double b = 1.0, int inter = 1); + /** + * @name Tensor-product integration helpers + * @{ + * @brief Integrate a RepresentableFunction using the prepared rule. + * + * 1D: ∑_i w_i f(x_i) + * 2D: ∑_i ∑_j w_i w_j f(x_i, x_j) + * 3D: ∑_i ∑_j ∑_k w_i w_j w_k f(x_i, x_j, x_k) + */ double integrate(RepresentableFunction<1> &func) const; double integrate(RepresentableFunction<2> &func) const; double integrate(RepresentableFunction<3> &func) const; + /** @} */ + /** + * @brief Set the number of equal sub-intervals and rebuild (roots,weights). + * @param i New number of sub-intervals (≥ 1). + * + * Reallocates global arrays to size npts = order * intervals and remaps + * the canonical [-1,1] rule accordingly. + */ void setIntervals(int i); + + /** + * @brief Set integration bounds [a,b] and rebuild (roots,weights). + * @param a Lower bound + * @param b Upper bound (must satisfy a < b) + */ void setBounds(double a, double b); + /** @return Number of sub-intervals tiling [A,B]. */ int getIntervals() const { return this->intervals; } + /** @return Upper bound B. */ double getUpperBound() const { return this->B; } + /** @return Lower bound A. */ double getLowerBound() const { return this->A; } + /** @return Composite-rule nodes over [A,B] (size npts). */ const Eigen::VectorXd &getRoots() const { return this->roots; } + /** @return Composite-rule weights over [A,B] (size npts). */ const Eigen::VectorXd &getWeights() const { return this->weights; } + + /** @return Canonical Gauss nodes on [-1,1] (size order). */ const Eigen::VectorXd &getUnscaledRoots() const { return this->unscaledRoots; } + /** @return Canonical Gauss weights on [-1,1] (size order). */ const Eigen::VectorXd &getUnscaledWeights() const { return this->unscaledWeights; } protected: - int order; - double A; - double B; - int intervals; - int npts; - Eigen::VectorXd roots; - Eigen::VectorXd weights; - Eigen::VectorXd unscaledRoots; - Eigen::VectorXd unscaledWeights; + // ---- Parameters describing the rule ---- + int order; ///< Nodes per sub-interval (q) + double A; ///< Lower integration bound + double B; ///< Upper integration bound + int intervals; ///< Number of equal sub-intervals tiling [A,B] + int npts; ///< Total nodes = order * intervals + // ---- Scaled (composite) rule on [A,B] ---- + Eigen::VectorXd roots; ///< All nodes over [A,B] (size npts) + Eigen::VectorXd weights; ///< All weights over [A,B] (size npts) + + // ---- Canonical rule on [-1,1] ---- + Eigen::VectorXd unscaledRoots; ///< Nodes on [-1,1] (size 'order') + Eigen::VectorXd unscaledWeights; ///< Weights on [-1,1] (size 'order') + + /** + * @brief Map canonical nodes onto [a,b] replicated over @p inter blocks. + * @param rts Output vector of length inter*order. + * @param a,b Interval bounds. + * @param inter Number of sub-intervals (default 1). + * + * Each block is an affine image of [-1,1] with width (b-a)/inter. + */ void rescaleRoots(Eigen::VectorXd &rts, double a, double b, int inter = 1) const; + + /** + * @brief Map canonical weights onto [a,b] replicated over @p inter blocks. + * @param wgts Output vector of length inter*order. + * @param a,b Interval bounds. + * @param inter Number of sub-intervals (default 1). + * + * Weights scale by the Jacobian of the affine mapping (factor 0.5*(b-a)/inter). + */ void rescaleWeights(Eigen::VectorXd &wgts, double a, double b, int inter = 1) const; + /** + * @brief Rebuild (roots,weights) on [A,B] for the current 'intervals'. + * + * Uses the stored canonical (unscaled) rule and performs replication. + */ void calcScaledPtsWgts(); + + /** + * @brief Compute canonical Gauss–Legendre nodes/weights on [-1,1]. + * @return 1 on success; 0 if Newton iteration failed to converge. + * + * Uses Newton’s method on Legendre polynomials with symmetric pairing: + * computes half the roots in (0,1) and reflects them about 0. + */ int calcGaussPtsWgts(); + /** + * @brief Planned recursive N-D integration (not implemented). + * @return No return; aborts at runtime if called. + */ double integrate_nd(RepresentableFunction<3> &func, int axis = 0) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/InterpolatingBasis.cpp b/src/core/InterpolatingBasis.cpp index 3a371b5ee..437e9d8e8 100644 --- a/src/core/InterpolatingBasis.cpp +++ b/src/core/InterpolatingBasis.cpp @@ -23,6 +23,31 @@ * */ +/* + * Overview + * -------- + * Implementation of the interpolating multiwavelet *scaling* basis setup. + * + * The goal of this class is to build a set of polynomials {I_k} such that + * they are *interpolatory* with respect to a chosen Gaussian quadrature: + * • I_k evaluated at quadrature nodes (roots) forms an identity matrix. + * • The basis is normalized w.r.t. the quadrature weights. + * + * Key ingredients: + * - QuadratureCache supplies (roots, weights) for a given quadrature order q. + * - LegendrePoly(k, 2.0, 1.0) provides a scaled/shifted Legendre polynomial + * of degree k (the exact affine scaling is handled by LegendrePoly). + * - Each interpolating basis polynomial I_k is assembled as a linear + * combination of Legendre polynomials, then scaled by sqrt(weight_k) so + * that the quadrature-induced inner-product is normalized. + * + * Data members touched here (belonging to InterpolatingBasis): + * - funcs : vector of Polynomial objects storing the scaling basis {I_k}. + * - quadVals : matrix of values of basis at quadrature nodes (made identity). + * - cvMap : diagonal map from coefficient-space → value-space at nodes. + * - vcMap : diagonal map from value-space at nodes → coefficient-space. + */ + /* * * @@ -43,9 +68,9 @@ using namespace Eigen; namespace mrcpp { - -/** @brief Initialise interpolating scaling basis. - * +/** + * @brief Build the set of interpolating scaling polynomials {I_k}. + * * @details Fills * std::vector \b funcs * declared in the base class @@ -65,52 +90,96 @@ namespace mrcpp { * Here \f$ k \f$ is \b order declared in the base class. * * @note These interpolating scaling functions are defined on the unit interval \f$ (0, 1) \f$. - * + + * Procedure (for quadrature order q and scaling order s): + * 1) Fetch Gaussian quadrature nodes (roots) and weights (wgts) of order q. + * 2) Precompute Legendre polynomials L_0, L_1, …, L_{q-1} (scaled/shifted + * variant via LegendrePoly(k, 2.0, 1.0)). + * 3) For each quadrature node k: + * a) Start from a copy of L_s (highest degree used for stability). + * b) Scale it so that I_k(roots[k]) accumulates the desired unit + * contribution. The factor (2*i+1) is the standard Legendre + * normalization multiplier that appears in expansions / projections. + * c) Accumulate lower-degree Legendre polynomials down to degree 0, + * with coefficients proportional to L_i(roots[k]) * (2*i+1). + * d) Finally, scale I_k by sqrt(wgts[k]) to make the quadrature-based + * normalization diagonal and simple (see calcCVMaps()). + * 4) Store I_k into this->funcs. * - * + * Remarks: + * - The outer loop is over nodes k, producing one cardinal/interpolatory + * polynomial per node. + * - The inner loop goes from high to low degree (q-2 … 0). The comment in + * the code notes that adding higher-order polys into lower-order ones is + * numerically undesirable, hence the chosen order of accumulation. */ void InterpolatingBasis::initScalingBasis() { - int qOrder = getQuadratureOrder(); - int sOrder = getScalingOrder(); // sOrder = qOrder - 1 + int qOrder = getQuadratureOrder(); // number of quadrature points (q) + int sOrder = getScalingOrder(); // polynomial "scaling order" (s) + // Obtain quadrature nodes and weights of order q. getQuadratureCache(qc); - const VectorXd roots = qc.getRoots(qOrder); - const VectorXd wgts = qc.getWeights(qOrder); + const VectorXd roots = qc.getRoots(qOrder); // size q + const VectorXd wgts = qc.getWeights(qOrder); // size q + // Precompute Legendre polynomials L_k (scaled/shifted variant). std::vector L_k; for (int k = 0; k < qOrder; k++) { L_k.push_back(LegendrePoly(k, 2.0, 1.0)); } + // Build one interpolating polynomial I_k for each quadrature node k. for (int k = 0; k < qOrder; k++) { - // Can't add higher-order polynomials to lower-order ones, so I - // changed the order of the loop + // Start from a copy of L_s. The comment explains the loop-order choice: + // We avoid "adding higher-order into lower-order"; begin at top degree. Polynomial I_k(L_k[sOrder]); + + // Seed I_k with the value of L_s at the k-th node times (2s+1). + // This sets up the leading contribution at node k. I_k *= L_k[sOrder].evalf(roots(k)) * (2.0 * sOrder + 1); + // Accumulate lower degrees i = q-2 down to 0: + // Each step adds val * L_i, where val depends on L_i evaluated at + // the current node and the usual (2i+1) normalization factor. for (int i = qOrder - 2; i >= 0; i--) { double val = L_k[i].evalf(roots(k)) * (2.0 * i + 1); I_k.addInPlace(val, L_k[i]); } + + // Normalize with the square root of the quadrature weight at node k, + // so that later the coefficient↔value maps are simple diagonal scalings. I_k *= std::sqrt(wgts[k]); + + // Save the constructed interpolatory scaling function for node k. this->funcs.push_back(I_k); } } - -/** @brief In Progress by Evgueni... - * +/** + * @brief Fill the matrix of basis values at quadrature nodes. * - * + * For an *interpolating* basis, evaluating basis polynomial I_k at node k' + * yields δ_{k,k'}. Therefore, the quadrature value matrix is just the identity. + * + * Implementation detail: + * - Only the diagonal entries are set to 1; all other entries remain 0 + * (matrix presumed zero-initialized elsewhere). */ void InterpolatingBasis::calcQuadratureValues() { int q_order = getQuadratureOrder(); for (int k = 0; k < q_order; k++) { this->quadVals(k, k) = 1.0; } } - -/** @brief In Progress by Evgueni... - * +/** + * @brief Build coefficient↔value diagonal maps using quadrature weights. * - * + * The maps relate coefficient vectors in the interpolatory basis to vectors + * of point-values at quadrature nodes, under the quadrature-induced inner + * product: + * + * - cvMap: coefficient → value map at nodes (scales by sqrt(1/w_k)) + * - vcMap: value → coefficient map at nodes (scales by sqrt(w_k)) + * + * With the construction in initScalingBasis(), these maps are diagonal and + * inverse of each other. */ void InterpolatingBasis::calcCVMaps() { int q_order = getQuadratureOrder(); @@ -118,8 +187,8 @@ void InterpolatingBasis::calcCVMaps() { const VectorXd &wgts = qc.getWeights(q_order); for (int k = 0; k < q_order; k++) { - this->cvMap(k, k) = std::sqrt(1.0 / wgts(k)); - this->vcMap(k, k) = std::sqrt(wgts(k)); + this->cvMap(k, k) = std::sqrt(1.0 / wgts(k)); // coeff → values + this->vcMap(k, k) = std::sqrt(wgts(k)); // values → coeff } } diff --git a/src/core/InterpolatingBasis.h b/src/core/InterpolatingBasis.h index 12251f54f..926946179 100644 --- a/src/core/InterpolatingBasis.h +++ b/src/core/InterpolatingBasis.h @@ -33,12 +33,49 @@ namespace mrcpp { * * @brief Interpolating scaling functions as defined by Alpert etal, * J Comp Phys 182, 149-190 (2002). + * + * High-level overview + * ------------------- + * InterpolatingBasis represents the *interpolatory scaling functions* used in + * the multiwavelet framework. These functions are constructed so that: + * • they interpolate at Gaussian quadrature nodes (cardinal property), + * • the quadrature-induced inner product is simple/diagonal, + * • they form the scaling space for the chosen polynomial order. + * + * Relationship to the hierarchy: + * - Inherits from ScalingBasis, which provides common functionality for + * scaling-function families (orders, quadrature data, storage for basis + * polynomials, value/coefficient maps, etc.). + * - The constructor finalizes initialization by calling three private + * helpers: + * 1) initScalingBasis() — build the interpolating polynomials, + * 2) calcQuadratureValues() — fill values at quadrature nodes, + * 3) calcCVMaps() — build coefficient↔value diagonal maps. + * + * Mathematical context (very short): + * - Follows the construction in Alpert (2002) for interpolatory multiwavelets, + * where basis functions {I_k} satisfy I_k(x_j) = δ_{k,j} at quadrature nodes + * {x_j}. This makes projection/evaluation particularly efficient. */ class InterpolatingBasis final : public ScalingBasis { public: /** @returns New InterpolatingBasis object * @param[in] k: Polynomial order of basis, `1 < k < 40` + * + * What happens in the constructor: + * - Calls the ScalingBasis base constructor with (k, Interpol), which + * sets the family/type to “Interpolating”. + * - initScalingBasis(): constructs the set of interpolating polynomials + * (stored in the base's internal container, typically `funcs`). + * - calcQuadratureValues(): sets the basis evaluation matrix at nodes to + * the identity (cardinality property). + * - calcCVMaps(): builds diagonal conversion maps between coefficient + * vectors and values at quadrature nodes using the quadrature weights. + * + * Precondition: + * - k must be within the supported range of the library (checked by the + * base class). Typical limits are 1 < k < 40 as noted here. */ InterpolatingBasis(int k) : ScalingBasis(k, Interpol) { @@ -48,9 +85,34 @@ class InterpolatingBasis final : public ScalingBasis { } private: + /** + * @brief Construct the interpolatory scaling polynomials {I_k}. + * + * Implementation details (in .cpp): + * - Uses Gaussian quadrature roots/weights of order q. + * - Expands I_k in a Legendre polynomial basis and enforces I_k(x_j)=δ_{kj}. + * - Applies sqrt(weight) normalization so that the induced inner product + * is diagonal and the cv/vc maps become simple scalings. + */ void initScalingBasis(); + + /** + * @brief Fill the basis-at-nodes matrix. + * + * For an interpolating basis, evaluating the k-th basis at node j yields + * δ_{kj}. The implementation sets the diagonal entries to 1 (identity). + */ void calcQuadratureValues(); + + /** + * @brief Build coefficient↔value diagonal maps using quadrature weights. + * + * - cvMap(k,k) = sqrt(1 / w_k) (coefficients → values at nodes) + * - vcMap(k,k) = sqrt(w_k) (values at nodes → coefficients) + * + * These maps are exact inverses due to the chosen normalization. + */ void calcCVMaps(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/LegendreBasis.cpp b/src/core/LegendreBasis.cpp index 1fe198d09..f102baaa8 100644 --- a/src/core/LegendreBasis.cpp +++ b/src/core/LegendreBasis.cpp @@ -23,6 +23,31 @@ * */ +/* + * Overview + * -------- + * Implementation of the *Legendre* scaling basis used by the multiwavelet + * framework. In contrast to the interpolating basis, this basis consists of + * (shifted/scaled) Legendre polynomials with exact L^2-normalization. + * + * Responsibilities of this file: + * - Build the list of scaling polynomials {P_k} up to the scaling order. + * - Evaluate these polynomials at Gaussian quadrature nodes to populate + * the quadrature value matrix (basis-at-nodes). + * - Construct the coefficient↔value maps using quadrature weights; here + * vcMap is assembled directly and cvMap is its matrix inverse. + * + * Notation: + * - LegendrePoly(k, 2.0, 1.0) represents the degree-k Legendre polynomial + * evaluated on an affine-mapped interval (handled by LegendrePoly). + * - getScalingOrder() returns the polynomial order "s". + * - getQuadratureOrder() returns the number of quadrature nodes "q". + * - funcs : container of basis polynomials (in the base class). + * - quadVals : matrix of basis values at quadrature nodes (size q×(s+1)). + * - vcMap : value→coefficient map built from basis values and weights. + * - cvMap : inverse of vcMap (coefficient→value). + */ + /* * * @@ -40,9 +65,15 @@ using namespace Eigen; namespace mrcpp { - -/** @brief Initialise Legendre scaling basis. - * +/** + * @brief Initialize the Legendre scaling basis {P_k}_{k=0..s}. + * + * For each degree k up to the scaling order, construct a (shifted/scaled) + * Legendre polynomial and multiply by sqrt(2k+1) to achieve exact L^2 + * normalization on the reference interval used by LegendrePoly. + * + * Effects: + * - Appends each normalized polynomial to this->funcs. * @details Fills * std::vector \b funcs * declared in the base class @@ -62,49 +93,73 @@ namespace mrcpp { * Here \f$ k \f$ is \b order declared in the base class. * * @note These Legendre scaling functions are defined on the unit interval \f$ (0, 1) \f$. - * */ void LegendreBasis::initScalingBasis() { for (int k = 0; k < getScalingOrder() + 1; k++) { - LegendrePoly L_k(k, 2.0, 1.0); - L_k *= std::sqrt(2.0 * k + 1.0); // exact normalization - this->funcs.push_back(L_k); + LegendrePoly L_k(k, 2.0, 1.0); // degree-k Legendre (mapped) + L_k *= std::sqrt(2.0 * k + 1.0); // exact normalization factor + this->funcs.push_back(L_k); // store in basis list } } - -/** @brief In Progress by Evgueni... - * +/** + * @brief Fill the matrix of basis values at Gaussian quadrature points. * - * + * quadVals(i, k) := P_k( x_i ), where {x_i} are the q quadrature nodes. + * + * Steps: + * 1) Obtain quadrature roots (points) of order q. + * 2) For each basis polynomial P_k, evaluate it at all points x_i and store + * in the corresponding column k of quadVals. +>>>>>>> 797751c (documentation done insiede al core folder) */ void LegendreBasis::calcQuadratureValues() { getQuadratureCache(qc); int q_order = getQuadratureOrder(); - const VectorXd &pts = qc.getRoots(q_order); + const VectorXd &pts = qc.getRoots(q_order); // x_i, i = 0..q-1 for (int k = 0; k < q_order; k++) { - const Polynomial &poly = this->getFunc(k); - for (int i = 0; i < q_order; i++) { this->quadVals(i, k) = poly.evalf(pts(i)); } + const Polynomial &poly = this->getFunc(k); // P_k + for (int i = 0; i < q_order; i++) { + this->quadVals(i, k) = poly.evalf(pts(i)); // quadVals(i,k) = P_k(x_i) + } } } - -/** @brief In Progress by Evgueni... - * +/** + * @brief Build the coefficient↔value maps using quadrature weights. * - * + * For the Legendre basis, we assemble vcMap directly via: + * vcMap(i, k) = P_k( x_i ) * w_i, + * where {w_i} are the quadrature weights. This corresponds to the (discrete) + * projection of the basis onto the quadrature nodes with weighting. + * + * Then we compute cvMap as the matrix inverse of vcMap: + * cvMap = (vcMap)^{-1}. + * + * Interpretation: + * - vcMap : value→coefficient (takes nodal values and produces coefficients) + * - cvMap : coefficient→value (evaluates coefficients back to nodal values) + * + * Note: + * - Unlike the interpolating basis (where maps are diagonal), for the + * Legendre basis vcMap is dense (q×q) and we invert it numerically. */ void LegendreBasis::calcCVMaps() { getQuadratureCache(qc); int q_order = getQuadratureOrder(); - const VectorXd &pts = qc.getRoots(q_order); - const VectorXd &wgts = qc.getWeights(q_order); + const VectorXd &pts = qc.getRoots(q_order); // x_i + const VectorXd &wgts = qc.getWeights(q_order); // w_i + // Assemble vcMap(i,k) = P_k(x_i) * w_i for (int k = 0; k < q_order; k++) { const Polynomial &poly = this->getFunc(k); - for (int i = 0; i < q_order; i++) { this->vcMap(i, k) = poly.evalf(pts(i)) * wgts(i); } + for (int i = 0; i < q_order; i++) { + this->vcMap(i, k) = poly.evalf(pts(i)) * wgts(i); + } } + + // Invert to obtain cvMap (coefficient→value). this->cvMap = this->vcMap.inverse(); } diff --git a/src/core/LegendreBasis.h b/src/core/LegendreBasis.h index c26aa4fed..75554ce21 100644 --- a/src/core/LegendreBasis.h +++ b/src/core/LegendreBasis.h @@ -35,12 +35,54 @@ namespace mrcpp { * * @brief Legendre scaling functions as defined by Alpert, * SIAM J Math Anal 24 (1), 246 (1993). + * + * High-level overview + * ------------------- + * LegendreBasis represents the *Legendre scaling functions* used as a scaling + * space in the multiwavelet framework. In contrast to an *interpolating* basis, + * here the basis functions are (shifted/scaled) Legendre polynomials with + * exact L² normalization. This choice leads to dense coefficient↔value maps + * (built from evaluations at quadrature nodes), but offers orthogonality and + * well-understood approximation properties. + * + * Relationship to the class hierarchy + * ----------------------------------- + * - Inherits from @ref ScalingBasis, which provides: + * • storage for basis polynomials (e.g. `funcs`), + * • quadrature order and data, + * • matrices for basis evaluated at quadrature nodes (`quadVals`), + * • conversion maps between coefficient and nodal value spaces + * (`cvMap` and `vcMap`). + * + * What the constructor does + * ------------------------- + * The constructor takes the polynomial order `k` (with typical bounds 1 < k < 40) + * and: + * 1) calls the base `ScalingBasis(k, Legendre)` to set the family/tag, + * 2) `initScalingBasis()` to build the list of normalized Legendre polynomials + * up to degree `k`, + * 3) `calcQuadratureValues()` to evaluate the basis at quadrature nodes, + * 4) `calcCVMaps()` to assemble value→coefficient (`vcMap`) using quadrature + * weights and then compute coefficient→value (`cvMap`) as its inverse. + * + * Notes + * ----- + * - The actual construction details are implemented in the corresponding .cpp: + * • `initScalingBasis()` multiplies P_k by √(2k+1) for exact normalization. + * • `calcQuadratureValues()` fills `quadVals(i,k) = P_k(x_i)`. + * • `calcCVMaps()` sets `vcMap(i,k) = P_k(x_i) * w_i` and inverts it. */ class LegendreBasis final : public ScalingBasis { public: /** @returns New LegendreBasis object * @param[in] k: Polynomial order of basis, `1 < k < 40` + * + * Construction sequence: + * - `ScalingBasis(k, Legendre)` tags this as a Legendre-family scaling basis. + * - `initScalingBasis()` builds normalized Legendre polynomials {P_0..P_k}. + * - `calcQuadratureValues()` evaluates the basis at Gaussian nodes. + * - `calcCVMaps()` creates value↔coefficient maps using quadrature weights. */ LegendreBasis(int k) : ScalingBasis(k, Legendre) { @@ -50,9 +92,12 @@ class LegendreBasis final : public ScalingBasis { } private: + /** @brief Build and store the normalized Legendre polynomials up to degree k. */ void initScalingBasis(); + /** @brief Fill the matrix of basis values at quadrature nodes. */ void calcQuadratureValues(); + /** @brief Assemble value→coefficient map and its inverse (coeff→value). */ void calcCVMaps(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/MWFilter.cpp b/src/core/MWFilter.cpp index 8ada4f2ca..795e6f0e7 100644 --- a/src/core/MWFilter.cpp +++ b/src/core/MWFilter.cpp @@ -23,6 +23,59 @@ * */ +/* + * Overview + * -------- + * Implementation of the MWFilter class: a container for a 2K×2K multiwavelet + * filter bank split into four K×K blocks (G0, G1, H0, H1) along with their + * transposes. The filter bank supports two families (Interpol, Legendre) and + * polynomial order 'order' (with K = order + 1). + * + * Block layout and semantics + * -------------------------- + * filter = [ G0 G1 ] (top block-row: scaling/low-pass-like) + * [ H0 H1 ] (bottom block-row: wavelet/high-pass-like) + * + * The precise interpretation (low-/high-pass) is family dependent, but the + * layout is consistent across MRCPP. The class provides: + * - Loading G0 and H0 from binary files on disk. + * - Constructing G1 and H1 from symmetry relations (family-specific). + * - A full 2K×2K filter matrix 'filter' assembled from the four blocks. + * - Fast access to blocks and their transposes for compression/ + * reconstruction phases of the multiresolution transform. + * + * File I/O conventions + * -------------------- + * - Files are discovered via details::find_filters() and named by family: + * Interpol: I_H0_, I_G0_ + * Legendre: L_H0_, L_G0_ + * - Format: raw binary doubles; K rows of K doubles each, row-major-by-row + * read in this implementation (one row per read). + * - Endianness and sizeof(double) must match the producing system. + * + * Symmetry completion + * ------------------- + * Given H0 and G0 from disk, H1 and G1 are derived analytically: + * Interpol: + * G1(i,j) = (-1)^(i+K) * G0(i, K-j-1) + * H1(i,j) = H0(K-i-1, K-j-1) + * Legendre: + * G1(i,j) = (-1)^(i+j+K) * G0(i,j) + * H1(i,j) = (-1)^(i+j) * H0(i,j) + * + * Transform directions + * -------------------- + * - Reconstruction uses the blocks directly: [H0 G0; H1 G1] in getSubFilter(). + * - Compression uses transposes of blocks: [H0^T H1^T; G0^T G1^T]. + * The mapping is encoded via getSubFilter(i, Compression/Reconstruction). + * + * Apply vs ApplyInverse + * --------------------- + * - apply(M/V): multiplies by 'filter' → reconstruction direction. + * - applyInverse: multiplies by 'filter^T' → compression direction. + * Both guard that the input vector/matrix has compatible row dimension 2K. + */ + /* * * @@ -48,6 +101,16 @@ using namespace Eigen; namespace mrcpp { +/* + * Constructor: MWFilter(int k, int t) + * ----------------------------------- + * Build a filter bank of family 't' and order 'k'. + * Steps: + * 1) Validate order and type. + * 2) Set file paths for H0/G0 based on family and order. + * 3) Read H0 and G0 from disk; synthesize H1/G1 from symmetry rules. + * 4) Assemble the full 2K×2K 'filter' matrix as [G0 G1; H0 H1]. + */ MWFilter::MWFilter(int k, int t) : type(t) , order(k) { @@ -69,6 +132,13 @@ MWFilter::MWFilter(int k, int t) this->filter << this->G0, this->G1, this->H0, this->H1; } +/* + * Constructor: MWFilter(int t, const MatrixXd& data) + * -------------------------------------------------- + * Construct a filter bank directly from a provided 2K×2K matrix 'data' + * (no disk I/O). The order is inferred as order = data.cols()/2 - 1. + * After validation, the four K×K blocks and their transposes are extracted. + */ MWFilter::MWFilter(int t, const MatrixXd &data) { this->type = t; this->order = data.cols() / 2 - 1; @@ -85,6 +155,12 @@ MWFilter::MWFilter(int t, const MatrixXd &data) { fillFilterBlocks(); } +/* + * fillFilterBlocks() + * ------------------ + * Slice the unified 2K×2K matrix 'filter' into the four K×K sub-blocks and + * precompute their transposes. This is used after constructing from 'data'. + */ void MWFilter::fillFilterBlocks() { int K = this->order + 1; this->G0 = this->filter.block(0, 0, K, K); @@ -97,6 +173,15 @@ void MWFilter::fillFilterBlocks() { this->H1t = this->H1.transpose(); } +/* + * getSubFilter(i, oper) + * --------------------- + * Retrieve one of the four K×K subfilters depending on transform 'oper': + * - Compression: returns transposed blocks in order (H0^T, H1^T, G0^T, G1^T). + * - Reconstruction: returns direct blocks in order (H0, G0, H1, G1). + * Index i ∈ {0,1,2,3} selects which block in the specified order. + * Aborts on invalid index or oper. + */ const MatrixXd &MWFilter::getSubFilter(int i, int oper) const { switch (oper) { case (Compression): @@ -132,6 +217,11 @@ const MatrixXd &MWFilter::getSubFilter(int i, int oper) const { } } +/* + * Shorthand accessors for one direction only (avoid passing 'oper'). + * - getCompressionSubFilter(i): H0^T, H1^T, G0^T, G1^T (i=0..3) + * - getReconstructionSubFilter(i): H0, G0, H1, G1 (i=0..3) + */ const MatrixXd &MWFilter::getCompressionSubFilter(int i) const { switch (i) { case (0): @@ -162,6 +252,14 @@ const MatrixXd &MWFilter::getReconstructionSubFilter(int i) const { } } +/* + * apply / applyInverse + * -------------------- + * Multiply a vector/matrix by the filter or its transpose. + * - apply(...) : filter * data (reconstruction direction) + * - applyInverse(...) : filter^T * data (compression direction) + * Both validate row dimension matches the filter size (2K). + */ void MWFilter::apply(MatrixXd &data) const { if (data.rows() != this->filter.cols()) { INVALID_ARG_ABORT } data = this->filter * data; @@ -182,6 +280,12 @@ void MWFilter::applyInverse(VectorXd &data) const { data = this->filter.transpose() * data; } +/* + * setFilterPaths(lib) + * ------------------- + * Compose full file paths for H0 and G0 depending on family and order. + * The prefix is 'I_' for Interpol and 'L_' for Legendre. + */ void MWFilter::setFilterPaths(const std::string &lib) { switch (this->type) { case (Interpol): @@ -197,6 +301,16 @@ void MWFilter::setFilterPaths(const std::string &lib) { } } +/* + * generateBlocks() + * ---------------- + * Read H0 and G0 from binary files and synthesize H1/G1 from symmetry. + * Finally, precompute all transposes. + * + * File format assumptions: + * - Each of H0 and G0 stores K rows; each row contains K doubles. + * - This function reads one row at a time into temporary buffers dH, dG. + */ void MWFilter::generateBlocks() { std::ifstream H_fis(this->H_path.c_str(), std::ios::binary); std::ifstream G_fis(this->G_path.c_str(), std::ios::binary); @@ -249,4 +363,4 @@ void MWFilter::generateBlocks() { this->H0t = this->H0.transpose(); this->H1t = this->H1.transpose(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/MWFilter.h b/src/core/MWFilter.h index 8daf43c54..099fceb4f 100644 --- a/src/core/MWFilter.h +++ b/src/core/MWFilter.h @@ -34,29 +34,144 @@ namespace mrcpp { +/** + * @class MWFilter + * @brief Container for a 2K×2K multiwavelet filter bank and its block views. + * + * High-level + * ---------- + * An MWFilter represents the matrix of a 1D multiwavelet transform for a given + * polynomial order and family (type). With K = order + 1, the full transform + * matrix has size 2K × 2K and is organized into four K × K blocks: + * + * filter = [ G0 G1 ] (top row: scaling channel) + * [ H0 H1 ] (bottom row: wavelet channel) + * + * In the implementation (.cpp), G0/H0 are loaded from binary tables and + * G1/H1 are derived by family-specific symmetry relations. Transposes of the + * four blocks are also precomputed for the compression direction. + * + * Usage model + * ----------- + * - Construct from (order, type) → loads data from disk and builds blocks. + * - Construct from a given 2K×2K matrix → slices into blocks (no I/O). + * - Multiply vectors/matrices with the transform or its transpose using + * apply()/applyInverse(). + * - Query individual K×K subfilters for compression or reconstruction. + * + * Notes on 'type' + * --------------- + * 'type' identifies the filter family (e.g., Interpol or Legendre). The exact + * integer codes are defined elsewhere in MRCPP and validated in the .cpp. + * + * Dimension conventions + * --------------------- + * - order = k, K = k + 1 + * - Full transform: 2K × 2K (acts on 2K-length vectors / 2K-row matrices). + */ class MWFilter final { public: + /** + * @brief Construct from order and family type; loads blocks from disk. + * @param k Polynomial order (k ≥ 0; with library-defined upper bound). + * @param t Filter family/type tag (e.g., Interpol or Legendre). + * + * Side effects (see .cpp): + * - Locates binary tables on disk (family+order dependent). + * - Reads G0 and H0, synthesizes G1 and H1 by symmetry. + * - Assembles the full 2K×2K matrix 'filter'. + */ MWFilter(int k, int t); + + /** + * @brief Construct directly from a full 2K×2K matrix (no I/O). + * @param t Filter family/type tag. + * @param data Full transform matrix of size 2K×2K. + * + * The order is inferred as K = data.cols()/2, order = K - 1. + * The four K×K blocks (and their transposes) are sliced from @p data. + */ MWFilter(int t, const Eigen::MatrixXd &data); + /** + * @name Apply the transform / its transpose + * @{ + * + * @brief Apply the forward/reconstruction transform: data ← filter * data. + * Overloads exist for Eigen::MatrixXd and Eigen::VectorXd. + * + * @brief Apply the inverse/compression transform: data ← filter^T * data. + * Overloads exist for Eigen::MatrixXd and Eigen::VectorXd. + * + * Precondition: + * - data.rows() must equal filter.cols() (i.e., 2K). + */ void apply(Eigen::MatrixXd &data) const; void apply(Eigen::VectorXd &data) const; void applyInverse(Eigen::MatrixXd &data) const; void applyInverse(Eigen::VectorXd &data) const; + /** @} */ + /** @return Polynomial order k (so K = k + 1). */ int getOrder() const { return this->order; } + + /** @return Filter family/type code. */ int getType() const { return this->type; } + /** @return Const reference to the full 2K×2K transform matrix. */ const Eigen::MatrixXd &getFilter() const { return this->filter; } + + /** + * @brief Return one of the four K×K subfilters. + * @param i Block index in the chosen operation's order (0..3). + * @param oper Operation selector (direction), defaults to 0. + * + * Semantics (see .cpp): + * - For Reconstruction: blocks returned in order (H0, G0, H1, G1). + * - For Compression: transposed blocks (H0^T, H1^T, G0^T, G1^T). + * + * The actual enum/integer values for 'oper' (e.g., Reconstruction/Compression) + * are defined elsewhere (constants header). This method aborts on invalid + * @p i or @p oper. + */ const Eigen::MatrixXd &getSubFilter(int i, int oper = 0) const; + + /** + * @brief Shorthand: return the i-th compression subfilter (transposed form). + * Order: i=0→H0^T, 1→H1^T, 2→G0^T, 3→G1^T. + */ const Eigen::MatrixXd &getCompressionSubFilter(int i) const; + + /** + * @brief Shorthand: return the i-th reconstruction subfilter (direct form). + * Order: i=0→H0, 1→G0, 2→H1, 3→G1. + */ const Eigen::MatrixXd &getReconstructionSubFilter(int i) const; protected: + /** + * @brief Filter family/type tag (e.g., Interpol, Legendre). + */ int type; + + /** + * @brief Polynomial order k (K = k + 1). + */ int order; + + /** + * @brief Auxiliary dimension (reserved; may be unused in current code). + */ int dim; + /** + * @name Stored matrices + * @{ + * @brief Full transform and its K×K sub-blocks (+ transposes). + * + * Layout: filter = [ G0 G1 ] + * [ H0 H1 ] + */ Eigen::MatrixXd filter; ///< Full MW-transformation matrix Eigen::MatrixXd G0; Eigen::MatrixXd G1; @@ -67,14 +182,34 @@ class MWFilter final { Eigen::MatrixXd G1t; Eigen::MatrixXd H0t; Eigen::MatrixXd H1t; + /** @} */ private: + /** + * @brief Compose on-disk paths to H0/G0 tables (family- and order-specific). + * + * Implemented in the .cpp; uses a discovered filter library root. + * Sets #H_path and #G_path accordingly. + */ void setFilterPaths(const std::string &lib); + + /** + * @brief Slice #filter into sub-blocks and compute their transposes. + * + * Used in the constructor that takes a full matrix. + */ void fillFilterBlocks(); + + /** + * @brief Load H0/G0 from disk, synthesize H1/G1 by symmetry, and + * precompute transposes. Populates #G0,#G1,#H0,#H1 and their ^T. + */ void generateBlocks(); + /** @brief Absolute file path to the H0 table. */ std::string H_path; + /** @brief Absolute file path to the G0 table. */ std::string G_path; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ObjectCache.cpp b/src/core/ObjectCache.cpp index 0df714200..a44452e99 100644 --- a/src/core/ObjectCache.cpp +++ b/src/core/ObjectCache.cpp @@ -23,6 +23,44 @@ * */ +/* + * Overview + * -------- + * This file provides the generic implementation of a very simple object cache: + * ObjectCache + * + * The cache stores pointers to objects of type T, indexed by an integer id. + * It supports: + * - on-demand loading (either overridden in a derived cache, or via + * load(id, T*, memory) with an already-constructed object), + * - unloading (delete + accounting), + * - querying if an id is present, and + * - retrieving a reference to a loaded object. + * + * Key properties: + * • Sparse index space: + * Internally, `objs` and `mem` are vectors. The `highWaterMark` records + * the highest id seen so far, and the vectors are expanded with nullptr/0 + * as needed in `load(id, T*, memory)`. + * + * • Memory accounting: + * `mem[id]` stores a byte estimate for the object at index `id` + * (provided by the caller). `memLoaded` accumulates the total over + * loaded entries. There is no automatic eviction policy here; derived + * caches may use these numbers for their own management. + * + * • Thread-safety: + * This base class does not synchronize access. Derived caches (e.g., + * filter caches) add OpenMP locks around load/insert to ensure safety. + * + * • Lifetime: + * Objects are owned by the cache (deleted on unload). `clear()` iterates + * over all indices and unloads any present objects. + * + * Explicit instantiations at the end make sure the compiler emits code for + * the most common cached types used within MRCPP. + */ + #include "ObjectCache.h" #include "CrossCorrelation.h" #include "GaussQuadrature.h" @@ -32,21 +70,51 @@ namespace mrcpp { +/* + * getInstance() + * ------------- + * Meyers' singleton accessor for ObjectCache. + * A single cache instance per T exists process-wide. + */ template ObjectCache &ObjectCache::getInstance() { static ObjectCache theObjectCache; return theObjectCache; } +/* + * clear() + * ------- + * Unload all currently loaded objects by iterating the index range and + * calling unload(i) for each non-null entry. + */ template void ObjectCache::clear() { for (unsigned int i = 0; i < this->objs.size(); i++) { if (this->objs[i] != nullptr) { unload(i); } } } +/* + * load(id) + * -------- + * Default "do nothing" loader. The intent is that specialized caches + * (e.g., FilterCache, CrossCorrelationCache) override this method to + * construct/load the appropriate object for the given id. Calling this + * base implementation only prints an info message. + */ template void ObjectCache::load(int id) { MSG_INFO("This routine does nothing in this class."); } +/* + * load(id, new_o, memory) + * ----------------------- + * Insert an already-constructed object pointer at index `id`. + * - Expands internal storage if `id` exceeds the current highWaterMark, + * filling with nullptr/0. + * - If an object is already present at `id`, the call is a no-op. + * - Otherwise, records the memory estimate, updates `memLoaded`, and stores + * the pointer. Ownership is transferred to the cache (deleted in unload()). + */ template void ObjectCache::load(int id, T *new_o, int memory) { if (id >= this->highWaterMark) { for (int i = 0; i < id - this->highWaterMark + 1; i++) { @@ -61,6 +129,14 @@ template void ObjectCache::load(int id, T *new_o, int memory) { this->objs[id] = new_o; } +/* + * unload(id) + * ---------- + * Remove and delete the object at index `id`, updating memory accounting. + * - Validates bounds. + * - Warns (and returns) if the slot is already empty. + * - Sets the slot to nullptr and zeroes its memory entry. + */ template void ObjectCache::unload(int id) { if (id < 0 or id > this->highWaterMark) { MSG_ERROR("Id out of bounds:" << id); } if (this->objs[id] == nullptr) { @@ -73,21 +149,41 @@ template void ObjectCache::unload(int id) { this->objs[id] = nullptr; } +/* + * get(id) + * ------- + * Return a reference to the object stored at `id`. + * - Emits an error if `id` is negative or if the object is not loaded. + * (Note: derived caches typically call hasId()/load() to ensure presence.) + */ template T &ObjectCache::get(int id) { if (id < 0) { MSG_ERROR("Id out of bounds:" << id); } if (this->objs[id] == nullptr) { MSG_ERROR("Object not loaded!"); } return *(this->objs[id]); } +/* + * hasId(id) + * --------- + * Query whether an object for `id` is present in the cache. + * Returns false if `id` exceeds the current high-water mark or if the + * slot holds nullptr; true otherwise. + */ template bool ObjectCache::hasId(int id) { if (id > this->highWaterMark) return false; if (this->objs[id] == nullptr) return false; return true; } +/* + * Explicit template instantiations + * -------------------------------- + * Force code generation for these commonly cached types, so users linking + * to MRCPP do not need to instantiate ObjectCache themselves. + */ template class ObjectCache; template class ObjectCache; template class ObjectCache; template class ObjectCache; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ObjectCache.h b/src/core/ObjectCache.h index 2323ae681..bdac0fe50 100644 --- a/src/core/ObjectCache.h +++ b/src/core/ObjectCache.h @@ -32,46 +32,152 @@ namespace mrcpp { +/** + * @def getObjectCache(T, X) + * @brief Convenience macro to bind a local reference @p X to the singleton + * instance of ObjectCache. + * + * Expands to: + * ObjectCache &X = ObjectCache::getInstance(); + * + * Example: + * getObjectCache(MyType, cache); + * if (!cache.hasId(id)) cache.load(id); + * MyType& obj = cache.get(id); + */ #define getObjectCache(T, X) ObjectCache &X = ObjectCache::getInstance(); +/** + * @class ObjectCache + * @tparam T The object type to be cached (owned via raw pointer). + * + * @brief A lightweight, index-addressed cache with singleton access, + * optional OpenMP locking, and simple memory accounting. + * + * High-level + * ---------- + * - Stores pointers to objects of type T in a sparse, integer-indexed array. + * - One global instance per T (Meyers singleton via getInstance()). + * - Provides virtual hooks `load(id)`, `unload(id)`, `get(id)` for derived + * caches to specialize on-demand construction and retrieval. + * - Tracks approximate memory usage per entry and in total. + * + * Thread-safety + * ------------- + * - The base class initializes an OpenMP lock (if MRCPP_HAS_OMP), and its + * destructor clears under that lock. However, *load/get/unload* here are not + * automatically locked; derived classes are expected to guard first-time + * construction (see FilterCache, ScalingCache, etc.). + * + * Ownership & lifetime + * -------------------- + * - The cache owns stored objects: `unload(id)` deletes them. + * - `clear()` unloads all present entries. + * - Copy/assignment are deleted to enforce singleton semantics. + * + * Indexing model + * -------------- + * - `highWaterMark` tracks the largest index ever seen. + * - Vectors `objs` and `mem` grow to accommodate new ids; gaps are filled with + * `nullptr` and `0`. Presence is tested with `hasId(id)`. + * + * Memory accounting + * ----------------- + * - `mem[id]` holds an approximate byte size for entry `id` (provided by the + * caller when inserting via `load(id, T*, memory)`). + * - `memLoaded` sums the sizes of currently loaded entries. + */ template class ObjectCache { public: + /** @brief Singleton accessor (one cache per T). */ static ObjectCache &getInstance(); + /** @brief Unload and delete all loaded objects. */ virtual void clear(); + /** + * @brief On-demand loader hook. Default impl is a no-op; derived caches + * should override to construct and insert the object for @p id. + */ virtual void load(int id); + + /** + * @brief Insert an already-constructed object pointer at index @p id. + * @param id Integer key. + * @param new_o Ownership-transferred pointer to T. + * @param memory Approximate size in bytes (for accounting). + * + * Expands internal storage if needed. If an object is already present + * at @p id, this is a no-op. + */ void load(int id, T *new_o, int memory); + + /** + * @brief Remove and delete the object at @p id (if present). + * Updates memory accounting. Virtual to allow specialization. + */ virtual void unload(int id); + /** + * @brief Retrieve a reference to the loaded object at @p id. + * Emits errors if @p id is invalid or if no object is loaded. + */ virtual T &get(int id); + + /** + * @brief Check whether an object is present at @p id. + * @return true if id ≤ highWaterMark and objs[id] != nullptr. + */ bool hasId(int id); + /** @return Number of slots allocated (including empty/null slots). */ int getNObjs() { return this->objs.size(); } + /** @return Total accounted memory over loaded entries. */ int getMem() { return this->memLoaded; } + /** @return Accounted memory for a specific @p id (0 if empty). */ int getMem(int id) { return this->mem[id]; } protected: + /** + * @brief Protected ctor initializes slot 0, memory 0, and OMP lock. + * + * Slot 0 is reserved/initialized so that valid ids can start at 1 if + * desired, but the cache also happily accepts id=0. + */ ObjectCache() { this->objs.push_back(nullptr); this->mem.push_back(0); MRCPP_INIT_OMP_LOCK(); } + /** + * @brief Destructor clears the cache under lock and destroys the lock. + * + * Ensures that concurrent threads do not race during teardown. + */ virtual ~ObjectCache() { MRCPP_SET_OMP_LOCK(); clear(); MRCPP_UNSET_OMP_LOCK(); MRCPP_DESTROY_OMP_LOCK(); } + + // Non-copyable singleton. ObjectCache(ObjectCache const &oc) = delete; ObjectCache &operator=(ObjectCache const &oc) = delete; + #ifdef MRCPP_HAS_OMP + /** @brief OpenMP lock for derived-class synchronized sections. */ omp_lock_t omp_lock; #endif + private: + /** @brief Largest index ever used (inclusive). */ int highWaterMark{0}; + /** @brief Sum of accounted memory over loaded entries. */ int memLoaded{0}; ///< memory occupied by loaded objects + /** @brief Sparse vector of owned pointers; nullptr denotes empty slot. */ std::vector objs; ///< objects store + /** @brief Per-slot memory accounting (0 if empty). */ std::vector mem; ///< mem per object }; diff --git a/src/core/QuadratureCache.cpp b/src/core/QuadratureCache.cpp index ceb7df652..db299c0d7 100644 --- a/src/core/QuadratureCache.cpp +++ b/src/core/QuadratureCache.cpp @@ -23,6 +23,35 @@ * */ +/* + * Overview + * -------- + * Implementation of the QuadratureCache singleton. This cache wraps + * ObjectCache to manage Gauss–Legendre quadrature rules by + * integer key `k` (the rule's order). It additionally tracks a *global* + * integration domain [A,B] and a number of equal sub-intervals `intervals` + * that should be applied to *all* cached rules. + * + * Responsibilities: + * - Provide default domain settings ([0,1], intervals=1). + * - Lazy-load GaussQuadrature objects for requested orders (load/get). + * - Propagate changes to [A,B] or `intervals` to any already-cached rules. + * + * Thread-safety: + * - First-time loads are guarded by MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK. + * Once an object is present in the cache, read access is lock-free. + * + * Memory accounting: + * - The `memo` passed to the base cache is a rough estimate: 2 * k * sizeof(double). + * (This is intentionally approximate and used only for coarse bookkeeping.) + * + * Notes: + * - Iteration over cached ids uses `for (int i = 0; i < getNObjs(); ++i)`. + * Slots may be empty (not all ids 0..high-water-mark are loaded). + * - Potential typo/bug (left as-is by request): in setIntervals(), the input + * validity check tests `this->intervals < 1` instead of `ivals < 1`. + */ + /* * * @@ -38,29 +67,59 @@ namespace mrcpp { +/** + * @brief Construct the cache with default domain and replication settings. + * + * Defaults: + * - A = 0.0, B = 1.0 → unit interval [0,1] + * - intervals = 1 → no subdivision (composite quadrature disabled) + * + * Actual GaussQuadrature objects are created lazily on demand in load(). + */ QuadratureCache::QuadratureCache() { this->A = 0.0; this->B = 1.0; this->intervals = 1; } +/** @brief Trivial destructor; owned objects are freed by the base cache. */ QuadratureCache::~QuadratureCache() = default; +/** + * @brief Ensure a GaussQuadrature of order k is present in the cache. + * + * Under the OMP lock: + * - If absent, allocate a new GaussQuadrature(k, A, B, intervals), + * compute a rough memory estimate, and insert it into ObjectCache. + */ void QuadratureCache::load(int k) { MRCPP_SET_OMP_LOCK(); if (not hasId(k)) { auto *gp = new GaussQuadrature(k, this->A, this->B, this->intervals); - int memo = 2 * k * sizeof(double); + int memo = 2 * k * sizeof(double); // rough accounting only ObjectCache::load(k, gp, memo); } MRCPP_UNSET_OMP_LOCK(); } +/** + * @brief Retrieve a reference to the cached quadrature of order k. + * Lazily loads it if not present yet. + */ GaussQuadrature &QuadratureCache::get(int k) { if (not hasId(k)) { load(k); } return ObjectCache::get(k); } +/** + * @brief Update the global integration bounds to [a,b] and propagate the + * change to all already-cached GaussQuadrature objects. + * + * Behavior: + * - If the new bounds are effectively identical (within MachineZero), do nothing. + * - Otherwise, set A/B and iterate over existing ids; for each loaded entry, + * call .setBounds(a,b) so its scaled nodes/weights are rebuilt. + */ void QuadratureCache::setBounds(double a, double b) { if (std::abs(this->A - a) < MachineZero and std::abs(this->B - b) < MachineZero) { return; } if (a >= b) { MSG_ERROR("Invalid Gauss interval, a > b."); } @@ -71,6 +130,19 @@ void QuadratureCache::setBounds(double a, double b) { } } +/** + * @brief Update the global number of equal sub-intervals and propagate to all + * already-cached rules. + * + * Behavior: + * - If unchanged, return early. + * - Sanity check: intervals must be ≥ 1. + * - Iterate over existing ids; for each loaded entry, call .setIntervals(ivals). + * + * Note: + * - The input validity test uses `this->intervals < 1` (likely intended to be + * `ivals < 1`). Left unchanged intentionally. + */ void QuadratureCache::setIntervals(int ivals) { if (ivals == this->intervals) { return; } if (this->intervals < 1) { MSG_ERROR("Invalid number of intervals, intervals < 1"); } diff --git a/src/core/QuadratureCache.h b/src/core/QuadratureCache.h index 5e7f11feb..f0a6abaf8 100644 --- a/src/core/QuadratureCache.h +++ b/src/core/QuadratureCache.h @@ -32,38 +32,157 @@ namespace mrcpp { +/** + * @def getQuadratureCache(X) + * @brief Convenience macro to bind a local reference @p X to the global + * (singleton) QuadratureCache instance. + * + * Expands to: + * QuadratureCache &X = QuadratureCache::getInstance() + * + * Example: + * getQuadratureCache(qc); + * const auto& w = qc.getWeights(order); + */ #define getQuadratureCache(X) QuadratureCache &X = QuadratureCache::getInstance() +/** + * @class QuadratureCache + * @brief Process-wide cache for Gaussian quadrature rules (roots & weights). + * + * High-level + * ---------- + * Gaussian quadrature (Gauss-Legendre in MRCPP) is parameterized by: + * • order (number of nodes/weights), + * • integration domain [A, B], + * • optional replication over multiple equal sub-intervals ("intervals"). + * + * Constructing GaussQuadrature objects repeatedly can be costly; this cache + * stores one instance per order (and current domain/interval settings) and + * hands out references on demand. + * + * Design + * ------ + * - Singleton per process (Meyers' singleton via getInstance()). + * - Inherits from ObjectCache which provides basic + * load/unload/get plumbing indexed by an integer id (here: order). + * - Domain control: + * setBounds(a,b) → set global integration bounds [A,B] + * setIntervals(i) → split [A,B] into @p i equal sub-intervals (if used) + * These settings influence how GaussQuadrature is created in load(order). + * + * Thread-safety + * ------------- + * The base ObjectCache does not synchronize by itself; specialized caches + * typically guard first-time loads with OpenMP locks in the .cpp. Users should + * assume the cache is safe to read concurrently after an entry is present. + * + * Typical usage + * ------------- + * auto& qc = QuadratureCache::getInstance(); + * qc.setBounds(-1.0, 1.0); + * const Eigen::VectorXd& x = qc.getRoots(quad_order); + * const Eigen::VectorXd& w = qc.getWeights(quad_order); + */ class QuadratureCache final : public ObjectCache { public: + /** + * @brief Access the singleton instance. + */ static QuadratureCache &getInstance() { static QuadratureCache theQuadratureCache; return theQuadratureCache; } + /** + * @brief Ensure the quadrature of a given @p order is loaded. + * + * Implemented in the .cpp: constructs/initializes a GaussQuadrature that + * reflects the current @ref A, @ref B, and @ref intervals settings and + * inserts it into the underlying ObjectCache if absent. + */ void load(int order); + + /** + * @brief Retrieve the cached quadrature for @p order (lazy-loads if needed). + * @return Reference to the GaussQuadrature object owned by the cache. + */ GaussQuadrature &get(int order); + /** + * @name Convenience accessors (fetch vectors directly) + * @{ + * @brief Get the vector of abscissas (roots) for a given order. + */ const Eigen::VectorXd &getRoots(int i) { return get(i).getRoots(); } + + /** + * @brief Get the vector of weights for a given order. + */ const Eigen::VectorXd &getWeights(int i) { return get(i).getWeights(); } + /** @} */ + /** + * @brief Set the number of equal sub-intervals for composite quadrature. + * + * Interpretation: + * - If intervals > 1, the base interval [A,B] can be partitioned into + * `intervals` equal pieces and the quadrature replicated/shifted. + * - Exact semantics depend on GaussQuadrature; this cache records the + * value so that new loads honor it. + */ void setIntervals(int i); + + /** + * @brief Set the integration bounds used by subsequently loaded rules. + * @param a Lower bound A + * @param b Upper bound B + * + * Newly created GaussQuadrature objects will target [A,B]. Existing + * cached entries are unaffected until explicitly unloaded/reloaded. + */ void setBounds(double a, double b); + /** @return Current number of sub-intervals recorded in the cache. */ int getIntervals() const { return this->intervals; } + + /** @return Current upper integration bound B. */ double getUpperBound() const { return this->B; } + + /** @return Current lower integration bound A. */ double getLowerBound() const { return this->A; } private: + /** + * @brief Lower and upper bounds of the integration domain. + * + * Defaults are set in the private constructor (see .cpp). Changing these + * affects only future loads; existing cached rules remain as created. + */ double A; double B; + + /** + * @brief Number of equal sub-intervals used to tile [A,B]. + * + * When >1, the cache can generate composite quadrature by replicating the + * base rule on each sub-interval (implementation in .cpp / GaussQuadrature). + */ int intervals; + /** + * @brief Private constructor initializes default bounds/intervals. + * + * Enforces the singleton pattern; use getInstance() to access the cache. + */ QuadratureCache(); + + /// Private destructor; cache cleans up its owned objects via ObjectCache. ~QuadratureCache(); + // Non-copyable / non-assignable to maintain singleton semantics. QuadratureCache(QuadratureCache const &qc) = delete; QuadratureCache &operator=(QuadratureCache const &qc) = delete; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ScalingBasis.cpp b/src/core/ScalingBasis.cpp index 99dc83f24..49a1cff51 100644 --- a/src/core/ScalingBasis.cpp +++ b/src/core/ScalingBasis.cpp @@ -23,29 +23,94 @@ * */ +/* + * Overview + * -------- + * ScalingBasis provides common functionality shared by concrete scaling bases + * (e.g., LegendreBasis, InterpolatingBasis). It stores: + * • the basis family tag (type) and polynomial order (order), + * • the list of basis polynomials (this->funcs) managed in the headers, + * • matrices used to convert between coefficient and nodal-value spaces: + * - quadVals : basis evaluated at quadrature nodes (q×q), + * - cvMap : coefficient → value map at nodes (q×q), + * - vcMap : value → coefficient map at nodes (q×q). + * + * Responsibilities in this file: + * - Construct and size the conversion matrices based on the quadrature order. + * - Provide a generic evaluator `evalf` to sample the basis at arbitrary points. + * - Expose the proper conversion map (cvMap or vcMap) given an operation flag. + * - Define equality/inequality operators and a simple printer. + * + * Conventions: + * - q := getQuadratureOrder() is the number of basis functions and nodes. + * - `type` identifies the scaling family (Legendre vs Interpol); codes live + * in shared headers. + * - `Forward` indicates coefficient→value mapping; anything else selects the + * reverse map (value→coefficient). + */ + #include "ScalingBasis.h" #include "utils/Printer.h" namespace mrcpp { +/** + * @brief Construct a base scaling space for family @p t and order @p k. + * + * Initializes: + * - type/order (with a minimal validity check on order), + * - square q×q matrices quadVals, cvMap, vcMap filled with zeros, + * where q = getQuadratureOrder() is determined by the concrete basis. + * + * Concrete derived classes are responsible for: + * - populating `funcs` with q basis polynomials, + * - filling `quadVals`, + * - building `cvMap` and `vcMap`. + */ ScalingBasis::ScalingBasis(int k, int t) : type(t) , order(k) { if (this->order < 0) MSG_ABORT("Invalid scaling order"); int q_order = getQuadratureOrder(); - this->quadVals = Eigen::MatrixXd::Zero(q_order, q_order); - this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); - this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); + this->quadVals = Eigen::MatrixXd::Zero(q_order, q_order); // basis@nodes + this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); // coeff → values + this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); // values → coeff } +/** + * @brief Evaluate each basis polynomial at a set of points. + * + * @param[in] r Pointer to an array of length D containing evaluation points. + * @param[out] vals Matrix of size (K × D) where: + * - K must equal the number of basis functions (funcs.size()), + * - column d receives the vector [ φ_0(r[d]), …, φ_{K-1}(r[d]) ]^T. + * + * Precondition: + * - vals.rows() == funcs.size(). If not, an error is reported. + * + * Notes: + * - The layout is "basis index in rows, sample index in columns". + * - getFunc(k) returns the k-th polynomial; evalf(x) evaluates it at x. + */ void ScalingBasis::evalf(const double *r, Eigen::MatrixXd &vals) const { if (vals.rows() != this->funcs.size()) MSG_ERROR("Invalid argument"); for (int d = 0; d < vals.cols(); d++) { - for (int k = 0; k < vals.rows(); k++) { vals(k, d) = getFunc(k).evalf(r[d]); } + for (int k = 0; k < vals.rows(); k++) { + vals(k, d) = getFunc(k).evalf(r[d]); + } } } +/** + * @brief Retrieve the appropriate coefficient/value conversion map. + * + * @param operation If equal to Forward, return cvMap (coeff → values at nodes). + * Otherwise, return vcMap (values at nodes → coeff). + * + * The precise enum/integer value of Forward is defined in shared headers. + * Derived classes ensure cvMap/vcMap are properly populated in their init code. + */ const Eigen::MatrixXd &ScalingBasis::getCVMap(int operation) const { if (operation == Forward) { return this->cvMap; @@ -54,18 +119,31 @@ const Eigen::MatrixXd &ScalingBasis::getCVMap(int operation) const { } } +/** + * @brief Two scaling bases are equal iff both family type and order match. + */ bool ScalingBasis::operator==(const ScalingBasis &basis) const { if (this->type != basis.type) return false; if (this->order != basis.order) return false; return true; } +/** + * @brief Negation of operator== (true if type or order differs). + */ bool ScalingBasis::operator!=(const ScalingBasis &basis) const { if (this->type != basis.type) return true; if (this->order != basis.order) return true; return false; } +/** + * @brief Stream printer with a minimal summary (order and family name). + * + * Prints: + * - "polynomial order : " + * - "polynomial type : " + */ std::ostream &ScalingBasis::print(std::ostream &o) const { o << " polynomial order : " << getScalingOrder() << std::endl; if (getScalingType() == Legendre) { @@ -78,4 +156,4 @@ std::ostream &ScalingBasis::print(std::ostream &o) const { return o; } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ScalingBasis.h b/src/core/ScalingBasis.h index 2ea48fba0..d5914a9b6 100644 --- a/src/core/ScalingBasis.h +++ b/src/core/ScalingBasis.h @@ -33,37 +33,129 @@ namespace mrcpp { +/** + * @class ScalingBasis + * @brief Abstract base for scaling-function families (Legendre, Interpolating). + * + * What this class represents + * -------------------------- + * A *scaling basis* is a finite set of 1D polynomials {φ_k}_{k=0..order} + * that span the scaling space at level 0 for a given multiwavelet family. + * Concrete families (e.g., LegendreBasis, InterpolatingBasis) derive from + * this class and: + * • construct and store the polynomials in `funcs`, + * • populate the evaluation matrix at quadrature nodes `quadVals`, + * • build coefficient↔value conversion maps `cvMap` / `vcMap`. + * + * Dimensions and conventions + * -------------------------- + * - order := polynomial degree cutoff (≥ 0). + * - Quadrature order q = order + 1 (one node per basis function). + * - `quadVals` is q×q with layout: rows = nodes, cols = basis index. + * - `cvMap` maps coefficient vectors → nodal values (Forward). + * - `vcMap` maps nodal values → coefficient vectors (Backward). + * + * Responsibilities provided here + * ------------------------------ + * - Store family `type` (Legendre or Interpol, defined in constants.h) and `order`. + * - Provide access to basis polynomials and to the conversion matrices. + * - Offer a generic evaluator to sample the basis at arbitrary points. + * - Define equality operators (same family and order). + * + * Notes for implementers of derived classes + * ----------------------------------------- + * - Call the base ctor with (k, t). It sizes `quadVals`, `cvMap`, `vcMap` + * to q×q zeros; you must fill them in your implementation (.cpp). + * - Push back exactly q polynomials into `funcs` in the order k = 0..order. + */ class ScalingBasis { public: + /** + * @brief Construct a base scaling space descriptor. + * @param k Polynomial order (k ≥ 0). + * @param t Family tag (e.g., Legendre or Interpol). + * + * Effects (implemented in the .cpp): + * - Stores @p t, @p k. + * - Allocates q×q zero matrices for `quadVals`, `cvMap`, and `vcMap`, + * where q = k + 1. + * - Derived classes then fill these structures. + */ ScalingBasis(int k, int t); virtual ~ScalingBasis() = default; + /** + * @brief Evaluate all basis polynomials at D sample points. + * @param r Pointer to array of D abscissas. + * @param vals Output matrix of size (q × D) with + * vals(k, d) = φ_k( r[d] ), k = 0..q-1. + * + * Precondition: + * - vals.rows() == funcs.size() == q. + * + * Remarks: + * - Column-major Eigen storage is irrelevant here; we just fill entries. + * - Useful for projecting/evaluating on arbitrary nodes (not only quadrature). + */ void evalf(const double *r, Eigen::MatrixXd &vals) const; + /** @return Mutable reference to the k-th basis polynomial φ_k. */ Polynomial &getFunc(int k) { return this->funcs[k]; } + /** @return Const reference to the k-th basis polynomial φ_k. */ const Polynomial &getFunc(int k) const { return this->funcs[k]; } + /** @return Family tag (Legendre or Interpol; see MRCPP/constants.h). */ int getScalingType() const { return this->type; } + /** @return Polynomial order k. */ int getScalingOrder() const { return this->order; } + /** @return Quadrature order q = k + 1 (one node per basis function). */ int getQuadratureOrder() const { return this->order + 1; } + /** @return Matrix of basis values at quadrature nodes (q × q). */ const Eigen::MatrixXd &getQuadratureValues() const { return this->quadVals; } + + /** + * @brief Access the coefficient/value conversion map. + * @param operation Use `Forward` (from constants.h) for coeff→value, + * anything else selects value→coeff. + * @return const reference to `cvMap` (Forward) or `vcMap` (Backward). + */ const Eigen::MatrixXd &getCVMap(int operation) const; + /** @brief Equality iff same family type and polynomial order. */ bool operator==(const ScalingBasis &basis) const; + /** @brief Inequality iff family type or polynomial order differs. */ bool operator!=(const ScalingBasis &basis) const; + /** + * @brief Stream print helper (delegates to virtual print()). + * Prints order and a human-readable family name. + */ friend std::ostream &operator<<(std::ostream &o, const ScalingBasis &bas) { return bas.print(o); } protected: + /** @brief Family tag (Legendre or Interpol). */ const int type; + /** @brief Polynomial order k. */ const int order; + + /** @brief Basis values at quadrature points: quadVals(i,k) = φ_k(x_i). */ Eigen::MatrixXd quadVals; // function values at quadrature pts + + /** @brief Coefficient → value (at nodes) linear map (q × q). */ Eigen::MatrixXd cvMap; // coef-value transformation matrix + + /** @brief Value (at nodes) → coefficient linear map (q × q). */ Eigen::MatrixXd vcMap; // value-coef transformation matrix + + /** @brief List of basis polynomials φ_0..φ_k (size q). */ std::vector funcs; + /** + * @brief Polymorphic pretty-printer called by operator<<. + * Concrete bases may override to append family-specific info. + */ std::ostream &print(std::ostream &o) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ScalingCache.h b/src/core/ScalingCache.h index 6ea795b6e..03efc734e 100644 --- a/src/core/ScalingCache.h +++ b/src/core/ScalingCache.h @@ -29,16 +29,76 @@ namespace mrcpp { +/** + * @def getLegendreScalingCache(X) + * @brief Convenience macro to bind a local reference @p X to the singleton + * ScalingCache specialized for LegendreBasis. + * + * Usage: + * getLegendreScalingCache(cache); + * auto& B = cache.get(order); + */ #define getLegendreScalingCache(X) ScalingCache &X = ScalingCache::getInstance() + +/** + * @def getInterpolatingScalingCache(X) + * @brief Convenience macro to bind a local reference @p X to the singleton + * ScalingCache specialized for InterpolatingBasis. + * + * Usage: + * getInterpolatingScalingCache(cache); + * auto& B = cache.get(order); + */ #define getInterpolatingScalingCache(X) \ ScalingCache &X = ScalingCache::getInstance() +/** + * @class ScalingCache + * @tparam P A concrete scaling-basis type (e.g., LegendreBasis, InterpolatingBasis). + * @brief Thread-safe singleton cache for scaling bases keyed by polynomial order. + * + * Motivation + * ---------- + * Constructing a scaling basis of order `k` (which internally prepares + * polynomials, quadrature-derived maps, etc.) can be relatively expensive. + * This cache guarantees that for a given template parameter P (basis family) + * and a given order, exactly one instance is created and then reused. + * + * Design + * ------ + * - Inherits from @ref ObjectCache

, which provides sparse indexed storage, + * memory accounting, and basic get/load/unload primitives. + * - Singleton per `P` (Meyers singleton via getInstance()) so that all parts + * of the program share the same cache for the same basis family. + * - Thread-safety: the first-time construction/insertion is protected by + * MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK. Reads after presence are fast. + * + * Memory accounting + * ----------------- + * The `memo` value passed to ObjectCache is a *rough* byte estimate: + * memo ≈ 2 * (order+1)^2 * sizeof(double) + * The constant factor “2” approximates two q×q matrices stored by a basis + * (e.g., cvMap and vcMap), where q = order + 1. It is not an exact footprint, + * but suffices for simple bookkeeping. + */ template class ScalingCache final : public ObjectCache

{ public: + /** + * @brief Access the singleton instance for the template parameter P. + * + * One instance per concrete basis family exists process-wide. + */ static ScalingCache &getInstance() { static ScalingCache theScalingCache; return theScalingCache; } + + /** + * @brief Ensure the basis of a given @p order is present in the cache. + * + * If absent, constructs a new P(order) under an OpenMP lock and inserts it + * into the underlying ObjectCache with a rough memory estimate. + */ void load(int order) { MRCPP_SET_OMP_LOCK(); if (not this->hasId(order)) { @@ -49,15 +109,21 @@ template class ScalingCache final : public ObjectCache

{ MRCPP_UNSET_OMP_LOCK(); } + /** + * @brief Retrieve the cached basis of a given @p order (lazy-loads if needed). + * @return Reference to the basis object owned by the cache. + */ P &get(int order) { if (not this->hasId(order)) { load(order); } return ObjectCache

::get(order); } private: + /// Private constructor enforces the singleton pattern. ScalingCache() {} + // Non-copyable / non-assignable. ScalingCache(const ScalingCache

&sc) = delete; ScalingCache

&operator=(const ScalingCache

&sc) = delete; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From 29b5a1ff764fdee40b7802a5f70083823471e2cd Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Mon, 27 Oct 2025 20:01:26 +0300 Subject: [PATCH 03/51] Done Documentation in functions folder --- src/functions/AnalyticFunction.h | 109 ++++++++++- src/functions/BoysFunction.cpp | 81 +++++++- src/functions/BoysFunction.h | 65 ++++++- src/functions/GaussExp.cpp | 201 ++++++++++++++++++- src/functions/GaussExp.h | 191 ++++++++++++++++-- src/functions/GaussFunc.cpp | 135 ++++++++++++- src/functions/GaussFunc.h | 109 +++++++++-- src/functions/GaussPoly.cpp | 112 +++++++++-- src/functions/GaussPoly.h | 176 ++++++++++++++++- src/functions/Gaussian.cpp | 190 ++++++++++++++---- src/functions/Gaussian.h | 245 ++++++++++++++++++++++-- src/functions/JpowerIntegrals.cpp | 129 +++++++++++-- src/functions/JpowerIntegrals.h | 205 +++++++++++--------- src/functions/LegendrePoly.cpp | 165 ++++++++++++---- src/functions/LegendrePoly.h | 84 +++++++- src/functions/Polynomial.cpp | 207 ++++++++++++++++---- src/functions/Polynomial.h | 179 +++++++++++++++-- src/functions/RepresentableFunction.cpp | 104 ++++++++-- src/functions/RepresentableFunction.h | 151 +++++++++++++-- src/functions/function_utils.cpp | 239 +++++++++++++++++------ src/functions/function_utils.h | 57 +++++- src/functions/special_functions.cpp | 108 ++++++----- src/functions/special_functions.h | 55 +++++- 23 files changed, 2845 insertions(+), 452 deletions(-) diff --git a/src/functions/AnalyticFunction.h b/src/functions/AnalyticFunction.h index aca20285b..2705b7ddc 100644 --- a/src/functions/AnalyticFunction.h +++ b/src/functions/AnalyticFunction.h @@ -32,27 +32,126 @@ namespace mrcpp { -template class AnalyticFunction : public RepresentableFunction { +/** + * @class AnalyticFunction + * @tparam D Spatial dimension (1, 2, 3, …). + * @tparam T Numeric value type (defaults to double). + * + * @brief Thin adapter that turns a C++ callable `std::function&)>` + * into a @ref RepresentableFunction suitable for MRCPP algorithms. + * + * Motivation + * ---------- + * Many MRCPP routines operate on the abstract interface `RepresentableFunction` + * (which provides domain bounds and an `evalf()` method). `AnalyticFunction` lets + * users plug in any analytic lambda or function pointer without writing a full + * derived class. + * + * Domain handling + * --------------- + * The base class @ref RepresentableFunction stores lower/upper bounds for each + * coordinate dimension. `AnalyticFunction::evalf` first checks + * `RepresentableFunction::outOfBounds(r)` and **returns 0** for points outside + * the domain; otherwise it forwards to the user-supplied callable. + * + * Typical usage + * ------------- + * @code + * using F = AnalyticFunction<2>; + * std::vector a = {0.0, 0.0}; + * std::vector b = {1.0, 2.0}; + * F f( + * [](const Coord<2>& x) -> double { + * // x[0] = x, x[1] = y + * return std::sin(x[0]) * std::exp(-x[1]); + * }, + * a, b + * ); + * Coord<2> p; p[0] = 0.3; p[1] = 1.5; + * double v = f.evalf(p); // evaluates lambda if p within [a,b] + * @endcode + * + * Thread-safety + * ------------- + * `AnalyticFunction` itself holds only an immutable std::function after construction. + * It is safe to call `evalf` concurrently *iff your callable is thread-safe* and + * does not mutate shared state. + */ +template +class AnalyticFunction : public RepresentableFunction { public: + /** @brief Default constructor; leaves the callable empty. */ AnalyticFunction() = default; + + /** @brief Virtual destructor to match the base class interface. */ ~AnalyticFunction() override = default; - AnalyticFunction(std::function &r)> f, const double *a = nullptr, const double *b = nullptr) + /** + * @brief Construct with a callable and optional raw-pointer bounds. + * + * @param f Callable of signature `T(const Coord&)`. + * @param a Optional pointer to an array of D lower bounds (can be nullptr). + * @param b Optional pointer to an array of D upper bounds (can be nullptr). + * + * The bounds are forwarded to the @ref RepresentableFunction base; if both + * are nullptr the base uses its defaults (implementation-defined). + */ + AnalyticFunction(std::function &r)> f, + const double *a = nullptr, + const double *b = nullptr) : RepresentableFunction(a, b) , func(f) {} - AnalyticFunction(std::function &r)> f, const std::vector &a, const std::vector &b) + + /** + * @brief Construct with a callable and STL vector bounds. + * + * @param f Callable of signature `T(const Coord&)`. + * @param a Vector of D lower bounds. + * @param b Vector of D upper bounds. + * + * Convenience overload that forwards raw pointers of the vectors to the + * other constructor. The vectors must have length D. + */ + AnalyticFunction(std::function &r)> f, + const std::vector &a, + const std::vector &b) : AnalyticFunction(f, a.data(), b.data()) {} + /** + * @brief Replace the underlying callable at runtime. + * + * @param f New callable `T(const Coord&)`. + * + * No synchronization is performed; if other threads may call `evalf` + * concurrently, arrange external synchronization. + */ void set(std::function &r)> f) { this->func = f; } + /** + * @brief Evaluate the function at coordinate @p r. + * + * Behavior: + * - If @p r lies outside the domain bounds (per `outOfBounds(r)`), return 0. + * - Otherwise, return `func(r)`. + * + * @note Returning 0 outside the domain is consistent with how many MRCPP + * integrators and projectors treat functions on bounded supports. + */ T evalf(const Coord &r) const override { - T val = 0.0; + T val = T(0); if (not this->outOfBounds(r)) val = this->func(r); return val; } protected: + /** + * @brief Stored analytic callable. + * + * The signature uses `Coord` (MRCPP’s fixed-size coordinate array). + * The callable should be side-effect free or externally synchronized if + * used from multiple threads. + */ std::function &r)> func; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/BoysFunction.cpp b/src/functions/BoysFunction.cpp index 7b9f1ddb5..a23e59d1c 100644 --- a/src/functions/BoysFunction.cpp +++ b/src/functions/BoysFunction.cpp @@ -23,25 +23,96 @@ * */ +/** + * @file BoysFunction.cpp + * + * @brief Numerically evaluates the Boys function + * \f[ + * F_n(x) \;=\; \int_{0}^{1} t^{2n}\,e^{-x\,t^2}\,dt + * \f] + * by projecting the integrand onto an adaptive multiresolution basis and then + * integrating the resulting @ref FunctionTree. + * + * Design overview + * --------------- + * 1) The class derives from @ref RepresentableFunction in 1D so that it can be + * used wherever MRCPP expects a function object with `evalf`. + * 2) Given an input coordinate `r`, we interpret `x = r[0]` and define the + * integrand + * g_x(t) = e^{-x t^2} · t^{2n}, t ∈ [0,1]. + * 3) We build a 1D @ref FunctionTree using an @ref MRA configured with an + * interpolating scaling basis (order 13 by default here), and call + * `project(prec, tree, f)` which adaptively refines the tree so that the + * projection error is below `prec`. + * 4) Finally we call `tree.integrate()` to integrate the projected function on + * [0,1], which is the desired value F_n(x). + * + * Notes + * ----- + * - The basis choice (`InterpolatingBasis(13)`) is a trade-off: sufficiently + * smooth to capture Gaussians well, while keeping stencil sizes reasonable. + * - The adaptive projection concentrates resolution where the integrand has + * structure (e.g., for large x near t=0 the function is sharply peaked). + * - Printing is muted during evaluation to keep the call side quiet. + */ + #include "BoysFunction.h" -#include "core/InterpolatingBasis.h" -#include "treebuilders/project.h" -#include "trees/FunctionTree.h" +#include "core/InterpolatingBasis.h" // basis used in the MRA +#include "treebuilders/project.h" // adaptive projection into a FunctionTree +#include "trees/FunctionTree.h" // hierarchical representation + integrate() #include "utils/Printer.h" namespace mrcpp { +/** + * @brief Construct a BoysFunction evaluator. + * + * @param n Non-negative integer order in @f$F_n(x)@f$ (power @f$t^{2n}@f$). + * @param p Target projection precision (controls adaptive refinement). + * + * Internals: + * - `order` stores @p n. + * - `prec` stores the target accuracy threshold used by `project`. + * - `MRA` is initialised over a default 1D bounding box with an + * interpolating basis of order 13; this MRA is reused per evaluation. + */ BoysFunction::BoysFunction(int n, double p) : RepresentableFunction<1, double>() , order(n) , prec(p) , MRA(BoundingBox<1>(), InterpolatingBasis(13)) {} +/** + * @brief Evaluate @f$F_n(x)@f$ at the requested abscissa. + * + * @param r Coordinate container; the one and only component is @f$x=r[0]@f$. + * @return The value of @f$F_n(x)=\int_0^1 t^{2n} e^{-x t^2}\,dt@f$. + * + * Algorithm: + * 1) Silence the printer and remember the old verbosity. + * 2) Capture `x` and `n` and form a lambda `f(t)` representing the integrand + * on @f$t\in[0,1]@f$. We compute `t_2 = t^2`, `t_2n = (t^2)^n`, and return + * `exp(-x * t_2) * t_2n`. For `n=0`, `t_2n` is set to 1 for speed/stability. + * 3) Build a fresh `FunctionTree<1,double>` bound to the stored `MRA`. + * 4) Call `project(prec, tree, f)` to approximate `f` within the tolerance + * `prec` by adaptively refining nodes where needed. + * 5) Call `tree.integrate()` to obtain the integral over [0,1]. + * 6) Restore the printer level and return the result. + * + * Accuracy remarks: + * - The achieved error depends on `prec`, the basis order, and the behaviour + * of the integrand (large x leads to rapid decay, which is well captured + * by the multiresolution approach). + */ double BoysFunction::evalf(const Coord<1> &r) const { + // Temporarily mute verbose output from the projection/integration. int oldlevel = Printer::setPrintLevel(0); int n = this->order; double x = r[0]; + + // Integrand g_x(t) = exp(-x * t^2) * (t^2)^n over t in [0,1]. + // Written in terms of t^2 to reduce pow() evaluation count. auto f = [x, n](const Coord<1> &t) -> double { double t_2 = t[0] * t[0]; double xt_2 = x * t_2; @@ -50,12 +121,14 @@ double BoysFunction::evalf(const Coord<1> &r) const { return std::exp(-xt_2) * t_2n; }; + // Build an adaptive representation of f on [0,1] and integrate it. FunctionTree<1, double> tree(this->MRA); mrcpp::project<1, double>(this->prec, tree, f); double result = tree.integrate(); + // Restore previous verbosity and return. Printer::setPrintLevel(oldlevel); return result; } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/BoysFunction.h b/src/functions/BoysFunction.h index f8b8824d1..237707f27 100644 --- a/src/functions/BoysFunction.h +++ b/src/functions/BoysFunction.h @@ -30,16 +30,79 @@ namespace mrcpp { +/** + * @class BoysFunction + * @brief Adaptive multiresolution evaluator for the 1D Boys-type integral + * \f$F_n(x) = \int_{0}^{1} t^{2n}\,e^{-x\,t^2}\,dt\f$. + * + * What this class does + * -------------------- + * This class provides an implementation of MRCPP's @ref RepresentableFunction + * interface for the scalar function \f$F_n(x)\f$ of a single variable \f$x\f$. + * Given an input abscissa \f$x\f$, `evalf()`: + * 1. builds the integrand \f$g_x(t)=e^{-x t^2}\,t^{2n}\f$ on \f$t\in[0,1]\f$, + * 2. projects it adaptively into a 1D multiresolution basis (using the + * `MultiResolutionAnalysis<1>` member), + * 3. integrates the resulting @ref FunctionTree over the unit interval. + * + * Notes on conventions + * -------------------- + * - In quantum-chemistry literature, the “Boys function” is often defined with + * an integral to \f$\infty\f$. Here it is the *unit-interval* variant + * \f$[0,1]\f$, consistent with the implementation in the corresponding .cpp. + * - The basis family and order used by the `MRA` are chosen in the .cpp + * definition (currently an interpolating basis of order 13). + * + * Accuracy and performance + * ------------------------ + * - The tolerance passed at construction (`prec`) controls the adaptive + * projection target. Smaller values yield higher accuracy at greater cost. + * - The multiresolution approach concentrates degrees of freedom where the + * integrand has structure (e.g., near \f$t=0\f$ for large \f$x\f$). + */ class BoysFunction final : public RepresentableFunction<1, double> { public: + /** + * @brief Construct an evaluator for \f$F_n(x)\f$. + * + * @param n Non-negative integer order in \f$F_n(x)\f$ (power \f$t^{2n}\f$). + * @param prec Target projection precision for the adaptive MRA + * (default \f$10^{-10}\f$). + * + * Implementation detail: + * The `MRA` member is initialised in the .cpp with a default 1D bounding + * box and a fixed scaling basis; this header does not constrain that choice. + */ BoysFunction(int n, double prec = 1.0e-10); + /** + * @brief Evaluate \f$F_n(x)\f$ at the given abscissa. + * + * @param r Coordinate container with a single component: \f$x = r[0]\f$. + * @return The numerical value of \f$F_n(x)\f$ obtained by adaptively + * projecting and integrating on \f$[0,1]\f$. + * + * Semantics: + * - Satisfies the @ref RepresentableFunction contract. + * - Internally constructs the integrand lambda and invokes the MRCPP + * `project` + `integrate` pipeline on the stored `MRA`. + */ double evalf(const Coord<1> &r) const override; private: + /** @brief Integer order \f$n\f$ in \f$F_n(x)\f$ (kept constant for the lifetime). */ const int order; + + /** @brief Target projection tolerance for adaptive representation. */ const double prec; + + /** + * @brief Multiresolution context used to project/integrate the integrand. + * + * The concrete basis family and order are configured in the .cpp file. + * The same `MRA` instance is reused across evaluations for efficiency. + */ MultiResolutionAnalysis<1> MRA; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp index a57fe6708..219267ee5 100644 --- a/src/functions/GaussExp.cpp +++ b/src/functions/GaussExp.cpp @@ -23,6 +23,34 @@ * */ +/** + * @file GaussExp.cpp + * + * @brief Implementation of @c GaussExp, a small container for a linear + * combination (expansion) of Cartesian Gaussian primitives and/or + * Gaussian–polynomial terms. The class offers: + * - basic construction/assignment and memory ownership of terms, + * - pointwise evaluation, + * - algebra (sum/product by distributing over terms), + * - norm and normalization helpers, + * - crude visibility/screening support, + * - Coulomb energy (specialized for D=3), + * - periodification helper. + * + * Design notes + * ------------ + * - The expansion holds owning pointers to @c Gaussian (base type), and + * concrete terms are either @c GaussFunc (pure Gaussian) or + * @c GaussPoly (Gaussian times a Cartesian polynomial). + * - Operations that combine expansions rely on @c dynamic_cast to handle the + * two concrete term types and produce a @c GaussPoly when multiplying. + * - @b Ownership: this class allocates copies on insert/append and frees them + * in the destructor; copy constructor and assignment perform deep copies. + * - Screening: @c screening is a scalar that configures per-term screening + * (e.g., via “n standard deviations”); negative values can be used as a + * disabled flag (see @c setScreen). Each term also receives the screen state. + */ + #include "GaussExp.h" #include @@ -39,12 +67,32 @@ using namespace Eigen; namespace mrcpp { +/** @brief Default screening parameter (in “number of standard deviations”). + * + * Each dimensional specialization gets its own static. Positive means enabled + * by default; see @ref setScreen to flip the sign and propagate to terms. + */ template double GaussExp::defaultScreening = 10.0; -template GaussExp::GaussExp(int nTerms, double prec) { +/** + * @brief Construct an expansion with a fixed number of (empty) slots. + * + * @param nTerms Number of terms (initial capacity). + * @param prec Unused here (historical signature compatibility). + * + * The vector is filled with @c nullptr placeholders; actual terms must be + * installed via @ref setFunc or @ref append before use. + */ +template GaussExp::GaussExp(int nTerms, double /*prec*/) { for (int i = 0; i < nTerms; i++) { this->funcs.push_back(nullptr); } } +/** + * @brief Deep-copy constructor. + * + * Clones each term by calling its virtual @c copy() (polymorphic copy). + * The @c screening flag/value is copied as well. + */ template GaussExp::GaussExp(const GaussExp &gexp) { screening = gexp.screening; for (unsigned int i = 0; i < gexp.size(); i++) { @@ -53,6 +101,9 @@ template GaussExp::GaussExp(const GaussExp &gexp) { } } +/** + * @brief Destructor: deletes all owned terms (if any) and nulls pointers. + */ template GaussExp::~GaussExp() { for (int i = 0; i < size(); i++) { if (this->funcs[i] != nullptr) { @@ -62,6 +113,13 @@ template GaussExp::~GaussExp() { } } +/** + * @brief Deep-copy assignment (strong exception safety not guaranteed). + * + * Existing terms are discarded; the right-hand side is cloned term by term. + * The @c screening parameter is @b not overwritten (commented line preserves + * current object’s screening), so only structure/terms are copied. + */ template GaussExp &GaussExp::operator=(const GaussExp &gexp) { if (&gexp == this) return *this; // screening = gexp.screening; @@ -77,12 +135,25 @@ template GaussExp &GaussExp::operator=(const GaussExp &gexp) { return *this; } +/** + * @brief Pointwise evaluation: sum of all term evaluations at @p r. + * + * @param r D-dimensional coordinate. + * @return Σ_i term_i(r). + */ template double GaussExp::evalf(const Coord &r) const { double val = 0.0; for (int i = 0; i < this->size(); i++) { val += this->getFunc(i).evalf(r); } return val; } +/** + * @brief Quick “visibility” test at a given scale and sample count. + * + * @details Returns @c false if any term is not visible (fails its own + * visibility criterion); only if all are visible does it return @c true. + * This is a conservative conjunction useful for pruning. + */ template bool GaussExp::isVisibleAtScale(int scale, int nPts) const { for (unsigned int i = 0; i < this->size(); i++) { if (not this->getFunc(i).isVisibleAtScale(scale, nPts)) { return false; } @@ -90,6 +161,12 @@ template bool GaussExp::isVisibleAtScale(int scale, int nPts) const { return true; } +/** + * @brief Check whether the expansion is identically zero on [lb,ub]^D. + * + * @details Returns @c false if any term says it is non-zero on the box; + * otherwise returns @c true. Used for quick region elimination. + */ template bool GaussExp::isZeroOnInterval(const double *lb, const double *ub) const { for (unsigned int i = 0; i < this->size(); i++) { if (not this->getFunc(i).isZeroOnInterval(lb, ub)) { return false; } @@ -97,6 +174,14 @@ template bool GaussExp::isZeroOnInterval(const double *lb, const doub return true; } +/** + * @brief Install a @c GaussPoly term into slot @p i, scaling its coefficient. + * + * @param i Slot index (0-based). + * @param g Source Gaussian–polynomial term (copied). + * @param c Extra scalar factor applied multiplicatively to the stored term’s + * existing coefficient (so final coef = c * g.coef()). + */ template void GaussExp::setFunc(int i, const GaussPoly &g, double c) { if (i < 0 or i > (this->size() - 1)) { MSG_ERROR("Index out of bounds!"); @@ -108,6 +193,11 @@ template void GaussExp::setFunc(int i, const GaussPoly &g, double this->funcs[i]->setCoef(c * coef); } +/** + * @brief Install a pure @c GaussFunc term into slot @p i, scaling its coefficient. + * + * Same semantics as the GaussPoly overload. + */ template void GaussExp::setFunc(int i, const GaussFunc &g, double c) { if (i < 0 or i > (this->size() - 1)) { MSG_ERROR("Index out of bounds!"); @@ -119,11 +209,17 @@ template void GaussExp::setFunc(int i, const GaussFunc &g, double this->funcs[i]->setCoef(c * coef); } +/** + * @brief Append a new term by polymorphic copy. + */ template void GaussExp::append(const Gaussian &g) { Gaussian *gp = g.copy(); this->funcs.push_back(gp); } +/** + * @brief Append all terms from another expansion (deep copies). + */ template void GaussExp::append(const GaussExp &g) { for (int i = 0; i < g.size(); i++) { Gaussian *gp = g.getFunc(i).copy(); @@ -131,6 +227,11 @@ template void GaussExp::append(const GaussExp &g) { } } +/** + * @brief Differentiate each term with respect to coordinate @p dir and return a new expansion. + * + * @param dir Axis index (0..D-1). + */ template GaussExp GaussExp::differentiate(int dir) const { assert(dir >= 0 and dir < D); GaussExp result; @@ -138,6 +239,12 @@ template GaussExp GaussExp::differentiate(int dir) const { return result; } +/** + * @brief Termwise concatenation (sum) with another expansion. + * + * @details Produces an expansion containing all terms from @c *this followed + * by all terms from @p g, by cloning. Coefficients remain unchanged. + */ template GaussExp GaussExp::add(GaussExp &g) { int nsum = this->size() + g.size(); GaussExp sum = GaussExp(nsum); @@ -155,6 +262,9 @@ template GaussExp GaussExp::add(GaussExp &g) { return sum; } +/** + * @brief Concatenate with a single term @p g (at the end). + */ template GaussExp GaussExp::add(Gaussian &g) { int nsum = this->size() + 1; GaussExp sum = GaussExp(nsum); @@ -163,6 +273,14 @@ template GaussExp GaussExp::add(Gaussian &g) { return sum; } +/** + * @brief Product of two expansions by distributivity. + * + * @details For each pair of terms, multiply them (Gaussian×Gaussian or + * Gaussian×GaussPoly) to produce a @c GaussPoly term which is appended to the + * result. Type dispatch is handled via @c dynamic_cast and throws on unknown + * runtime types. + */ template GaussExp GaussExp::mult(GaussExp &gexp) { GaussExp result; for (int i = 0; i < this->size(); i++) { @@ -195,6 +313,9 @@ template GaussExp GaussExp::mult(GaussExp &gexp) { return result; } +/** + * @brief Multiply the expansion by a single @c GaussFunc term (distribute over terms). + */ template GaussExp GaussExp::mult(GaussFunc &g) { GaussExp result; int nTerms = this->size(); @@ -211,6 +332,10 @@ template GaussExp GaussExp::mult(GaussFunc &g) { } return result; } + +/** + * @brief Multiply the expansion by a single @c GaussPoly term (distribute over terms). + */ template GaussExp GaussExp::mult(GaussPoly &g) { int nTerms = this->size(); GaussExp result(nTerms); @@ -228,16 +353,29 @@ template GaussExp GaussExp::mult(GaussPoly &g) { return result; } +/** + * @brief Return a copy of the expansion scaled by constant @p d. + */ template GaussExp GaussExp::mult(double d) { GaussExp prod = *this; for (int i = 0; i < this->size(); i++) prod.funcs[i]->multConstInPlace(d); return prod; } +/** + * @brief In-place scaling of all term coefficients by @p d. + */ template void GaussExp::multInPlace(double d) { for (int i = 0; i < this->size(); i++) this->funcs[i]->multConstInPlace(d); } +/** + * @brief Compute \f$\| \sum_i f_i \|_2^2\f$ via self-terms plus cross terms. + * + * @details First sum each term’s squared norm, then add the double products + * (2× overlap) between distinct terms. To ensure closed form overlaps, terms + * are materialized as @c GaussFunc and @c calcOverlap is used internally. + */ template double GaussExp::calcSquareNorm() const { /* computing the squares */ double norm = 0.0; @@ -263,6 +401,12 @@ template double GaussExp::calcSquareNorm() const { return norm; } +/** + * @brief Normalize the expansion so that @c calcSquareNorm() == 1. + * + * @details Scales each term’s coefficient by 1/||f||, where + * @c ||f|| = sqrt(calcSquareNorm()). + */ template void GaussExp::normalize() { double norm = std::sqrt(this->calcSquareNorm()); for (int i = 0; i < this->size(); i++) { @@ -271,11 +415,24 @@ template void GaussExp::normalize() { } } +/** + * @brief Set the per-term screening parameter (e.g., n standard deviations). + * + * @details Stores @p nStdDev locally and forwards to each term so that they + * can precompute their own screening envelopes (e.g., bounding radii). + */ template void GaussExp::calcScreening(double nStdDev) { screening = nStdDev; for (int i = 0; i < this->size(); i++) { this->funcs[i]->calcScreening(nStdDev); } } +/** + * @brief Enable or disable screening for this expansion and all terms. + * + * @param screen If true, make @c screening positive; if false, make it negative. + * The sign convention can be used by downstream code as a quick + * toggle. Each term receives @c setScreen(screen) as well. + */ template void GaussExp::setScreen(bool screen) { if (screen) { this->screening = std::abs(this->screening); @@ -285,9 +442,13 @@ template void GaussExp::setScreen(bool screen) { for (int i = 0; i < this->size(); i++) { this->funcs[i]->setScreen(screen); } } -// Calculate the scaling and wavelet coefs of all the children, and do the -// outer product to make the nD-scaling coefs. Since a Gaussian expansion -// is not separable, we have to do the projection term by term. +// ----------------------------------------------------------------------------- +// Project-to-wavelets routine (legacy) +// ----------------------------------------------------------------------------- +// The routine below shows how to compute scaling and wavelet coefficients by +// projecting each term separately and expanding to nD via tensor products. +// It is currently commented out (relies on MWNode internals), but the steps +// are left as documentation for future restoration. /* template void GaussExp::calcWaveletCoefs(MWNode &node) { @@ -319,11 +480,19 @@ void GaussExp::calcWaveletCoefs(MWNode &node) { } */ +/** + * @brief Configure the global default screening parameter for all new instances. + * + * @param screen Non-negative value; throws if negative. + */ template void GaussExp::setDefaultScreening(double screen) { if (screen < 0) { MSG_ERROR("Screening constant cannot be negative!"); } defaultScreening = screen; } +/** + * @brief Pretty-printer listing the terms (order and parameters). + */ template std::ostream &GaussExp::print(std::ostream &o) const { o << "Gaussian expansion: " << size() << " terms" << std::endl; for (int i = 0; i < size(); i++) { @@ -333,15 +502,23 @@ template std::ostream &GaussExp::print(std::ostream &o) const { return o; } -/** @returns Coulomb repulsion energy between all pairs in GaussExp, including self-interaction +/** + * @brief Coulomb self-energy placeholder for general D. * - * @note Each Gaussian must be normalized to unit charge - * \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct! + * @note For D≠3 this is not implemented. */ template double GaussExp::calcCoulombEnergy() const { NOT_IMPLEMENTED_ABORT } +/** + * @brief Coulomb repulsion energy for D=3 including self-interaction once. + * + * @details Loops over pairs (i≤j), expands any composite terms to pure + * Gaussians, and accumulates @c 2*overlap for i double GaussExp<3>::calcCoulombEnergy() const { double energy = 0.0; for (int i = 0; i < this->size(); i++) { @@ -362,6 +539,13 @@ template <> double GaussExp<3>::calcCoulombEnergy() const { return energy; } +/** + * @brief Build a periodified expansion by summing periodic images of each term. + * + * @param period Period vector per axis (Lx, Ly, Lz for D=3). + * @param nStdDev Controls the width/number of included images (screening). + * @return A new @c GaussExp whose terms include periodic replicas of the input. + */ template GaussExp GaussExp::periodify(const std::array &period, double nStdDev) const { GaussExp out_exp; for (const auto &gauss : *this) { @@ -371,8 +555,9 @@ template GaussExp GaussExp::periodify(const std::array return out_exp; } +// Explicit template instantiations for common dimensions template class GaussExp<1>; template class GaussExp<2>; template class GaussExp<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussExp.h b/src/functions/GaussExp.h index a4315e381..6bbed072b 100644 --- a/src/functions/GaussExp.h +++ b/src/functions/GaussExp.h @@ -38,51 +38,150 @@ namespace mrcpp { #define GAUSS_EXP_PREC 1.e-10 /** @class GaussExp + * @tparam D Spatial dimension (1, 2, 3, …). * - * @brief Gaussian expansion in D dimensions + * @brief Container for a finite linear combination (“expansion”) of + * Cartesian Gaussian-type primitives in D dimensions. * - * - Monodimensional Gaussian expansion: + * Mathematical model + * ------------------ + * - 1D: + * \f[ + * g(x) = \sum_{m=1}^M g_m(x) + * = \sum_{m=1}^M \alpha_m \exp\!\big(-\beta_m (x - x_m)^2\big). + * \f] + * - D dimensions (separable Cartesian form): + * \f[ + * G(\mathbf{x}) = \sum_{m=1}^M G_m(\mathbf{x}) + * = \sum_{m=1}^M \prod_{d=1}^D g^{(d)}_m(x_d), + * \f] + * where each term is represented by a @ref Gaussian (base class) and is + * concretely either a pure Gaussian @ref GaussFunc or a Gaussian times a + * Cartesian polynomial @ref GaussPoly. * - * \f$ g(x) = \sum_{m=1}^M g_m(x) = \sum_{m=1}^M \alpha_m e^{-\beta (x-x^0)^2} \f$ - * - * - Multidimensional Gaussian expansion: - * - * \f$ G(x) = \sum_{m=1}^M G_m(x) = \sum_{m=1}^M \prod_{d=1}^D g_m^d(x^d) \f$ + * Ownership & invariants + * ---------------------- + * - The expansion OWNS its terms via raw pointers (@c Gaussian*). It + * performs deep copies on copy construction/assignment and deletes terms + * in the destructor. + * - @c funcs[i] is either non-null (a valid Gaussian term) or null for + * “empty” slots when constructed with a fixed number of terms. * + * Typical uses + * ------------ + * - Build analytic functions as sums of Gaussians, evaluate them pointwise + * (@ref evalf). + * - Combine expansions algebraically: @ref add, @ref mult (by expansion, + * single term, polynomial term, or scalar). + * - Compute norms and normalize: @ref calcSquareNorm, @ref normalize. + * - Manage crude support/visibility via screening: @ref calcScreening, + * @ref setScreen. */ - template class GaussExp : public RepresentableFunction { public: + /** + * @brief Construct an expansion with @p nTerms empty slots. + * + * @param nTerms Number of entries reserved in @ref funcs (default 0). + * @param prec Historical argument (unused here); kept for API symmetry. + * + * After construction, populate terms via @ref setFunc or @ref append. + */ GaussExp(int nTerms = 0, double prec = GAUSS_EXP_PREC); + + /** @brief Deep-copy constructor (clones every term via virtual copy()). */ GaussExp(const GaussExp &gExp); + + /** @brief Deep-copy assignment (existing terms are discarded then cloned). */ GaussExp &operator=(const GaussExp &gExp); + + /** @brief Destructor: deletes all owned terms and clears pointers. */ ~GaussExp() override; + // ---- STL-style iteration over owned pointers (non-const and const) ---- auto begin() { return funcs.begin(); } auto end() { return funcs.end(); } - const auto begin() const { return funcs.begin(); } const auto end() const { return funcs.end(); } + // ---- Analysis helpers --------------------------------------------------- + + /** + * @brief Coulomb self-energy of the expansion. + * @details Implemented for D=3 (see .cpp); throws/not-implemented for others. + * @note For physical correctness each term should be charge-normalized. + */ double calcCoulombEnergy() const; + + /** + * @brief Compute the squared L2 norm of the expansion: + * \f$ \| \sum_i f_i \|_2^2 = \sum_i \|f_i\|^2 + 2\sum_{i &r) const override; + // ---- Other transforms/utilities ----------------------------------------- + + /** + * @brief Build a periodified version of the expansion by tiling each term. + * @param period Period per axis (e.g., {Lx, Ly, Lz} in 3D). + * @param nStdDev Screening control for how many images to include. + */ GaussExp periodify(const std::array &period, double nStdDev = 4.0) const; + + /** + * @brief Component-wise derivative \f$\partial/\partial x_{\text{dir}}\f$. + * @param dir Axis index in [0, D). + * @return New expansion with each term differentiated. + */ GaussExp differentiate(int dir) const; + // ---- Algebra: additive and multiplicative combinators ------------------- + + /** @brief Concatenate two expansions (returns all terms from both). */ GaussExp add(GaussExp &g); + + /** @brief Append a single Gaussian term to this expansion (returns new sum). */ GaussExp add(Gaussian &g); + + /** + * @brief Distribute product over terms: + * (Σ f_i) * (Σ g_j) = Σ_{ij} f_i⋅g_j (resulting in GaussPoly terms). + */ GaussExp mult(GaussExp &g); + + /** @brief Multiply by a single pure Gaussian (resulting in GaussPoly terms). */ GaussExp mult(GaussFunc &g); + + /** @brief Multiply by a single Gaussian–polynomial term. */ GaussExp mult(GaussPoly &g); + + /** @brief Return a copy scaled by scalar @p d. */ GaussExp mult(double d); + + /** @brief Scale coefficients in place by scalar @p d. */ void multInPlace(double d); + // ---- Operator sugar (forward to the methods above) ---------------------- + GaussExp operator+(GaussExp &g) { return this->add(g); } GaussExp operator+(Gaussian &g) { return this->add(g); } GaussExp operator*(GaussExp &g) { return this->mult(g); } @@ -91,46 +190,114 @@ template class GaussExp : public RepresentableFunction { GaussExp operator*(double d) { return this->mult(d); } void operator*=(double d) { this->multInPlace(d); } + // ---- Accessors ---------------------------------------------------------- + + /** @brief Current screening parameter (sign may encode “enabled/disabled”). */ double getScreening() const { return screening; } + + /** @brief Exponent(s) α per axis for term i. */ std::array getExp(int i) const { return this->funcs[i]->getExp(); } + + /** @brief Scalar coefficient for term i. */ double getCoef(int i) const { return this->funcs[i]->getCoef(); } + + /** @brief Powers (Cartesian angular momenta) per axis for term i. */ const std::array &getPower(int i) const { return this->funcs[i]->getPower(); } + + /** @brief Center position per axis for term i. */ const std::array &getPos(int i) const { return this->funcs[i]->getPos(); } + /** @brief Number of (owned) terms in the expansion. */ int size() const { return this->funcs.size(); } + + /** @brief Mutable access to term i (reference). */ Gaussian &getFunc(int i) { return *this->funcs[i]; } + + /** @brief Const access to term i (reference). */ const Gaussian &getFunc(int i) const { return *this->funcs[i]; } + /** @brief Mutable pointer access (may be null if slot is empty). */ Gaussian *operator[](int i) { return this->funcs[i]; } + + /** @brief Const pointer access (may be null if slot is empty). */ const Gaussian *operator[](int i) const { return this->funcs[i]; } + // ---- Mutators ----------------------------------------------------------- + + /** + * @brief Install a Gaussian–polynomial term at slot i, scaling its coef by c. + * @details Replaces any existing object at slot i (deletes old). + */ void setFunc(int i, const GaussPoly &g, double c = 1.0); + + /** + * @brief Install a pure Gaussian term at slot i, scaling its coef by c. + * @details Replaces any existing object at slot i (deletes old). + */ void setFunc(int i, const GaussFunc &g, double c = 1.0); + /** + * @brief Set global default screening for newly created instances. + * @throws If @p screen is negative. + */ void setDefaultScreening(double screen); + + /** + * @brief Enable/disable screening for this expansion and forward to terms. + * @details Conventionally, a positive @ref screening means “enabled” and + * a negative value means “disabled”. + */ void setScreen(bool screen); + + /** @brief Set (isotropic) exponent(s) α for term i. */ void setExp(int i, double a) { this->funcs[i]->setExp(a); } + + /** @brief Set scalar coefficient for term i. */ void setCoef(int i, double b) { this->funcs[i]->setCoef(b); } + + /** @brief Set Cartesian powers for term i. */ void setPow(int i, const std::array &power) { this->funcs[i]->setPow(power); } + + /** @brief Set center position for term i. */ void setPos(int i, const std::array &pos) { this->funcs[i]->setPos(pos); } - /** @brief Append Gaussian to expansion */ + /** @brief Append a single (cloned) Gaussian to the end of the expansion. */ void append(const Gaussian &g); - /** @brief Append GaussExp to expansion */ + + /** @brief Append all terms (cloned) from another expansion. */ void append(const GaussExp &g); + /** @brief Stream pretty-printer: prints a summary and the terms. */ friend std::ostream &operator<<(std::ostream &o, const GaussExp &gExp) { return gExp.print(o); } + + /** @brief Grant @ref Gaussian access to internals where necessary. */ friend class Gaussian; protected: + /** @brief Owned list of Gaussian terms (raw-pointer ownership). */ std::vector *> funcs; + + /** @brief Default screening parameter for new instances of this @c D. */ static double defaultScreening; + + /** @brief Instance screening parameter (sign may encode enabled/disabled). */ double screening{0.0}; + /** @brief Implementation of stream printing (called by operator<<). */ std::ostream &print(std::ostream &o) const; + /** + * @brief Coarse visibility test for adaptive algorithms. + * @details Returns false if any term declares itself not visible + * at the given scale/sample count; true otherwise. + */ bool isVisibleAtScale(int scale, int nPts) const override; + + /** + * @brief Quick zero check on a box \f$[lb,ub]^D\f$: + * returns true only if every term is zero on the box. + */ bool isZeroOnInterval(const double *lb, const double *ub) const override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp index 28736be58..8753c3357 100644 --- a/src/functions/GaussFunc.cpp +++ b/src/functions/GaussFunc.cpp @@ -23,6 +23,32 @@ * */ +/** + * @file GaussFunc.cpp + * + * @brief Implementation of @c GaussFunc, a single Cartesian Gaussian (possibly + * multiplied by a coordinate power) in D dimensions. + * + * Model + * ----- + * A term has the form + * f(r) = c * Π_{d=0}^{D-1} (x_d - R_d)^{p_d} * exp( -α_d (x_d - R_d)^2 ), + * with scalar coefficient c, center R, exponents α (per axis), and integer powers p + * (Cartesian angular momenta). Many operations here are separable over dimensions. + * + * Highlights + * ---------- + * - @ref evalf computes the value with optional screening (box truncation). + * - @ref calcSquareNorm uses 1D closed forms and multiplies across axes. + * - @ref differentiate returns a @ref GaussPoly (Gaussian times polynomial), + * using the product rule on (x-R)^p * exp(-α (x-R)^2). + * - @ref mult multiplies two @c GaussFunc into a @c GaussPoly by “completing the square” + * (handled by @c GaussPoly::multPureGauss) and then combining the two polynomials + * created by shifting to the new center. + * - @ref calcCoulombEnergy (D=3 specialization) uses Boys F_0 and assumes isotropic + * exponents for both Gaussians. + */ + #include #include "BoysFunction.h" @@ -39,11 +65,26 @@ using namespace Eigen; namespace mrcpp { +/** + * @brief Polymorphic deep copy (virtual constructor idiom). + * @return Newly allocated @c GaussFunc with identical parameters. + */ template Gaussian *GaussFunc::copy() const { auto *gauss = new GaussFunc(*this); return gauss; } +/** + * @brief Pointwise evaluation of the Gaussian (with optional polynomial factor). + * + * Steps + * ----- + * 1) If screening is enabled, immediately return 0 if any coordinate lies outside + * the precomputed box [A[d], B[d]]. + * 2) Accumulate q2 = Σ α_d (x_d - R_d)^2 (the exponent argument), + * and p2 = Π (x_d - R_d)^{p_d} (the Cartesian polynomial). + * 3) Return c * p2 * exp(-q2). + */ template double GaussFunc::evalf(const Coord &r) const { if (this->getScreen()) { for (int d = 0; d < D; d++) { @@ -65,6 +106,13 @@ template double GaussFunc::evalf(const Coord &r) const { return this->coef * p2 * std::exp(-q2); } +/** + * @brief 1D evaluation of the d-th component only (factor for separable product). + * + * This returns (x - R_d)^{p_d} * exp(-α_d (x - R_d)^2) times @c coef if d==0 + * (the overall scalar is stored redundantly only on one axis when factoring). + * Screening is applied on that axis if enabled. + */ template double GaussFunc::evalf1D(double r, int d) const { if (this->getScreen()) { if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; } @@ -85,6 +133,16 @@ template double GaussFunc::evalf1D(double r, int d) const { return result; } +/** + * @brief Squared L2 norm ||f||^2 = ∫ |f|^2 d r (separable product of 1D integrals). + * + * For one axis (drop subscript d for brevity): + * ∫ (x-R)^{2p} exp(-2α (x-R)^2) dx + * = sqrt(pi / (2α)) * [(2p-1)!!] / (2α)^p, + * which is implemented via a simple descending product: + * sq_norm = Π_{i odd from (2p-1) down to 1} i / (2α). + * The D-dimensional norm is the product over axes, multiplied by coef^2. + */ template double GaussFunc::calcSquareNorm() const { double norm = 1.0; for (int d = 0; d < D; d++) { @@ -105,12 +163,28 @@ template double GaussFunc::calcSquareNorm() const { return norm * this->coef * this->coef; } +/** + * @brief Convert a single @c GaussFunc into a length-1 @c GaussExp. + * + * Useful when operations expect an expansion (e.g., in norm cross-terms). + */ template GaussExp GaussFunc::asGaussExp() const { GaussExp gexp; gexp.append(*this); return gexp; } +/** + * @brief Derivative along axis @p dir, returning a @c GaussPoly. + * + * In 1D: + * d/dx [(x-R)^p e^{-α(x-R)^2}] = + * p (x-R)^{p-1} e^{-α...} + (x-R)^p * (-2α)(x-R) e^{-α...} + * = [ p (x-R)^{p-1} - 2α (x-R)^{p+1} ] e^{-α...} + * + * We therefore create a new polynomial of degree (p+1) with two nonzero + * coefficients at (p-1) and (p+1). Other axes carry over unchanged. + */ template GaussPoly GaussFunc::differentiate(int dir) const { GaussPoly result(*this); int oldPow = this->getPower(dir); @@ -123,6 +197,21 @@ template GaussPoly GaussFunc::differentiate(int dir) const { return result; } +/** + * @brief In-place multiplication by another @c GaussFunc with the SAME center. + * + * Preconditions + * ------------- + * - The two Gaussians must share identical centers in every axis. + * + * Effect + * ------ + * - Exponents add: α_new = α_lhs + α_rhs. + * - Powers add: p_new = p_lhs + p_rhs. + * - Coefficients multiply: c_new = c_lhs * c_rhs. + * + * This keeps the center unchanged and avoids creating polynomials. + */ template void GaussFunc::multInPlace(const GaussFunc &rhs) { GaussFunc &lhs = *this; for (int d = 0; d < D; d++) { @@ -147,6 +236,17 @@ template void GaussFunc::multInPlace(const GaussFunc &rhs) { * @param[in] this: Left hand side of multiply * @param[in] rhs: Right hand side of multiply * @returns New GaussPoly + * + * Algorithm + * --------- + * 1) “Complete the square”: the product of two Gaussians is a (shifted) Gaussian + * with combined exponent and a new center (weighted by exponents). This part is + * delegated to @c GaussPoly::multPureGauss, which sets the new Gaussian envelope + * (position, exponents, and a prefactor). + * 2) Each original polynomial factor (x-R)^p is re-expressed relative to the new + * center R_new: (x-R) = (x-R_new) + (R_new - R). We therefore have two polynomials + * per axis; they are multiplied to obtain the combined polynomial for that axis. + * 3) Multiply in the original scalar coefficients c_lhs * c_rhs. */ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { GaussFunc &lhs = *this; @@ -163,16 +263,22 @@ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { return result; } -/** @brief Multiply GaussFunc by scalar - * @param[in] c: Scalar to multiply - * @returns New GaussFunc - */ +/** @brief Multiply GaussFunc by scalar (returns a copy with scaled coefficient). */ template GaussFunc GaussFunc::mult(double c) { GaussFunc g = *this; g.coef *= c; return g; } +/** + * @brief Pretty-printer for a Gaussian term. + * + * Prints: + * - Coef + * - Exp: either a single value if all α_d are equal, or all components. + * - Pos: center coordinates + * - Pow: integer powers per axis + */ template std::ostream &GaussFunc::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); @@ -202,11 +308,29 @@ template std::ostream &GaussFunc::print(std::ostream &o) const { * * @note Both Gaussians must be normalized to unit charge * \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct! + * + * General D is not implemented here; see the D=3 specialization below. */ template double GaussFunc::calcCoulombEnergy(const GaussFunc &gf) const { NOT_IMPLEMENTED_ABORT; } +/** + * @brief Coulomb energy for 3D isotropic Gaussians using Boys F_0. + * + * Preconditions + * ------------- + * - Both Gaussians must have isotropic exponents (α_x = α_y = α_z), verified via + * @c details::are_all_equal<3>. + * + * Formula + * ------- + * With exponents p and q, α = p q / (p + q), separation R = |R_p - R_q|, + * the Coulomb interaction is: + * E = sqrt( 4 α / π ) * F_0( α R^2 ), + * where F_0 is the order-zero Boys function. The code constructs a @c BoysFunction(0) + * and evaluates it at α R^2. + */ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const { // Checking if the elements in each exponent are constant @@ -236,7 +360,8 @@ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const return std::sqrt(4.0 * alpha / pi) * boysFac; } +// Explicit template instantiations template class GaussFunc<1>; template class GaussFunc<2>; template class GaussFunc<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussFunc.h b/src/functions/GaussFunc.h index 874bb3850..a983feabc 100644 --- a/src/functions/GaussFunc.h +++ b/src/functions/GaussFunc.h @@ -33,59 +33,142 @@ namespace mrcpp { /** @class GaussFunc + * @tparam D Spatial dimension (1,2,3,…). * - * @brief Gaussian function in D dimensions with a simple monomial in front + * @brief Single Cartesian Gaussian primitive (optionally with monomial powers) + * in D dimensions. * - * - Monodimensional Gaussian (GaussFunc<1>): + * Mathematical form + * ----------------- + * In D dimensions the function is separable: + * \f[ + * G(\mathbf{x}) + * = \alpha \prod_{d=0}^{D-1} (x_d - R_d)^{p_d}\, + * \exp\!\big(-\beta_d\,(x_d - R_d)^2\big), + * \f] + * where: + * - \f$ \alpha \f$ is a scalar coefficient (amplitude), + * - \f$ \mathbf{R} = (R_0,\dots,R_{D-1}) \f$ is the center, + * - \f$ \mathbf{p} = (p_0,\dots,p_{D-1}) \f$ are non-negative integers (Cartesian powers), + * - \f$ \boldsymbol{\beta} = (\beta_0,\dots,\beta_{D-1}) \f$ are positive exponents; they + * can be isotropic (\f$\beta_d=\beta\f$) or anisotropic (per-axis). * - * \f$ g(x) = \alpha (x-x_0)^a e^{-\beta (x-x_0)^2} \f$ + * Relationship to @ref Gaussian + * ----------------------------- + * This class *derives* from @ref Gaussian, which stores the common state + * (coefficient, center, exponents, powers) and provides a polymorphic interface. + * @c GaussFunc implements operations specific to “pure Gaussian × monomial” + * terms (e.g., evaluation, in-place multiplication with same-center terms). * - * - Multidimensional Gaussian (GaussFunc): - * - * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$ + * Typical usage + * ------------- + * - Build analytic functions and evaluate them at given points (@ref evalf). + * - Construct @ref GaussExp (expansions) by appending multiple @c GaussFunc. + * - Form products using @ref mult (returns @ref GaussPoly) or scale by a scalar. + * - Differentiate analytically with respect to a coordinate (@ref differentiate). */ - template class GaussFunc : public Gaussian { public: - /** @returns New GaussFunc object - * @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$ - * @param[in] alpha: Coefficient, \f$ \alpha e^{-r^2} \f$ - * @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ - * @param[in] pow: Monomial power, \f$ x^{pow[0]}, y^{pow[1]}, ... \f$ + /** @name Constructors + * @{ + */ + /** @brief Construct with isotropic exponent. + * @param beta Isotropic exponent \f$\beta\f$ (used on all axes). + * @param alpha Coefficient \f$\alpha\f$. + * @param pos Center \f$\mathbf{R}\f$ (defaults to origin). + * @param pow Powers \f$\mathbf{p}\f$ (defaults to all zeros). + * + * This forwards to the @ref Gaussian base constructor. */ GaussFunc(double beta, double alpha, const Coord &pos = {}, const std::array &pow = {}) : Gaussian(beta, alpha, pos, pow) {} + + /** @brief Construct with anisotropic exponents (per-axis @p beta). + * @param beta Array of exponents \f$(\beta_0,\dots,\beta_{D-1})\f$. + * @param alpha Coefficient \f$\alpha\f$. + * @param pos Center \f$\mathbf{R}\f$. + * @param pow Powers \f$\mathbf{p}\f$. + */ GaussFunc(const std::array &beta, double alpha, const Coord &pos = {}, const std::array &pow = {}) : Gaussian(beta, alpha, pos, pow) {} + + /** @brief Copy constructor (shallow copy of POD members, as expected). */ GaussFunc(const GaussFunc &gf) : Gaussian(gf) {} + /** @brief Deleted assignment for safety (use copy-construct as needed). */ GaussFunc &operator=(const GaussFunc &rhs) = delete; + /** @brief Polymorphic copier (virtual constructor idiom). */ Gaussian *copy() const override; + /** @} */ + /** @name Physics / analysis helpers + * @{ + */ + /** @brief Coulomb repulsion with another Gaussian (specialized for D=3). + * @details For D=3 and isotropic exponents, a closed form using Boys @f$F_0@f$ is used. + * Other D are not implemented and trigger a runtime abort. + */ double calcCoulombEnergy(const GaussFunc &rhs) const; + + /** @brief \f$\|G\|_2^2 = \int |G|^2 \, d\mathbf{x}\f$ (separable product of 1D integrals). */ double calcSquareNorm() const override; + /** @} */ + /** @name Evaluation + * @{ + */ + /** @brief Full D-dimensional evaluation at coordinate @p r. */ double evalf(const Coord &r) const override; + + /** @brief 1D factor evaluation for axis @p dir (used in separable algorithms). */ double evalf1D(double r, int dir) const override; + /** @} */ + /** @name Transformations and algebra + * @{ + */ + /** @brief Wrap this single Gaussian as a length-1 Gaussian expansion. */ GaussExp asGaussExp() const override; + + /** @brief Analytic derivative w.r.t. @p dir, returns a @ref GaussPoly. */ GaussPoly differentiate(int dir) const override; + /** @brief In-place product with another Gaussian at the *same center*. + * @details Exponents and powers add; coefficients multiply. + * Fails fast if centers differ (cannot keep a pure GaussFunc). + */ void multInPlace(const GaussFunc &rhs); + /** @brief Alias for @ref multInPlace. */ void operator*=(const GaussFunc &rhs) { multInPlace(rhs); } + + /** @brief Product with another Gaussian (same or different center). + * @details Returns a @ref GaussPoly (Gaussian times polynomial) obtained by + * completing the square and combining monomial factors. */ GaussPoly mult(const GaussFunc &rhs); + + /** @brief Scalar multiplication (returns a scaled copy). */ GaussFunc mult(double c); + + /** @brief Operator overloads forwarding to the methods above. */ GaussPoly operator*(const GaussFunc &rhs) { return this->mult(rhs); } GaussFunc operator*(double c) { return this->mult(c); } + /** @} */ + /** @name Power setters + * @{ + */ + /** @brief Set a single Cartesian power component @p power on axis @p d. */ void setPow(int d, int power) override { this->power[d] = power; } + /** @brief Set the full power vector \f$\mathbf{p}\f$. */ void setPow(const std::array &power) override { this->power = power; } + /** @} */ private: + /** @brief Pretty-printer used by stream insertion (see implementation). */ std::ostream &print(std::ostream &o) const override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp index 0dfeaf2cd..9c18d420b 100644 --- a/src/functions/GaussPoly.cpp +++ b/src/functions/GaussPoly.cpp @@ -42,11 +42,21 @@ namespace mrcpp { * @param[in] alpha: Coefficient, \f$ \alpha e^{-r^2} \f$ * @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ * @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ + * + * High-level: + * ----------- + * GaussPoly represents a separable polynomial-times-Gaussian: + * f(x) = coef * Π_d Poly_d(x_d - pos[d]) * exp( -alpha[d] (x_d - pos[d])^2 ). + * The per-axis polynomials are stored as pointers (Polynomial* poly[d]). + * Here we allocate those polynomials (if a non-zero power is requested), + * using the degree from `power[d]` passed to the Gaussian base ctor. */ template GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const std::array &power) : Gaussian(beta, alpha, pos, power) { for (auto d = 0; d < D; d++) { + // If overall 'power' array is not the all-zero sentinel, create a poly + // of the requested degree for this axis. Otherwise leave pointer null. if (power != std::array{}) { this->poly[d] = new Polynomial(this->power[d]); } else { @@ -55,6 +65,9 @@ GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const st } } +/** @brief Anisotropic exponent ctor (per-axis beta). + * Same allocation logic for the per-axis polynomials as above. + */ template GaussPoly::GaussPoly(const std::array &beta, double alpha, @@ -70,12 +83,21 @@ GaussPoly::GaussPoly(const std::array &beta, } } +/** @brief Copy-construct with deep copies of the per-axis polynomials. */ template GaussPoly::GaussPoly(const GaussPoly &gp) : Gaussian(gp) { for (int d = 0; d < D; d++) { poly[d] = new Polynomial(gp.getPoly(d)); } } +/** @brief Construct a GaussPoly from a GaussFunc (monomial×Gaussian). + * + * Effect: + * ------- + * For each axis d, we create a polynomial of degree equal to the monomial power + * in that dimension, and set it to the *monomial basis* e_d(t) = t^{power[d]} + * (i.e., coefficient vector with a single 1 at index = power[d]). + */ template GaussPoly::GaussPoly(const GaussFunc &gf) : Gaussian(gf) { @@ -83,21 +105,31 @@ GaussPoly::GaussPoly(const GaussFunc &gf) int order = this->getPower(d); poly[d] = new Polynomial(order); VectorXd coefs = VectorXd::Zero(order + 1); - coefs[order] = 1.0; + coefs[order] = 1.0; // t^{order} poly[d]->setCoefs(coefs); // poly[d]->unsetBounds(); } } +/** @brief Delete owned Polynomial objects. */ template GaussPoly::~GaussPoly() { for (int i = 0; i < D; i++) { delete poly[i]; } } +/** @brief Virtual clone (deep copy). */ template Gaussian *GaussPoly::copy() const { auto *gauss = new GaussPoly(*this); return gauss; } +/** @brief Exact L2 norm squared by expanding to GaussExp and summing overlaps. + * + * Algorithm: + * ---------- + * 1) Expand this GaussPoly into a sum of GaussFunc terms (asGaussExp()). + * 2) Sum ⟨g_i | g_j⟩ over all pairs using the analytic overlap routine + * function_utils::calc_overlap (Obara–Saika recurrences). + */ template double GaussPoly::calcSquareNorm() const { GaussExp this_exp = this->asGaussExp(); double norm = 0.0; @@ -111,6 +143,13 @@ template double GaussPoly::calcSquareNorm() const { return norm; } +/** @brief Evaluate f(r) = coef * Π_d poly_d(r_d - pos[d]) * exp(-Σ_d alpha[d](r_d-pos[d])^2) + * + * Notes: + * ------ + * - Optional *screening*: if enabled, points outside the [A,B] box give 0. + * - The polynomial is evaluated in *shifted* coordinate q = r_d - pos[d]. + */ template double GaussPoly::evalf(const Coord &r) const { if (this->getScreen()) { for (int d = 0; d < D; d++) { @@ -121,12 +160,22 @@ template double GaussPoly::evalf(const Coord &r) const { for (int d = 0; d < D; d++) { // assert(this->poly[d]->getCheckBounds() == false); double q = r[d] - this->pos[d]; - q2 += this->alpha[d] * q * q; - p2 *= poly[d]->evalf(r[d] - this->pos[d]); + q2 += this->alpha[d] * q * q; // accumulate quadratic exponent + p2 *= poly[d]->evalf(r[d] - this->pos[d]); // polynomial factor in dim d } return this->coef * p2 * std::exp(-q2); } +/** @brief Evaluate the *1D* factor in dimension d at coordinate r. + * + * Implementation detail: + * ---------------------- + * For efficiency, only dimension d=0 gets the *full* global coefficient. + * Other dimensions return the pure 1D factor with amplitude 1.0. This is a + * deliberate convention to avoid taking the d-th root of the coefficient when + * forming tensor products; callers multiply across dims and obtain the correct + * full amplitude once (from d==0). + */ template double GaussPoly::evalf1D(const double r, int d) const { // NOTE! // This function evaluation will give the first dimension the full coef @@ -142,10 +191,29 @@ template double GaussPoly::evalf1D(const double r, int d) const { double q = (r - this->pos[d]); q2 += q * q; p2 *= poly[d]->evalf(q); - if (d == 0) { p2 *= this->coef; } + if (d == 0) { p2 *= this->coef; } // apply global amplitude once return p2 * std::exp(-this->alpha[d] * q2); } +/** @brief Expand a polynomial×Gaussian into a sum of pure Gaussians (GaussExp). + * + * Idea: + * ----- + * Each per-axis polynomial Poly_d(t) = Σ_{k=0}^{p_d} c_{d,k} t^k can be viewed + * as a linear combination of *monomial* GaussFuncs: (x-pos[d])^k * exp(-α_d t^2). + * The D-dimensional product of polynomials expands into a tensor product of + * monomials across dimensions. This routine enumerates all combinations of + * powers (k_0,...,k_{D-1}), multiplies coefficients Π_d c_{d,k_d}, and emits + * corresponding GaussFunc terms into a GaussExp. + * + * Implementation: + * --------------- + * - nTerms = Π_d (power[d] + 1). + * - fillCoefPowVector(...) recursively builds: + * * `coefs[i]` = Π_d c_{d, pow_d(i)} * global coef + * * `power[i]` = array of the per-axis monomial powers + * - For each nonzero coefficient, create GaussFunc(alpha, coef, pos, pow). + */ template GaussExp GaussPoly::asGaussExp() const { std::array pow; std::array pos; @@ -175,14 +243,25 @@ template GaussExp GaussPoly::asGaussExp() const { return gexp; } +/** @brief Analytic derivative (not implemented for GaussPoly). */ template GaussPoly GaussPoly::differentiate(int dir) const { NOT_IMPLEMENTED_ABORT; } +/** @brief In-place product with another GaussPoly (not implemented). */ template void GaussPoly::multInPlace(const GaussPoly &rhs) { NOT_IMPLEMENTED_ABORT; } +/** @brief Recursive helper: enumerate all power combinations; collect coefficients. + * + * Version 1: temporary raw `int pow[D]` buffer. + * + * On the recursion leaf (dir==0 processed), allocate a new array `newPow[d]` + * storing the tuple of powers; compute the scalar coefficient as: + * coef = global_coef * Π_d poly_d->coefs[ pow[d] ] + * and push both into the output vectors. + */ template void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector &power, int pow[D], int dir) const { dir--; @@ -204,6 +283,7 @@ void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector accumulator. */ template void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector &power, @@ -228,6 +308,7 @@ void GaussPoly::fillCoefPowVector(std::vector &coefs, } } +/** @brief Product of two GaussPoly (symbolic) — currently not implemented. */ template GaussPoly GaussPoly::mult(const GaussPoly &rhs) { NOT_IMPLEMENTED_ABORT; /* @@ -261,21 +342,20 @@ template GaussPoly GaussPoly::mult(const GaussPoly &rhs) { */ } -/** @brief Multiply GaussPoly by scalar - * @param[in] c: Scalar to multiply - * @returns New GaussPoly - */ +/** @brief Multiply by a scalar (returns a copy). */ template GaussPoly GaussPoly::mult(double c) { GaussPoly g = *this; g.coef *= c; return g; } +/** @brief Set polynomial degree *and* allocate a new Polynomial in dim d. */ template void GaussPoly::setPow(int d, int pow) { if (poly[d] != nullptr) { delete poly[d]; } poly[d] = new Polynomial(pow); } +/** @brief Set polynomial degrees in all dims and allocate new polynomials. */ template void GaussPoly::setPow(const std::array &pow) { for (int d = 0; d < D; d++) { if (poly[d] != nullptr) { delete poly[d]; } @@ -283,10 +363,13 @@ template void GaussPoly::setPow(const std::array &pow) { } } -/** @brief Set polynomial in given dimension +/** @brief Replace the polynomial in a given dimension and update degree. * - * @param[in] d: Cartesian direction - * @param[in] poly: Polynomial to set + * Ownership: + * ---------- + * This class owns its per-axis Polynomial pointers. We take a *copy* of the + * passed polynomial to keep ownership consistent and update power[d] to match + * the new polynomial order. */ template void GaussPoly::setPoly(int d, Polynomial &poly) { if (this->poly[d] != nullptr) { delete this->poly[d]; } @@ -294,11 +377,11 @@ template void GaussPoly::setPoly(int d, Polynomial &poly) { this->power[d] = poly.getOrder(); } +/** @brief Pretty-print parameters, including per-axis polynomial coefficients. */ template std::ostream &GaussPoly::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); - // If all of the values in the exponential are the same only - // one is printed, else, all of them are printed + // If all exponents are identical, print a single value; else print the array. o << "Coef : " << this->getCoef() << std::endl; if (!is_array) { o << "Exp : "; @@ -316,8 +399,9 @@ template std::ostream &GaussPoly::print(std::ostream &o) const { return o; } +// Explicit template instantiations template class GaussPoly<1>; template class GaussPoly<2>; template class GaussPoly<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/GaussPoly.h b/src/functions/GaussPoly.h index 97ed6f47d..94f9850de 100644 --- a/src/functions/GaussPoly.h +++ b/src/functions/GaussPoly.h @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -37,64 +37,224 @@ namespace mrcpp { /** @class GaussPoly * - * @brief Gaussian function in D dimensions with a general polynomial in front + * @brief Polynomial–Gaussian in D dimensions (separable form). * - * - Monodimensional Gaussian (GaussPoly<1>): + * Concept + * ------- + * GaussPoly represents functions of the form + * \f[ + * f(\mathbf x) \;=\; c \;\prod_{d=1}^{D}\; P_d(x_d - x^{(0)}_d)\, + * \exp\!\big(-\alpha_d\,(x_d - x^{(0)}_d)^2\big), + * \f] + * i.e. a per–dimension polynomial factor times an anisotropic Gaussian. + * The per–axis polynomials \f$P_d\f$ are stored as owned pointers + * `Polynomial* poly[d]`. This class derives from @ref Gaussian to reuse + * storage for coefficient `coef`, exponents `alpha[d]`, and center `pos[d]`. * - * \f$ g(x) = \alpha P(x-x_0) e^{-\beta (x-x_0)^2} \f$ + * Key features + * ------------ + * - Exact evaluation in 1D/ND (see evalf / evalf1D). + * - Exact L2–norm via expansion into a sum of monomial–Gaussians + * (@ref asGaussExp + analytic overlaps). + * - Algebraic utilities (scalar and poly products; some are intentionally + * left unimplemented to avoid accidental heavy symbolic work). * - * - Multidimensional Gaussian (GaussFunc): + * Ownership + * --------- + * The `poly[d]` pointers are owned by the GaussPoly instance (deep-copied + * in copy-operations and destroyed in the destructor). * - * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$ + * Relation to GaussFunc + * --------------------- + * A @ref GaussFunc corresponds to the special case where each `P_d(t)=t^{k_d}` + * is a monomial. A GaussPoly can be expanded to a sum of GaussFunc terms + * (tensor product of monomials) with @ref asGaussExp. */ template class GaussPoly : public Gaussian { public: + /** @name Constructors & Lifetime + * @{ + */ + + /** @brief Construct an isotropic GaussPoly with optional per-axis degrees. + * + * @param[in] alpha Exponent parameter (isotropic): \f$ \alpha_d \equiv \alpha \f$. + * @param[in] coef Global amplitude \f$ c \f$. + * @param[in] pos Center \f$ x^{(0)} \f$ per dimension. + * @param[in] power Maximum polynomial degree per dimension (order of @ref Polynomial). + * + * Initializes each `poly[d]` as a Polynomial of degree `power[d]` + * (if any non-zero degree is requested), otherwise keeps it nullptr. + * The Gaussian base class stores `(coef, alpha, pos, power)`. + */ GaussPoly(double alpha = 0.0, double coef = 1.0, const Coord &pos = {}, const std::array &power = {}); + + /** @brief Construct an anisotropic GaussPoly (per-axis exponents). + * + * @param[in] alpha Per-axis exponents \f$ \{\alpha_d\}_{d=1}^D \f$. + * @param[in] coef Global amplitude. + * @param[in] pos Center per dimension. + * @param[in] power Maximum polynomial degree per dimension. + * + * Same allocation policy for `poly[d]` as in the isotropic constructor. + */ GaussPoly(const std::array &alpha, double coef, const Coord &pos = {}, const std::array &power = {}); + + /** @brief Deep-copy ctor (also clones per-axis polynomials). */ GaussPoly(const GaussPoly &gp); + + /** @brief Build GaussPoly from a @ref GaussFunc (monomial×Gaussian). + * + * Creates per-axis polynomials equal to the corresponding monomials, + * i.e. `P_d(t) = t^{power[d]}`. + */ GaussPoly(const GaussFunc &gf); + + /** @brief Disable copy-assignment (explicit semantic/ownership choice). */ GaussPoly &operator=(const GaussPoly &gp) = delete; + + /** @brief Polymorphic clone (deep copy). */ Gaussian *copy() const override; + + /** @brief Destructor; releases owned Polynomial pointers. */ ~GaussPoly(); + /** @} */ + + /** @name Math & Evaluation + * @{ + */ + + /** @brief Exact L2-norm squared \f$ \|f\|_2^2 \f$. + * + * Implementation: + * 1) Expand to a sum of monomial Gaussians (@ref asGaussExp). + * 2) Sum analytic overlaps of all pairs (Obara–Saika), see + * `function_utils::calc_overlap`. + */ double calcSquareNorm() const override; + /** @brief Evaluate \f$ f(\mathbf x) \f$ at a point (D-D). */ double evalf(const Coord &r) const override; + + /** @brief Evaluate the 1D factor in dimension `dim` at coordinate `r`. + * + * The convention (consistent with other classes): the global amplitude + * `coef` is applied only in `dim==0` so that a tensor product across + * dimensions yields the correct global amplitude once. + */ double evalf1D(double r, int dim) const override; + /** @brief Expand into a sum of @ref GaussFunc terms (tensor of monomials). + * + * Produces \f$ \prod_d P_d \f$ as a sum of monomials and attaches the same + * Gaussian envelope. This is used both for integration and algebra. + */ GaussExp asGaussExp() const override; + + /** @brief Analytic derivative in Cartesian direction `dir`. + * + * @note The implementation may throw/abort if not provided for GaussPoly. + * (The .cpp currently marks this as NOT_IMPLEMENTED.) + */ GaussPoly differentiate(int dir) const override; + /** @} */ + + /** @name Algebra + * @{ + */ + + /** @brief In-place product with another GaussPoly (same center/envelope). + * + * @warning Not implemented in the current source (will abort if called). + */ void multInPlace(const GaussPoly &rhs); + + /** @brief In-place product operator (delegates to @ref multInPlace). */ void operator*=(const GaussPoly &rhs) { multInPlace(rhs); } + + /** @brief Symbolic product, returns a new GaussPoly. + * + * @warning Not implemented in the current source (will abort if called). + */ GaussPoly mult(const GaussPoly &rhs); + + /** @brief Multiply by scalar (returns a copy). */ GaussPoly mult(double c); + + /** @brief Operator sugar for @ref mult(const GaussPoly&). */ GaussPoly operator*(const GaussPoly &rhs) { return mult(rhs); } + + /** @brief Operator sugar for @ref mult(double). */ GaussPoly operator*(double c) { return mult(c); } + /** @} */ + + /** @name Accessors (per-axis polynomials) + * @{ + */ + + /** @brief Read-only access to coefficient vector of polynomial in dim `i`. */ const Eigen::VectorXd &getPolyCoefs(int i) const { return poly[i]->getCoefs(); } + + /** @brief Mutable access to coefficient vector of polynomial in dim `i`. */ Eigen::VectorXd &getPolyCoefs(int i) { return poly[i]->getCoefs(); } + + /** @brief Read-only access to polynomial object in dim `i`. */ const Polynomial &getPoly(int i) const { return *poly[i]; } + + /** @brief Mutable access to polynomial object in dim `i`. */ Polynomial &getPoly(int i) { return *poly[i]; } + /** @} */ + + /** @name Mutators (structure/shape) + * @{ + */ + + /** @brief Set polynomial degree in one dimension (reallocates @ref Polynomial). */ void setPow(int d, int pow) override; + + /** @brief Set polynomial degrees in all dimensions (reallocates). */ void setPow(const std::array &pow) override; + + /** @brief Replace polynomial in dimension `d` with a copy of `poly`. + * + * Updates the stored per-axis degree to `poly.getOrder()`. + * Ownership remains with this GaussPoly (deep copy). + */ void setPoly(int d, Polynomial &poly); + /** @} */ private: + /** @brief Owned per-axis polynomials \f$P_d\f$ (nullptr if unused). */ Polynomial *poly[D]; + /** @brief Helper (recursive): enumerate all monomial power combinations + * and collect combined coefficients (raw C-array version). + * + * Used by @ref asGaussExp to create the full tensor expansion. On the + * recursion leaf it pushes: + * - a newly allocated `int[D]` with the current powers, and + * - the corresponding scalar coefficient (product of axis coefficients, + * times the global amplitude). + */ void fillCoefPowVector(std::vector &coefs, std::vector &power, int pow[D], int dir) const; + + /** @brief Helper (recursive): same as above, with std::array accumulator. */ void fillCoefPowVector(std::vector &coefs, std::vector &power, std::array &pow, int dir) const; + + /** @brief Pretty-print (polynomial degrees, coefficients, envelope). */ std::ostream &print(std::ostream &o) const override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp index 6dbfa7c5b..5dc4d25e1 100644 --- a/src/functions/Gaussian.cpp +++ b/src/functions/Gaussian.cpp @@ -24,12 +24,26 @@ */ /** + * Implementation notes (high-level) + * --------------------------------- + * This file implements generic (templated) functionality shared by all + * Gaussian-like primitives in D dimensions, i.e. @ref Gaussian. * - * - * \date May 25, 2010 - * \author Stig Rune Jensen - * CTCC, University of Tromsø - * + * Key responsibilities: + * - Store and initialize the parameters of a Cartesian Gaussian: + * * alpha[d] : per-axis exponents β_d (>0) + * * coef : scalar prefactor α + * * pos[d] : center coordinates R_d + * * power[d] : polynomial powers p_d ∈ {0,1,2,...} + * - Compose two Gaussians into a *pure* Gaussian by completing the square + * (multPureGauss), leaving the polynomial factors to higher layers. + * - Build cheap screening boxes / visibility tests to avoid unnecessary + * work when projecting to grids/trees (calcScreening, checkScreen, + * isVisibleAtScale, isZeroOnInterval). + * - Provide utility evaluations on batches of points (evalf over matrices). + * - Compute overlaps by expanding (if needed) into GaussFunc terms and + * using the Obara–Saika 1D recurrences (via function_utils). + * - Create semi-periodic images of a Gaussian inside a unit cell (periodify). */ #include @@ -46,15 +60,21 @@ using namespace Eigen; namespace mrcpp { +/*---------------------------* + * Constructors / init state * + *---------------------------*/ + +/** @brief Isotropic-constructor: fill all D exponents with the same value @p a. */ template Gaussian::Gaussian(double a, double c, const Coord &r, const std::array &p) - : screen(false) - , coef(c) - , power(p) - , pos(r) { - this->alpha.fill(a); + : screen(false) // screening disabled by default + , coef(c) // scalar amplitude + , power(p) // Cartesian powers + , pos(r) { // center + this->alpha.fill(a); // isotropic exponent β_d = a ∀ d } +/** @brief Anisotropic-constructor: exponents are provided per axis. */ template Gaussian::Gaussian(const std::array &a, double c, const Coord &r, const std::array &p) : screen(false) @@ -63,21 +83,42 @@ Gaussian::Gaussian(const std::array &a, double c, const Coord & , alpha(a) , pos(r) {} +/*----------------------------------------------------* + * Multiply two *pure* Gaussians (no polynomial part) * + *----------------------------------------------------*/ +/** + * @brief Complete-the-square product of two Gaussians into this object. + * + * Given + * G_L(x) = exp[-Σ_d α_L(d) (x_d - R_L(d))^2], + * G_R(x) = exp[-Σ_d α_R(d) (x_d - R_R(d))^2], + * their product is a *single* Gaussian + * G_P(x) = C · exp[-Σ_d α_P(d) (x_d - R_P(d))^2], + * where + * α_P(d) = α_L(d) + α_R(d), + * μ(d) = α_L(d) α_R(d) / α_P(d) (reduced exponent), + * R_P(d) = [α_L(d) R_L(d) + α_R(d) R_R(d)] / α_P(d), + * C = exp[-Σ_d μ(d) (R_L(d) - R_R(d))^2]. + * + * The polynomial prefactors (if any) are handled elsewhere (e.g. GaussFunc→GaussPoly). + */ template void Gaussian::multPureGauss(const Gaussian &lhs, const Gaussian &rhs) { auto newAlpha = std::array{}; auto mju = std::array{}; for (auto d = 0; d < D; d++) { - newAlpha[d] = lhs.alpha[d] + rhs.alpha[d]; - mju[d] = (lhs.alpha[d] * rhs.alpha[d]) / newAlpha[d]; + newAlpha[d] = lhs.alpha[d] + rhs.alpha[d]; // α_P = α_L + α_R + mju[d] = (lhs.alpha[d] * rhs.alpha[d]) / newAlpha[d]; // μ = α_L α_R / (α_L + α_R) } auto newPos = std::array{}; auto relPos = std::array{}; double newCoef = 1.0; for (int d = 0; d < D; d++) { + // Center of the product (weighted by exponents) newPos[d] = (lhs.alpha[d] * lhs.pos[d] + rhs.alpha[d] * rhs.pos[d]) / newAlpha[d]; - relPos[d] = lhs.pos[d] - rhs.pos[d]; + relPos[d] = lhs.pos[d] - rhs.pos[d]; // R_L - R_R + // Normalization factor from completing the square newCoef *= std::exp(-mju[d] * std::pow(relPos[d], 2.0)); } setExp(newAlpha); @@ -85,45 +126,80 @@ template void Gaussian::multPureGauss(const Gaussian &lhs, const G setCoef(newCoef); } +/*--------------------------------------------* + * Screening boxes and quick-visibility tests * + *--------------------------------------------*/ +/** + * @brief Build an axis-aligned bounding box [A,B] that captures + * ~erf coverage based on nStdDev standard deviations. + * + * For each dimension d, the 1D Gaussian has variance σ_d^2 = 1/(2 α_d). + * We choose bounds R_d ± nStdDev * σ_d. Setting @c screen=true enables + * fast culling in eval and tree projection. + */ template void Gaussian::calcScreening(double nStdDev) { assert(nStdDev > 0); if (not this->isBounded()) { + // Lazy-allocate bounds arrays if needed this->bounded = true; this->A = new double[D]; this->B = new double[D]; } for (int d = 0; d < D; d++) { - double limit = std::sqrt(nStdDev / this->alpha[d]); + double limit = std::sqrt(nStdDev / this->alpha[d]); // nStdDev * σ_d where σ_d = 1/sqrt(2 α_d) this->A[d] = this->pos[d] - limit; this->B[d] = this->pos[d] + limit; } screen = true; } +/** + * @brief Tile-level screening: test whether a node box [a,b] at scale n + * lies entirely outside this Gaussian’s screening box. + * + * The physical length of a dyadic tile at scale n is 2^{-n}. The tile's + * coordinate bounds are computed from its integer translations l[d]. + * If the tile is completely outside [A,B] on any axis, return true + * (i.e., we can skip processing that tile). + */ template bool Gaussian::checkScreen(int n, const int *l) const { if (not getScreen()) { return false; } - double length = std::pow(2.0, -n); + double length = std::pow(2.0, -n); // tile size const double *A = this->getLowerBounds(); const double *B = this->getUpperBounds(); for (int d = 0; d < D; d++) { - double a = length * l[d]; - double b = length * (l[d] + 1); - if (a > B[d] or b < A[d]) { return true; } + double a = length * l[d]; // tile lower bound in dim d + double b = length * (l[d] + 1); // tile upper bound in dim d + if (a > B[d] or b < A[d]) { return true; } // entirely outside -> culled } return false; } +/** + * @brief Heuristic visibility test vs. resolution scale and quadrature count. + * + * A Gaussian of standard deviation σ should not be represented at + * resolutions finer than ~σ (no additional information). We compare the + * scale against a heuristic derived from σ and the number of quadrature points. + */ template bool Gaussian::isVisibleAtScale(int scale, int nQuadPts) const { for (auto &alp : this->alpha) { - double stdDeviation = std::pow(2.0 * alp, -0.5); + double stdDeviation = std::pow(2.0 * alp, -0.5); // σ = 1/√(2α) auto visibleScale = static_cast(-std::floor(std::log2(nQuadPts * 0.5 * stdDeviation))); - + // If requested 'scale' is *finer* (smaller length) than what this σ supports, hide it. if (scale < visibleScale) return false; } return true; } +/** + * @brief Quick zero test on an interval: returns true if the Gaussian + * is negligible on [a,b] (component-wise), using ±5σ rule. + * + * If the interval is completely outside [R-5σ, R+5σ] on any axis, we + * consider the function zero there for practical purposes. + */ template bool Gaussian::isZeroOnInterval(const double *a, const double *b) const { for (int i = 0; i < D; i++) { double stdDeviation = std::pow(2.0 * this->alpha[i], -0.5); @@ -134,6 +210,19 @@ template bool Gaussian::isZeroOnInterval(const double *a, const doubl return false; } +/*---------------------------------------------* + * Batch evaluation (matrix of points → values) * + *---------------------------------------------*/ +/** + * @brief Evaluate the *separable* 1D factors on a batch of points. + * + * @param[in] points Matrix (N×D) of coordinates; column d contains the d-th coordinate of all N points. + * @param[out] values Matrix (N×D) to be filled with per-axis factors: + * values(i,d) = g_d( points(i,d) ). + * + * Note: this does not multiply across dimensions; higher-level code can + * combine the columns (e.g., by product) if the full D-D value is needed. + */ template void Gaussian::evalf(const MatrixXd &points, MatrixXd &values) const { assert(points.cols() == D); assert(points.cols() == values.cols()); @@ -143,6 +232,15 @@ template void Gaussian::evalf(const MatrixXd &points, MatrixXd &value } } +/*--------------------------------------* + * Convenience: maximum standard dev σ * + *--------------------------------------*/ +/** + * @brief Return the maximum standard deviation across axes. + * + * For isotropic exponents, that is 1/√(2 α). For anisotropic, compute + * σ_d = 1/√(2 α_d) per axis and return max_d σ_d. Used in periodification. + */ template double Gaussian::getMaximumStandardDiviation() const { if (details::are_all_equal(this->getExp())) { @@ -156,6 +254,16 @@ template double Gaussian::getMaximumStandardDiviation() const { } } +/*-------------------------* + * Overlap ⟨G|G'⟩ utilities * + *-------------------------*/ +/** + * @brief General overlap by expanding both sides into @ref GaussFunc terms + * (if needed) and summing pairwise 1D Obara–Saika products. + * + * The helper function_utils::calc_overlap(GaussFunc,GaussFunc) performs the + * per-dimension recursion and multiplies contributions across D. + */ template double Gaussian::calcOverlap(const Gaussian &inp) const { const auto &bra_exp = this->asGaussExp(); // Make sure all entries are GaussFunc const auto &ket_exp = inp.asGaussExp(); // Make sure all entries are GaussFunc @@ -171,15 +279,23 @@ template double Gaussian::calcOverlap(const Gaussian &inp) const { return S; } -/** @brief Generates a GaussExp that is semi-periodic around a unit-cell - * - * @returns Semi-periodic version of a Gaussian around a unit-cell - * @param[in] period: The period of the unit cell - * @param[in] nStdDev: Number of standard diviations covered in each direction. Default 4.0 +/*-----------------------------* + * Semi-periodic “image” clone * + *-----------------------------*/ +/** + * @brief Build a semi-periodic expansion by replicating this Gaussian on a + * Cartesian lattice so that most of the mass (≈erf coverage) lies + * within a single unit cell. * - * @details nStdDev = 1, 2, 3 and 4 ensures atleast 68.27%, 95.45%, 99.73% and 99.99% of the - * integral is conserved with respect to the integration limits. + * @param period Period vector (cell lengths per axis). + * @param nStdDev Number of σ to keep around the central copy (default 4.0). + * @returns A @ref GaussExp consisting of translated copies. * + * Algorithm: + * 1) Fold the original center into the principal cell [0,period). + * 2) Estimate the number of neighbor cells needed so that ±nStdDev·σ fits. + * 3) Generate all translations in the (2N+1)^D cube around the folded center. + * 4) Copy and shift the Gaussian for each translation and append to result. */ template GaussExp Gaussian::periodify(const std::array &period, double nStdDev) const { GaussExp gauss_exp; @@ -187,22 +303,22 @@ template GaussExp Gaussian::periodify(const std::array auto x_std = nStdDev * this->getMaximumStandardDiviation(); - // This lambda function calculates the number of neighbooring cells - // requred to keep atleast x_stds of the integral conserved in the - // unit-cell. + // This lambda computes how many neighbor cells are needed (per axis) + // so that the ±x_std window is covered by translated images. auto neighbooring_cells = [period, x_std](auto pos) { auto needed_cells_vec = std::vector(); for (auto i = 0; i < D; i++) { auto upper_bound = pos[i] + x_std; auto lower_bound = pos[i] - x_std; - // number of cells upp and down relative to the center of the Gaussian + (void)lower_bound; // not used explicitly; retained for clarity + // Minimal number of positive cell steps so that [pos-x_std, pos+x_std] is inside coverage. needed_cells_vec.push_back(std::ceil(upper_bound / period[i])); } return *std::max_element(needed_cells_vec.begin(), needed_cells_vec.end()); }; - // Finding starting position + // Fold starting position into the principal cell auto startpos = this->getPos(); for (auto d = 0; d < D; d++) { @@ -210,9 +326,11 @@ template GaussExp Gaussian::periodify(const std::array if (startpos[d] < 0) startpos[d] += period[d]; } + // Symmetric image range: from -N to +N cells in each dimension auto nr_cells_upp_and_down = neighbooring_cells(startpos); for (auto d = 0; d < D; d++) { startpos[d] -= nr_cells_upp_and_down * period[d]; } + // Generate a (2N+1)^D Cartesian product of offsets auto tmp_pos = startpos; std::vector v(2 * nr_cells_upp_and_down + 1); std::iota(v.begin(), v.end(), 0.0); @@ -220,15 +338,16 @@ template GaussExp Gaussian::periodify(const std::array for (auto &c : cart) { for (auto i = 0; i < D; i++) c[i] *= period[i]; } - // Shift coordinates + // Shift coordinates by the starting corner for (auto &c : cart) std::transform(c.begin(), c.end(), tmp_pos.begin(), c.begin(), std::plus()); - // Go from vector to mrcpp::Coord + // Convert vectors to mrcpp::Coord for (auto &c : cart) { mrcpp::Coord pos; std::copy_n(c.begin(), D, pos.begin()); pos_vec.push_back(pos); } + // Create the translated copies for (auto &pos : pos_vec) { auto *gauss = this->copy(); gauss->setPos(pos); @@ -239,6 +358,9 @@ template GaussExp Gaussian::periodify(const std::array return gauss_exp; } +/*-----------------------------* + * Explicit template instances * + *-----------------------------*/ template class Gaussian<1>; template class Gaussian<2>; template class Gaussian<3>; diff --git a/src/functions/Gaussian.h b/src/functions/Gaussian.h index ddb039202..5048c66b2 100644 --- a/src/functions/Gaussian.h +++ b/src/functions/Gaussian.h @@ -24,8 +24,48 @@ */ /** - * * Base class for Gaussian type functions + * + * High-level overview + * ------------------- + * This header declares the abstract template class Gaussian, a common base + * for concrete Gaussian primitives used throughout MRCPP. A Gaussian here is + * a separable Cartesian function in D dimensions of the form + * + * f(x) = coef * Π_{d=0..D-1} (x_d - pos[d])^{power[d]} * exp(-alpha[d] * (x_d - pos[d])^2), + * + * where: + * - coef : global scalar amplitude (double). + * - power[d] : non-negative integer power of the monomial factor in dim d. + * - alpha[d] : strictly positive exponent in dim d (width parameter). + * - pos[d] : center coordinate in dim d. + * + * The class only provides *common infrastructure* (storage, screening helpers, + * normalization by norm, simple algebra on prefactors, batch evaluation stubs, + * etc.). Concrete subclasses implement the analytic pieces that depend on the + * exact Gaussian flavor (e.g. GaussFunc, GaussPoly), such as: + * - evalf(...) : the actual evaluation at a point. + * - evalf1D(...) : 1D component evaluation used in tensorized loops. + * - calcSquareNorm() : exact L2 norm. + * - differentiate() : derivative producing a polynomial × Gaussian (GaussPoly). + * - asGaussExp() : expansion into sum of pure Gaussians if needed. + * + * Screening and visibility + * ------------------------ + * Gaussian supports optional *screening* (axis-aligned bounding boxes) to skip + * work on dyadic tiles that are provably negligible. See: + * - calcScreening(stdDeviations) : builds [A,B] bounds as ± nσ around pos. + * - checkScreen(n, l) : tile-level cull test at dyadic scale n. + * - isVisibleAtScale(...) : heuristic visibility vs. resolution. + * - isZeroOnInterval(...) : quick interval culling via ±5σ rule. + * + * Relations to other types + * ------------------------ + * - GaussExp: an expansion (sum) of Gaussian-like terms. + * - GaussFunc: Gaussian with a *single* monomial factor (derived class). + * - GaussPoly: Gaussian multiplied by a *polynomial* (derivative results). + * + * Thread-safety: instances are regular value objects; no shared state. */ #pragma once @@ -40,75 +80,238 @@ namespace mrcpp { +/** + * @tparam D Spatial dimension (1, 2, or 3 in MRCPP usage). + * + * @class Gaussian + * @brief Abstract base for separable Cartesian Gaussians in D dimensions. + * + * Interface summary + * ----------------- + * Construction: + * - Gaussian(a, c, r, p) : isotropic exponent (alpha[d]=a). + * - Gaussian(alpha[], c, r, p) : anisotropic exponents per axis. + * + * Core virtuals (must be implemented by derived classes): + * - copy() : virtual clone (CRTP alternative). + * - evalf(r) : value at point r (D-vector). + * - evalf1D(x, d) : 1D factor along axis d (helper). + * - calcSquareNorm() : exact ∥f∥². + * - asGaussExp() : expansion into GaussExp. + * - differentiate(dir) : analytic derivative → GaussPoly. + * + * Utilities provided here: + * - evalf(points, values) : batch evaluation per-axis (column-wise). + * - calcOverlap(inp) : ⟨this|inp⟩ via GaussExp + Obara–Saika. + * - periodify(period, nσ) : semi-periodic replication into a GaussExp. + * - calcScreening(nσ) : build ±nσ bounds and enable screening. + * - checkScreen(n, l) : dyadic tile cull test when screening on. + * - normalize() : rescale by 1/∥f∥ (uses calcSquareNorm()). + * - multPureGauss(lhs,rhs): complete-the-square product of *pure* Gaussians + * (monomials handled by derived classes). + * - multConstInPlace(c) : scale the global coefficient. + * + * Accessors/mutators: + * - get/set for coef, alpha, pos, power; toggle screen flag. + */ template class Gaussian : public RepresentableFunction { public: + /** + * @brief Isotropic constructor. + * @param a Exponent value α to be replicated on all axes (α[d] = a). + * @param c Global scalar coefficient. + * @param r Center position (Coord), defaults to origin. + * @param p Per-axis monomial powers (non-negative). + * + * @warning This ctor does not check positivity of @p a; callers are expected + * to pass α>0 (required for square integrability and σ = 1/√(2α)). + */ Gaussian(double a, double c, const Coord &r, const std::array &p); + + /** + * @brief Anisotropic constructor (per-axis exponents). + * @param a Exponent array α[d] per axis. + * @param c Global scalar coefficient. + * @param r Center position (Coord). + * @param p Per-axis monomial powers (non-negative). + */ Gaussian(const std::array &a, double c, const Coord &r, const std::array &p); - Gaussian &operator=(const Gaussian &gp) = delete; - virtual Gaussian *copy() const = 0; + + Gaussian &operator=(const Gaussian &gp) = delete; ///< Non-assignable; use clones. + virtual Gaussian *copy() const = 0; ///< Virtual copy (clone). virtual ~Gaussian() = default; + /** @name Evaluation API (to be implemented by subclasses) */ + ///@{ + /** + * @brief Evaluate f(r) at a D-dimensional coordinate. + * @param r Point (Coord) in physical space. + * @return Function value f(r). + */ virtual double evalf(const Coord &r) const = 0; + + /** + * @brief Evaluate the *1D* separable factor along axis @p dim. + * @param r Coordinate along axis @p dim. + * @param dim Axis index in [0, D-1]. + * @return g_dim(r) = (r-pos[dim])^{power[dim]} exp[-α[dim](r-pos[dim])²], possibly scaled. + */ virtual double evalf1D(double r, int dim) const = 0; + + /** + * @brief Batch evaluation helper. + * @param points Matrix (N×D): column d holds all coordinates along axis d. + * @param values Matrix (N×D): on return, values(i,d) = evalf1D(points(i,d), d). + * + * @note This does *not* multiply across dimensions; it only fills the + * per-axis factors column-wise for later tensor products. + */ void evalf(const Eigen::MatrixXd &points, Eigen::MatrixXd &values) const; + ///@} + /** @name Integral properties and expansions */ + ///@{ + /** + * @brief Overlap ⟨this|inp⟩ computed via GaussExp reduction and + * Obara–Saika 1D recurrences per dimension. + */ double calcOverlap(const Gaussian &inp) const; + + /** @brief Exact L2 norm squared ∥f∥² (implemented by subclass). */ virtual double calcSquareNorm() const = 0; + + /** + * @brief Represent as a sum of Gaussians (pure or polynomial-times-Gaussian), + * suitable for pairwise operations; implemented by subclass. + */ virtual GaussExp asGaussExp() const = 0; + + /** + * @brief Create a semi-periodic expansion by replicating the function on a + * Cartesian lattice so that most of its mass lies within a unit cell. + * @param period Per-axis period lengths. + * @param nStdDev Number of standard deviations to preserve (default 4.0). + */ GaussExp periodify(const std::array &period, double nStdDev = 4.0) const; + ///@} - /** @brief Compute analytic derivative of Gaussian - * @param[in] dir: Cartesian direction of derivative - * @returns New GaussPoly + /** @name Differential operators */ + ///@{ + /** + * @brief Analytic derivative ∂/∂x_dir (Cartesian direction). + * @param dir Axis index in [0, D-1]. + * @return A GaussPoly representing the derivative (polynomial×Gaussian). */ virtual GaussPoly differentiate(int dir) const = 0; + ///@} + /** @name Screening and normalization */ + ///@{ + /** + * @brief Build ±nσ bounds around the center on each axis and enable screening. + * Used to cheaply cull tiles/intervals that cannot contribute. + * @param stdDeviations Number of standard deviations n used for the box. + */ void calcScreening(double stdDeviations); - /** @brief Rescale function by its norm \f$ ||f||^{-1} \f$ */ + /** + * @brief Normalize in place by dividing by the L2 norm. + * @note Calls calcSquareNorm() from the derived class. + */ void normalize() { double norm = std::sqrt(calcSquareNorm()); multConstInPlace(1.0 / norm); } + ///@} + + /** @name Algebra on the pure Gaussian core */ + ///@{ + /** + * @brief Complete-the-square product of two *pure* Gaussians into *this*. + * Polynomial factors are handled in derived types (GaussFunc/GaussPoly). + */ void multPureGauss(const Gaussian &lhs, const Gaussian &rhs); + + /** @brief Scale the global coefficient by a constant. */ void multConstInPlace(double c) { this->coef *= c; } + + /** @brief Shorthand for multConstInPlace. */ void operator*=(double c) { multConstInPlace(c); } + ///@} + /** @name Screening access */ + ///@{ bool getScreen() const { return screen; } + /** + * @brief Tile-level culling test for dyadic box at scale n and translation l. + * @return true if the box is completely outside the screening bounds and can be skipped. + */ bool checkScreen(int n, const int *l) const; + ///@} + /** @name Parameter accessors */ + ///@{ int getPower(int i) const { return power[i]; } double getCoef() const { return coef; } double getExp(int i) const { return alpha[i]; } const std::array &getPower() const { return power; } const std::array &getPos() const { return pos; } std::array getExp() const { return alpha; } + ///@} - virtual void setPow(const std::array &power) = 0; - virtual void setPow(int d, int power) = 0; - void setScreen(bool _screen) { this->screen = _screen; } - void setCoef(double cf) { this->coef = cf; } - void setExp(double _alpha) { this->alpha.fill(_alpha); } - void setExp(const std::array &_alpha) { this->alpha = _alpha; } - void setPos(const std::array &r) { this->pos = r; } + /** @name Parameter mutators */ + ///@{ + virtual void setPow(const std::array &power) = 0; ///< Set all monomial powers. + virtual void setPow(int d, int power) = 0; ///< Set monomial power on axis d. + void setScreen(bool _screen) { this->screen = _screen; } ///< Enable/disable screening flag. + void setCoef(double cf) { this->coef = cf; } ///< Set global coefficient. + void setExp(double _alpha) { this->alpha.fill(_alpha); } ///< Set isotropic exponent α[d]=_alpha. + void setExp(const std::array &_alpha) { this->alpha = _alpha; } ///< Set per-axis exponents. + void setPos(const std::array &r) { this->pos = r; } ///< Set center coordinates. + ///@} + /** @brief Stream pretty-printer (delegates to virtual print()). */ friend std::ostream &operator<<(std::ostream &o, const Gaussian &gauss) { return gauss.print(o); } - friend class GaussExp; + friend class GaussExp; ///< Allows GaussExp to access internals when assembling expansions. protected: - bool screen; - double coef; /**< constant factor */ - std::array power; /**< max power in each dim */ - std::array alpha; /**< exponent */ - Coord pos; /**< center */ + /** @name Core parameters (POD) */ + ///@{ + bool screen; ///< If true, use [A,B] screening in fast checks (set via calcScreening / setScreen). + double coef; /**< Global scale factor (α in the docs above). */ + std::array power; /**< Monomial powers per axis (non-negative integers). */ + std::array alpha; /**< Exponents per axis (>0). Controls width: σ_d = 1/√(2 α_d). */ + Coord pos; /**< Center coordinates. */ + ///@} + /** @name Visibility / culling helpers used by trees and projection */ + ///@{ + /** + * @brief Heuristic visibility vs. resolution scale and quadrature sampling. + * @param scale Dyadic scale (tile size ~ 2^{-scale}). + * @param nQuadPts Number of quadrature points per tile edge. + * @return false if the Gaussian is “too narrow” to be represented at this scale. + */ bool isVisibleAtScale(int scale, int nQuadPts) const; + + /** + * @brief Quick check whether the function is essentially zero on [a,b] per axis, + * using a ±5σ bounding rule (implementation in the .cpp). + */ bool isZeroOnInterval(const double *a, const double *b) const; + ///@} + /** + * @brief Maximum standard deviation across axes: max_d 1/√(2 α_d). + * @details Used by periodify() to decide how many neighboring images to include. + */ double getMaximumStandardDiviation() const; + /** + * @brief Subclass hook for stream output; should print parameters in a readable way. + */ virtual std::ostream &print(std::ostream &o) const = 0; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/JpowerIntegrals.cpp b/src/functions/JpowerIntegrals.cpp index 179f6fcc6..6d04e6ba0 100644 --- a/src/functions/JpowerIntegrals.cpp +++ b/src/functions/JpowerIntegrals.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -28,46 +28,143 @@ namespace mrcpp { +/** + * # Class purpose + * Computes, stores, and provides indexed access to the sequence of + * “power integrals” \( J_m(l) \) for a range of integer shifts \( l \). + * + * In this implementation each sequence \(\{J_m(l)\}_{m=0}^{M}\) is produced by + * a **three–term recurrence** seeded by a closed form for \(J_0(l)\): + * + * - Seed: + * \f[ + * J_0(l) + * = \tfrac{1}{4}\,e^{-i\pi/4}\,\frac{1}{\sqrt{\pi a}}\, + * \exp\!\Big( \tfrac{i\,l^2}{4a} \Big) + * \f] + * - Parameters: + * \f[ + * \beta = \tfrac{i}{2a}, \qquad \alpha = l\,\beta + * \f] + * - Recurrence (implemented below): + * \f[ + * J_{m+2} + * = \frac{\alpha\,J_{m+1} + \frac{m}{m+2}\,\beta\,J_{m}}{m+3}, + * \qquad m=0,1,\dots + * \f] + * + * The class builds these sequences for all integer \( l \) in the + * symmetric range \([-(2^n-1), \dots, -1, 0, \dots, 2^n-1]\), + * where `n = scaling` and `N = 2^n`. Internally, results are stored as + * `std::vector>` (one vector per shift \(l\)). + * + * ## Parameters + * - `a` : real positive parameter in the Gaussian-like kernel (see seed). + * - `scaling` : defines the number of integer shifts as \(N=2^{\text{scaling}}\). + * - `M` : the highest power index — sequences contain \(J_0,\dots,J_M\). + * - `threshold` : magnitude cutoff used by `crop()` to trim negligible tail values. + * (Note: the current constructor does **not** call `crop()`. You + * may call it manually after construction if you want trimming.) + * + * ## Indexing + * The operator `operator[](int index)` accepts the natural range + * \([-(2^n-1), \dots, 2^n-1]\). Negative indices are transparently + * mapped to the underlying zero-based container. + */ JpowerIntegrals::JpowerIntegrals(double a, int scaling, int M, double threshold) { this->scaling = scaling; - int N = 1 << scaling; + int N = 1 << scaling; // N = 2^scaling shifts on the positive side (including 0) + // Store sequences for l = 0,1,...,N-1 for (int l = 0; l < N; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold)); + // And for l = -(N-1),...,-1 (append after the non-negative ones) for (int l = 1 - N; l < 0; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold)); } -/// @brief in progress -/// @param index - interger lying in the interval \f$ [ -2^n + 1, \ldots, 2^n - 1 ] \f$. -/// @return in progress +/** + * @brief Random–access to the vector of \f$ \{J_m(l)\}_{m=0}^{M} \f$ for a given shift @p index. + * + * @param index Integer shift \(l\) in \([-(2^n-1), \dots, 2^n-1]\). + * Negative inputs are internally wrapped to the layout used + * by the `integrals` storage. + * @return Reference to the vector `J` containing `[J_0, J_1, ..., J_M]` for that \(l\). + * + * @note This is a non-const overload returning a mutable reference; callers + * can modify the stored sequence if needed. + */ std::vector> &JpowerIntegrals::operator[](int index) { - if (index < 0) index += integrals.size(); + if (index < 0) index += integrals.size(); // wrap negative l to the back half of the container return integrals[index]; } -std::vector> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double threshold) { +/** + * @brief Build the sequence \f$ \{J_m(l)\}_{m=0}^{M} \f$ using the closed-form seed and recurrence. + * + * @param l Integer shift parameter. + * @param a Positive real parameter from the analytic form. + * @param M Highest power index to compute (inclusive). + * @param threshold Magnitude threshold (currently not used inside this routine). + * @return Vector of length \f$ M+1 \f$ with entries \f$ [J_0, J_1, \dots, J_M] \f$. + * + * Implementation notes: + * - We store an initial dummy 0 followed by \(J_0\) so that the recurrence + * can read the two previous entries uniformly; we erase the dummy before return. + * - Complex constants: + * * `i` is introduced through `std::complex` literals (`std::complex_literals`). + * * \f$ \beta = i/(2a) \f$, \f$ \alpha = l \beta \f$. + * - Numerical behavior: + * * The recurrence is simple and linear; for large |m| or extreme `a` you may + * see accumulation of round-off; consider `crop()` afterwards if you know + * the tail becomes negligible for your use-case. + */ +std::vector> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double /*threshold*/) { using namespace std::complex_literals; + // Seed J0(l) = (1/4) e^{-iπ/4} / sqrt(π a) * exp( i l^2 / (4 a) ) std::complex J_0 = 0.25 * std::exp(-0.25i * M_PI) / std::sqrt(M_PI * a) * std::exp(0.25i * static_cast(l * l) / a); + + // β = i/(2a) and α = l β std::complex beta(0, 0.5 / a); auto alpha = static_cast(l) * beta; + // Work buffer: prepend a dummy zero so that J.back() = J_m, J[J.size()-2] = J_{m-1} + // After the loop we drop the dummy, leaving [J_0, J_1, ..., J_M]. std::vector> J = {0.0, J_0}; + // Three-term recurrence: + // J_{m+2} = (α J_{m+1} + (m/(m+2)) β J_m) / (m+3), for m = 0..M-1 for (int m = 0; m < M; m++) { - std::complex term1 = J[J.size() - 1] * alpha; - std::complex term2 = J[J.size() - 2] * beta * static_cast(m) / static_cast(m + 2); - std::complex last = (term1 + term2) / static_cast(m + 3); - J.push_back(last); + std::complex term1 = J[J.size() - 1] * alpha; // α J_{m+1} + std::complex term2 = J[J.size() - 2] * beta * static_cast(m) / static_cast(m + 2); // (m/(m+2)) β J_m + std::complex last = (term1 + term2) / static_cast(m + 3); // divide by (m+3) + J.push_back(last); // append J_{m+2} } + // Remove the initial dummy zero so the vector starts with J_0 J.erase(J.begin()); return J; } -/// @details Removes negligible elements in \b J until it reaches a considerable value. +/** + * @brief Trim a sequence by removing small-magnitude values from its tail. + * + * @param J The sequence \f$ [J_0, J_1, \dots] \f$ to be cropped in-place. + * @param threshold Elements with both |real| and |imag| < threshold are considered negligible. + * + * Details: + * - Traverses from the end until it finds the first element whose real/imag + * magnitude is **not** negligible and erases everything past that point. + * - Use this to keep only the “significant” prefix \f$ J_0,\dots,J_{m^\*} \f$ + * if you know the tail rapidly vanishes for your parameters. + * + * @warning The constructor does not call this automatically. If you want + * trimmed sequences, call `crop(...)` after construction. + */ void JpowerIntegrals::crop(std::vector> &J, double threshold) { - // Lambda function to check if an element is negligible - auto isNegligible = [threshold](const std::complex &c) { return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; }; - // Remove negligible elements from the end of the vector + // Predicate: element is negligible if both real and imaginary parts are below threshold + auto isNegligible = [threshold](const std::complex &c) { + return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; + }; + // Erase the trailing run of negligible entries J.erase(std::find_if_not(J.rbegin(), J.rend(), isNegligible).base(), J.end()); } diff --git a/src/functions/JpowerIntegrals.h b/src/functions/JpowerIntegrals.h index dea01305f..eab08a50a 100644 --- a/src/functions/JpowerIntegrals.h +++ b/src/functions/JpowerIntegrals.h @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -34,116 +34,143 @@ namespace mrcpp { /** @class JpowerIntegrals * - * @brief A class needed for construction Schrodinger time evolution operator + * @brief Precompute and store families of power–type integrals \f$ \{\widetilde J_m(l,a)\}_{m\ge 0} \f$ + * for integer shifts \f$ l \f$, used in the Schrödinger time–evolution operator. * - * @details A two dimensional array consisting of integrals \f$ J_m \f$ as follows. - * Our main operator has the following expansion + * @details + * This helper class generates the sequences + * \f$ \big(\widetilde J_m(l,a)\big)_{m=0}^{M} \f$ for a finite set of integer + * translations \f$ l \in \{-(2^n-1),\ldots,-1,0,1,\ldots,2^n-1\} \f$, where + * \f$ n=\texttt{scaling} \f$ and \f$ a>0 \f$ is the time–scaled parameter + * (typically \f$ a = t\,\mathfrak N^2 = t\,4^{\mathfrak n} \f$). + * + * The integrals appear in the expansion of the (matrix–valued) operator + * \f[ + * \big[ \sigma_l^{\mathfrak n} \big]_{pj}(a) + * = + * \sum_{k=0}^{\infty} C_{jp}^{2k}\, + * \widetilde J_{\,2k + j + p}(l,a), + * \f] + * where the scalar building blocks are * \f[ - * \left[ \sigma_l^{\mathfrak n} \right]_{pj} - * (a) + * \widetilde J_m(l,a) * = - * \sum_{k = 0}^{\infty} - * C_{jp}^{2k} - * \widetilde J_{2k + j + p}(l, a) - * , + * \frac{e^{i\frac{\pi}{4}(m-1)}}{2\pi\,(m+2)!} + * \int_{\mathbb R} + * \exp\!\Big( + * \rho\,l\,e^{i\pi/4} - a\,\rho^2 + * \Big)\, + * \rho^m\, d\rho . * \f] - * where \f$ a = t \mathfrak N^2 = t 4^{\mathfrak n} \f$ - * and + * + * In the code, \f$ \widetilde J_m \f$ are produced by the three–term recurrence + * (valid for \f$ m=0,1,2,\ldots \f$) * \f[ - * \widetilde J_m - * = - * \frac - * { - * I_m - * e^{ i \frac {\pi}4 (m - 1) } - * } - * { - * 2 \pi ( m + 2 )! - * } - * = - * \frac - * { - * e^{ i \frac {\pi}4 (m - 1) } - * } - * { - * 2 \pi ( m + 2 )! - * } - * \int_{\mathbb R} - * \exp - * \left( - * \rho l \exp \left( i \frac \pi 4 \right) - a \rho^2 - * \right) - * \rho^m - * d \rho + * \widetilde J_{m+1} + * = + * \frac{i}{2a\,(m+3)}\left( + * l\,\widetilde J_m + \frac{m}{m+2}\,\widetilde J_{m-1} + * \right), + * \qquad \widetilde J_{-1}=0, * \f] - * satisfying the following relation + * with the closed–form seed * \f[ - * \widetilde J_{m+1} - * = - * \frac - * { - * il - * } - * { - * 2a (m + 3) - * } - * \widetilde J_m - * + - * \frac {im}{2a(m + 2)(m + 3)} - * \widetilde J_{m-1} - * = - * \frac - * { - * i - * } - * { - * 2a (m + 3) - * } - * \left( - * l - * \widetilde J_m - * + - * \frac {m}{(m + 2)} - * \widetilde J_{m-1} - * \right) - * , \quad - * m = 0, 1, 2, \ldots, + * \widetilde J_0(l,a) + * = + * \frac{e^{-i\pi/4}}{4\sqrt{\pi a}}\, + * \exp\!\left(\frac{i\,l^2}{4a}\right). * \f] - * with \f$ \widetilde J_{-1} = 0 \f$ and + * + * ### Storage layout + * For convenience of iteration, the container `integrals` is filled + * in the following order: * \f[ - * \label{power_integral_0} - * \widetilde J_0 - * = - * \frac{ e^{ -i \frac{\pi}4 } }{ 4 \sqrt{ \pi a } } - * \exp - * \left( - * \frac{il^2}{4a} - * \right) - * . + * l = 0, 1, \ldots, 2^n-1,\; 1-2^n, 2-2^n, \ldots, -2, -1. * \f] + * Each entry is a vector + * \code + * integrals[k] == { J_0(l), J_1(l), ..., J_M(l) } + * \endcode + * of complex values for a fixed shift \f$ l \f$. + * + * ### Intended use + * - Construct once for a given \f$ a \f$, \f$ n \f$ and \f$ M \f$. + * - Access the sequence for a particular shift via `operator[](l)`. + * - Combine with precomputed correlation coefficients \f$ C_{jp}^{2k} \f$ + * to assemble \f$ [\sigma_l^{\mathfrak n}]_{pj}(a) \f$. * - * + * @note The class offers an internal @ref crop routine to trim negligible + * tail entries of a sequence (based on a magnitude threshold). Whether and + * when cropping is used is an implementation detail; sequences are always + * returned in full length \f$ M\!+\!1 \f$ from the constructor path. */ class JpowerIntegrals { public: - /// @brief creates an array of power integrals - /// @param a : parameter a - /// @param scaling : scaling level - /// @param M : maximum amount of integrals for each l - /// @param threshold : lower limit for neglecting the integrals - /// @details The array is orginised as a vector ordered as \f$l = 0, 1, 2, \ldots, 2^n - 1, 1 - 2^n, 2 - 2^n, \ldots, -2, -1 \f$. + /// @brief Construct and precompute all \f$ \widetilde J_m(l,a) \f$ for + /// \f$ l\in[-(2^n-1),\ldots,2^n-1] \f$ and \f$ m=0,\ldots,M \f$. + /// + /// @param a Time–scaled parameter (typically \f$ a=t\,4^{\mathfrak n} \f$), must be positive. + /// @param scaling Level \f$ n \f$ that defines \f$ N=2^n \f$ distinct nonnegative shifts + /// (the negative ones are added symmetrically after them). + /// @param M Highest power index in the sequence (inclusive). Each stored vector + /// has length \f$ M+1 \f$ starting at \f$ m=0 \f$. + /// @param threshold Magnitude cutoff used by the private @ref crop routine to remove + /// negligible tail entries (if cropping is applied internally). + /// + /// @details + /// The internal ordering of the outer container is + /// \f$ l=0,1,\ldots,2^n-1, 1-2^n,\ldots,-1 \f$. This ordering is mirrored by + /// the @ref operator[] which will map negative indices to the appropriate + /// position of the storage. JpowerIntegrals(double a, int scaling, int M, double threshold = 1.0e-15); //JpowerIntegrals(const JpowerIntegrals& other); - - int scaling; //it is probably not used + /// @brief Scaling level \f$ n \f$ (kept for reference; not used directly in lookups). + int scaling; + + /// @brief Container of sequences \f$ \{\widetilde J_m(l,a)\}_{m=0}^M \f$ for all shifts \f$ l \f$. + /// Each element is a vector of length \f$ M+1 \f$: + /// \code + /// integrals[idx_for_l] = { J_0(l), J_1(l), ..., J_M(l) } + /// \endcode std::vector>> integrals; + /// @brief Mutable access to the precomputed sequence for a given shift \f$ l \f$. + /// + /// @param index Integer shift \f$ l \in [-(2^n-1), \ldots, 2^n-1] \f$. + /// @return Reference to the vector \f$ [J_0(l), \ldots, J_M(l)] \f$. + /// + /// @details + /// Negative indices are transparently remapped to the internal storage order + /// (see the constructor’s documentation). This allows natural use like `obj[-3]`. std::vector> & operator[](int index); + private: + /// @brief Build one full sequence \f$ \{\widetilde J_m(l,a)\}_{m=0}^M \f$ for a fixed shift @p l. + /// + /// @param l Shift index. + /// @param a Time–scaled parameter (positive). + /// @param M Highest power index. + /// @param threshold Magnitude cutoff passed to @ref crop (if enabled). + /// + /// @return A vector with entries \f$ [J_0(l), J_1(l), \ldots, J_M(l)] \f$. + /// + /// @details + /// The routine uses the closed–form seed \f$ \widetilde J_0(l,a) \f$ and the + /// recurrence relation to fill the sequence up to \f$ m=M \f$. std::vector> calculate_J_power_integrals(int l, double a, int M, double threshold); + + /// @brief Remove negligible tail entries from a sequence in place. + /// + /// @param J The sequence to be cropped (modified in place). + /// @param threshold Entries with both real and imaginary parts below @p threshold + /// in absolute value are considered negligible. + /// + /// @details + /// Cropping can be used to shrink \f$ [J_0,\ldots,J_M] \f$ to + /// \f$ [J_0,\ldots,J_{m^\*}] \f$ once the tail has decayed under the requested tolerance. void crop(std::vector> & J, double threshold); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/LegendrePoly.cpp b/src/functions/LegendrePoly.cpp index 4dce049b0..4f95af763 100644 --- a/src/functions/LegendrePoly.cpp +++ b/src/functions/LegendrePoly.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -24,12 +24,24 @@ */ /* + * File overview + * ------------- + * Implements LegendrePoly, a Polynomial subclass that builds the (shifted/scaled) + * Legendre polynomial of a given order k using the standard three-term recurrence. * - * \date Jul 5, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * Key ideas: + * - Coefficients for P_k on the canonical interval [-1, 1] are computed once + * (in the Polynomial's coefficient storage) by combining cached lower orders. + * - The resulting polynomial is then *affinely transformed* by an internal + * mapping x ↦ N·x + L via Polynomial::translate(l) and Polynomial::dilate(n), + * so that users obtain a Legendre polynomial defined on the transformed domain. + * - A lightweight cache (ObjectCache) avoids recomputing lower + * orders repeatedly when constructing higher ones. * - * \breif + * Extras: + * - firstDerivative(x) returns both P_k(x) and P'_k(x) evaluated at x (w.r.t. the + * current affine mapping). + * - secondDerivative(x) is declared but not implemented (calls NOT_IMPLEMENTED_ABORT). */ #include "LegendrePoly.h" @@ -40,45 +52,96 @@ using namespace Eigen; namespace mrcpp { -using LegendreCache = ObjectCache; +using LegendreCache = ObjectCache; // Cache of LegendrePoly objects keyed by order -/** Legendre polynomial constructed on [-1,1] and - * scaled by n and translated by l */ +/** @brief Construct the order-k Legendre polynomial and apply an affine transform. + * + * @param k Polynomial order (degree). + * @param n Dilation factor applied after construction (see Polynomial::dilate). + * @param l Translation applied before dilation (see Polynomial::translate). + * + * Details: + * - The raw Legendre polynomial P_k is constructed on [-1, 1] using the standard + * recurrence: + * P_0(q) = 1, + * P_1(q) = q, + * P_k(q) = ((2k-1) q P_{k-1}(q) - (k-1) P_{k-2}(q)) / k, k ≥ 2. + * Here q is the canonical variable on [-1, 1]. + * - Lower-order polynomials P_{k-1}, P_{k-2} are fetched (or created once and + * cached) through LegendreCache to avoid recomputation. + * - After coefficients are set, the base interval [-1, 1] is recorded via + * setBounds, then the polynomial is translated by l and dilated by n, + * effectively producing P_k(N·x + L) in the Polynomial base class, where + * N and L are the stored affine parameters. + */ LegendrePoly::LegendrePoly(int k, double n, double l) : Polynomial(k) { - // Since we create Legendre polynomials recursively on [-1,1] - // we cache all lower order polynomilas for future use. + // Ensure lower orders are cached: creating P_k requires P_{k-1} and P_{k-2}. + // We preload P_{k-1} so the subsequent compute can fetch both from cache. LegendreCache &Cache = LegendreCache::getInstance(); if (k >= 1) { if (not Cache.hasId(k - 1)) { auto *lp = new LegendrePoly(k - 1); + // Rough memory accounting: 2*(k+1) doubles (heuristic) for the cache Cache.load(k - 1, lp, 2 * sizeof(double) * (k + 1)); } } + + // Compute P_k on the canonical domain [-1, 1] computeLegendrePolynomial(k); + + // Record canonical bounds for sanity checks in eval/derivatives double a = -1.0; double b = 1.0; setBounds(&a, &b); + + // Apply affine map x ↦ N·x + L via translate(l) then dilate(n) translate(l); dilate(n); } -/** Compute Legendre polynomial coefs on interval [-1,1] */ +/** @brief Populate this->coefs with the coefficients of P_k on [-1,1]. + * + * Implements the standard three-term Legendre recurrence in coefficient space: + * P_0(q) = 1, + * P_1(q) = q, + * P_k(q) = ((2k-1) q P_{k-1}(q) - (k-1) P_{k-2}(q)) / k, for k ≥ 2. + * + * Coefficient layout: + * - The Polynomial base stores coefficients in ascending powers: + * coefs[j] corresponds to q^j. + * - To form P_k, we combine cached P_{k-1} and P_{k-2} term-by-term. + * + * Edge cases: + * - k=0 and k=1 are assigned explicitly. + */ void LegendrePoly::computeLegendrePolynomial(int k) { assert(this->size() >= k); + if (k == 0) { + // P_0(q) = 1 this->coefs[0] = 1.0; } else if (k == 1) { + // P_1(q) = q this->coefs[0] = 0.0; this->coefs[1] = 1.0; } else { + // Fetch lower-order polynomials from the cache LegendreCache &Cache = LegendreCache::getInstance(); - LegendrePoly &Lm1 = Cache.get(k - 1); - LegendrePoly &Lm2 = Cache.get(k - 2); + LegendrePoly &Lm1 = Cache.get(k - 1); // P_{k-1} + LegendrePoly &Lm2 = Cache.get(k - 2); // P_{k-2} auto K = (double)k; + + // Constant term (j=0): + // coef0 = -(k-1)/k * (coef from P_{k-2} at j=0) double cm2_0 = Lm2.getCoefs()[0]; this->coefs[0] = -(K - 1.0) * cm2_0 / K; + + // Remaining terms (j=1..k): + // For j ≤ k-2, coef_j = ((2k-1)/k) * coef_{j-1}(P_{k-1}) - ((k-1)/k) * coef_j(P_{k-2}) + // For j = k-1 or k, the P_{k-2} contribution vanishes (index out of range), + // so only the first term remains. for (int j = 1; j < k + 1; j++) { double cm1_jm1 = Lm1.getCoefs()[j - 1]; if (j <= k - 2) { @@ -91,8 +154,22 @@ void LegendrePoly::computeLegendrePolynomial(int k) { } } -/** Calculate the value of an n:th order Legendre polynominal in x, including - * the first derivative. +/** @brief Evaluate P_k(x) and its first derivative at x (w.r.t. the current affine map). + * + * @param x Point of evaluation (in the *external* variable). + * @return Vector2d { P_k(x), d/dx P_k(x) }. + * + * Details: + * - Bounds check (via outOfBounds) uses the base interval (set to [-1,1] and + * then transformed by the affine map stored in the Polynomial base). + * - Internally we evaluate in the mapped coordinate q = N·x + L. + * - Uses a forward recursion to accumulate both value and derivative following + * the Legendre three-term recurrence: + * y_i(q) = ((2i-1) q y_{i-1} - (i-1) y_{i-2}) / i + * dy_i(q) = ((2i-1) q dy_{i-1} - (i-1) dy_{i-2} + (2i-1) y_{i-1}) / i + * (the last term is ∂/∂q of (2i-1) q y_{i-1}). + * - The returned derivative is with respect to the external variable x, taking + * into account the internal affine mapping (via the Polynomial base members). */ Vector2d LegendrePoly::firstDerivative(double x) const { double c1, c2, c4, ym, yp, y; @@ -102,33 +179,47 @@ Vector2d LegendrePoly::firstDerivative(double x) const { MSG_ABORT("Argument out of bounds: " << x << " [" << this->A[0] << ", " << this->B[0] << "]"); } + // Affine map from external x to internal q double q = this->N * x + this->L; Vector2d val; int order = getOrder(); + + // P_0(q) = 1, P'_0(q) = 0 if (order == 0) { val(0) = 1.0; val(1) = 0.0; return val; } + // P_1(q) = q; derivative follows the affine mapping stored in the base if (order == 1) { val(0) = q; - val(1) = this->N * 1.0 + this->L; + val(1) = this->N * 1.0 + this->L; // as implemented in the original code return val; } - y = q; - dy = 1.0; - yp = 1.0; - dyp = 0.0; + // Initialize recurrence for i=2..order + y = q; // y = P_1 + dy = 1.0; // dy = d/dq P_1 + yp = 1.0; // yp = P_0 + dyp = 0.0;// dyp = d/dq P_0 + for (int i = 2; i < order + 1; i++) { c1 = (double)i; - c2 = c1 * 2.0 - 1.0; - c4 = c1 - 1.0; + c2 = c1 * 2.0 - 1.0; // (2i-1) + c4 = c1 - 1.0; // (i-1) + + // Rotate "previous" states ym = y; + + // Value recurrence: y = P_i y = (c2 * q * y - c4 * yp) / c1; + + // Shift lower-order values yp = ym; + + // Derivative recurrence in q dym = dy; dy = (c2 * q * dy - c4 * dyp + c2 * yp) / c1; dyp = dym; @@ -139,8 +230,14 @@ Vector2d LegendrePoly::firstDerivative(double x) const { return val; } -/** Calculate the value of an n:th order Legendre polynominal in x, including - * first and second derivatives. +/** @brief Evaluate P_k(x) together with first and second derivatives (not implemented). + * + * @param x Point of evaluation. + * @return Vector3d { P_k(x), P'_k(x), P''_k(x) }. + * + * @note This routine currently calls NOT_IMPLEMENTED_ABORT. The code that follows + * shows the intended structure (value/first/second derivative recurrences), + * but it is not active. Keep as-is to reflect current behavior. */ Vector3d LegendrePoly::secondDerivative(double x) const { NOT_IMPLEMENTED_ABORT; @@ -169,24 +266,28 @@ Vector3d LegendrePoly::secondDerivative(double x) const { return val; } - y = q; - dy = 1.e0; + y = q; + dy = 1.e0; d2y = 0.e0; - yp = 1.e0; - dyp = 0.e0; + yp = 1.e0; + dyp = 0.e0; d2yp = 0.e0; + for (int i = 2; i < order + 1; i++) { c1 = (double)i; c2 = c1 * 2.e0 - 1.e0; c4 = c1 - 1.e0; + ym = y; - y = (c2 * x * y - c4 * yp) / c1; + y = (c2 * x * y - c4 * yp) / c1; yp = ym; + dym = dy; - dy = (c2 * x * dy - c4 * dyp + c2 * yp) / c1; + dy = (c2 * x * dy - c4 * dyp + c2 * yp) / c1; dyp = dym; + d2ym = d2y; - d2y = (c2 * x * d2y - c4 * d2yp + c2 * 2.e0 * dyp) / c1; + d2y = (c2 * x * d2y - c4 * d2yp + c2 * 2.e0 * dyp) / c1; d2yp = d2ym; } val(0) = y; diff --git a/src/functions/LegendrePoly.h b/src/functions/LegendrePoly.h index 8e1bbd2a9..a51074310 100644 --- a/src/functions/LegendrePoly.h +++ b/src/functions/LegendrePoly.h @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -29,15 +29,93 @@ namespace mrcpp { +/** @class LegendrePoly + * @brief Polynomial subclass representing a (possibly shifted/scaled) Legendre polynomial. + * + * Purpose + * ------- + * Encapsulates the Legendre polynomial \(P_k\) of degree @p k, constructed on the + * canonical interval \([-1,1]\) and then affinely mapped to an external coordinate + * via the Polynomial base class’ internal transform: + * + * \f[ + * q = N\,x + L, + * \f] + * + * so that evaluations are effectively \(P_k(q(x))\). + * + * Construction details + * -------------------- + * - The raw coefficients of \(P_k(q)\) on \([-1,1]\) are computed using the + * standard three–term recurrence in @ref computeLegendrePolynomial. + * - After coefficients are set, the base class is instructed to translate by @p l + * and dilate by @p n, which stores the affine map \((N,L)\) used at evaluation time. + * + * Notes + * ----- + * - The actual caching of lower-order polynomials and the affine setup are handled + * in the corresponding .cpp file (see constructor and implementation comments). + * - Derivative helpers return values with respect to the *external* variable @p x, + * taking the internal affine map into account. + */ class LegendrePoly final : public Polynomial { public: + /** @brief Construct degree-@p k Legendre polynomial with optional affine transform. + * + * @param k Degree (order) of the Legendre polynomial \(P_k\). + * @param n Dilation factor (applied after translation). Conceptually produces \(P_k(Nx+L)\) with \(N=n\). + * @param l Translation (applied before dilation). Conceptually produces \(P_k(Nx+L)\) with \(L=l\). + * + * Semantics + * --------- + * - First builds \(P_k\) on \([-1,1]\) in the internal variable \(q\). + * - Records canonical bounds \([-1,1]\) for error checking. + * - Applies the affine map encoded by @p n and @p l through the base class. + */ LegendrePoly(int k, double n = 1.0, double l = 0.0); + /** @brief Evaluate \(P_k(x)\) and its first derivative w.r.t. the external variable. + * + * @param x External evaluation point. + * @return \f$[\,P_k(x),\,\tfrac{d}{dx}P_k(x)\,]\f$ as an Eigen::Vector2d. + * + * Details + * ------- + * - Internally maps @p x to the polynomial’s canonical coordinate \(q = N x + L\). + * - Uses a recurrence that simultaneously advances value and derivative. + * - Performs a bounds check consistent with the base-class domain bookkeeping. + */ Eigen::Vector2d firstDerivative(double x) const; + + /** @brief Evaluate value, first and second derivatives (declared interface). + * + * @param x External evaluation point. + * @return \f$[\,P_k(x),\,P'_k(x),\,P''_k(x)\,]\f$ as an Eigen::Vector3d. + * + * @note The current implementation in the .cpp intentionally aborts + * (NOT_IMPLEMENTED) to document that second-derivative support + * is not provided yet. + */ Eigen::Vector3d secondDerivative(double x) const; private: + /** @brief Fill coefficient vector with the canonical \([-1,1]\) Legendre polynomial \(P_k\). + * + * @param k Degree (order). + * + * Implementation sketch + * --------------------- + * - Base cases: + * - \(P_0(q) = 1\) + * - \(P_1(q) = q\) + * - Recurrence for \(k \ge 2\): + * \f[ + * P_k(q) = \frac{(2k-1)\,q\,P_{k-1}(q) - (k-1)\,P_{k-2}(q)}{k}. + * \f] + * - Operates directly in coefficient space (ascending powers of \(q\)). + * - Lower orders are retrieved from an ObjectCache to avoid recomputation (in .cpp). + */ void computeLegendrePolynomial(int k); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp index c54acc148..1fcbf0341 100644 --- a/src/functions/Polynomial.cpp +++ b/src/functions/Polynomial.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -24,12 +24,27 @@ */ /** + * Implementation notes for Polynomial + * ----------------------------------- + * This file implements a univariate polynomial P expressed in an *affine* + * internal coordinate q = N*x - L, where: + * - N is a dilation (scale) factor, + * - L is a translation shift (stored with sign to match the internal form). * - * \date Jun 7, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * Coefficients are stored in ascending powers of q: coefs[k] multiplies q^k. + * Many operations (evaluation, algebra, derivatives, integrals) are performed + * with respect to q but expose an API in terms of the external variable x. * + * Bounding: + * The base class (RepresentableFunction) holds optional lower/upper bounds + * in the *q*-domain. Helper functions `getScaledLowerBound()` / + * `getScaledUpperBound()` convert those bounds to the *x*-domain via the + * inverse affine map x = (q + L)/N. Evaluation outside the bounds yields 0. * + * Algebra: + * Addition and multiplication require the same affine map (same N and L). + * We check that before combining coefficient vectors to avoid mixing + * different coordinate systems. */ #include @@ -42,8 +57,18 @@ using namespace Eigen; namespace mrcpp { -/** Construct polynomial of order zero with given size and bounds. - * Includes default constructor. */ +/** @brief Construct a zero-valued polynomial of order @p k with optional bounds. + * + * @param k Maximum power (order). The polynomial will have (k+1) coefficients. + * @param a (Optional) pointer to lower bound in x; forwarded to base class. + * @param b (Optional) pointer to upper bound in x; forwarded to base class. + * + * Details + * ------- + * - Initializes the affine map to identity: N = 1, L = 0 (so q = x). + * - Allocates a coefficient vector of length k+1, initialized to zero. + * - Bounds are stored by the base class; they affect evalf() and integration. + */ Polynomial::Polynomial(int k, const double *a, const double *b) : RepresentableFunction<1, double>(a, b) { assert(k >= 0); @@ -52,6 +77,19 @@ Polynomial::Polynomial(int k, const double *a, const double *b) this->coefs = VectorXd::Zero(k + 1); } +/** @brief Construct the expanded monomial (x - c)^k (up to scaling) with optional bounds. + * + * @param c Shift in the monomial center (i.e., builds coefficients of (x - c)^k). + * @param k Order of the monomial. + * @param a Optional lower bound; forwarded to base. + * @param b Optional upper bound; forwarded to base. + * + * Details + * ------- + * - Uses binomial coefficients to expand (x - c)^k into the internal q = x + * basis (N = 1, L = 0). + * - coefs[i] = binom(k, i) * (-c)^(k - i). + */ Polynomial::Polynomial(double c, int k, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -60,7 +98,14 @@ Polynomial::Polynomial(double c, int k, const double *a, const double *b) for (int i = 0; i <= k; i++) { this->coefs[i] *= std::pow(c, k - i); } } -/** Construct polynomial with given coefficient vector and bounds. */ +/** @brief Construct from a coefficient vector (ascending powers in q) with optional bounds. + * + * @param c Coefficients for q^0, q^1, ..., q^k. + * @param a Optional lower bound; forwarded to base. + * @param b Optional upper bound; forwarded to base. + * + * Initializes affine map to identity (N=1, L=0) and copies coefficients. + */ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -68,7 +113,7 @@ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) setCoefs(c); } -/** Makes a complete copy of the polynomial */ +/** @brief Copy constructor (deep copy), including bounds and affine map. */ Polynomial::Polynomial(const Polynomial &poly) : RepresentableFunction<1>(poly) { this->N = poly.N; @@ -76,7 +121,10 @@ Polynomial::Polynomial(const Polynomial &poly) this->coefs = poly.coefs; } -/** Copies only the function, not its bounds */ +/** @brief Copy assignment (deep copy), including bounds and affine map. + * + * Copies base part, then affine parameters N,L and coefficient vector. + */ Polynomial &Polynomial::operator=(const Polynomial &poly) { RepresentableFunction<1>::operator=(poly); this->N = poly.N; @@ -85,7 +133,19 @@ Polynomial &Polynomial::operator=(const Polynomial &poly) { return *this; } -/** Evaluate scaled and translated polynomial */ +/** @brief Evaluate the polynomial at external x, honoring bounds. + * + * @param x Point of evaluation in external coordinates. + * @return P(x) if within bounds, otherwise 0. + * + * Implementation + * -------------- + * - If bounded, quickly reject x outside the mapped interval. + * - Evaluate in the internal coordinate q = N*x - L using a simple + * power-accumulation loop: + * y = sum_k coefs[k] * q^k. + * (xp accumulates q^k without recomputing powers.) + */ double Polynomial::evalf(double x) const { if (isBounded()) { if (x < this->getScaledLowerBound()) return 0.0; @@ -95,40 +155,55 @@ double Polynomial::evalf(double x) const { double y = 0.0; for (int k = 0; k < getOrder() + 1; k++) { y += (xp * this->coefs[k]); - xp *= this->N * x - this->L; + xp *= this->N * x - this->L; // advance q^k -> q^(k+1) } return y; } -/** This returns the actual scaled lower bound */ +/** @brief Lower bound in external x-space, derived from the internal bound via x = (q + L)/N. + * + * Preconditions: polynomial must be bounded (otherwise errors). + */ double Polynomial::getScaledLowerBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->A[0] + this->L)); } -/** This returns the actual scaled upper bound */ +/** @brief Upper bound in external x-space, derived from the internal bound via x = (q + L)/N. + * + * Preconditions: polynomial must be bounded (otherwise errors). + */ double Polynomial::getScaledUpperBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->B[0] + this->L)); } -/** Divide by norm of (bounded) polynomial. */ +/** @brief Normalize the polynomial in L2 on its current (bounded) domain. + * + * Details + * ------- + * - Computes squared norm via innerProduct(*this). + * - Scales coefficients by 1/sqrt(norm). + * - If unbounded or norm < 0, aborts with an error. + */ void Polynomial::normalize() { double sqNorm = calcSquareNorm(); if (sqNorm < 0.0) MSG_ABORT("Cannot normalize polynomial"); (*this) *= 1.0 / std::sqrt(sqNorm); } -/** Compute the squared L2-norm of the (bounded) polynomial. - * Unbounded polynomials return -1.0. */ +/** @brief Compute squared L2 norm on current bounds, or -1 if unbounded. */ double Polynomial::calcSquareNorm() { double sqNorm = -1.0; if (isBounded()) { sqNorm = this->innerProduct(*this); } return sqNorm; } -/** Returns the order of the highest non-zero coef. - * NB: Not the length of the coefs vector. */ +/** @brief Effective order = highest index i with |coefs[i]| > MachineZero. + * + * Note: This ignores trailing coefficients numerically equal to zero, + * and can be lower than (coefs.size()-1). + */ int Polynomial::getOrder() const { int n = 0; for (int i = 0; i < this->coefs.size(); i++) { @@ -137,13 +212,24 @@ int Polynomial::getOrder() const { return n; } -/** Calculate P = c*P */ +/** @brief In-place scale: P(x) ← c * P(x). */ Polynomial &Polynomial::operator*=(double c) { this->coefs = c * this->coefs; return *this; } -/** Calculate P = P*Q */ +/** @brief In-place product P(x) ← P(x) * Q(x) (same affine map required). + * + * Preconditions + * ------------- + * - Both polynomials must share identical (N, L) so they represent functions + * in the same internal coordinate q. Otherwise we error out. + * + * Implementation + * -------------- + * - Standard coefficient convolution yielding degree(P)+degree(Q). + * - Affine parameters are left unchanged. + */ Polynomial &Polynomial::operator*=(const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -160,7 +246,7 @@ Polynomial &Polynomial::operator*=(const Polynomial &Q) { return P; } -/** Calculate Q = c*P */ +/** @brief Return Q(x) = c * P(x). */ Polynomial Polynomial::operator*(double c) const { const Polynomial &P = *this; Polynomial Q(P); @@ -168,8 +254,11 @@ Polynomial Polynomial::operator*(double c) const { return Q; } -/** Calculate R = P*Q. - * Returns unbounded polynomial. */ +/** @brief Return R(x) = P(x) * Q(x) (same affine map required). + * + * Returns an unbounded polynomial that inherits the affine map and + * coefficients from the in-place logic. + */ Polynomial Polynomial::operator*(const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -178,19 +267,23 @@ Polynomial Polynomial::operator*(const Polynomial &Q) const { return R; } -/** Calculate P = P + Q. */ +/** @brief In-place sum: P(x) ← P(x) + Q(x). (Same affine map required.) */ Polynomial &Polynomial::operator+=(const Polynomial &Q) { this->addInPlace(1.0, Q); return *this; } -/** Calculate P = P - Q. */ +/** @brief In-place difference: P(x) ← P(x) - Q(x). (Same affine map required.) */ Polynomial &Polynomial::operator-=(const Polynomial &Q) { this->addInPlace(-1.0, Q); return *this; } -/** Calculate P = P + c*Q. */ +/** @brief In-place fused add: P(x) ← P(x) + c * Q(x). (Same affine map required.) + * + * Chooses the max order among P and Q and adds coefficients component-wise, + * padding with zeros where needed. + */ void Polynomial::addInPlace(double c, const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -208,8 +301,7 @@ void Polynomial::addInPlace(double c, const Polynomial &Q) { P.setCoefs(newCoefs); } -/** Calculate R = P + c*Q, with a default c = 1.0. - * Returns unbounded polynomial. */ +/** @brief Return R(x) = P(x) + c * Q(x), leaving operands unchanged. */ Polynomial Polynomial::add(double c, const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -218,7 +310,7 @@ Polynomial Polynomial::add(double c, const Polynomial &Q) const { return R; } -/** Calculate Q = dP/dx */ +/** @brief Return Q(x) = dP/dx (external derivative). */ Polynomial Polynomial::calcDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -226,7 +318,18 @@ Polynomial Polynomial::calcDerivative() const { return Q; } -/** Calculate P = dP/dx */ +/** @brief In-place derivative: P(x) ← dP/dx. + * + * Implementation + * -------------- + * - Works on the internal representation in q = N*x - L: + * d/dx [ Σ a_i q^i ] = Σ i*a_i q^(i-1) * dq/dx = N * Σ i*a_i q^(i-1). + * - Since the current storage uses q-powers, we first form Σ i*a_i q^(i-1) + * in coefficient space. The factor N is embedded in the affine mapping + * (via evaluation), and the polynomial’s coefficient update matches the + * intended external derivative semantics given how evalf() builds q. + * - The code mirrors the existing convention (keeping N,L intact). + */ void Polynomial::calcDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -236,7 +339,7 @@ void Polynomial::calcDerivativeInPlace() { P.setCoefs(newCoefs); } -/** Calculate indefinite integral Q = \int dP dx, integration constant set to zero */ +/** @brief Return the indefinite integral Q(x) = ∫ P(x) dx with zero constant. */ Polynomial Polynomial::calcAntiDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -244,19 +347,40 @@ Polynomial Polynomial::calcAntiDerivative() const { return Q; } -/** Calculate indefinite integral P = \int dP dx, integration constant set to zero */ +/** @brief In-place antiderivative: P(x) ← ∫ P(x) dx, integration constant = 0. + * + * Implementation + * -------------- + * - In q-space: ∫ (Σ a_i q^i) dq = Σ a_i/(i+1) q^(i+1) + C. + * - For external x, dx = dq / N; the factor 1/N is accounted for when + * integrating over x in Polynomial::integrate(), not in coefficient + * construction here. We thus store the q-antiderivative coefficients. + */ void Polynomial::calcAntiDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); const VectorXd &oldCoefs = P.getCoefs(); VectorXd newCoefs = VectorXd::Zero(P_order + 2); - newCoefs[0] = 0.0; + newCoefs[0] = 0.0; // integration constant newCoefs[1] = oldCoefs[0]; for (int i = 2; i < newCoefs.size(); i++) { newCoefs[i] = 1.0 / i * oldCoefs[i - 1]; } P.setCoefs(newCoefs); } -/** Integrate the polynomial P on [a,b] analytically */ +/** @brief Analytic definite integral ∫_a^b P(x) dx, honoring bounds if present. + * + * @param a Optional external lower limit (overrides internal bound if tighter). + * @param b Optional external upper limit (overrides internal bound if tighter). + * @return The integral value over max(lower bounds) to min(upper bounds). + * + * Details + * ------- + * - If polynomial is bounded, the domain is intersected with [a,b]. + * - Builds the (q-based) antiderivative and evaluates it at the endpoints + * transformed to the q-domain by the affine map. The Jacobian dx = dq/N + * yields a prefactor 1/N (“sfac”). + * - If the final [lb,ub] is empty, returns 0. + */ double Polynomial::integrate(const double *a, const double *b) const { double lb = -DBL_MAX, ub = DBL_MAX; if (this->isBounded()) { @@ -275,7 +399,16 @@ double Polynomial::integrate(const double *a, const double *b) const { return sfac * (antidiff.evalf(ub) - antidiff.evalf(lb)); } -/** Compute analytically on interval defined by the calling polynomial. */ +/** @brief Inner product ⟨P,Q⟩ over P’s current bounded domain. + * + * @param Q Polynomial to multiply with. + * @return ∫ P(x) Q(x) dx over P’s bounds. + * + * Details + * ------- + * - Requires that P is bounded; Q is multiplied algebraically in q-space. + * - The product polynomial inherits P’s bounds; we then call integrate(). + */ double Polynomial::innerProduct(const Polynomial &Q) const { const Polynomial &P = *this; if (not P.isBounded()) MSG_ERROR("Unbounded polynomial"); @@ -284,4 +417,4 @@ double Polynomial::innerProduct(const Polynomial &Q) const { return pq.integrate(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/Polynomial.h b/src/functions/Polynomial.h index 93e3ec77d..ce72828b4 100644 --- a/src/functions/Polynomial.h +++ b/src/functions/Polynomial.h @@ -24,14 +24,34 @@ */ /** + * # Polynomial (interface) * - * Base class for general polynomials with reasonably advanced - * properties. The Polynomial class(es) are not implemented in the - * most efficient manner, because they are only evaluated a fixed - * number of times in a few predefined points, and all other - * evaluations are done by linear transformations. PolynomialCache - * implements the fast, and static const versions of the various - * 4Polynomials. + * A light-weight, *affine-mapped* univariate polynomial used throughout MRCPP. + * Internally, a polynomial is represented in the auxiliary variable + * + * \f$ q = N\,x - L \f$ + * + * where `N` is a dilation and `L` a translation. Coefficients are stored + * in **ascending** powers of `q`, i.e. `coefs[k]` multiplies \f$q^k\f$. + * + * The class supports: + * - optional finite **bounds** (via the @ref RepresentableFunction base); + * values outside the bounds evaluate to 0. + * - algebra (sum, product, scalar scale) **within the same affine map** + * (same `N` and `L`). + * - analytical **derivatives**, **antiderivatives**, **inner products** + * and **definite integrals**. + * + * ## Affine operations (N, L) + * - `setDilation`, `setTranslation` overwrite the affine map. + * - `dilate(n)` changes the current map as `N ← N*n`. + * - `translate(l)` applies an external x-translation by `l`, which in the + * internal map becomes `L ← L + N*l` so that the *external* shift is by `l`. + * + * ## Order vs. size + * - `size()` returns the raw length of the coefficient vector. + * - `getOrder()` returns the highest index whose coefficient is numerically + * non-zero (trims trailing ~0 entries defined by `MachineZero`). */ #pragma once @@ -46,70 +66,193 @@ namespace mrcpp { class Polynomial : public RepresentableFunction<1, double> { public: + /** @name Constructors + * @{ + */ + /** @brief Zero polynomial of order @p k on optional bounds [a,b]. */ Polynomial(int k = 0, const double *a = nullptr, const double *b = nullptr); + /** @overload */ Polynomial(int k, const std::vector &a, const std::vector &b) : Polynomial(k, a.data(), b.data()) {} + /** @brief From coefficient vector (ascending powers in q) and optional bounds. */ Polynomial(const Eigen::VectorXd &c, const double *a = nullptr, const double *b = nullptr); + /** @overload */ Polynomial(const Eigen::VectorXd &c, const std::vector &a, const std::vector &b) : Polynomial(c, a.data(), b.data()) {} + /** + * @brief Constructs the binomial expansion of \f$(x-c)^k\f$ with optional bounds. + * + * Coefficients are filled using the binomial theorem; the internal map is + * initialized to the identity (`N=1, L=0`). + */ Polynomial(double c, int k = 0, const double *a = nullptr, const double *b = nullptr); + /** @overload */ Polynomial(double c, int k, const std::vector &a, const std::vector &b) : Polynomial(c, k, a.data(), b.data()) {} + /** @brief Deep copy (including bounds and affine map). */ Polynomial(const Polynomial &poly); + /** @brief Deep copy assignment (including bounds and affine map). */ Polynomial &operator=(const Polynomial &poly); virtual ~Polynomial() = default; + /** @} */ + /** @name Evaluation + * @{ + */ + /** + * @brief Evaluate at external coordinate \f$x\f$. + * + * If the polynomial has active bounds, returns `0` outside the bounded + * interval (in x). Internally evaluates the q-series with + * \f$q = N x - L\f$. + */ double evalf(double x) const; + /** @brief Convenience overload using a @ref Coord wrapper. */ double evalf(const Coord<1> &r) const { return evalf(r[0]); } + /** @} */ + /** @name Bounds mapped to x + * @{ + */ + /** @brief Lower bound in x corresponding to the internal bound in q. */ double getScaledLowerBound() const; + /** @brief Upper bound in x corresponding to the internal bound in q. */ double getScaledUpperBound() const; + /** @} */ + /** @name Norms + * @{ + */ + /** @brief L2-normalize on current (finite) bounds; no-op if unbounded. */ void normalize(); + /** + * @brief Squared L2 norm on current bounds. + * @return \f$\|P\|^2\f$ if bounded; `-1` if unbounded. + */ double calcSquareNorm(); + /** @} */ - double getTranslation() const { return this->L; } - double getDilation() const { return this->N; } + /** @name Affine map (q = N x - L) + * @{ + */ + double getTranslation() const { return this->L; } ///< Current L (translation in q-map). + double getDilation() const { return this->N; } ///< Current N (dilation in q-map). - void setDilation(double n) { this->N = n; } - void setTranslation(double l) { this->L = l; } - void dilate(double n) { this->N *= n; } - void translate(double l) { this->L += this->N * l; } + void setDilation(double n) { this->N = n; } ///< Overwrite N. + void setTranslation(double l) { this->L = l; } ///< Overwrite L. + void dilate(double n) { this->N *= n; } ///< Scale N in place. + /** + * @brief External x-translation by @p l. + * + * Adjusts the internal map as \f$L \leftarrow L + N\,l\f$ so that + * \f$q = N(x+l) - L_\text{old} = N x - (L_\text{old}-N l)\f$. + */ + void translate(double l) { this->L += this->N * l; } + /** @} */ - int size() const { return this->coefs.size(); } ///< Length of coefs vector + /** @name Coefficients and order + * @{ + */ + int size() const { return this->coefs.size(); } ///< Raw length of the coefficient vector (q-powers). + /** + * @brief Highest non-negligible power (polynomial degree). + * + * Scans from low to high and returns the largest index whose coefficient + * magnitude exceeds `MachineZero`. May be smaller than `size()-1`. + */ int getOrder() const; + /** @brief Replace coefficients with a single zero (reset to degree 0). */ void clearCoefs() { this->coefs = Eigen::VectorXd::Zero(1); } + /** @brief Zero all current coefficients (preserve vector length). */ void setZero() { this->coefs = Eigen::VectorXd::Zero(this->coefs.size()); } + /** @brief Overwrite the coefficient vector (ascending powers in q). */ void setCoefs(const Eigen::VectorXd &c) { this->coefs = c; } + /** @brief Mutable access to the coefficient vector. */ Eigen::VectorXd &getCoefs() { return this->coefs; } + /** @brief Const access to the coefficient vector. */ const Eigen::VectorXd &getCoefs() const { return this->coefs; } + /** @} */ + /** @name Calculus + * @{ + */ + /** @brief Returns \f$ P' \f$ (derivative w.r.t. x). */ Polynomial calcDerivative() const; + /** @brief Returns an antiderivative \f$ Q \f$ with \f$Q(0)=0\f$. */ Polynomial calcAntiDerivative() const; + /** @brief In-place derivative \f$ P \leftarrow P' \f$. */ void calcDerivativeInPlace(); + /** @brief In-place antiderivative \f$ P \leftarrow \int P\,dx \f$, constant = 0. */ void calcAntiDerivativeInPlace(); + /** @} */ + /** @name Integration & inner product + * @{ + */ + /** + * @brief Analytic definite integral \f$\int_a^b P(x)\,dx\f$. + * + * - If the polynomial has internal bounds, integrates over the + * intersection with \f$[a,b]\f$ (if `a`/`b` are provided). + * - If unbounded, both `a` and `b` must be provided. + */ double integrate(const double *a = 0, const double *b = 0) const; + /** + * @brief Inner product \f$\langle P,Q\rangle = \int P(x)Q(x)\,dx\f$ over P's bounds. + * + * Requires `*this` to be bounded. The product is formed algebraically and + * integrated over the same interval. + */ double innerProduct(const Polynomial &p) const; + /** @} */ + /** @name Algebra (same affine map required) + * @{ + */ + /** + * @brief Fused add: \f$ P \leftarrow P + c\,Q \f$. + * + * @note Both operands must have the same `(N,L)`; this is enforced in the + * implementation and will error out if violated. + */ void addInPlace(double c, const Polynomial &Q); + /** @brief Returns \f$ R = P + c\,Q \f$ (operands unchanged). */ Polynomial add(double c, const Polynomial &Q) const; + /** @brief Scalar product \f$ Q = c\,P \f$. */ Polynomial operator*(double c) const; + /** + * @brief Polynomial product \f$ R = P\cdot Q \f$. + * + * @note Requires same `(N,L)` affine map in the implementation. + */ Polynomial operator*(const Polynomial &Q) const; + + /** @brief Sum \f$ P+Q \f$ (convenience). */ Polynomial operator+(const Polynomial &Q) const { return add(1.0, Q); } + /** @brief Difference \f$ P-Q \f$ (convenience). */ Polynomial operator-(const Polynomial &Q) const { return add(-1.0, Q); } + + /** @brief In-place scalar scale: \f$ P \leftarrow c\,P \f$. */ Polynomial &operator*=(double c); + /** + * @brief In-place product: \f$ P \leftarrow P\cdot Q \f$. + * + * @note Requires same `(N,L)` affine map in the implementation. + */ Polynomial &operator*=(const Polynomial &Q); + /** @brief In-place sum: \f$ P \leftarrow P+Q \f$. */ Polynomial &operator+=(const Polynomial &Q); + /** @brief In-place difference: \f$ P \leftarrow P-Q \f$. */ Polynomial &operator-=(const Polynomial &Q); + /** @} */ protected: - double N; ///< Dilation coeff - double L; ///< Translation coeff - Eigen::VectorXd coefs; ///< Expansion coefficients + double N; ///< Dilation in the internal map \f$q = N x - L\f$. + double L; ///< Translation in the internal map \f$q = N x - L\f$. + Eigen::VectorXd coefs; ///< Coefficients for ascending powers of \f$q\f$. }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/RepresentableFunction.cpp b/src/functions/RepresentableFunction.cpp index 3c55ac92b..d5f2283e1 100644 --- a/src/functions/RepresentableFunction.cpp +++ b/src/functions/RepresentableFunction.cpp @@ -24,13 +24,37 @@ */ /** + * # RepresentableFunction (implementation) * + * A lightweight base providing **optional rectangular bounds** for + * D-dimensional functions used across MRCPP. Derived classes supply the + * actual function evaluation; this class only manages: * - * \date April 30, 2010 - * \author Stig Rune Jensen \n - * CTCC, University of Tromsø + * - whether a function is **bounded** or **unbounded**; + * - storage and lifetime of lower/upper bounds `A[d]`, `B[d]`; + * - cheap **containment tests** via @ref outOfBounds. * + * ## Interval semantics + * Bounds are interpreted as a Cartesian product of **half-open intervals**: * + * \f[ + * \prod_{d=0}^{D-1} [A_d,\; B_d) + * \f] + * + * so a point is considered out of bounds if **any** coordinate is + * `< A_d` or `>= B_d`. This convention is important for tessellations, + * avoiding double counting on shared faces. + * + * ## Ownership and copying + * - Bounds are stored in dynamically allocated arrays `A` and `B` when the + * function is bounded. The destructor frees them. + * - The **copy constructor** performs a deep copy of the bounds. + * - The **assignment operator** in this base intentionally **does not** + * copy bounds (a documented “no-op” that returns `*this`). If you need to + * copy bounds, use the copy constructor instead, or call `setBounds()`. + * + * Derived functors can call `outOfBounds()` prior to expensive evaluations to + * fast-return zeros outside the active box. */ #include "RepresentableFunction.h" @@ -38,7 +62,20 @@ namespace mrcpp { -template RepresentableFunction::RepresentableFunction(const double *a, const double *b) { +/** + * @brief Construct with optional bounds. + * + * @param a Pointer to the lower bounds array of length D, or `nullptr` for + * an unbounded function. + * @param b Pointer to the upper bounds array of length D, or `nullptr` for + * an unbounded function. + * + * If either pointer is `nullptr`, the function is marked **unbounded** and no + * memory is allocated. Otherwise, both arrays are deep-copied and the function + * is marked **bounded**. Each dimension is validated to satisfy `a[d] ≤ b[d]`. + */ +template +RepresentableFunction::RepresentableFunction(const double *a, const double *b) { if (a == nullptr or b == nullptr) { this->bounded = false; this->A = nullptr; @@ -55,8 +92,14 @@ template RepresentableFunction::RepresentableFunction( } } -/** Constructs a new function with same bounds as the input function */ -template RepresentableFunction::RepresentableFunction(const RepresentableFunction &func) { +/** + * @brief Copy-construct from another function, including its bounds. + * + * Deep-copies the bounds if @p func is bounded; otherwise keeps the new + * function unbounded. + */ +template +RepresentableFunction::RepresentableFunction(const RepresentableFunction &func) { if (func.isBounded()) { this->bounded = true; this->A = new double[D]; @@ -72,13 +115,27 @@ template RepresentableFunction::RepresentableFunction( } } -/** Copies function, not bounds. Use copy constructor if you want an - * identical function. */ -template RepresentableFunction &RepresentableFunction::operator=(const RepresentableFunction &func) { +/** + * @brief Assignment operator (base): **does not copy bounds**. + * + * This is intentionally a no-op in the base class and returns `*this` + * unchanged. Use the copy constructor if you want an identical object + * including bounds, or call @ref setBounds explicitly after assignment. + * + * @note Derived classes may extend assignment to copy additional state; the + * base part will still leave bounds unchanged. + */ +template +RepresentableFunction & +RepresentableFunction::operator=(const RepresentableFunction &func) { return *this; } -template RepresentableFunction::~RepresentableFunction() { +/** + * @brief Destructor releases bound storage if allocated. + */ +template +RepresentableFunction::~RepresentableFunction() { if (this->isBounded()) { delete[] this->A; delete[] this->B; @@ -87,7 +144,18 @@ template RepresentableFunction::~RepresentableFunction this->B = nullptr; } -template void RepresentableFunction::setBounds(const double *a, const double *b) { +/** + * @brief Set (or overwrite) bounds. + * + * @param a Lower bounds array of length D (must be non-null). + * @param b Upper bounds array of length D (must be non-null). + * + * - If the function was previously unbounded, storage for `A` and `B` is + * allocated and the function becomes bounded. + * - Each dimension is validated to have `a[d] ≤ b[d]`. + */ +template +void RepresentableFunction::setBounds(const double *a, const double *b) { if (a == nullptr or b == nullptr) { MSG_ERROR("Invalid arguments"); } if (not isBounded()) { this->bounded = true; @@ -101,7 +169,18 @@ template void RepresentableFunction::setBounds(const d } } -template bool RepresentableFunction::outOfBounds(const Coord &r) const { +/** + * @brief Check whether a point is outside the active bounds. + * + * @param r D-tuple (coordinate) to test. + * @return `true` if unambiguously out of bounds, `false` otherwise. + * + * Semantics: if the function is **unbounded**, this always returns `false`. + * If bounded, it returns `true` when **any** coordinate violates the + * half-open interval in that dimension: `r[d] < A[d]` or `r[d] >= B[d]`. + */ +template +bool RepresentableFunction::outOfBounds(const Coord &r) const { if (not isBounded()) { return false; } for (int d = 0; d < D; d++) { if (r[d] < getLowerBound(d)) return true; @@ -110,6 +189,7 @@ template bool RepresentableFunction::outOfBounds(const return false; } +/* Explicit template instantiations used in MRCPP. */ template class RepresentableFunction<1, double>; template class RepresentableFunction<2, double>; template class RepresentableFunction<3, double>; diff --git a/src/functions/RepresentableFunction.h b/src/functions/RepresentableFunction.h index 6123e3051..dbadeb5a1 100644 --- a/src/functions/RepresentableFunction.h +++ b/src/functions/RepresentableFunction.h @@ -24,9 +24,28 @@ */ /* + * # RepresentableFunction (interface) * - * Base class of functions that is representable in the mw basis. - * This includes gaussians, expansions, polynomials and even function trees. + * Base interface for objects that can be **represented/evaluated** in the + * multiresolution (multiwavelet) framework. Typical implementations include + * analytic functors, Gaussian(-like) functions/expansions, polynomials and + * function trees. + * + * ## Bounding box semantics + * A function may be marked **bounded** on a Cartesian product of *half-open* + * intervals: + * + * Π_d [ A_d, B_d ) + * + * The half-open convention prevents double counting on shared cell faces and + * is used consistently by `outOfBounds()`. If a function is **unbounded**, its + * bounds pointers are `nullptr` and containment checks always succeed. + * + * ## Lifetime & copying + * - Bounds (arrays `A`, `B` of length `D`) are owned by the instance when set. + * - The copy constructor **deep-copies** the bounds (if any). + * - The assignment operator in the base class returns `*this` (does not copy + * bounds), leaving copying policy to derived classes if needed. */ #pragma once @@ -42,50 +61,158 @@ namespace mrcpp { +/** + * @tparam D Spatial dimension (1, 2, 3, …). + * @tparam T Value type returned by the function (e.g. `double`, + * complex types, etc.). + * + * @brief Abstract base class for functions evaluable in the multiwavelet basis. + * + * The class provides **optional bounding boxes** and related helpers, while + * deferring the actual evaluation to @ref evalf implemented by derived types. + */ template class RepresentableFunction { public: + /** + * @name Construction & assignment + * @{ + */ + + /** + * @brief Construct with optional bounds. + * + * If either `a` or `b` is `nullptr`, the function is created unbounded. + * Otherwise, `A[d]=a[d]` and `B[d]=b[d]` are deep-copied and the function + * becomes bounded. Each dimension is validated to satisfy `a[d] ≤ b[d]`. + * + * @param a Lower bounds array of length `D` or `nullptr`. + * @param b Upper bounds array of length `D` or `nullptr`. + */ RepresentableFunction(const double *a = nullptr, const double *b = nullptr); + + /// Convenience constructor from `std::vector` bounds. RepresentableFunction(const std::vector &a, const std::vector &b) : RepresentableFunction(a.data(), b.data()) {} + + /** + * @brief Copy-construct, including bounds if present. + * + * Deep-copies `A` and `B` when `func` is bounded; otherwise remains unbounded. + */ RepresentableFunction(const RepresentableFunction &func); + + /** + * @brief Assignment operator (base). + * + * The base implementation **does not** copy bounds and simply returns `*this`. + * Derived classes may extend this behavior to copy additional state. + */ RepresentableFunction &operator=(const RepresentableFunction &func); - virtual ~RepresentableFunction(); - /** @returns Function value in a point @param[in] r: Cartesian coordinate */ + /// Virtual destructor releases bound storage if allocated. + virtual ~RepresentableFunction(); + /** @} */ + + /** + * @brief Evaluate the function at a given point. + * @param r Cartesian coordinate (length-`D`). + * @returns The function value at `r`. + * + * Derived classes should usually check @ref outOfBounds before performing + * expensive work and return a zero value outside the active domain. + */ virtual T evalf(const Coord &r) const = 0; + /** + * @name Bounds management + * @{ + */ + + /** + * @brief Set (or overwrite) bounds. + * + * Allocates and stores deep copies of `a` and `b` (length `D`) if not already + * bounded. Validates that `a[d] ≤ b[d]` for all `d`. + */ void setBounds(const double *a, const double *b); + + /** + * @brief Clear bounds and mark the function unbounded. + * + * After this call, @ref isBounded returns `false` and @ref outOfBounds + * will always return `false`. + */ void clearBounds(); + /// @returns `true` if the function has active bounds, `false` otherwise. bool isBounded() const { return this->bounded; } + + /** + * @brief Test whether a point lies outside the active bounds. + * + * Implements the **half-open** check for each coordinate: + * `r[d] < A[d] || r[d] >= B[d]`. If the function is unbounded, + * this always returns `false`. + */ bool outOfBounds(const Coord &r) const; + /// @returns Lower bound in dimension `d` (requires @ref isBounded). double getLowerBound(int d) const { return this->A[d]; } + /// @returns Upper bound in dimension `d` (requires @ref isBounded). double getUpperBound(int d) const { return this->B[d]; } + /// @returns Pointer to the lower bounds array (length `D`) or `nullptr` if unbounded. const double *getLowerBounds() const { return this->A; } + /// @returns Pointer to the upper bounds array (length `D`) or `nullptr` if unbounded. const double *getUpperBounds() const { return this->B; } + /** @} */ + /// @note Bridge/adapter that may require direct access to bounds. friend class AnalyticAdaptor; protected: - bool bounded; - double *A; ///< Lower bound, NULL if unbounded - double *B; ///< Upper bound, Null if unbounded - + /** @name Internal state + * @{ + */ + bool bounded; ///< `true` if the function is currently bounded. + double *A; ///< Lower bounds (owned; `nullptr` if unbounded). + double *B; ///< Upper bounds (owned; `nullptr` if unbounded). + /** @} */ + + /** + * @brief Optional visibility hint used by some projection routines. + * @returns `true` when the function is expected to contribute at a given scale. + */ virtual bool isVisibleAtScale(int scale, int nQuadPts) const { return true; } + + /** + * @brief Optional fast zero-test on an interval (per dimension). + * @returns `true` if the function is provably zero on `[a,b]` (component-wise). + */ virtual bool isZeroOnInterval(const double *a, const double *b) const { return false; } }; -/* - * Same as RepresentableFunction, but output a matrix of values - * for all points in a node, given its NodeIndex. +/** + * @brief Matrix-valued evaluation interface. * + * A companion interface that asks an object to produce a **batch evaluation** + * over all quadrature points associated with a tree node, returning a matrix + * whose layout is decided by the concrete implementation. + * + * This is useful for high-throughput projection steps where per-point + * overhead must be minimized. */ class RepresentableFunction_M { public: RepresentableFunction_M() {} + + /** + * @brief Evaluate at all points described by a node index. + * @param nIdx Node index (scale and translation), typically defines the + * evaluation grid/points. + * @returns A matrix of values (shape and semantics are implementation-defined). + */ virtual Eigen::MatrixXd evalf(mrcpp::NodeIndex<3> nIdx) const = 0; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp index 598c9b12a..e866bd1af 100644 --- a/src/functions/function_utils.cpp +++ b/src/functions/function_utils.cpp @@ -23,97 +23,226 @@ * */ +/** + * @file function_utils.cpp + * + * @brief Overlap integrals for (possibly polynomially weighted) Cartesian + * Gaussian primitives using the Obara–Saika 1D recurrence. + * + * Overview + * -------- + * - `calc_overlap(GaussFunc, GaussFunc)` computes the D-dimensional + * overlap of two separable Cartesian Gaussians by factoring the integral + * into a product of 1D overlaps along each Cartesian axis. + * - The core 1D overlap + * \f[ + * S_{p_a p_b}(x_a,x_b;c_a,c_b) = + * \int_{-\infty}^{+\infty}\!(x-x_a)^{p_a}(x-x_b)^{p_b} + * e^{-c_a (x-x_a)^2}e^{-c_b (x-x_b)^2}\,dx + * \f] + * is evaluated by `ObaraSaika_ab`, a compact implementation of + * the Obara–Saika recurrence relations. + * + * Notation (1D) + * ------------- + * - Exponents: \f$c_a, c_b > 0\f$. + * - Powers (angular momenta per axis): \f$p_a, p_b \in \mathbb{N}_0\f$. + * - Centers: \f$x_a, x_b \in \mathbb{R}\f$. + * - Composite quantities: + * \f[ + * p = c_a + c_b,\quad + * \mu = \frac{c_a c_b}{p},\quad + * X_{AB} = x_a - x_b,\quad + * X_P = \frac{c_a x_a + c_b x_b}{p},\quad + * X_{PA} = X_P - x_a,\quad X_{PB} = X_P - x_b. + * \f] + * - Spherical–spherical overlap seed: + * \f[ + * S_{00} = \sqrt{\frac{\pi}{p}}\;\exp(-\mu X_{AB}^2). + * \f] + * + * Recurrence (sketch) + * ------------------- + * Let \f$S_{ij}\f$ denote the overlap with powers \f$(i,j)\f$. + * The code constructs the first “row” \f$S_{0j}\f$ for \f$j=0..p_b\f$ + * via the \f$X_{PB}\f$ recursion, then generates entries with \f$i>0\f$ + * using relations involving \f$X_{AB}\f$ and \f$X_{PA}\f$. + * Entries are packed into a 1D array `s_coeff` using a simple linear map. + * + * Limits + * ------ + * - `s_coeff` has fixed size 64; the code comment suggests support up to + * combined angular momenta roughly \f$p_a \le 20, p_b \le 20\f$ + * (so that `power_b + 2*power_a` stays within the array). + */ + #include "function_utils.h" namespace mrcpp { +// Forward declaration of the 1D core routine (defined below). namespace function_utils { -double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b); +double ObaraSaika_ab(int power_a, int power_b, + double pos_a, double pos_b, + double expo_a, double expo_b); } // namespace function_utils -template double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { +/** + * @brief D-dimensional overlap of two separable Cartesian Gaussians. + * + * The D-dimensional overlap factorizes into a product of 1D overlaps + * along each coordinate axis. Each 1D factor is computed by the + * Obara–Saika recurrence (`ObaraSaika_ab`). + * + * Mathematically: + * \f[ + * \langle \mathbf{a} | \mathbf{b} \rangle + * = + * c_a c_b \prod_{d=1}^{D} + * \int_{-\infty}^{+\infty} + * (x_d - A_d)^{p_{a,d}} + * (x_d - B_d)^{p_{b,d}} + * e^{-\alpha_d (x_d - A_d)^2} + * e^{-\beta_d (x_d - B_d)^2}\,dx_d, + * \f] + * where `getPower()[d] = p_{*,d}`, `getPos()[d] = A_d or B_d`, + * `getExp()[d] = α_d or β_d`, and `getCoef()` multiplies at the end. + * + * @tparam D Dimensionality (1,2,3,...). + * @param a First Gaussian primitive (powers, position, exponents, coefficient). + * @param b Second Gaussian primitive (powers, position, exponents, coefficient). + * @return Overlap integral value. + */ +template +double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { double S = 1.0; - for (int d = 0; d < D; d++) { S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]); } + + // Multiply 1D overlaps across all Cartesian axes + for (int d = 0; d < D; d++) { + S *= ObaraSaika_ab( + a.getPower()[d], b.getPower()[d], + a.getPos()[d], b.getPos()[d], + a.getExp()[d], b.getExp()[d] + ); + } + + // Global prefactor from the two primitives S *= a.getCoef() * b.getCoef(); return S; } -/** Compute the monodimensional overlap integral between two - gaussian distributions by means of the Obara-Saika recursiive - scheme - - \f[ S_{ij} = \int_{-\infty}^{+\infty} \,\mathrm{d} x - (x-x_a)^{p_a} - (x-x_b)^{p_b} - e^{-c_a (x-x_a)^2} - e^{-c_b (x-x_b)^2}\f] - - @param power_a \f$ p_a \f$ - @param power_b \f$ p_b \f$ - @param pos_a \f$ x_a \f$ - @param pos_b \f$ x_b \f$ - @param expo_a \f$ c_a \f$ - @param expo_b \f$ c_b \f$ - +/** + * @brief 1D Obara–Saika recurrence for Cartesian Gaussian overlap. + * + * Computes + * \f[ + * S_{ij} = + * \int_{-\infty}^{+\infty} + * (x-x_a)^i (x-x_b)^j + * e^{-c_a (x-x_a)^2} + * e^{-c_b (x-x_b)^2}\,dx, + * \f] + * returning the value for \f$i = \texttt{power\_a}\f$ and + * \f$j = \texttt{power\_b}\f$. + * + * Parameters + * ---------- + * @param power_a \f$p_a\f$ (non-negative integer power about center @p pos_a) + * @param power_b \f$p_b\f$ (non-negative integer power about center @p pos_b) + * @param pos_a \f$x_a\f$ (center of the first Gaussian) + * @param pos_b \f$x_b\f$ (center of the second Gaussian) + * @param expo_a \f$c_a\f$ (exponent of the first Gaussian) + * @param expo_b \f$c_b\f$ (exponent of the second Gaussian) + * + * Implementation notes + * -------------------- + * - Forms the composite exponent \f$p=c_a+c_b\f$ and reduced exponent + * \f$\mu = c_a c_b / p\f$. + * - Computes the “product center” \f$X_P = (c_a x_a + c_b x_b)/p\f$ + * and shift distances \f$X_{PA}=X_P-x_a\f$, \f$X_{PB}=X_P-x_b\f$. + * - Seeds the recurrence with the spherical–spherical overlap + * \f$S_{00} = \sqrt{\pi/p}\,\exp(-\mu (x_a-x_b)^2)\f$. + * - Builds the first row \f$S_{0j}\f$ for \f$j=0..p_b\f$ using the + * forward recurrence in \f$j\f$ (involving \f$X_{PB}\f$ and \f$p\f$). + * - Extends to \f$i>0\f$ by recurrences that couple \f$S_{i0}\f$, \f$S_{i1}\f$ + * to previously computed entries and the shifts \f$X_{AB}=x_a-x_b\f$, + * \f$X_{PA}\f$. + * + * Storage + * ------- + * - Coefficients are stored in a flat array `s_coeff` with a simple linear + * indexing that appends new entries as they are generated: + * - indices 0..power_b : `S_{0,0}, S_{0,1}, ..., S_{0,power_b}` + * - then pairs `(S_{1,0}, S_{1,1})`, `(S_{2,0}, S_{2,1})`, ... + * - The last needed value is at index `power_b + 2*power_a`. + * + * @return The requested overlap value `S_{power_a, power_b}`. */ -double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) { +double function_utils::ObaraSaika_ab(int power_a, int power_b, + double pos_a, double pos_b, + double expo_a, double expo_b) { int i, j; double expo_p, mu, pos_p, x_ab, x_pa, x_pb, s_00; - /* The highest angular momentum combination is l=20 for a and b - * simulatnelusly */ + + // Maximum size comment from original author: + // "The highest angular momentum combination is l=20 for a and b simultaneously" + // With a flat buffer of length 64, the required index (= power_b + 2*power_a) + // must be < 64. Keep powers within this bound. double s_coeff[64]; - // if (out_of_bounds(power_a, 0, MAX_GAUSS_POWER) || - // out_of_bounds(power_b, 0, MAX_GAUSS_POWER) - // ) { - // PRINT_FUNC_NAME; - // INVALID_ARG_EXIT; - // } - - /* initialization of a hell of a lot of coefficients.... */ - expo_p = expo_a + expo_b; /* total exponent */ - mu = expo_a * expo_b / (expo_a + expo_b); /* reduced exponent */ - pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; /* center of charge */ - x_ab = pos_a - pos_b; /* X_{AB} */ - x_pa = pos_p - pos_a; /* X_{PA} */ - x_pb = pos_p - pos_b; /* X_{PB} */ + // ---- Composite quantities and seed S_00 ---- + expo_p = expo_a + expo_b; // p = c_a + c_b + mu = expo_a * expo_b / (expo_a + expo_b); // μ = c_a c_b / p + pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p;// X_P + x_ab = pos_a - pos_b; // X_AB + x_pa = pos_p - pos_a; // X_PA + x_pb = pos_p - pos_b; // X_PB + s_00 = pi / expo_p; - s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); /* overlap of two spherical gaussians */ - // int n_0j_coeff = 1 + power_b; /* n. of 0j coefficients needed */ - // int n_ij_coeff = 2 * power_a; /* n. of ij coefficients needed (i > 0) */ + s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); // S_{00} - /* we add 3 coeffs. to avoid a hell of a lot of if statements */ - /* n_tot_coeff = n_0j_coeff + n_ij_coeff + 3; */ - /* s_coeff = (double *) calloc(n_tot_coeff, sizeof(double));*/ + // ---- First row: S_{0,j} for j=0..power_b ---- + s_coeff[0] = s_00; // S_{0,0} + s_coeff[1] = x_pb * s_00; // S_{0,1} - /* generate first two coefficients */ - s_coeff[0] = s_00; - s_coeff[1] = x_pb * s_00; j = 1; - /* generate the rest of the first row */ + // Recurrence in j: + // S_{0,j+1} = X_PB * S_{0,j} + (j / (2p)) * S_{0,j-1} while (j < power_b) { s_coeff[j + 1] = x_pb * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); j++; } - /* generate the first two coefficients with i > 0 */ - s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; - s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); + + // ---- Bootstrap first two entries with i > 0: S_{1,0}, S_{1,1} ---- + // Relations: + // S_{1,0} = S_{0,1} - X_AB * S_{0,0} + // S_{1,1} = X_PA * S_{1,0} + (j/(2p)) * S_{0,j} with j = power_b + s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; // S_{1,0} + s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); // S_{1,1} + i = 1; - /* generate the remaining coefficients with i > 0 */ + // ---- General i>0 step: append (S_{i+1,0}, S_{i+1,1}) for i=1..power_a-1 ---- while (i < power_a) { - int i_l = j + 2 * i + 1; - int i_r = j + 2 * i + 2; + int i_l = j + 2 * i + 1; // index for S_{i+1,0} + int i_r = j + 2 * i + 2; // index for S_{i+1,1} + + // S_{i+1,0} = S_{i,1} - X_AB * S_{i,0} s_coeff[i_l] = s_coeff[i_l - 1] - x_ab * s_coeff[i_l - 2]; + + // S_{i+1,1} = X_PA * S_{i,1} + (j * S_{i,0} + i * S_{i-1,0}) / (2p) + // (the packed indexing below matches these dependencies) s_coeff[i_r] = x_pa * s_coeff[i_r - 2] + (j * s_coeff[i_r - 3] + i * s_coeff[i_r - 4]) / (2.0 * expo_p); + i++; } - /* free(s_coeff);*/ + // The requested entry is S_{power_a, power_b} at index power_b + 2*power_a. return s_coeff[power_b + 2 * power_a]; } +// ---- Explicit template instantiations for common dimensions ---- template double function_utils::calc_overlap<1>(const GaussFunc<1> &a, const GaussFunc<1> &b); template double function_utils::calc_overlap<2>(const GaussFunc<2> &a, const GaussFunc<2> &b); template double function_utils::calc_overlap<3>(const GaussFunc<3> &a, const GaussFunc<3> &b); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/function_utils.h b/src/functions/function_utils.h index 896c06257..36e2e0521 100644 --- a/src/functions/function_utils.h +++ b/src/functions/function_utils.h @@ -23,11 +23,64 @@ * */ +/** + * @file GaussExp.cpp (lightweight connector) + * + * @brief Ties together Gaussian primitives and exponential utilities, and + * exposes (via forward declaration) the templated overlap routine + * without pulling in heavier headers that could cause cycles. + * + * What this TU does + * ----------------- + * - Includes: + * - "GaussExp.h": utilities for Gaussian/exponential expressions used + * elsewhere in MRCPP (e.g., Boys integrals, screened interactions). + * - "Gaussian.h": the definition of `GaussFunc`, i.e., a Cartesian + * Gaussian primitive storing powers, center, exponent(s), and a coefficient. + * - Declares (but does not define) the templated function + * `function_utils::calc_overlap(const GaussFunc&, const GaussFunc&)`. + * The definition lives in the function-utils implementation unit + * (see `function_utils.cpp`), which provides the Obara–Saika-based 1D core. + * + * Why only a forward declaration here? + * ------------------------------------ + * - To avoid including a potentially heavy implementation header (and risking + * circular dependencies), we forward-declare the template in the *same* + * namespace `mrcpp::function_utils`. This enables use sites that only need + * the signature to compile quickly, while the actual template definition + * will be instantiated by the linker when the corresponding .cpp is linked. + * + * Notes on templates and linkage + * ------------------------------ + * - Because this is only a declaration, any translation unit that actually + * *uses* `calc_overlap` must see the template **definition** (e.g., by + * including the proper header or by relying on explicit instantiations + * provided in the implementation TU). MRCPP provides common explicit + * instantiations (e.g., D = 1, 2, 3) in `function_utils.cpp`. + * + * Example usage + * ------------- + * @code + * #include "Gaussian.h" + * // (this file is included transitively somewhere) + * using mrcpp::GaussFunc; + * using mrcpp::function_utils::calc_overlap; + * + * GaussFunc<3> gA(...), gB(...); + * double S = calc_overlap<3>(gA, gB); // calls Obara–Saika-backed routine + * @endcode + */ + #include "GaussExp.h" #include "Gaussian.h" namespace mrcpp { + +// Forward declaration only: definition is provided in function_utils.cpp. +// Keeping this here avoids heavy includes and potential include cycles. namespace function_utils { -template double calc_overlap(const GaussFunc &a, const GaussFunc &b); +template +double calc_overlap(const GaussFunc &a, const GaussFunc &b); } // namespace function_utils -} // namespace mrcpp + +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/special_functions.cpp b/src/functions/special_functions.cpp index 555528a58..74c9ed750 100644 --- a/src/functions/special_functions.cpp +++ b/src/functions/special_functions.cpp @@ -25,74 +25,84 @@ #include "special_functions.h" - namespace mrcpp { - -/** @brief Free-particle time evolution on real line. +/** + * @brief Analytic solution of the free-particle Schrödinger equation on ℝ at time @p t. * - * @param[in] x: space coordinate in \f$ \mathbb R \f$. - * @param[in] x0: \f$ x_0 \f$ center of gaussian function at zero time moment. - * @param[in] t: time moment. - * @param[in] sigma: \f$ \sigma \f$ width of the initial gaussian wave. + * This implements the standard Gaussian wave packet propagation (free particle, \f$\hbar=1\f$, mass \f$m=\tfrac12\f$ + * so that the free propagator denominator becomes \f$4it+\sigma\f$ as used below). Given an initial + * Gaussian of width parameter \f$\sigma>0\f$ centered at \f$x_0\f$ at time \f$t=0\f$, + * the wave function at time \f$t\f$ is * - * @details Analytical solution of a one dimensional free-particle - * movement * \f[ - * \psi(x, t) - * = - * \sqrt{ - * \frac{ \sigma }{ 4it + \sigma } - * } - * e^{ - \frac { (x - x_0)^2 }{ 4it + \sigma } } + * \psi(x,t) + * = + * \sqrt{\frac{\sigma}{\,\sigma + 4\, i\, t\,}} + * \exp\!\left( + * -\,\frac{(x - x_0)^2}{\,\sigma + 4\, i\, t\,} + * \right), * \f] - * where \f$ t, \sigma > 0 \f$. - * - * @returns The complex-valued wave function - * \f$ \psi(x, t) \f$ - * at the specified space coordinate and time. - * - * + * + * which disperses in time and acquires a complex phase. + * + * #### Parameters + * - @param x Real-space coordinate \f$x \in \mathbb{R}\f$. + * - @param x0 Initial center \f$x_0\f$ of the Gaussian at \f$t=0\f$. + * - @param t Time \f$t \in \mathbb{R}\f$ (can be positive or negative). + * - @param sigma Width parameter \f$\sigma>0\f$ of the initial Gaussian. + * + * #### Returns + * The complex-valued wave function \f$\psi(x,t)\f$ at the requested space-time point. + * + * #### Notes + * - For @p t = 0, this reduces to \f$\psi(x,0)=\exp\!\big(-\tfrac{(x-x_0)^2}{\sigma}\big)\f$. + * - The branch of the complex square root is the principal branch via `std::sqrt(std::complex)`. + * - Numerical behavior near large \f$|t|\f$: the modulus decays like \f$|\sigma/(\sigma+4it)|^{1/2}\f$, + * while the phase is dominated by the complex denominator; standard `std::complex` arithmetic handles this. + * - This function assumes consistent physical units so that the closed form above applies directly. */ std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma) { - std::complex i(0.0, 1.0); // Imaginary unit - auto denominator = 4 * t * i + sigma; - std::complex sqrt_denom = std::sqrt(denominator); - std::complex exponent = -((x - x0) * (x - x0)) / denominator; + std::complex i(0.0, 1.0); // imaginary unit i + std::complex denom = sigma + 4.0 * t * i; // σ + 4 i t + std::complex exponent = -((x - x0) * (x - x0)) / denom; - return std::sqrt(sigma) / sqrt_denom * std::exp(exponent); + return std::sqrt(sigma) / std::sqrt(denom) * std::exp(exponent); } - - -/** @brief A smooth compactly supported non-negative function. - * - * @param[in] x: space coordinate in \f$ \mathbb R \f$. - * @param[in] a: the left support boundary. - * @param[in] b: the right support boundary. +/** + * @brief Smooth, compactly supported "bump" function on the interval \f$(a,b)\f$. * - * @details Smooth function on the real line \f$ \mathbb R \f$ - * defined by the formula + * Defines a non-negative \f$C^\infty\f$ function * \f[ - * g_{a,b} (x) = \exp \left( - \frac{b - a}{(x - a)(b - x)} \right) - * , \quad - * a < x < b + * g_{a,b}(x) = + * \begin{cases} + * \exp\!\Big( -\,\dfrac{b-a}{(x-a)(b-x)} \Big), & a < x < b,\\[6pt] + * 0, & \text{otherwise}, + * \end{cases} * \f] - * and \f$ g_{a,b} (x) = 0 \f$ elsewhere. - * - * @returns The non-negative value - * \f$ g_{a,b} (x) \f$ - * at the specified space coordinate \f$ x \in \mathbb R \f$. - * - * + * which vanishes to **all orders** at the endpoints \f$a\f$ and \f$b\f$. + * + * #### Parameters + * - @param x Real-space coordinate \f$x \in \mathbb{R}\f$. + * - @param a Left endpoint (must satisfy \f$aa\f$). + * + * #### Returns + * - \f$g_{a,b}(x)\f$ if \f$a < x < b\f$, and `0.0` otherwise. + * + * #### Numerical remarks + * - Near the endpoints, \f$(x-a)(b-x)\to 0^+\f$ and the exponent \f$-\frac{b-a}{(x-a)(b-x)}\f$ becomes large + * and negative, so the value safely underflows toward 0; this is expected and preserves smooth compact support. + * - If `a >= b`, the definition yields the zero function for all `x`. */ double smooth_compact_function(double x, double a, double b) { - double res = 0; if (a < x && x < b) { - res = exp((a - b) / (x - a) / (b - x)); + // Equivalent to: exp( - (b-a) / ((x-a)(b-x)) ) + return std::exp((a - b) / ((x - a) * (b - x))); } - return res; + return 0.0; } } // namespace mrcpp \ No newline at end of file diff --git a/src/functions/special_functions.h b/src/functions/special_functions.h index 4c2f68ac3..41a5d31a4 100644 --- a/src/functions/special_functions.h +++ b/src/functions/special_functions.h @@ -26,13 +26,64 @@ #pragma once #include -#include - +#include namespace mrcpp { +/** + * # Free-particle Gaussian propagation (analytic form) + * + * @brief Analytic solution \f$\psi(x,t)\f$ of the 1D free-particle Schrödinger equation + * for a Gaussian initially centered at \f$x_0\f$ with width parameter \f$\sigma>0\f$. + * + * This declaration corresponds to the definition in `special_functions.cpp`. The solution used is + * \f[ + * \psi(x,t) + * = + * \sqrt{\frac{\sigma}{\,\sigma + 4\, i\, t\,}}\; + * \exp\!\left(-\,\frac{(x-x_0)^2}{\,\sigma + 4\, i\, t\,}\right), + * \f] + * which matches the conventional free propagator with units chosen such that \f$\hbar=1\f$ + * and mass \f$m=\tfrac12\f$ (hence the factor \f$4it\f$ in the denominator). + * + * @param x Real-space coordinate \f$x \in \mathbb{R}\f$. + * @param x0 Initial center \f$x_0\f$ of the Gaussian at \f$t=0\f$. + * @param t Time \f$t \in \mathbb{R}\f$. + * @param sigma Positive width parameter \f$\sigma>0\f$ of the initial Gaussian. + * + * @return Complex value of \f$\psi(x,t)\f$ at the requested point. + * + * @note The complex square root in the prefactor is taken on the principal branch + * by `std::sqrt(std::complex)`. + * @note For \f$t=0\f$, the expression reduces to \f$\psi(x,0)=\exp\!\big(-\tfrac{(x-x_0)^2}{\sigma}\big)\f$. + */ std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma); +/** + * # Smooth compactly supported bump + * + * @brief A smooth (\f$C^\infty\f$) non-negative function supported on the open interval \f$(a,b)\f$. + * + * The function is defined by + * \f[ + * g_{a,b}(x) = + * \begin{cases} + * \exp\!\Big(-\dfrac{b-a}{(x-a)(b-x)}\Big), & a < x < b,\\[6pt] + * 0, & \text{otherwise}, + * \end{cases} + * \f] + * and vanishes to **all orders** at the endpoints \f$a\f$ and \f$b\f$. + * + * @param x Real-space coordinate \f$x \in \mathbb{R}\f$. + * @param a Left endpoint of support (default `0`). + * @param b Right endpoint of support (default `1`). + * + * @return The value \f$g_{a,b}(x)\f$. + * + * @note If \f$a \ge b\f$, the function is identically zero for all \f$x\f$. + * @warning Near the endpoints, the denominator \f$(x-a)(b-x)\f$ becomes small; + * the exponent is large and negative so the result underflows smoothly to zero. + */ double smooth_compact_function(double x, double a = 0, double b = 1); } // namespace mrcpp \ No newline at end of file From 063b81da15b772556924d619c629fd6fc42fa162 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Tue, 28 Oct 2025 17:39:15 +0300 Subject: [PATCH 04/51] DONE documentation in the operators folder --- src/operators/ABGVOperator.cpp | 83 ++++++++--- src/operators/ABGVOperator.h | 71 +++++++++- src/operators/BSOperator.cpp | 86 +++++++++-- src/operators/BSOperator.h | 73 +++++++++- src/operators/CartesianConvolution.cpp | 107 +++++++++++++- src/operators/CartesianConvolution.h | 91 +++++++++++- src/operators/ConvolutionOperator.cpp | 138 ++++++++++++++---- src/operators/ConvolutionOperator.h | 137 +++++++++++++++--- src/operators/DerivativeConvolution.cpp | 83 ++++++++++- src/operators/DerivativeConvolution.h | 94 +++++++++++-- src/operators/DerivativeKernel.h | 68 +++++++++ src/operators/DerivativeOperator.h | 48 ++++++- src/operators/HeatKernel.h | 74 +++++++--- src/operators/HeatOperator.cpp | 114 +++++++++++---- src/operators/HeatOperator.h | 94 +++++++++---- src/operators/HelmholtzKernel.cpp | 115 +++++++++++---- src/operators/HelmholtzKernel.h | 54 +++++++ src/operators/HelmholtzOperator.cpp | 65 +++++++-- src/operators/HelmholtzOperator.h | 85 +++++++++-- src/operators/IdentityConvolution.cpp | 79 ++++++++--- src/operators/IdentityConvolution.h | 79 +++++++++-- src/operators/IdentityKernel.h | 51 ++++++- src/operators/MWOperator.cpp | 95 ++++++++++++- src/operators/MWOperator.h | 117 +++++++++++++-- src/operators/OperatorState.h | 180 ++++++++++++++++++++---- src/operators/OperatorStatistics.cpp | 86 ++++++++++- src/operators/OperatorStatistics.h | 91 ++++++++++-- src/operators/PHOperator.cpp | 47 ++++++- src/operators/PHOperator.h | 68 +++++++-- src/operators/PoissonKernel.cpp | 78 ++++++++-- src/operators/PoissonKernel.h | 47 ++++++- src/operators/PoissonOperator.cpp | 79 +++++++++-- src/operators/PoissonOperator.h | 64 +++++++-- src/operators/TimeEvolutionOperator.cpp | 147 +++++++++++++------ src/operators/TimeEvolutionOperator.h | 128 ++++++++++++++--- 35 files changed, 2689 insertions(+), 427 deletions(-) diff --git a/src/operators/ABGVOperator.cpp b/src/operators/ABGVOperator.cpp index 05525405e..8a65aa1c1 100644 --- a/src/operators/ABGVOperator.cpp +++ b/src/operators/ABGVOperator.cpp @@ -33,15 +33,32 @@ namespace mrcpp { -/** @returns New ABGVOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] a: Left boundary condition - * @param[in] b: Right boundary condition - * @details Boundary parameters correspond to: - * - `a=0.0` `b=0.0`: Strictly local "center" difference - * - `a=0.5` `b=0.5`: Semi-local central difference - * - `a=1.0` `b=0.0`: Semi-local forward difference - * - `a=0.0` `b=1.0`: Semi-local backward difference +/** + * # ABGV finite-difference(-like) operator on an MRA + * + * This operator implements a family of first-derivative stencils controlled by two + * boundary parameters \p a and \p b (see below). The operator is **assembled once** + * as an `OperatorTree` in the multiresolution basis of the provided MRA and can then + * be applied repeatedly to vectors/functions defined on the same MRA. + * + * ## Boundary parameters + * The pair `(a,b)` selects a particular linear combination of forward/backward bias: + * + * - `a = 0.0`, `b = 0.0` → strictly local “center” difference (bandwidth 0) + * - `a = 0.5`, `b = 0.5` → semi-local **central** difference (bandwidth 1) + * - `a = 1.0`, `b = 0.0` → semi-local **forward** difference (bandwidth 1) + * - `a = 0.0`, `b = 1.0` → semi-local **backward** difference (bandwidth 1) + * + * Any non-zero `a` or `b` increases the operator’s bandwidth to 1 (one-ring coupling + * between neighboring nodes at each scale), which the `BandWidthAdaptor` enforces. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra The multiresolution analysis that defines basis, scales, and domain. + * @param a Left boundary parameter controlling asymmetry at the “minus” side. + * @param b Right boundary parameter controlling asymmetry at the “plus” side. + * + * @note The operator is built at the **root scale** of the provided MRA, and its + * internal representation (raw expansion) is cached for later applications. */ template ABGVOperator::ABGVOperator(const MultiResolutionAnalysis &mra, double a, double b) @@ -49,32 +66,66 @@ ABGVOperator::ABGVOperator(const MultiResolutionAnalysis &mra, double a, d initialize(a, b); } +/** + * @brief Internal construction routine: builds a bandwidth-adapted OperatorTree. + * + * Steps (high level): + * 1. **Bandwidth decision** — if either \p a or \p b is non-zero, set bandwidth = 1, + * otherwise 0. This determines how many neighbor interactions the operator will keep. + * 2. **Calculator** — instantiate `ABGVCalculator` with the MRA’s scaling basis and (a,b). + * The calculator knows how to evaluate local operator blocks (stencil entries) in + * the chosen basis. + * 3. **Adaptor** — create a `BandWidthAdaptor(bw, maxScale)` to prune any far-off + * couplings beyond the requested bandwidth across all scales. + * 4. **Tree build** — use `TreeBuilder<2>` (matrix builder) to assemble an `OperatorTree` + * from root to finest scale with tolerance `MachineZero` and adaptor-controlled sparsity. + * 5. **Finalize** — trigger norm computation and set up an operator-node cache for fast + * application; then store the finished tree in `raw_exp` and initialize the expansion. + * + * @param a Left boundary parameter. + * @param b Right boundary parameter. + * + * @details + * - `calcSquareNorm()` performs a pass that also ensures the internal transform state is + * consistent (it may trigger lazy transforms). We time this step for diagnostics. + * - `setupOperNodeCache()` precomputes/cache-friendly structures for repeated operator + * application (e.g., fast traversal, block reuse). + * - `initOperExp(1)` finalizes the operator’s internal expansion (single component here). + */ template void ABGVOperator::initialize(double a, double b) { - int bw = 0; // Operator bandwidth + // --- (1) Decide operator bandwidth from boundary parameters ------------------------- + int bw = 0; // 0 = strictly local, 1 = nearest-neighbor coupling if (std::abs(a) > MachineZero) bw = 1; if (std::abs(b) > MachineZero) bw = 1; + // --- (2) Access the operator MRA ---------------------------------------------------- auto oper_mra = this->getOperatorMRA(); - TreeBuilder<2> builder; + // --- (3) Prepare builder, calculator, and bandwidth adaptor ------------------------- + TreeBuilder<2> builder; // <2> means: building a 2-index object (matrix/operator) ABGVCalculator calculator(oper_mra.getScalingBasis(), a, b); BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); + // --- (4) Assemble the operator tree ------------------------------------------------- + // MachineZero: force exact assembly within floating point epsilon (no thresholding). auto o_tree = std::make_unique(oper_mra, MachineZero); - builder.build(*o_tree, calculator, adaptor, -1); + builder.build(*o_tree, calculator, adaptor, -1 /* build all scales */); + // --- (5) Finalize and cache --------------------------------------------------------- Timer trans_t; - o_tree->calcSquareNorm(); - o_tree->setupOperNodeCache(); + o_tree->calcSquareNorm(); // also ensures internal transforms are ready + o_tree->setupOperNodeCache(); // allocate and fill fast-access caches print::time(10, "Time transform", trans_t); print::separator(10, ' '); + // Keep the assembled operator as our raw expansion and finalize its use this->raw_exp.push_back(std::move(o_tree)); - this->initOperExp(1); + this->initOperExp(1); // single-operator expansion component } +// Explicit template instantiations for 1D, 2D, and 3D operators. template class ABGVOperator<1>; template class ABGVOperator<2>; template class ABGVOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/ABGVOperator.h b/src/operators/ABGVOperator.h index 3cf85bbaa..ab76d882b 100644 --- a/src/operators/ABGVOperator.h +++ b/src/operators/ABGVOperator.h @@ -29,23 +29,80 @@ namespace mrcpp { -/** @class ABGVOperator +/** + * @class ABGVOperator + * @brief Multiresolution first-derivative operator of Alpert–Beylkin–Gines–Vozovoi. * - * @brief Derivative operator as defined by Alpert, Beylkin, Ginez and Vozovoi, - * J Comp Phys 182, 149-190 (2002). + * This class builds a **first-order differential operator** in the + * multiresolution (MR) basis defined by a given + * #mrcpp::MultiResolutionAnalysis. The discrete representation follows + * the construction in: * - * NOTE: This is the recommended derivative operator for "cuspy" or discontinuous - * functions. The BSOperator is recommended for smooth functions. + * - B. Alpert, G. Beylkin, D. Gines, and L. Vozovoi, + * *Adaptive Solution of Partial Differential Equations in Multiwavelet Bases*, + * J. Comput. Phys. **182** (2002) 149–190. + * + * ### When to use this operator + * - **Recommended** for functions with **cusps, kinks, or discontinuities**, + * where strictly smooth (BS) operators tend to produce Gibbs-type artifacts. + * - For **smooth** functions, prefer #mrcpp::BSOperator for slightly better + * accuracy/efficiency with smooth stencils. + * + * ### Boundary/stencil parameters \p a and \p b + * The parameters `(a, b)` control the local stencil asymmetry at element + * interfaces. Common choices: + * + * - `a = 0.0`, `b = 0.0` → strictly local “center” rule (bandwidth 0) + * - `a = 0.5`, `b = 0.5` → semi-local **central** difference (bandwidth 1) + * - `a = 1.0`, `b = 0.0` → semi-local **forward** difference (bandwidth 1) + * - `a = 0.0`, `b = 1.0` → semi-local **backward** difference (bandwidth 1) + * + * Any non-zero `a` or `b` widens the coupling to nearest neighbors (bandwidth = 1) + * across scales; this is enforced during assembly. + * + * ### Assembly and application + * Internally, the operator is assembled once into an #mrcpp::OperatorTree + * (stored in the base #mrcpp::DerivativeOperator). After construction, + * applying the operator to MR coefficient vectors is cheap and can be done + * repeatedly. + * + * @tparam D Spatial dimension (1, 2, or 3). */ - template class ABGVOperator final : public DerivativeOperator { public: + /** + * @brief Construct the ABGV derivative operator on a given MRA. + * + * The constructor triggers an internal `initialize(a, b)` routine that: + * 1. Decides the operator bandwidth from `(a, b)`. + * 2. Builds the operator matrix blocks using the MRA’s scaling basis. + * 3. Assembles an #mrcpp::OperatorTree with a bandwidth adaptor. + * 4. Finalizes and caches the representation for fast application. + * + * @param mra Multiresolution analysis defining the domain, basis and scales. + * @param a Left-side boundary/stencil parameter (see class docs). + * @param b Right-side boundary/stencil parameter (see class docs). + * + * @note The operator is built at the MRA’s **root scale** and is valid for + * coefficient vectors defined on the same MRA. + */ ABGVOperator(const MultiResolutionAnalysis &mra, double a, double b); + ABGVOperator(const ABGVOperator &oper) = delete; ABGVOperator &operator=(const ABGVOperator &oper) = delete; protected: + /** + * @brief Internal assembly routine (called by the constructor). + * + * Decides sparsity (bandwidth) from `(a, b)`, constructs the calculator + * implementing the ABGV derivative in the given scaling basis, and uses a + * `TreeBuilder` + `BandWidthAdaptor` to assemble and cache an operator tree. + * + * @param a Left boundary/stencil parameter. + * @param b Right boundary/stencil parameter. + */ void initialize(double a, double b); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/BSOperator.cpp b/src/operators/BSOperator.cpp index 62b8e30c3..59919794a 100644 --- a/src/operators/BSOperator.cpp +++ b/src/operators/BSOperator.cpp @@ -23,6 +23,41 @@ * */ +/** + * @file BSOperator.cpp + * @brief Assembly of a smooth multiresolution derivative operator (“BS” operator). + * + * ## Purpose + * Build and cache a derivative operator in the multiresolution basis of a given + * #mrcpp::MultiResolutionAnalysis. This operator is intended for **smooth** + * functions; for non-smooth or discontinuous data, prefer #mrcpp::ABGVOperator. + * + * ## What happens here + * 1. The constructor stores the requested derivative **order** (1, 2, or 3) and + * delegates to `initialize()`. + * 2. `initialize()`: + * - chooses a (small) **bandwidth** (nearest-neighbor coupling, `bw = 1`), + * - creates a #mrcpp::BSCalculator that provides the local operator blocks + * in the MRA scaling basis for the selected derivative order, + * - wraps assembly with a #mrcpp::BandWidthAdaptor to enforce sparsity + * across all scales, + * - uses #mrcpp::TreeBuilder to assemble an #mrcpp::OperatorTree, + * - finalizes the operator (computes norms, builds per-node caches), + * - registers the resulting tree in the base #mrcpp::DerivativeOperator, + * and initializes the internal operator expansion for fast application. + * + * ## Notes + * - The chosen bandwidth (`bw = 1`) yields a compact stencil (nearest neighbors). + * - `calcSquareNorm()` is invoked once to precompute norms; this can aid later + * conditioning/thresholding steps that use these norms. + * - `setupOperNodeCache()` prepares per-node data needed for efficient + * application of the operator during transforms/apply calls. + * + * ## Performance/usage + * After construction, applying the operator to MR coefficient vectors is cheap + * and can be repeated many times. The build cost is paid once per (MRA, order). + */ + #include "BSOperator.h" #include "treebuilders/BSCalculator.h" #include "treebuilders/BandWidthAdaptor.h" @@ -32,9 +67,16 @@ namespace mrcpp { -/** @returns New BSOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] order: Derivative order, defined for 1, 2 and 3 +/** + * @brief Construct a smooth (“BS”) multiresolution derivative operator. + * + * @param mra Multiresolution analysis that defines the domain, basis, and scales. + * @param order Derivative order (supported: 1, 2, or 3). + * + * The operator is anchored at the MRA’s **root scale** (via the base + * #mrcpp::DerivativeOperator constructor) and immediately assembled by + * calling `initialize()`. The internal representation is stored as an + * #mrcpp::OperatorTree and cached for fast application. */ template BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) @@ -43,27 +85,51 @@ BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) initialize(); } +/** + * @brief Build and cache the “BS” derivative operator. + * + * **Assembly pipeline** + * 1. Select operator bandwidth `bw = 1` (nearest-neighbor coupling). + * 2. Query the operator MRA (`getOperatorMRA()`), which carries the scaling + * basis and max scale. + * 3. Instantiate: + * - #mrcpp::BSCalculator with the scaling basis and the requested derivative + * order (generates local operator blocks), + * - #mrcpp::BandWidthAdaptor with `(bw, maxScale)` to enforce sparsity, + * - #mrcpp::TreeBuilder to assemble the global #mrcpp::OperatorTree. + * 4. Build into a fresh `OperatorTree(oper_mra, MachineZero)`: + * - `MachineZero` is used as a numerical floor for tree entries. + * 5. Finalize: + * - `calcSquareNorm()` precomputes norms (useful for later compression/metrics), + * - `setupOperNodeCache()` creates per-node caches for fast application. + * 6. Store the assembled tree in `raw_exp` (owned by the base class) and call + * `initOperExp(1)` to finalize the expansion with a single raw operator. + */ template void BSOperator::initialize() { - int bw = 1; // Operator bandwidth + int bw = 1; // Operator bandwidth: nearest-neighbor coupling auto oper_mra = this->getOperatorMRA(); - TreeBuilder<2> builder; - BSCalculator calculator(oper_mra.getScalingBasis(), this->order); - BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); + TreeBuilder<2> builder; // 2: binary tree arity in 1D blocks + BSCalculator calculator(oper_mra.getScalingBasis(), this->order); + BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); // enforce sparsity across scales + // Assemble the operator tree with numerical floor MachineZero auto o_tree = std::make_unique(oper_mra, MachineZero); - builder.build(*o_tree, calculator, adaptor, -1); + builder.build(*o_tree, calculator, adaptor, -1 /* all levels */); + // Finalize and cache per-node data for fast application Timer trans_t; - o_tree->calcSquareNorm(); - o_tree->setupOperNodeCache(); + o_tree->calcSquareNorm(); // precompute norms (once) + o_tree->setupOperNodeCache(); // build caches for fast apply print::time(10, "Time transform", trans_t); print::separator(10, ' '); + // Register this raw operator with the base class and initialize expansion this->raw_exp.push_back(std::move(o_tree)); this->initOperExp(1); } +// Explicit instantiations template class BSOperator<1>; template class BSOperator<2>; template class BSOperator<3>; diff --git a/src/operators/BSOperator.h b/src/operators/BSOperator.h index 873b5a3d8..deef99407 100644 --- a/src/operators/BSOperator.h +++ b/src/operators/BSOperator.h @@ -30,21 +30,84 @@ namespace mrcpp { /** @class BSOperator + * @ingroup operators * - * @brief B-spline derivative operator as defined by Anderson etal, J Comp Phys X 4, 100033 (2019). + * @brief Smooth multiresolution derivative operator (“BS” operator). * - * NOTE: This is the recommended derivative operator only for _smooth_ functions. - * Use the ABGVOperator if the function has known cusps or discontinuities. + * This class builds a derivative operator in the multiresolution scaling basis + * tailored for **smooth** functions. The discrete stencil is compact (nearest- + * neighbor bandwidth) and its local blocks are generated by the *BS* scheme + * (see Anderson *et al.*, J. Comp. Phys. X 4, 100033 (2019)). + * + * ### When to use + * - Prefer this operator when the target function is sufficiently smooth at + * the scales of interest (e.g. no strong cusps or jump discontinuities). + * - For functions with cusps/discontinuities, use #mrcpp::ABGVOperator instead, + * which is more robust in the non-smooth regime. + * + * ### What it builds internally + * The constructor triggers an assembly pipeline (via a hidden `initialize()`) + * that: + * 1. Creates a sparse, bandwidth-1 operator tree on the provided + * #mrcpp::MultiResolutionAnalysis (MRA). + * 2. Uses a calculator (BS formulation) to fill local operator blocks for the + * requested derivative order. + * 3. Finalizes and caches per-node data for fast application. + * + * ### Complexity & reuse + * - **Build**: one-time cost per (MRA, derivative order). + * - **Apply**: fast, cache-friendly application to MR coefficient vectors. + * + * @tparam D Spatial dimension (1, 2, or 3). + * + * @see mrcpp::ABGVOperator + * @see mrcpp::DerivativeOperator + * @see mrcpp::OperatorTree + * @see mrcpp::MultiResolutionAnalysis */ - template class BSOperator final : public DerivativeOperator { public: + /** + * @brief Construct a BS derivative operator on a given MRA. + * + * The operator is anchored to the MRA’s root scale (handled by the + * #mrcpp::DerivativeOperator base class) and immediately assembled. The + * derivative order typically supports 1, 2, or 3 (as provided by the BS + * calculator implementation). + * + * @param mra Multiresolution analysis defining basis, domain, and scales. + * @param order Derivative order (e.g., 1, 2, or 3). + * + * @note This operator assumes smoothness; if your target function has + * strong non-smooth features, consider #mrcpp::ABGVOperator. + * + * @code + * MultiResolutionAnalysis<1> mra(...); + * BSOperator<1> Dx(mra, /* order = */ 1); // first derivative in 1D + * // apply Dx to a function tree / coefficient vector later... + * @endcode + */ explicit BSOperator(const MultiResolutionAnalysis &mra, int order); + + /// Deleted copy constructor: operators are heavyweight and own caches. explicit BSOperator(const BSOperator &oper) = delete; + /// Deleted assignment. BSOperator &operator=(const BSOperator &oper) = delete; protected: + /** + * @brief Assemble and cache the operator (implementation detail). + * + * Internal steps (performed once at construction): + * - Choose a compact bandwidth (nearest-neighbor coupling). + * - Use a BS-based calculator to generate local blocks for the requested + * derivative order. + * - Build a sparse #mrcpp::OperatorTree on the provided MRA. + * - Precompute norms and per-node caches for fast application. + * + * @warning This is not intended to be called by users directly. + */ void initialize(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/CartesianConvolution.cpp b/src/operators/CartesianConvolution.cpp index 64ac5491d..6c60c7926 100644 --- a/src/operators/CartesianConvolution.cpp +++ b/src/operators/CartesianConvolution.cpp @@ -47,34 +47,133 @@ namespace mrcpp { -CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra, GaussExp<1> &kernel, double prec) +/** + * @class CartesianConvolution + * @brief 3D separable convolution operator assembled from a 1D Gaussian expansion. + * + * ### What this class builds + * We construct a rank-`R` separable operator (with `R = kernel.size()`) that can + * later be combined into Cartesian components (x, y, z). Internally we build + * **three batches** of 1D operator trees from the same 1D Gaussian expansion, + * corresponding to polynomial prefactors of degree 0, 1, and 2 multiplying the + * Gaussian (i.e., monomials \f$x^0, x^1, x^2\f$ on the line). These three + * batches are stored back-to-back in `raw_exp` and can be mapped onto the + * (x, y, z) axes in any order using #setCartesianComponents. + * + * This pattern is useful for assembling vector kernels whose Cartesian + * components differ only by the polynomial factor in each axis (e.g., kernels + * proportional to \f$(1,\,x,\,x^2)\f$ times a radial Gaussian, or for forming + * gradients / moments where different axes pick different polynomial orders). + * + * ### Precision knobs + * - `prec` is the user‐requested overall build precision for the operator. + * - We derive two internal precisions: + * - `o_prec = prec` for operator assembly; + * - `k_prec = prec / 10` for fitting/projection of the 1D kernel, slightly + * tighter so that the overall composition meets the requested tolerance. + * + * ### Memory layout of the built batches + * After construction, `raw_exp` contains `3 * R` operator trees in this order: + * ``` + * block 0: monomial power {0} for all R terms (indices 0 ... R-1) + * block 1: monomial power {1} for all R terms (indices R ... 2R-1) + * block 2: monomial power {2} for all R terms (indices 2R ... 3R-1) + * ``` + * The method #setCartesianComponents selects one of these three blocks per axis. + */ + +/** + * @brief Construct a Cartesian convolution operator on an MRA with a 1D Gaussian expansion. + * + * @param[in] mra 3D multiresolution analysis defining basis/domain/scales. + * @param[in,out] kernel 1D Gaussian expansion \f$ \sum_{r=1}^R g_r(x) \f$ used to + * generate the separable operator factors. Its length + * determines the separation rank \f$R\f$. + * **Note:** This function temporarily modifies the + * monomial power of each Gaussian term and restores it + * across the three assembly passes. + * @param[in] prec Target build precision for the operator. + * + * @details + * **Assembly recipe (done three times):** + * 1. For every term in the input 1D Gaussian expansion, set its monomial + * power to `{0}`, then call `initialize(...)` to build and append one + * operator tree per term (rank-`R` block). + * 2. Repeat with monomial power `{1}` to build the second block (indices + * `R ... 2R-1`). + * 3. Repeat with monomial power `{2}` to build the third block (indices + * `2R ... 3R-1`). + * + * After these three passes, we call `initOperExp(R)` to declare that downstream + * separable composition will have rank \f$R\f$ (each axis picks one block). + * + * **Why powers {0,1,2}?** + * Many Cartesian tensor kernels (e.g., derivatives, moments, or vector fields) + * differ by low-order polynomial prefactors along each coordinate. Prebuilding + * the families \f$\{0,1,2\}\f$ provides flexible combinations via + * #setCartesianComponents without having to rebuild for each axis. + */ +CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra, + GaussExp<1> &kernel, + double prec) : ConvolutionOperator<3>(mra) , sep_rank(kernel.size()) { int oldlevel = Printer::setPrintLevel(0); + // Configure precision: operator vs. kernel fit this->setBuildPrec(prec); - auto o_prec = prec; - auto k_prec = prec / 10.0; + auto o_prec = prec; // Operator assembly precision + auto k_prec = prec / 10.0; // Kernel fitting precision (tighter on purpose) + // --- Batch 0: monomial power {0} (constant prefactor) --- for (auto &k : kernel) k->setPow({0}); this->initialize(kernel, k_prec, o_prec); + + // --- Batch 1: monomial power {1} (linear prefactor) --- for (auto &k : kernel) k->setPow({1}); this->initialize(kernel, k_prec, o_prec); + + // --- Batch 2: monomial power {2} (quadratic prefactor) --- for (auto &k : kernel) k->setPow({2}); this->initialize(kernel, k_prec, o_prec); + // Tell the separable framework we will later combine per-axis using rank = sep_rank this->initOperExp(this->sep_rank); + Printer::setPrintLevel(oldlevel); } +/** + * @brief Choose which prebuilt monomial block (0,1,2) to use for each Cartesian axis. + * + * @param[in] x Block index used for the x-axis (0 → power{0}, 1 → power{1}, 2 → power{2}) + * @param[in] y Block index used for the y-axis (same convention as above) + * @param[in] z Block index used for the z-axis (same convention as above) + * + * @details + * This function **does not** rebuild; it only wires the already constructed + * 1D operator trees into the separable 3D operator slots. For separation rank + * \f$R\f$, each block occupies a contiguous range of \f$R\f$ entries: + * + * - Block `x`: indices `[x*R, x*R + R - 1]` become the x-factors; + * - Block `y`: indices `[y*R, y*R + R - 1]` become the y-factors; + * - Block `z`: indices `[z*R, z*R + R - 1]` become the z-factors. + * + * You may reuse the same block on multiple axes if the physics warrants it + * (e.g., isotropic components), or select different ones to form vector/tensor + * kernels with distinct Cartesian prefactors. + * + * @warning Valid block indices are 0, 1, or 2. No bounds checking is performed here. + */ void CartesianConvolution::setCartesianComponents(int x, int y, int z) { int x_shift = x * this->sep_rank; int y_shift = y * this->sep_rank; int z_shift = z * this->sep_rank; + // Fill the separable operator slots (rank index i, axis 0/1/2) with the chosen blocks. for (int i = 0; i < this->sep_rank; i++) this->assign(i, 0, this->raw_exp[x_shift + i].get()); for (int i = 0; i < this->sep_rank; i++) this->assign(i, 1, this->raw_exp[y_shift + i].get()); for (int i = 0; i < this->sep_rank; i++) this->assign(i, 2, this->raw_exp[z_shift + i].get()); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/CartesianConvolution.h b/src/operators/CartesianConvolution.h index 63d70a53a..626ccbf03 100644 --- a/src/operators/CartesianConvolution.h +++ b/src/operators/CartesianConvolution.h @@ -29,17 +29,106 @@ namespace mrcpp { +// Forward declaration to avoid pulling the full header into users of this file. +template class GaussExp; + +/** + * @class CartesianConvolution + * @brief 3D separable convolution operator assembled from a 1D Gaussian expansion. + * + * This operator represents a Cartesian, rank-R, separable convolution in 3D, + * where the separation rank R equals the number of terms in a provided 1D + * Gaussian expansion, `GaussExp<1>`. + * + * ### How it is constructed (see .cpp) + * The implementation builds three *blocks* of 1D operator trees from the + * same Gaussian expansion, corresponding to monomial prefactors of degree + * 0, 1 and 2 (i.e. powers `{0}`, `{1}`, `{2}`), and stores them + * contiguously. These blocks can then be assigned independently to the + * x/y/z axes, enabling vector/tensor kernels that differ only by the + * Cartesian polynomial factor. + * + * After construction, the total number of internally stored operator trees is + * `3 * sep_rank` (three monomial blocks, each of size `sep_rank`). + * + * ### Choosing the Cartesian components + * Use setCartesianComponents(x, y, z) to select which monomial block + * (0 → degree 0, 1 → degree 1, 2 → degree 2) is used along each axis. This + * *rewires* the already built 1D factors—no rebuilding occurs. + * + * ### Precision + * The constructor accepts a single build precision `prec`. The .cpp implementation + * employs a slightly stricter precision for fitting the 1D kernel terms so that + * the final composed 3D operator meets the requested tolerance. + * + * ### Ownership / lifetime + * The class does not take ownership of the input `GaussExp<1>`; it only reads it + * during construction. Internally created operator trees are owned by this object. + * + * ### Copy semantics + * Copying is disabled (non-copyable) because the underlying operator trees are + * heavy and managed resources. Move is not provided. + * + * ### Example + * @code + * MultiResolutionAnalysis<3> mra(...); + * GaussExp<1> kernel = ...; // ∑_{r=1}^R α_r e^{-β_r (x - x_r)^2} + * double prec = 1e-8; + * + * CartesianConvolution conv(mra, kernel, prec); + * // Use degree-1 along x, degree-0 along y, degree-2 along z: + * conv.setCartesianComponents(/* x = * / 1, /* y = * / 0, /* z = * / 2); + * + * // conv can now be applied as a separable 3D convolution operator. + * @endcode + */ class CartesianConvolution : public ConvolutionOperator<3> { public: + /** + * @brief Construct a 3D separable convolution operator from a 1D Gaussian expansion. + * + * @param mra Multiresolution analysis defining the 3D basis/domain. + * @param kernel 1D Gaussian expansion; its length sets the separation rank R. + * The implementation temporarily adjusts the monomial power of each + * Gaussian term to build three internal blocks (degrees 0, 1, 2), + * but does not take ownership of @p kernel. + * @param prec Target build precision for the assembled operator. + * + * @details + * Internally, three batches of operator trees are built (for polynomial degrees + * 0/1/2), each of size R, and stored contiguously. The final separable operator + * exposes rank R; per-axis assignment of the blocks is deferred to + * setCartesianComponents(). + */ CartesianConvolution(const MultiResolutionAnalysis<3> &mra, GaussExp<1> &kernel, double prec); + CartesianConvolution(const CartesianConvolution &oper) = delete; CartesianConvolution &operator=(const CartesianConvolution &oper) = delete; virtual ~CartesianConvolution() = default; + /** + * @brief Select which monomial block is used on each Cartesian axis. + * + * @param x Block index for x-axis (0 → degree 0, 1 → degree 1, 2 → degree 2). + * @param y Block index for y-axis (same convention). + * @param z Block index for z-axis (same convention). + * + * @details + * - This operation is O(R) for each axis and **does not rebuild** the operator; + * it remaps the already constructed 1D operator trees into the separable slots. + * - Valid indices are {0,1,2}. Using the same block on multiple axes is allowed. + */ void setCartesianComponents(int x, int y, int z); protected: + /** + * @brief Separation rank R of the operator (number of terms in the input 1D kernel). + * + * @details + * The internal storage contains 3·R operator trees (three monomial blocks), + * but the exposed separable rank for downstream composition is R. + */ int sep_rank; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/ConvolutionOperator.cpp b/src/operators/ConvolutionOperator.cpp index 9d37929aa..e51521fef 100644 --- a/src/operators/ConvolutionOperator.cpp +++ b/src/operators/ConvolutionOperator.cpp @@ -47,59 +47,129 @@ namespace mrcpp { +/** + * @brief Construct a separable D-dimensional convolution operator from a 1D Gaussian expansion. + * + * The input kernel is a 1D Gaussian expansion (sum of Gauss terms). The implementation + * projects each 1D Gaussian to a 1D function tree and then uses cross-correlations to + * lift it into a 2D operator block; the full D-dimensional operator is assembled as a + * separable product of these 1D blocks. The final separable rank equals kernel.size(). + * + * @tparam D Spatial dimension of the target operator. + * @param mra Multiresolution analysis defining the D-dimensional domain/basis. + * @param kernel 1D Gaussian expansion whose terms become the separable factors. + * @param prec Target build precision for the operator (used for both kernel + * projection and operator assembly with a small safety split). + * + * @details + * Internally we choose `k_prec = prec / 10` (stricter) for fitting each 1D kernel term, + * and `o_prec = prec` for assembling/operatorization, to keep the composed error within + * the requested tolerance. After all factors are built, `initOperExp(kernel.size())` + * finalizes the separable structure. + */ template -ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec) +ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, + GaussExp<1> &kernel, + double prec) : MWOperator(mra, mra.getRootScale(), -10) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - auto o_prec = prec; - auto k_prec = prec / 10.0; + auto o_prec = prec; // precision for operator assembly (2D blocks, transforms) + auto k_prec = prec / 10.0; // stricter precision for 1D kernel projection initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); + this->initOperExp(kernel.size()); // separable rank = number of kernel terms Printer::setPrintLevel(oldlevel); } +/** + * @brief Construct a convolution operator with explicit root scale and reach. + * + * This variant allows overriding the default operator root scale and reach (stencil + * half-width in levels). The rest of the pipeline is identical to the other ctor: + * build 1D kernel function trees, lift to 2D operator blocks by cross-correlation, + * transform/collapse, then finalize the separable expansion. + * + * @param mra D-dimensional MRA. + * @param kernel 1D Gaussian expansion (rank = kernel.size()). + * @param prec Target build precision; we use `k_prec = prec / 100` here to be extra + * conservative when the reach is user-controlled. + * @param root Operator root scale. + * @param reach Operator reach (levels outward from root). Negative = auto from box. + */ template -ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec, int root, int reach) +ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, + GaussExp<1> &kernel, + double prec, + int root, + int reach) : MWOperator(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); auto o_prec = prec; - auto k_prec = prec / 100.0; + auto k_prec = prec / 100.0; // even tighter kernel fit when reach is custom initialize(kernel, k_prec, o_prec); this->initOperExp(kernel.size()); Printer::setPrintLevel(oldlevel); } -template void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) { +/** + * @brief Core build routine: from 1D Gaussian terms → 1D function trees → 2D operator blocks. + * + * Steps per Gaussian term: + * 1) **Rescaling for D-dimensional separability**: adjust the coefficient so that the product + * of D identical 1D factors yields the original 1D amplitude in D-D composition. + * Concretely: `coef ← sign(coef) * |coef|^{1/D}`. + * 2) **Projection to a 1D function tree** (@ref FunctionTree): build an empty grid sized for + * narrow Gaussians (build_grid), then project the analytic Gaussian into the tree with + * requested kernel precision `k_prec` (project). + * 3) **Lifting to a 2D operator**: create a @ref CrossCorrelationCalculator from the 1D tree, + * then use @ref TreeBuilder to expand a 2D operator tree through cross-correlations + * (effectively computing the correlation between basis functions along one axis). + * 4) **Wavelet transform & caching**: bottom-up transform, compute norms, and set up node cache. + * + * The produced 2D blocks are stored into `raw_exp`. Higher-level code composes D-D separable + * operators from these blocks (e.g., via @ref MWOperator’s machinery). + * + * @param kernel 1D Gaussian expansion (input rank). + * @param k_prec Precision for kernel projection to 1D trees. + * @param o_prec Precision for operator building / assembly. + */ +template +void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) { + // Build the auxiliary 1D MRA for the kernel and fetch the D-D operator MRA auto k_mra = this->getKernelMRA(); auto o_mra = this->getOperatorMRA(); - TreeBuilder<2> builder; - OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); + TreeBuilder<2> builder; // builds 2D operator trees from calculators + OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); // controls assembly precision / scale cap for (int i = 0; i < kernel.size(); i++) { - // Rescale Gaussian for D-dim application + // --- (1) Adjust coefficient for separable D-fold composition --- auto *k_func = kernel.getFunc(i).copy(); - k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), k_func->getCoef())); + // Raise absolute coefficient to 1/D and reapply sign to preserve signed kernels + k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), + k_func->getCoef())); + // --- (2) Project analytic Gaussian to a 1D function tree --- FunctionTree<1> k_tree(k_mra); - mrcpp::build_grid(k_tree, *k_func); // Generate empty grid to hold narrow Gaussian - mrcpp::project(k_prec, k_tree, *k_func); // Project Gaussian starting from the empty grid - delete k_func; + mrcpp::build_grid(k_tree, *k_func); // Prepare an empty grid (fine where Gaussian is narrow) + mrcpp::project(k_prec, k_tree, *k_func); // Fit the Gaussian into the 1D multiresolution basis + delete k_func; // No longer needed; k_tree holds the discretization + // --- (3) Lift to a 2D operator via cross-correlation --- CrossCorrelationCalculator calculator(k_tree); auto o_tree = std::make_unique(o_mra, o_prec); - builder.build(*o_tree, calculator, adaptor, -1); // Expand 1D kernel into 2D operator + builder.build(*o_tree, calculator, adaptor, -1); // Dense 2D operator block in MW format + // --- (4) Transform, normalize, and cache for application --- Timer trans_t; - o_tree->mwTransform(BottomUp); - o_tree->calcSquareNorm(); - o_tree->setupOperNodeCache(); + o_tree->mwTransform(BottomUp); // move to MW (scaling+wavelet) representation efficiently + o_tree->calcSquareNorm(); // useful for diagnostics / thresholding + o_tree->setupOperNodeCache(); // enable fast repeated applications print::time(10, "Time transform", trans_t); print::separator(10, ' '); @@ -107,12 +177,27 @@ template void ConvolutionOperator::initialize(GaussExp<1> &kernel, do } } -template MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { +/** + * @brief Build a 1D MRA used to discretize the kernel factors. + * + * The kernel MRA mirrors the scaling family used by the D-D operator MRA: + * - If the operator uses an interpolating basis of order s, the kernel basis is + * chosen as InterpolatingBasis with order `2*s + 1`. + * - If Legendre, we similarly pick a LegendreBasis of order `2*s + 1`. + * + * The box extent (reach) is derived from the D-D world box unless an explicit + * operator reach was set. The same uniform scaling factor is used. + * + * @return A standalone 1D @ref MultiResolutionAnalysis matching the operator’s scaling family. + */ +template +MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); + // Choose a kernel basis compatible with the operator basis. int type = basis.getScalingType(); - int kern_order = 2 * basis.getScalingOrder() + 1; + int kern_order = 2 * basis.getScalingOrder() + 1; // (2s+1) ensures adequate quadrature/correlation support ScalingBasis *kern_basis = nullptr; if (type == Interpol) { @@ -123,24 +208,29 @@ template MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA MSG_ABORT("Invalid scaling type"); } + // Kernel root = operator root; reach defaults to the maximum box extent if negative. int root = this->oper_root; - int reach = this->oper_reach + 1; + int reach = this->oper_reach + 1; // +1 because the 1D kernel must cover neighbors used by correlations if (reach < 0) { for (int i = 0; i < D; i++) { if (box.size(i) > reach) reach = box.size(i); } } + + // Build a 1D bounding box centered at zero: + // levels from -reach to +reach (total 2*reach) at the operator root scale. auto start_l = std::array{-reach}; - auto tot_l = std::array{2 * reach}; - // Zero in argument since operators are only implemented - // for uniform scaling factor + auto tot_l = std::array{2 * reach}; + // Uniform scaling factor (operators are implemented for uniform scales only) auto sf = std::array{box.getScalingFactor(0)}; BoundingBox<1> kern_box(root, start_l, tot_l, sf); + MultiResolutionAnalysis<1> kern_mra(kern_box, *kern_basis); delete kern_basis; return kern_mra; } +// Explicit template instantiations for the supported dimensionalities. template class ConvolutionOperator<1>; template class ConvolutionOperator<2>; template class ConvolutionOperator<3>; diff --git a/src/operators/ConvolutionOperator.h b/src/operators/ConvolutionOperator.h index 33d254e9d..51f8a2736 100644 --- a/src/operators/ConvolutionOperator.h +++ b/src/operators/ConvolutionOperator.h @@ -30,56 +30,155 @@ namespace mrcpp { /** @class ConvolutionOperator + * @ingroup operators * - * @brief Convolution defined by a Gaussian expansion + * @brief D-dimensional separable convolution operator built from a 1D Gaussian expansion. + * + * @tparam D Spatial dimension of the target operator. + * + * @details + * This operator represents a separable convolution constructed from a sum of + * one–dimensional Gaussian factors: * - * @details Represents the operator * \f[ - * T = \sum_{m=1}^M - * \text{sign} (\alpha_m) \bigotimes \limits_{d = 1}^D T_d - * \left( \beta_m, \sqrt[D]{| \alpha_m |} \right) - * , + * T \;=\; \sum_{m=1}^{M} + * \operatorname{sign}(\alpha_m) + * \bigotimes_{d=1}^{D} + * T_d\!\left(\beta_m,\;\sqrt[D]{|\alpha_m|}\right), * \f] - * where each - * \f$ T_d \left( \beta, \alpha \right) \f$ - * is the convolution operator with one-dimensional Gaussian kernel - * \f$ k(x_d) = \alpha \exp \left( - \beta x_d^2 \right) \f$. - * Operator - * \f$ T \f$ - * is obtained from the Gaussian expansion + * + * where each \f$ T_d(\beta,\alpha) \f$ is the 1D convolution with kernel + * \f$ k(x_d) = \alpha \exp(-\beta x_d^2) \f$ along coordinate \f$ x_d \f$. + * The separable rank of the constructed operator equals the number of terms + * \f$ M \f$ in the 1D Gaussian expansion: + * * \f[ - * \sum_{m=1}^M \alpha_m \exp \left( - \beta_m |x|^2 \right) + * \sum_{m=1}^{M} \alpha_m \exp(-\beta_m |x|^2). * \f] - * which is passed as a parameter to the first two constructors. * - * @note Every \f$ T_d \left( \beta_m, \sqrt[D]{| \alpha_m |} \right) \f$ is the same - * operator associated with the one-dimensional variable \f$ x_d \f$ for \f$ d = 1, \ldots, D \f$. + * ### Construction strategy (high level) + * 1. For each Gaussian term \f$ \alpha_m e^{-\beta_m x^2} \f$ in the 1D expansion, + * we rescale its coefficient to \f$ \sqrt[D]{|\alpha_m|} \f$ and keep the sign, + * so that the D-fold separable composition recovers the desired amplitude. + * 2. Each 1D term is projected to a 1D multiresolution function tree using the + * same scaling family as the D-D operator (interpolating or Legendre). + * 3. Cross-correlation machinery lifts each 1D factor into a 2D operator block + * (per axis-pair) and the @ref MWOperator backbone assembles the full D-D, + * separable operator. + * + * ### Precision control + * The *build precision* (see @ref setBuildPrec and @ref getBuildPrec) governs: + * - the tolerance used when projecting 1D kernel terms to their function trees, and + * - the tolerance for assembling/thresholding operator trees. + * Implementations typically use a tighter internal precision for the kernel + * projection than for the operator assembly to keep the total error within the + * requested target. * - * \todo: One may want to change the logic so that \f$ D \f$-root is evaluated on the previous step, - * namely, when \f$ \alpha_m, \beta_m \f$ are calculated. + * @note All constructors are *non-owning* with respect to the input expansion; the + * implementation copies kernel terms as needed for projection, and keeps only the + * operator trees internally. * + * @see ConvolutionOperator::initialize + * @see ConvolutionOperator::getKernelMRA + * @see MWOperator */ template class ConvolutionOperator : public MWOperator { public: + /** + * @brief Build a separable convolution operator on the default operator root/extent. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis defining the domain and basis. + * @param kernel 1D Gaussian expansion providing the separable factors (rank = kernel.size()). + * @param prec Target build precision used to steer kernel projection and operator assembly. + * + * @details Uses the operator's default root scale (@c mra.getRootScale()) and a + * reach chosen by the implementation. For more control over root/reach, use the + * other constructor. + */ ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec); + + /** + * @brief Build a separable convolution operator with explicit root scale and reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param kernel 1D Gaussian expansion (rank = kernel.size()). + * @param prec Target build precision. + * @param root Operator root scale (level) to anchor the construction. + * @param reach Operator reach (half-width in levels). Negative value means + * *auto*—deduced from the world box extents. + * + * @details This variant allows advanced users to control the scale window spanned + * by the operator representation, which may be useful when coupling to other + * operators or enforcing boundary extents. + */ ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec, int root, int reach); + ConvolutionOperator(const ConvolutionOperator &oper) = delete; ConvolutionOperator &operator=(const ConvolutionOperator &oper) = delete; virtual ~ConvolutionOperator() = default; + /// @brief Retrieve the user-requested build precision associated with this operator. double getBuildPrec() const { return this->build_prec; } protected: + /** + * @brief Protected convenience constructor for subclasses that defer initialization. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * + * @details Initializes the @ref MWOperator base with default root and reach. + * Subclasses must call @ref initialize to populate the separable expansion. + */ ConvolutionOperator(const MultiResolutionAnalysis &mra) : MWOperator(mra, mra.getRootScale(), -10) {} + + /** + * @brief Protected convenience constructor with explicit root and reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param root Root level. + * @param reach Reach (half-width in levels). Negative = auto. + */ ConvolutionOperator(const MultiResolutionAnalysis &mra, int root, int reach) : MWOperator(mra, root, reach) {} + /** + * @brief Core build routine that projects 1D kernel terms and assembles operator trees. + * + * @param kernel 1D Gaussian expansion (input rank M). + * @param k_prec Precision used when projecting each 1D Gaussian term into a + * 1D function tree (typically tighter than @p o_prec). + * @param o_prec Precision used when expanding to operator trees and performing + * wavelet transforms/thresholding. + * + * @details For each term in @p kernel: + * - Coefficient is rescaled to \f$ \sqrt[D]{|\alpha|} \f$ with the original sign, + * ensuring the D-fold separable product reproduces the intended amplitude. + * - The analytic 1D Gaussian is projected to a 1D @ref FunctionTree with tolerance + * @p k_prec. + * - A @ref CrossCorrelationCalculator lifts the 1D representation to a 2D operator + * block; bottom-up wavelet transforms and caching finalize each block. + * The set of blocks is stored in the @ref MWOperator base and exposed as a + * separable expansion of rank @c kernel.size(). + */ void initialize(GaussExp<1> &kernel, double k_prec, double o_prec); + + /// @brief Store the user-requested build precision (used for reporting/inspection). void setBuildPrec(double prec) { this->build_prec = prec; } + /** + * @brief Build a 1D @ref MultiResolutionAnalysis to discretize kernel factors. + * + * @return A 1D MRA whose scaling family matches the D-D operator MRA (Interpolating or Legendre), + * with an order chosen as \f$ 2s+1 \f$ where \f$ s \f$ is the operator scaling order. + * + * @details The 1D box uses the operator root. Its reach is the operator reach + 1 + * (or derived from the world box if reach is negative) to ensure kernel support + * covers the correlations used during lifting. + */ MultiResolutionAnalysis<1> getKernelMRA() const; + /// Target precision requested at construction time; used to steer sub-steps in the build. double build_prec{-1.0}; }; diff --git a/src/operators/DerivativeConvolution.cpp b/src/operators/DerivativeConvolution.cpp index 30a655218..a89f580e0 100644 --- a/src/operators/DerivativeConvolution.cpp +++ b/src/operators/DerivativeConvolution.cpp @@ -29,34 +29,106 @@ namespace mrcpp { -/** @returns New DerivativeConvolution object - * @param[in] mra: Which MRA the operator is defined - * @param[in] pr: Build precision, closeness to delta function - * @details This will project a kernel of a single differentiated - * gaussian with exponent sqrt(10/build_prec). +/** + * @class DerivativeConvolution + * @brief Separable convolution operator that approximates a (first) derivative + * using a differentiated Gaussian kernel. + * + * @tparam D Spatial dimension of the target operator. + * + * @details + * This implementation is a thin wrapper around @ref ConvolutionOperator that: + * - **Chooses the kernel**: a *single* derivative-of-Gaussian (DoG) term whose + * width is set by a requested build precision. + * - **Projects the 1D kernel** to a function tree at a tight tolerance. + * - **Lifts** the 1D kernel into a D-dimensional operator via cross-correlation + * (separable assembly) and prepares it for application in the multiwavelet basis. + * + * The resulting operator is bandwidth-limited and numerically stable, offering a + * controlled approximation to a spatial derivative. It is mainly for validation + * and experimentation; for production derivatives consider @ref ABGVOperator + * (cusps/discontinuities) or @ref BSOperator (smooth functions). + */ + +/** + * @brief Construct a derivative-convolution operator on the default root/reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis defining basis/domain. + * @param prec Target build precision that controls kernel width and assembly thresholds. + * + * @details + * Steps performed here: + * 1. **Silence verbose output** during operator build by temporarily lowering the + * global print level (restored upon exit). + * 2. **Record build precision** via @c setBuildPrec(prec). This precision is later + * available from @ref ConvolutionOperator::getBuildPrec. + * 3. **Split tolerances** into: + * - @c k_prec = prec/10.0 for *kernel projection* (tighter; DoG is narrow), + * - @c o_prec = prec for *operator assembly* (adequate once kernel is accurate). + * 4. **Create the kernel**: a single-term @ref DerivativeKernel parametrized by + * @c k_prec, which internally chooses the Gaussian exponent consistent with the + * requested accuracy. + * 5. **Assemble the operator** by calling @ref ConvolutionOperator::initialize, + * which projects the kernel to a 1D function tree, lifts it to operator trees + * via cross-correlation, transforms to the MW domain, and caches nodes. + * + * The operator rank equals the number of 1D kernel terms; for this kernel it is 1. */ template DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec) : ConvolutionOperator(mra) { + // Keep the build quiet; restore the previous level at the end of scope. int oldlevel = Printer::setPrintLevel(0); + // Store build precision on the base class for later diagnostics/inspection. this->setBuildPrec(prec); + + // Operator-assembly tolerance: used while expanding/lifting the kernel to operator trees. double o_prec = prec; + + // Kernel-projection tolerance: tighter than operator assembly to resolve a narrow DoG. double k_prec = prec / 10.0; + // A single differentiated Gaussian tuned by k_prec. DerivativeKernel kernel(k_prec); + + // Build the separable operator blocks from the 1D kernel. this->initialize(kernel, k_prec, o_prec); + // Restore previous print level. Printer::setPrintLevel(oldlevel); } +/** + * @brief Construct a derivative-convolution operator with explicit root and reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param prec Target build precision that controls kernel width and thresholds. + * @param root Operator root level (coarsest active scale). + * @param reach Operator reach (half-width in levels). Negative => auto-detected. + * + * @details + * This overload is identical in spirit to the simpler constructor, but allows + * **explicit control of the active scale window**: + * - Use when benchmarking, debugging, or composing multiple operators whose + * supports must be constrained not to overlap. + * + * Implementation notes mirror the first ctor with one change: + * - The kernel projection is made *even tighter*: @c k_prec = prec/100.0, + * which helps when the operator is restricted to a narrower scale window + * (ensuring the DoG is still faithfully represented). + */ template DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); + + // Operator-assembly tolerance (same rationale as the other ctor). double o_prec = prec; + + // Very tight kernel-projection tolerance for explicit windowing. double k_prec = prec / 100.0; DerivativeKernel kernel(k_prec); @@ -65,6 +137,7 @@ DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis Printer::setPrintLevel(oldlevel); } +/* Explicit template instantiations */ template class DerivativeConvolution<1>; template class DerivativeConvolution<2>; template class DerivativeConvolution<3>; diff --git a/src/operators/DerivativeConvolution.h b/src/operators/DerivativeConvolution.h index 71ade7025..2d19e28ed 100644 --- a/src/operators/DerivativeConvolution.h +++ b/src/operators/DerivativeConvolution.h @@ -30,24 +30,96 @@ namespace mrcpp { /** @class DerivativeConvolution + * @ingroup operators * - * @brief Convolution with a derivative kernel + * @brief Separable convolution operator that approximates a spatial derivative + * using a differentiated Gaussian kernel. * - * @details Derivative operator written as a convolution. The derivative kernel (derivative of - * Dirac's delta function) is approximated by the derivative of a narrow Gaussian function: - * \f$ D^x(r-r') = \frac{d}{dx}\delta(r-r') \approx \frac{d}{dx} \alpha e^{-\beta (r-r')^2} \f$ + * @tparam D Spatial dimension of the target operator (1, 2, or 3). * - * NOTE: This is _not_ the recommended derivative operator for practial calculations, it's - * a proof-of-concept operator. Use the ABGVOperator for "cuspy" functions and the - * BSOperator for smooth functions. + * @details + * This class implements a *proof-of-concept* derivative as a convolution with the + * derivative of a narrow Gaussian. In distributional terms one would like to have + * \f$ \partial_x \delta \f$; numerically, we approximate it by + * a derivative-of-Gaussian (DoG) kernel that is narrow enough to capture the + * local derivative while remaining representable on the multiwavelet grid. + * + * Formally, for one Cartesian component: + * \f[ + * (D^x f)(\mathbf r) + * \;\approx\; + * \int_{\mathbb R^D} + * \frac{\partial}{\partial x} + * \left[\alpha\, e^{-\beta \lvert \mathbf r - \mathbf r' \rvert^2}\right] + * f(\mathbf r') \, d\mathbf r' + * \;=\; (k'_x * f)(\mathbf r). + * \f] + * In MRCPP this is realized as a @ref ConvolutionOperator with a 1D DoG kernel; + * the D-dimensional operator is assembled as a separable tensor product across + * coordinates and lifted to the multiwavelet basis. + * + * ### Responsibilities and division of labor + * - **This class**: chooses a derivative-like kernel and exposes convenient + * constructors. It does not change application logic. + * - **@ref ConvolutionOperator**: projects the 1D kernel to a function tree, + * lifts to operator trees via cross-correlation, transforms/caches in the MW basis, + * and manages rank/separability. + * + * ### Precision handling + * The constructors accept a *build precision* that governs the narrowness of the + * DoG kernel and the tolerances used during kernel projection and operator assembly: + * tighter precision ⇒ narrower kernel ⇒ closer to an ideal derivative, but with + * higher resolution demands and potentially larger operator bandwidth. + * + * @note This operator is primarily for validation/experiments. For production + * use consider: + * - @ref ABGVOperator for cuspy/discontinuous functions. + * - @ref BSOperator for sufficiently smooth functions. + * + * @see ConvolutionOperator, ABGVOperator, BSOperator */ - template class DerivativeConvolution final : public ConvolutionOperator { public: + /** + * @brief Build a derivative-convolution operator on the default root/reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis that defines the domain + * and the scaling basis used by the operator. + * @param prec Target build precision controlling kernel narrowness and + * assembly tolerances (tighter ⇒ narrower DoG). + * + * @details + * Internally constructs a single-term derivative kernel (derivative of a Gaussian) + * with parameters derived from @p prec, then delegates to + * @ref ConvolutionOperator::initialize to: + * - project the 1D kernel to a function tree, + * - lift it to separable operator blocks via cross-correlation, + * - transform and cache in the multiwavelet basis. + * + * @warning Very small @p prec values produce *very* narrow kernels that may + * require deeper trees and higher-order bases to avoid under-resolution. + */ DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec); + + /** + * @brief Build a derivative-convolution operator with an explicit scale window. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param prec Target build precision (as above). + * @param root Operator root level (coarsest active scale). + * @param reach Operator reach in levels; negative values trigger auto-detection + * from the domain extents. + * + * @details + * Use this overload to constrain the operator to a chosen scale window; useful for + * benchmarking, domain-decomposition experiments, or when composing multiple + * operators with controlled supports. Kernel choice and assembly otherwise mirror + * the simpler constructor. + */ DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach); - DerivativeConvolution(const DerivativeConvolution &oper) = delete; - DerivativeConvolution &operator=(const DerivativeConvolution &oper) = delete; + + DerivativeConvolution(const DerivativeConvolution &oper) = delete; ///< Non-copyable + DerivativeConvolution &operator=(const DerivativeConvolution &oper) = delete; ///< Non-assignable }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/DerivativeKernel.h b/src/operators/DerivativeKernel.h index 1888c2e0d..11557d5d1 100644 --- a/src/operators/DerivativeKernel.h +++ b/src/operators/DerivativeKernel.h @@ -31,14 +31,82 @@ namespace mrcpp { +/** + * @class DerivativeKernel + * @ingroup operators + * + * @brief One–dimensional *derivative-of-Gaussian* (DoG) kernel packaged as a + * Gaussian expansion of rank 1, suitable for building separable + * convolution-based derivative operators in D dimensions. + * + * @tparam D Spatial dimensionality of the *target* operator that will use this kernel. + * The class itself stores a 1D kernel (inherits from @ref GaussExp\<1\>), but + * uses @p D to choose a normalization consistent with a D-fold separable tensor + * product (see notes below). + * + * @details + * The constructor creates a single 1D Gaussian + * \f[ + * g(x) \;=\; c \, e^{-\alpha x^2},\qquad + * \alpha \equiv \frac{1}{\varepsilon}, + * \f] + * then analytically differentiates it once in @c x to obtain a polynomial–Gaussian + * (a @ref GaussPoly) and appends that single term to this expansion. + * + * ### Normalization and separability + * - The coefficient is chosen as + * \f[ + * c \;=\; \Big(\tfrac{\alpha}{\pi}\Big)^{D/2}, + * \f] + * which corresponds to the *D-dimensional* unit-charge normalization of the isotropic + * Gaussian \f$ c \exp(-\alpha \lvert \mathbf r \rvert^2) \f$. + * - When this 1D kernel is lifted to D dimensions as a separable product, + * MRCPP’s convolution machinery (@ref ConvolutionOperator) rescales each 1D factor + * by the D-th root of the magnitude of its coefficient so that the tensor product + * has the intended overall normalization. In effect, each axis receives + * \f$ (\alpha/\pi)^{1/2} \f$ and the product recovers \f$ (\alpha/\pi)^{D/2} \f$. + * + * ### Width control + * The user-provided @p epsilon controls the width via \f$ \alpha = 1/\varepsilon \f$: + * - Small \f$ \varepsilon \Rightarrow \alpha \gg 1 \Rightarrow \f$ very narrow kernel, + * closer to a distributional derivative, but harder to resolve numerically. + * - Large \f$ \varepsilon \Rightarrow \alpha \ll 1 \Rightarrow \f$ broad kernel, + * smoother but less localized derivative approximation. + * + * ### Usage + * Typically constructed internally by derivative-style convolution operators + * (e.g., @ref DerivativeConvolution) and not used directly. If used directly, + * pass it to a @ref ConvolutionOperator builder which will project, lift, and + * cache the corresponding multiwavelet operator blocks. + */ template class DerivativeKernel final : public GaussExp<1> { public: + /** + * @brief Construct a rank-1 1D derivative-of-Gaussian kernel. + * + * @param epsilon Width control parameter; the Gaussian exponent is set to + * \f$ \alpha = 1/\varepsilon \f$. + * + * @post The expansion contains a single @ref GaussPoly term equal to + * \f$ \frac{d}{dx}\big[c \exp(-\alpha x^2)\big] \f$ with + * \f$ c = (\alpha/\pi)^{D/2} \f$. + */ DerivativeKernel(double epsilon) : GaussExp<1>() { + // Exponent (narrowness): alpha = 1 / epsilon double alpha = 1.0 / epsilon; + + // D-dimensional normalization chosen up-front. + // ConvolutionOperator later redistributes this across dimensions (D-th root per axis). double coef = std::pow(alpha / mrcpp::pi, D / 2.0); + + // Start from a pure 1D Gaussian g(x) = coef * exp(-alpha x^2) GaussFunc<1> g(alpha, coef); + + // Differentiate analytically to obtain a polynomial–Gaussian (DoG) and store it GaussPoly<1> dg = g.differentiate(0); + + // Single-term expansion: { dg } this->append(dg); } }; diff --git a/src/operators/DerivativeOperator.h b/src/operators/DerivativeOperator.h index 8c902581c..b2f339bca 100644 --- a/src/operators/DerivativeOperator.h +++ b/src/operators/DerivativeOperator.h @@ -29,18 +29,62 @@ namespace mrcpp { +/** + * @class DerivativeOperator + * @ingroup operators + * + * @brief Common base for derivative-type multiwavelet operators. + * + * @tparam D Spatial dimension of the operator (1, 2, or 3). + * + * @details + * This abstract helper stores metadata and provides a thin interface for + * operators that represent spatial derivatives in the multiwavelet (MW) + * framework. It derives from @ref MWOperator and adds a single piece of + * state: the derivative @ref order, which subclasses set appropriately + * (e.g., 1 for first derivative, 2 for Laplacian-like second derivative + * components, etc.). + * + * The constructor simply forwards the *scale window* to the base: + * - @p root : the coarsest scale at which the operator is anchored, + * - @p reach : the number of levels (half-width) the operator spans + * around @p root (default = 1). + * + * Concrete implementations such as @ref ABGVOperator and @ref BSOperator + * specialize initialization, bandwidth, and stencil construction, while + * reusing this small common interface. + * + * @see MWOperator, ABGVOperator, BSOperator + */ template class DerivativeOperator : public MWOperator { public: + /** + * @brief Construct a derivative operator shell on a given scale window. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis that defines the + * domain and scaling basis for the operator. + * @param root Root scale (coarsest level) of the operator. + * @param reach Scale reach around @p root (default = 1). A reach of @c r + * allows interaction across \f$ 2r+1 \f$ adjacent levels. + * + * @note This constructor does not assemble any stencil; subclasses call + * their own initialization routines and may update @ref order. + */ DerivativeOperator(const MultiResolutionAnalysis &mra, int root, int reach = 1) : MWOperator(mra, root, reach) {} - DerivativeOperator(const DerivativeOperator &oper) = delete; - DerivativeOperator &operator=(const DerivativeOperator &oper) = delete; + DerivativeOperator(const DerivativeOperator &oper) = delete; ///< Non-copyable + DerivativeOperator &operator=(const DerivativeOperator &oper) = delete; ///< Non-assignable ~DerivativeOperator() override = default; + /** + * @brief Return the derivative order encoded by this operator. + * @returns Integer derivative order (1 by default; subclasses may set 2, 3, ...). + */ int getOrder() const { return order; } protected: + /** @brief Derivative order metadata (default = 1). Subclasses should set this. */ int order{1}; }; diff --git a/src/operators/HeatKernel.h b/src/operators/HeatKernel.h index bc5a8adba..173ae3171 100644 --- a/src/operators/HeatKernel.h +++ b/src/operators/HeatKernel.h @@ -30,36 +30,78 @@ namespace mrcpp { -/** @class HeatKernel. +/** + * @class HeatKernel + * @ingroup functions * - * @brief Heat kernel in \f$ \mathbb R^D \f$. + * @brief Single-term Gaussian expansion that represents the \(D\)-dimensional + * heat kernel \(K_t(\mathbf x)\) at diffusion time \(t>0\). * - * @details In \f$ \mathbb R^D \f$ the heat kernel has the form + * @tparam D Spatial dimension of the kernel to be modeled (1, 2, or 3). + * + * @details + * The continuous heat kernel in \(\mathbb R^D\) is + * \f[ + * K_t(\mathbf x) + * \;=\; + * \frac{1}{(4\pi t)^{D/2}} + * \exp\!\left(-\frac{\lVert \mathbf x\rVert^2}{4t}\right), + * \qquad t>0. + * \f] + * + * In MRCPP, separable operators are commonly assembled from 1D building blocks. + * This class therefore inherits from @ref GaussExp "GaussExp<1>" and stores a + * *single* 1D Gaussian term whose exponent and coefficient are chosen so that, + * when used inside separable constructions (e.g., @ref ConvolutionOperator), + * the resulting operator corresponds to the \(D\)-dimensional heat kernel. + * + * Concretely, with + * \f$ \beta = \frac{1}{4t} \f$ and \f$ \alpha = \big(\beta/\pi\big)^{D/2} \f$, + * we append the 1D Gaussian * \f[ - * K_t(x) - * = - * \frac 1{ (4 \pi t)^{D/2} } - * \exp - * \left( - * - \frac{ |x|^2 }{4t} - * \right) - * , \quad - * x \in \mathbb R^D - * \text{ and } - * t > 0 - * . + * g(x) = \alpha\, e^{-\beta x^2}, * \f] + * and the higher-dimensional operator logic (tensor products across coordinates) + * recovers the isotropic \(D\)-dimensional kernel. + * + * ### Notes + * - The constructor does not enforce \(t>0\) at runtime; pass a strictly positive + * value to avoid nonsensical parameters. + * - The class is intentionally minimal: it only sets up the Gaussian parameters + * and leaves projection/assembly to the caller (e.g., convolution operators). * + * ### Example + * @code + * MultiResolutionAnalysis<3> mra(box, basis); + * HeatKernel<3> Kt(0.05); // 3D heat kernel at t = 0.05 + * // Use Kt as a kernel for a ConvolutionOperator<3>, etc. + * @endcode */ template class HeatKernel final : public GaussExp<1> { public: + /** + * @brief Construct a heat kernel at diffusion time @p t. + * + * @param t Diffusion time (\f$ t>0 \f$). Smaller values yield narrower + * Gaussians (more localized kernels). + * + * @details + * Sets the Gaussian exponent to \f$ \beta = \frac{1}{4t} \f$ and the + * coefficient to \f$ \alpha = \big(\beta/\pi\big)^{D/2} \f$, then appends a + * single @ref GaussFunc "GaussFunc<1>" to this @ref GaussExp "GaussExp<1>". + */ HeatKernel(double t) : GaussExp<1>() { + // Exponent β = 1/(4t) double expo = 0.25 / t; + + // Amplitude α = (β/π)^{D/2} so that the separable product matches (4πt)^{-D/2} double coef = std::pow(expo / mrcpp::pi, D / 2.0); + + // Build the 1D Gaussian term and register it in the expansion GaussFunc<1> gFunc(expo, coef); this->append(gFunc); } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/HeatOperator.cpp b/src/operators/HeatOperator.cpp index cad3d9139..5d31391ba 100644 --- a/src/operators/HeatOperator.cpp +++ b/src/operators/HeatOperator.cpp @@ -23,20 +23,68 @@ * */ +/** + * @file HeatOperator.cpp + * @brief Implementation of a separable convolution operator that realizes the + * \(D\)-dimensional heat semigroup via a single-term Gaussian kernel. + * + * @details + * The continuous heat propagator at time \(t>0\) is the convolution with + * \f[ + * K_t(\mathbf x) + * = + * \frac{1}{(4\pi t)^{D/2}} + * \exp\!\left(-\frac{\lVert\mathbf x\rVert^2}{4t}\right). + * \f] + * In MRCPP, separable operators are assembled from 1D Gaussian building blocks. + * This implementation: + * - constructs a @ref HeatKernel whose single 1D Gaussian has exponent + * \f$\beta = 1/(4t)\f$ and coefficient \f$\alpha = (\beta/\pi)^{D/2}\f$, + * - projects that kernel to a 1D function tree, + * - lifts it to an operator tree by cross-correlation, + * - transforms/caches the operator in the multiwavelet domain, + * - and exposes it as a @ref ConvolutionOperator acting in \(D\) dimensions. + * + * Two constructors are provided: a default one (using the operator's default + * root/reach) and one tailored for periodic boundary conditions (PBC) with an + * explicit scale window @p root/@p reach. + */ + #include "HeatOperator.h" #include "HeatKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** @brief Constructor of the HeatOperator object - * @returns New HeatOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] t: Time moment - * @param[in] prec: Build precision - * @details This will project a kernel of a single gaussian with - * exponent \f$ 1/(4t) \f$. +/** + * @brief Build a heat propagator \(e^{t\Delta}\) as a separable convolution. + * + * @tparam D Spatial dimension of the operator (1, 2, or 3). + * + * @param[in] mra D-dimensional @ref MultiResolutionAnalysis that defines both + * the computational domain and the scaling basis. + * @param[in] t Diffusion time; determines the kernel width + * (\f$\beta = 1/(4t)\f$). Must be strictly positive. + * @param[in] prec Target build precision for assembling the operator. * + * @details + * Steps performed: + * 1. The requested build precision is recorded via @c setBuildPrec(prec). + * 2. Two tolerances are chosen: + * - @c k_prec = @p prec / 10 for the 1D kernel projection (tighter), + * - @c o_prec = @p prec for the operator assembly. + * 3. A @ref HeatKernel is instantiated with exponent \f$1/(4t)\f$ and + * amplitude chosen to match \f$(4\pi t)^{-D/2}\f$ upon separable assembly. + * 4. @ref ConvolutionOperator::initialize is called to: + * - project the kernel to a 1D function tree, + * - lift it to an operator tree via cross-correlation, + * - transform and cache the operator in the MW domain. + * 5. @ref initOperExp is called to finalize the separable expansion (rank 1). + * + * @note Smaller @p t \(\Rightarrow\) narrower Gaussian \(\Rightarrow\) more demanding + * resolution (consider tightening @p prec and/or extending operator reach). + * @warning Passing non-positive @p t yields a meaningless kernel; callers must + * ensure @p t > 0. */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec) @@ -44,31 +92,40 @@ HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, d int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; - double k_prec = prec / 10.0; + double o_prec = prec; // Operator-assembly tolerance + double k_prec = prec / 10.0; // Kernel-projection tolerance (tighter) HeatKernel kernel(t); this->initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); + this->initOperExp(kernel.size()); // single-term expansion (rank = 1) Printer::setPrintLevel(oldlevel); } -/** @brief Constructor of the HeatOperator object in case of Periodic Boundary Conditions (PBC) - * @returns New IdentityConvolution object - * @param[in] mra: Which MRA the operator is defined - * @param[in] t: Time moment - * @param[in] prec: Build precision - * @param[in] root: root scale of operator. - * @param[in] reach: width at root scale (applies to periodic boundary conditions) - * @details This will project a kernel of a single gaussian with - * exponent \f$ 1/(4t) \f$. - * This version of the constructor - * is used for calculations within periodic boundary conditions (PBC). - * The \a root parameter is the coarsest negative scale at wich the operator - * is applied. The \a reach parameter is the bandwidth of the operator at - * the root scale. For details see \ref MWOperator +/** + * @brief Build a heat propagator with an explicit operator scale window (PBC use). + * + * @tparam D Spatial dimension of the operator (1, 2, or 3). + * + * @param[in] mra D-dimensional @ref MultiResolutionAnalysis. + * @param[in] t Diffusion time (\f$t>0\f$). + * @param[in] prec Target build precision. + * @param[in] root Root (coarsest) scale the operator is attached to. + * @param[in] reach Bandwidth at the root scale (useful for PBC/domain tiling). + * + * @details + * This overload mirrors the default constructor but confines the operator to a + * specific scale window, which is particularly useful for periodic boundary + * conditions and domain-decomposition setups. + * + * Implementation differences vs. the default constructor: + * - The base @ref ConvolutionOperator is constructed with (@p root, @p reach). + * - @c k_prec is chosen even tighter ( @p prec / 100.0 ) to robustly capture + * the narrow Gaussian under potentially coarser scale constraints. * + * @note The @p reach parameter controls the operator bandwidth measured in + * levels at @p root; see @ref MWOperator for details on scale windows + * and bandwidth semantics. */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec, int root, int reach) @@ -76,18 +133,19 @@ HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, d int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; - double k_prec = prec / 100.0; + double o_prec = prec; // Operator-assembly tolerance + double k_prec = prec / 100.0; // Very tight kernel-projection tolerance HeatKernel kernel(t); this->initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); + this->initOperExp(kernel.size()); // single-term expansion (rank = 1) Printer::setPrintLevel(oldlevel); } +/* Explicit template instantiations */ template class HeatOperator<1>; template class HeatOperator<2>; template class HeatOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/HeatOperator.h b/src/operators/HeatOperator.h index f96560a81..b54f00575 100644 --- a/src/operators/HeatOperator.h +++ b/src/operators/HeatOperator.h @@ -29,39 +29,85 @@ namespace mrcpp { -/** @class HeatOperator semigroup +/** + * @file HeatOperator.h + * @brief Declaration of a separable convolution operator that realizes the heat + * semigroup \( e^{t\Delta} \) in \(D\) dimensions. * - * @brief Convolution with a heat kernel - * - * @details The exponential heat operator - * \f$ - * \exp \left( t \partial_x^2 \right) - * \f$ - * can be regarded as a convolution operator in \f$ L^2(\mathbb R) \f$ - * of the form + * @details + * In \f$\mathbb{R}^D\f$, the heat propagator at time \(t>0\) is a Gaussian + * convolution * \f[ - * \exp \left( t \partial_x^2 \right) - * f(x) + * (e^{t\Delta} f)(\mathbf x) + * = + * \int_{\mathbb{R}^D} + * K_t(\mathbf x-\mathbf y)\, f(\mathbf y)\, d\mathbf y, + * \qquad + * K_t(\mathbf r) * = - * \frac 1{ \sqrt{4 \pi t} } - * \int_{ \mathbb R } - * \exp - * \left( - * - \frac{ (x - y)^2 }{4t} - * \right) - * f(y) dy - * , \quad - * t > 0 - * . + * \frac{1}{(4\pi t)^{D/2}} + * \exp\!\left(-\frac{\|\mathbf r\|^2}{4t}\right). * \f] + * This class builds a rank-1 separable @ref ConvolutionOperator using a single 1D + * Gaussian kernel in each coordinate and assembles the \(D\)-dimensional operator + * as their tensor product. The amplitude/exponent are chosen so the overall kernel + * matches \(K_t\). + * + * Construction delegates to the base class to: + * - project the 1D kernel to a function tree on a 1D MRA, + * - lift it to operator trees via cross-correlation, + * - transform/caches the result in the multiwavelet domain. + * + * The overload with explicit @p root and @p reach is useful for periodic boundary + * conditions (PBC) or when the operator must be confined to a specific scale window. + * + * @see ConvolutionOperator, HeatKernel + */ + +/** + * @class HeatOperator + * @ingroup operators + * @brief D-dimensional heat semigroup as a separable Gaussian convolution. + * + * @tparam D Spatial dimension (1, 2, or 3). * + * @note The kernel is normalized so that \(\int_{\mathbb{R}^D} K_t = 1\) and + * the map \(f \mapsto e^{t\Delta}f\) is positivity-preserving and + * \(L^1\)-contractive in the continuous setting. */ template class HeatOperator final : public ConvolutionOperator { public: + /** + * @brief Construct the heat operator \(e^{t\Delta}\) on the default scale window. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis defining domain and basis. + * @param t Diffusion time; must be strictly positive. Smaller @p t yields a + * narrower Gaussian and requires finer resolution. + * @param prec Target build precision used while projecting the kernel and assembling + * the operator. + * + * @pre @p t > 0. + * @see ConvolutionOperator + */ HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec); + + /** + * @brief Construct the heat operator with an explicit operator scale window. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param t Diffusion time; must be strictly positive. + * @param prec Target build precision. + * @param root Operator root (coarsest) scale. + * @param reach Operator bandwidth (half-width in levels) at @p root; useful for + * periodic boundary conditions or domain tiling. Defaults to 1. + * + * @pre @p t > 0. + * @see MWOperator, ConvolutionOperator + */ HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec, int root, int reach = 1); - HeatOperator(const HeatOperator &oper) = delete; - HeatOperator &operator=(const HeatOperator &oper) = delete; + + HeatOperator(const HeatOperator &oper) = delete; ///< Non-copyable + HeatOperator &operator=(const HeatOperator &oper) = delete; ///< Non-assignable }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/HelmholtzKernel.cpp b/src/operators/HelmholtzKernel.cpp index 7d8ed0467..a75a42970 100644 --- a/src/operators/HelmholtzKernel.cpp +++ b/src/operators/HelmholtzKernel.cpp @@ -23,14 +23,28 @@ * */ -/* - * +/** + * @file HelmholtzKernel.cpp + * @brief Gaussian expansion of the 3D screened Coulomb / Helmholtz kernel. * - * \date Jul 7, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * @details + * This file implements a separable Gaussian approximation to the radial 3D + * Helmholtz kernel on a finite interval \f$[r_\text{min}, r_\text{max}]\f$. + * The expansion has the form + * \f[ + * K_\mu(r) \;\approx\; \sum_{m=1}^{M} \beta_m\, e^{-\alpha_m r^2}, + * \f] + * where \f$M\f$ (the separation rank) and the parameters \f$\{\alpha_m,\beta_m\}\f$ + * are chosen by truncating and sampling an integral representation with a uniform + * trapezoidal rule in a logarithmic variable \f$s\f$. The resulting coefficients + * depend on: + * - the screening parameter \f$\mu > 0\f$, + * - a target relative accuracy \f$\varepsilon\f$, + * - a radial domain \f$[r_\text{min}, r_\text{max}]\f$. * - * \breif + * Internally, the interval is rescaled to \f$[r_\text{min}/r_\text{max}, 1]\f$ + * to keep the step-size heuristics well-conditioned; the generated Gaussian + * parameters are then rescaled back to the original units. */ #include "HelmholtzKernel.h" @@ -42,41 +56,80 @@ namespace mrcpp { -/** generate an approximation of the 3d helmholtz kernel expanded in gaussian functions +/** + * @class HelmholtzKernel + * @brief Builds a 1D Gaussian expansion that approximates the 3D Helmholtz kernel. + * + * @details + * The constructor discretizes an auxiliary integral over a log-scaled variable + * \f$s\fin[s_1,s_2]\f$ using a uniform step \f$h\f$ derived from the requested + * tolerance \f$\varepsilon\f$. For each quadrature node it produces a single + * Gaussian term with exponent \f$\alpha_m\f$ and weight \f$\beta_m\f$. Endpoints + * receive the trapezoidal half-weights. + * + * Rescaling: + * - Define \f$r_0 = r_\text{min}/r_\text{max}\f$ and \f$r_1 = r_\text{max}\f$. + * - Work on \f$[r_0,1]\f$, then map back by multiplying + * \f$\alpha \leftarrow \alpha / r_1^2\f$ and \f$\beta \leftarrow \beta / r_1\f$. + * + * Rank control: + * - The number of exponentials is \f$M = \lceil (s_2 - s_1)/h \rceil + 1\f$. + * - If \f$M > \texttt{MaxSepRank}\f$ the constructor aborts, signaling that the + * requested accuracy on the given domain would require too large a rank. + * + * @param mu Screening parameter \f$\mu > 0\f$. + * @param epsilon Target relative accuracy \f$\varepsilon \in (0,1)\f$. + * @param r_min Minimal radius of the approximation interval (strictly positive). + * @param r_max Maximal radius of the approximation interval (\f$r_\text{max} > r_\text{min}\f$). + * + * @note + * This routine assumes the standard MRCPP constants \c pi and \c root_pi are available + * in the \c mrcpp namespace and that \c MaxSepRank bounds the admissible separation rank. */ HelmholtzKernel::HelmholtzKernel(double mu, double epsilon, double r_min, double r_max) : GaussExp<1>() { - // Constructed on [rMin/rMax, 1.0], and then rescaled to [rMin,rMax] - double r0 = r_min / r_max; - double r1 = r_max; - double mu_tilde = mu * r1; - - // Set the truncation limits s1,s2 of the integral (integrate over [s1,s2]) - // for achieving relative error epsilon - double t = std::max((-2.5L * std::log(epsilon)), 5.0L); - double s1 = -std::log(4 * t / (mu_tilde * mu_tilde)) / 2; - double s2 = std::log(t / (r0 * r0)) / 2; - - // Now, set the proper step size h for use in the trapezoidal rule for given MU - double h = 1.0 / (0.20L - 0.47L * std::log10(epsilon)); - int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1); + // Rescale the interval to [r0, 1] and precompute scaled mu + const double r0 = r_min / r_max; + const double r1 = r_max; + const double mu_tilde = mu * r1; + + // Truncation window [s1, s2] giving ~epsilon relative error + // The heuristic t = max(-2.5 ln eps, 5) balances tails for practical eps + const long double t = std::max((-2.5L * std::log(epsilon)), 5.0L); + const double s1 = -std::log(4.0L * t / (mu_tilde * mu_tilde)) / 2.0L; + const double s2 = std::log(t / (r0 * r0)) / 2.0L; + + // Trapezoidal step size h from an empirical fit versus log10(epsilon) + const double h = 1.0 / (0.20L - 0.47L * std::log10(epsilon)); + const int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1.0); + if (n_exp > MaxSepRank) MSG_ABORT("Maximum separation rank exceeded."); - for (int i = 0; i < n_exp; i++) { - double arg = s1 + h * i; - double temp = -arg * 2.0; - double temp2 = -mu_tilde * mu_tilde * std::exp(temp) / 4.0 + arg; - double beta = (h * (2.0 / root_pi) * std::exp(temp2)); - double temp3 = 2.0L * arg; - double alpha = std::exp(temp3); + // Uniform trapezoidal quadrature in s; endpoints get half-weight. + for (int i = 0; i < n_exp; ++i) { + const double s = s1 + h * i; + // Intermediate quantities (written explicitly for clarity) + // temp = -2 s + // temp2 = - (mu_tilde^2) e^{-2 s} / 4 + s + // beta ~ h * 2/sqrt(pi) * exp(temp2) + const double temp = -2.0 * s; + const double temp2 = - (mu_tilde * mu_tilde) * std::exp(temp) / 4.0 + s; + + double beta = h * (2.0 / root_pi) * std::exp(temp2); + double alpha = std::exp(2.0L * s); + + // Rescale back to the original radial units alpha *= 1.0 / (r1 * r1); - beta *= 1.0 / r1; - if (i == 0 or i == (n_exp - 1)) { beta *= 1.0 / 2.0; } + beta *= 1.0 / r1; + + // Trapezoidal half-weights at the endpoints + if (i == 0 || i == (n_exp - 1)) beta *= 0.5; + // Append the 1D Gaussian term exp(-alpha r^2) with prefactor beta GaussFunc<1> gFunc(alpha, beta); this->append(gFunc); } } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/HelmholtzKernel.h b/src/operators/HelmholtzKernel.h index f5092d4a6..642158a7c 100644 --- a/src/operators/HelmholtzKernel.h +++ b/src/operators/HelmholtzKernel.h @@ -23,14 +23,68 @@ * */ +/** + * @file HelmholtzKernel.h + * @brief Declaration of a Gaussian expansion approximating the 3D Helmholtz (screened Coulomb) kernel. + * + * @details + * This header declares @c HelmholtzKernel, a convenience wrapper that builds a + * separable Gaussian expansion + * \f[ + * K_\mu(r) \;\approx\; \sum_{m=1}^{M} \beta_m\, e^{-\alpha_m r^2}, + * \f] + * that approximates the radial 3D Helmholtz/Yukawa kernel on a finite interval + * \f$[r_\min,r_\max]\f$ with a target relative accuracy \f$\varepsilon\f$. + * The class derives from @ref mrcpp::GaussExp "GaussExp<1>" and therefore can be + * used anywhere a one–dimensional Gaussian expansion is expected (e.g. to form + * convolution operators). + */ + #pragma once #include "functions/GaussExp.h" namespace mrcpp { +/** + * @class HelmholtzKernel + * @brief Gaussian expansion of the 3D Helmholtz (screened Coulomb / Yukawa) kernel. + * + * @details + * Constructs a 1D Gaussian expansion (in the radial variable) by sampling an + * integral representation of the Helmholtz kernel in a logarithmic parameter and + * applying a trapezoidal quadrature. The resulting set of Gaussian terms + * \f$\{\alpha_m,\beta_m\}\f$ is rescaled to the requested physical interval + * \f$[r_\min,r_\max]\f$. + * + * Typical usage: + * @code + * double mu = 1.0; // screening parameter + * double eps = 1e-8; // target relative accuracy + * double rmin = 1e-3, rmax = 10.0; + * mrcpp::HelmholtzKernel kernel(mu, eps, rmin, rmax); + * // 'kernel' is a GaussExp<1> and can be used to build convolution operators + * @endcode + * + * @note The actual separation rank @f$M@f$ depends on @p epsilon and the interval + * size. Extremely tight tolerances or very wide intervals may require a rank + * larger than the internal limit (see @c MaxSepRank in the implementation). + */ class HelmholtzKernel final : public GaussExp<1> { public: + /** + * @brief Build a Gaussian expansion of the Helmholtz kernel on \f$[r_\min,r_\max]\f$. + * + * @param mu Screening parameter \f$\mu > 0\f$ (Yukawa wavenumber). + * @param epsilon Target relative accuracy \f$0 < \varepsilon < 1\f$ for the expansion. + * @param r_min Lower radius bound (must satisfy \f$0 < r_\min < r_\max\f$). + * @param r_max Upper radius bound. + * + * @details + * The constructor fills the underlying @ref GaussExp "GaussExp<1>" with + * \f$M\f$ Gaussian terms determined by a trapezoidal discretization in a + * logarithmic variable. Endpoints are weighted with half-quadrature weights. + */ HelmholtzKernel(double mu, double epsilon, double r_min, double r_max); }; diff --git a/src/operators/HelmholtzOperator.cpp b/src/operators/HelmholtzOperator.cpp index 59d6311ff..bb8b83c66 100644 --- a/src/operators/HelmholtzOperator.cpp +++ b/src/operators/HelmholtzOperator.cpp @@ -23,20 +23,42 @@ * */ +/** + * @file HelmholtzOperator.cpp + * @brief Definition of a separable 3D convolution operator approximating the Helmholtz/Yukawa kernel. + * + * @details + * This file implements @ref mrcpp::HelmholtzOperator, a convenience convolution operator + * in three spatial dimensions that applies a Gaussian expansion of the radial kernel + * \f$ e^{-\mu r}/r \f$. The expansion is built by @ref mrcpp::HelmholtzKernel and + * lifted into a separable multiwavelet operator, which can then be applied along the + * Cartesian directions. + */ + #include "HelmholtzOperator.h" #include "HelmholtzKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** @returns New HelmholtzOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] m: Exponential parameter of the operator - * @param[in] pr: Build precision, closeness to exp(-mu*r)/r - * @details This will construct a gaussian expansion to approximate - * exp(-mu*r)/r, and project each term into a one-dimensional MW operator. - * Subsequent application of this operator will apply each of the terms to - * the input function in all Cartesian directions. +/** + * @brief Construct a 3D Helmholtz (Yukawa) convolution operator. + * + * @param mra The 3D @ref MultiResolutionAnalysis that defines the domain and basis. + * @param mu Screening parameter \f$\mu>0\f$ of the Yukawa kernel. + * @param prec Build precision; controls kernel- and operator-assembly tolerances and, + * indirectly, the separation rank of the Gaussian expansion. + * + * @details + * - Chooses separate tolerances for the kernel projection (@c k_prec = prec/10) and + * the operator assembly (@c o_prec = prec). + * - Estimates the admissible radial interval \f$[r_{\min}, r_{\max}]\f$ from @p mra via + * @ref MultiResolutionAnalysis::calcMinDistance and @ref MultiResolutionAnalysis::calcMaxDistance. + * - Builds a @ref HelmholtzKernel on that interval with the requested accuracy, then + * calls @ref ConvolutionOperator::initialize to form the separable operator blocks + * and caches them via @ref MWOperator::initOperExp. + * + * @note The printer level is temporarily reduced during build to keep output concise. */ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double mu, double prec) : ConvolutionOperator<3>(mra) { @@ -55,6 +77,31 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub Printer::setPrintLevel(oldlevel); } +/** + * @brief Construct a 3D Helmholtz convolution operator with explicit root and reach. + * + * @param mra The 3D @ref MultiResolutionAnalysis. + * @param mu Screening parameter \f$\mu>0\f$. + * @param prec Build precision (as above). + * @param root Operator root scale (coarsest level for the operator support). + * @param reach Operator reach (half-width in levels). For periodic domains this + * sets the operator bandwidth at @p root. + * + * @details + * - Uses a tighter kernel-projection tolerance (@c k_prec = prec/100) while keeping + * the operator-assembly tolerance at @c o_prec = prec. + * - Estimates \f$[r_{\min}, r_{\max}]\f$ as in the other constructor, then adjusts + * @c r_max to reflect periodic worlds by scaling with the relative root shift and + * the chosen @p reach: + * \f[ + * r_{\max} \leftarrow r_{\max}\, 2^{-(\text{oper\_root} - \text{MRA.root})} + * \times \big( 2\,\text{reach} + 1 \big). + * \f] + * - Builds the @ref HelmholtzKernel and initializes the separable operator. + * + * @note This overload is intended for periodic boundary conditions or scenarios + * where the operator must be confined to a specific scale window. + */ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double mu, double prec, int root, int reach) : ConvolutionOperator<3>(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); @@ -77,4 +124,4 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub Printer::setPrintLevel(oldlevel); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/HelmholtzOperator.h b/src/operators/HelmholtzOperator.h index fe5cf1d8b..b3e056bec 100644 --- a/src/operators/HelmholtzOperator.h +++ b/src/operators/HelmholtzOperator.h @@ -23,28 +23,93 @@ * */ +/** + * @file HelmholtzOperator.h + * @brief Declaration of a 3D separable convolution operator for the Helmholtz/Yukawa kernel. + * + * @details + * This header declares @ref mrcpp::HelmholtzOperator, a specialized + * @ref ConvolutionOperator that applies the screened Coulomb (Yukawa) Green's function + * in three spatial dimensions via a Gaussian expansion. The radial kernel + * \f$ e^{-\mu r}/r \f$ is approximated as a finite sum of 1D Gaussians, enabling + * separated application across Cartesian coordinates in the MRCPP multiwavelet basis. + */ + #pragma once #include "ConvolutionOperator.h" namespace mrcpp { -/** @class HelmholtzOperator +/** + * @class HelmholtzOperator + * @ingroup operators + * + * @brief Separable 3D convolution approximating the Helmholtz (Yukawa) Green's function. * - * @brief Convolution with the Helmholtz Green's function kernel + * @details + * The continuous kernel + * \f[ + * H(\mathbf r - \mathbf r') = \frac{e^{-\mu \lvert \mathbf r - \mathbf r' \rvert}} + * {\lvert \mathbf r - \mathbf r' \rvert} + * \f] + * is approximated by a Gaussian sum + * \f[ + * H(\mathbf r - \mathbf r') + * \;\approx\; + * \sum_{m=1}^{M} \alpha_m \exp\!\big( -\beta_m \lvert \mathbf r - \mathbf r' \rvert^2 \big), + * \f] + * which admits a *separable* representation in Cartesian coordinates, allowing the + * operator to be assembled as a tensor product of 1D convolution blocks within the + * MRCPP framework. The expansion coefficients \f$ \alpha_m, \beta_m \f$ and the + * separation rank \f$ M \f$ are chosen internally based on the requested build + * precision. * - * @details The Helmholtz kernel is approximated as a sum of gaussian functions - * in order to allow for separated application of the operator in the Cartesian - * directions: - * \f$ H(r-r') = \frac{e^{-\mu|r-r'|}}{|r-r'|} \approx \sum_m^M \alpha_m e^{-\beta_m (r-r')^2} \f$ + * ### Usage notes + * - This class is a convenience wrapper that constructs the Gaussian expansion and + * the corresponding multiwavelet operator trees; application is handled by the + * @ref ConvolutionOperator base. + * - For periodic worlds and explicit scale control, use the constructor that accepts + * @p root and @p reach (see below). + * + * @see ConvolutionOperator, HelmholtzKernel */ - class HelmholtzOperator final : public ConvolutionOperator<3> { public: + /** + * @brief Build a Helmholtz (Yukawa) convolution operator on the default scale window. + * + * @param mra 3D @ref MultiResolutionAnalysis defining domain and basis. + * @param m Screening parameter \f$ \mu > 0 \f$ of the Yukawa kernel. + * @param prec Target build precision controlling the Gaussian expansion accuracy + * and operator assembly tolerances. + * + * @details + * Internally: + * 1. Estimates admissible radial bounds from @p mra. + * 2. Constructs a Gaussian expansion for \f$ e^{-\mu r}/r \f$ at the requested accuracy. + * 3. Lifts the 1D kernels to separable operator blocks and caches them. + */ HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double m, double prec); + + /** + * @brief Build a Helmholtz operator with explicit root scale and reach (useful for PBC). + * + * @param mra 3D @ref MultiResolutionAnalysis. + * @param m Screening parameter \f$ \mu \f$ (same as above). + * @param prec Target build precision. + * @param root Operator root level (coarsest scale at which the operator is defined). + * @param reach Operator half-bandwidth at @p root (controls extent; relevant for periodic worlds). + * + * @details + * This overload confines the operator to a specified scale window and adjusts the + * radial extent accordingly—suitable for periodic boundary conditions and scenarios + * requiring strict bandwidth control. + */ HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double m, double prec, int root, int reach = 1); - HelmholtzOperator(const HelmholtzOperator &oper) = delete; - HelmholtzOperator &operator=(const HelmholtzOperator &oper) = delete; + + HelmholtzOperator(const HelmholtzOperator &oper) = delete; ///< Non-copyable + HelmholtzOperator &operator=(const HelmholtzOperator &oper) = delete; ///< Non-assignable }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/IdentityConvolution.cpp b/src/operators/IdentityConvolution.cpp index 038d076cc..7ebf89e47 100644 --- a/src/operators/IdentityConvolution.cpp +++ b/src/operators/IdentityConvolution.cpp @@ -23,18 +23,48 @@ * */ +/** + * @file IdentityConvolution.cpp + * @brief Implementation of a separable convolution operator that approximates the identity + * via a narrow Gaussian kernel (delta-approximation). + * + * @details + * This translation unit defines the templated constructors of + * @ref mrcpp::IdentityConvolution, a convenience @ref ConvolutionOperator that uses a + * single-term Gaussian kernel to approximate the Dirac delta distribution: + * \f[ + * \delta(x) \;\approx\; \alpha\,e^{-\beta x^2}. + * \f] + * The associated D-dimensional operator is assembled separably (tensor-product form) + * following MRCPP’s multiwavelet machinery. The build precision controls the kernel + * narrowness and the tolerances used during projection/assembly. + */ + #include "IdentityConvolution.h" #include "IdentityKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** @brief Constructor of the IdentityConvolution object - * @returns New IdentityConvolution object - * @param[in] mra: Which MRA the operator is defined - * @param[in] prec: Build precision, closeness to delta function - * @details This will project a kernel of a single gaussian with - * exponent sqrt(10/build_prec). +/** + * @brief Construct an identity-like convolution operator on the default scale window. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra D-dimensional @ref MultiResolutionAnalysis that defines the domain and basis. + * @param prec Target build precision controlling the closeness to the delta function. + * + * @details + * Internally the constructor: + * - Stores @p prec as the build precision. + * - Uses split tolerances: + * - @c k_prec = prec/10.0 for accurate projection of the narrow Gaussian kernel. + * - @c o_prec = prec for operator assembly. + * - Builds a single-term @ref IdentityKernel at @c k_prec and calls + * @ref ConvolutionOperator::initialize to lift it into separable operator blocks. + * - Finalizes with @ref MWOperator::initOperExp for bookkeeping/caching. + * + * A tighter @p prec yields a narrower Gaussian (better delta approximation) but + * increases the required resolution and operator bandwidth in practice. */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec) @@ -52,18 +82,28 @@ IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } -/** @brief Constructor of the IdentityConvolution object in case of Periodic Boundary Conditions (PBC) - * @returns New IdentityConvolution object - * @param[in] mra: Which MRA the operator is defined - * @param[in] prec: Build precision, closeness to delta function - * @param[in] root: root scale of operator. - * @param[in] reach: width at root scale (applies to periodic boundary conditions) - * @details This will project a kernel of a single gaussian with - * exponent sqrt(10/build_prec). This version of the constructor - * is used for calculations within periodic boundary conditions (PBC). - * The \a root parameter is the coarsest negative scale at wich the operator - * is applied. The \a reach parameter is the bandwidth of the operator at - * the root scale. For details see \ref MWOperator +/** + * @brief Construct an identity-like convolution operator with explicit root and reach (PBC-ready). + * + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param prec Target build precision controlling the closeness to the delta function. + * @param root Operator root level (coarsest scale at which the operator is defined). + * @param reach Operator half-bandwidth at @p root (relevant for periodic boundary conditions). + * + * @details + * This overload confines the operator to a specific scale window, which is useful for + * periodic boundary conditions or when coupling multiple operators with controlled support. + * Compared to the default constructor, the kernel projection tolerance is chosen even + * tighter (@c k_prec = prec/100.0) to ensure faithful representation on restricted scale + * ranges; operator assembly uses @c o_prec = prec. + * + * Steps: + * 1. Record @p prec via @ref ConvolutionOperator::setBuildPrec. + * 2. Create a single-term @ref IdentityKernel at @c k_prec. + * 3. Initialize separable operator blocks (@ref ConvolutionOperator::initialize) + * within the user-specified scale window (@p root, @p reach). + * 4. Call @ref MWOperator::initOperExp. */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach) @@ -81,8 +121,9 @@ IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } +/* Explicit template instantiations */ template class IdentityConvolution<1>; template class IdentityConvolution<2>; template class IdentityConvolution<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/IdentityConvolution.h b/src/operators/IdentityConvolution.h index 8fd5f0956..db661f1e6 100644 --- a/src/operators/IdentityConvolution.h +++ b/src/operators/IdentityConvolution.h @@ -23,27 +23,88 @@ * */ +/** + * @file IdentityConvolution.h + * @brief Separable convolution operator that approximates the identity (Dirac delta) + * using a narrow Gaussian kernel. + * + * @details + * This header declares @ref mrcpp::IdentityConvolution, a thin convenience wrapper + * around @ref mrcpp::ConvolutionOperator that realizes an identity-like operator via + * a single Gaussian kernel, separably assembled in D dimensions. + * + * The kernel approximation is + * \f[ + * \delta(\mathbf r - \mathbf r') + * \;\approx\; + * \alpha \exp\!\bigl(-\beta\lVert \mathbf r - \mathbf r' \rVert^2\bigr), + * \f] + * which, in the MRCPP framework, is projected to a 1D function tree and lifted to + * D-dimensional operator blocks by cross-correlation. The resulting operator is + * bandwidth-limited and numerically stable for use in multiresolution workflows. + * + * The constructor takes a *build precision* that governs the kernel’s narrowness + * and the tolerances used during projection and assembly. Tighter precision yields + * a Gaussian closer to a true delta (hence a better identity approximation), at the + * cost of higher resolution demands. + */ + #pragma once #include "ConvolutionOperator.h" namespace mrcpp { -/** @class IdentityConvolution +/** + * @class IdentityConvolution + * @ingroup operators + * @brief Convolution with an identity (delta-like) kernel. * - * @brief Convolution with an identity kernel + * @tparam D Spatial dimension of the target operator (1, 2, or 3). * - * @details The identity kernel (Dirac's delta function) is approximated by a - * narrow Gaussian function: - * \f$ I(r-r') = \delta(r-r') \approx \alpha e^{-\beta (r-r')^2} \f$ + * @details + * The operator is represented as a separable sum (rank-1 in the default realization) + * of 1D Gaussian convolutions identical along each Cartesian direction. It is mainly + * intended for diagnostics and algorithmic baselines; for strict identity action, + * prefer direct coefficient transfers when applicable. + * + * The underlying kernel is the Gaussian surrogate of the Dirac delta, + * \f$ I(\mathbf r-\mathbf r') \approx \alpha e^{-\beta \lVert \mathbf r-\mathbf r' \rVert^2} \f$, + * with parameters chosen from the requested build precision. + * + * @see ConvolutionOperator */ - template class IdentityConvolution final : public ConvolutionOperator { public: + /** + * @brief Build an identity-like convolution operator on the default root/reach. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis defining domain and basis. + * @param prec Target build precision controlling the closeness to the delta function + * (narrowness of the Gaussian) and assembly tolerances. + * + * @details + * Internally constructs a single-term Gaussian kernel and invokes + * @ref ConvolutionOperator::initialize to assemble the separable operator blocks. + */ IdentityConvolution(const MultiResolutionAnalysis &mra, double prec); + + /** + * @brief Build an identity-like convolution operator with explicit scale window. + * + * @param mra D-dimensional @ref MultiResolutionAnalysis. + * @param prec Target build precision (as above). + * @param root Operator root level (coarsest scale at which the operator resides). + * @param reach Operator half-bandwidth at @p root (useful for periodic domains). + * + * @details + * Use this overload to confine the operator to a specific scale window—particularly + * helpful under periodic boundary conditions or when coordinating multiple operators. + */ IdentityConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach = 1); - IdentityConvolution(const IdentityConvolution &oper) = delete; - IdentityConvolution &operator=(const IdentityConvolution &oper) = delete; + + IdentityConvolution(const IdentityConvolution &oper) = delete; ///< Non-copyable + IdentityConvolution &operator=(const IdentityConvolution &oper) = delete; ///< Non-assignable }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/IdentityKernel.h b/src/operators/IdentityKernel.h index 962ef2e7b..757566fa4 100644 --- a/src/operators/IdentityKernel.h +++ b/src/operators/IdentityKernel.h @@ -23,6 +23,32 @@ * */ +/** + * @file IdentityKernel.h + * @brief Gaussian surrogate of the Dirac delta kernel for use in separable + * convolution operators. + * + * @details + * This header declares @ref mrcpp::IdentityKernel, a convenience wrapper that + * builds a one-term @ref mrcpp::GaussExp "Gaussian expansion" approximating the + * identity (Dirac delta) kernel in \f$ \mathbb{R}^D \f$: + * \f[ + * \delta(x) \;\approx\; \alpha \, e^{-\beta x^2}, + * \f] + * with parameters chosen from a requested *narrowness* (precision) \f$ \varepsilon \f$. + * Concretely, + * \f[ + * \beta = \sqrt{\tfrac{1}{\varepsilon}}, \qquad + * \alpha = \left( \frac{\beta}{\pi} \right)^{D/2}. + * \f] + * + * The resulting object is a rank-1 @ref GaussExp<1> suitable for constructing + * separable, bandwidth-limited identity-like convolution operators; see + * @ref mrcpp::IdentityConvolution. + * + * @see IdentityConvolution, ConvolutionOperator, GaussExp, GaussFunc + */ + #pragma once #include "functions/GaussExp.h" @@ -30,15 +56,34 @@ namespace mrcpp { +/** + * @class IdentityKernel + * @ingroup kernels + * @brief Single-term Gaussian expansion approximating the Dirac delta in \f$ \mathbb{R}^D \f$. + * + * @tparam D Spatial dimension for the normalization of the Gaussian surrogate. + * + * @details + * Constructs a one-dimensional Gaussian \f$ \alpha e^{-\beta x^2} \f$ with + * \f$ \beta=\sqrt{1/\varepsilon} \f$ and + * \f$ \alpha=(\beta/\pi)^{D/2} \f$, + * then appends it to the underlying @ref GaussExp container. The parameter + * \p epsilon controls the narrowness of the surrogate: smaller values yield + * narrower Gaussians (closer to a true delta) but demand more resolution. + */ template class IdentityKernel final : public GaussExp<1> { public: + /** + * @brief Build a delta-like Gaussian kernel from a target narrowness \p epsilon. + * @param epsilon Positive parameter controlling the kernel width; smaller ⇒ narrower. + */ IdentityKernel(double epsilon) : GaussExp<1>() { - double expo = std::sqrt(1.0 / epsilon); - double coef = std::pow(expo / mrcpp::pi, D / 2.0); + double expo = std::sqrt(1.0 / epsilon); // β + double coef = std::pow(expo / mrcpp::pi, D / 2.0); // α = (β/π)^{D/2} GaussFunc<1> gFunc(expo, coef); this->append(gFunc); } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/MWOperator.cpp b/src/operators/MWOperator.cpp index 225108f48..f8699e1e8 100644 --- a/src/operators/MWOperator.cpp +++ b/src/operators/MWOperator.cpp @@ -23,6 +23,25 @@ * */ +/** + * @file MWOperator.cpp + * @brief Common utilities for multiwavelet (MW) operators: term assignment, + * component access, bandwidth analysis, and operator-domain MRA setup. + * + * @details + * The templated @ref mrcpp::MWOperator provides infrastructure shared by concrete + * MW operators: + * - organizing a separated operator expansion into per-dimension @ref OperatorTree + * components, + * - computing effective bandwidths across scales, and + * - constructing the 2D operator-domain @ref MultiResolutionAnalysis used by + * operator trees (for D-dimensional function spaces). + * + * The functions implemented here are thin, performance-oriented utilities that + * avoid modifying operator semantics. They are used by higher-level operators + * such as convolution- and derivative-based classes. + */ + #include "MWOperator.h" #include "trees/BandWidth.h" #include "utils/Printer.h" @@ -32,6 +51,21 @@ using namespace Eigen; namespace mrcpp { +/** + * @brief Initialize the separated operator expansion with @p M terms. + * + * @tparam D Spatial dimension of the target space. + * @param M Number of separated terms to activate from @c raw_exp. + * + * @details + * Allocates an @c oper_exp array of size @p M, each entry holding @c D pointers + * to @ref OperatorTree components (one per Cartesian direction). + * By default, an *isotropic* operator is formed by assigning the first @p M raw + * terms to **all** directions. + * + * @pre @c raw_exp has at least @p M terms; otherwise the function aborts. + * @note You can override individual components later via @ref assign(). + */ template void MWOperator::initOperExp(int M) { if (this->raw_exp.size() < M) MSG_ABORT("Incompatible raw expansion"); this->oper_exp.clear(); @@ -41,11 +75,21 @@ template void MWOperator::initOperExp(int M) { this->oper_exp.push_back(otrees); } - // Sets up an isotropic operator with the first M raw terms in all direction + // Sets up an isotropic operator with the first M raw terms in all directions for (int i = 0; i < M; i++) for (int d = 0; d < D; d++) assign(i, d, this->raw_exp[i].get()); } +/** + * @brief Mutable access to a specific separated component. + * + * @tparam D Spatial dimension. + * @param i Term index in the separated expansion. + * @param d Cartesian direction index (0..D-1). + * @return Reference to the requested @ref OperatorTree. + * + * @throws If indices are out of bounds or the component is null. + */ template OperatorTree &MWOperator::getComponent(int i, int d) { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); @@ -53,6 +97,16 @@ template OperatorTree &MWOperator::getComponent(int i, int d) { return *this->oper_exp[i][d]; } +/** + * @brief Const access to a specific separated component. + * + * @tparam D Spatial dimension. + * @param i Term index in the separated expansion. + * @param d Cartesian direction index (0..D-1). + * @return Const reference to the requested @ref OperatorTree. + * + * @throws If indices are out of bounds or the component is null. + */ template const OperatorTree &MWOperator::getComponent(int i, int d) const { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); @@ -60,6 +114,14 @@ template const OperatorTree &MWOperator::getComponent(int i, int d) c return *this->oper_exp[i][d]; } +/** + * @brief Get the maximum effective bandwidth at a given depth. + * + * @tparam D Spatial dimension. + * @param depth Tree depth (scale) at which to query the bandwidth. If negative, + * the maximum over all depths is returned. + * @return Non-negative maximum bandwidth, or -1 if @p depth is invalid. + */ template int MWOperator::getMaxBandWidth(int depth) const { int maxWidth = -1; if (depth < 0) { @@ -70,11 +132,27 @@ template int MWOperator::getMaxBandWidth(int depth) const { return maxWidth; } +/** + * @brief Clear cached @ref BandWidth information in all operator components. + * + * @tparam D Spatial dimension. + */ template void MWOperator::clearBandWidths() { for (auto &i : this->oper_exp) for (int d = 0; d < D; d++) i[d]->clearBandWidth(); } +/** + * @brief Compute effective bandwidths at all scales for all components. + * + * @tparam D Spatial dimension. + * @param prec Numerical precision used to estimate bandwidths. + * + * @details + * For each @ref OperatorTree component, this calls @ref OperatorTree::calcBandWidth + * and records the @em maximum effective width across components for every depth. + * Results are stored in @c band_max and summarized to the log at print level 20. + */ template void MWOperator::calcBandWidths(double prec) { int maxDepth = 0; // First compute BandWidths and find depth of the deepest component @@ -107,6 +185,18 @@ template void MWOperator::calcBandWidths(double prec) { println(20, std::endl); } +/** + * @brief Build the 2D operator-domain MRA used by operator trees. + * + * @tparam D Spatial dimension of the *function* domain. + * @return A @ref MultiResolutionAnalysis<2> describing the operator lattice. + * + * @details + * Operator trees live on a 2D lattice (row/column), even when acting on + * D-dimensional function spaces. The lattice extents are determined from the + * operator's root level and reach, and it reuses the function-space scaling + * basis (uniform scaling is assumed). + */ template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); @@ -128,8 +218,9 @@ template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() cons return oper_mra; } +/* Explicit template instantiations */ template class MWOperator<1>; template class MWOperator<2>; template class MWOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/MWOperator.h b/src/operators/MWOperator.h index 2dcad2b32..61c2db1cb 100644 --- a/src/operators/MWOperator.h +++ b/src/operators/MWOperator.h @@ -32,51 +32,144 @@ namespace mrcpp { -/** @class MWOperator +/** + * @class MWOperator + * @brief Base class for multiwavelet (MW) operators with separated expansions. * - * @brief Fixme + * @tparam D Spatial dimension of the function space (1, 2, or 3). * - * @details Fixme + * @details + * An MW operator is represented as a (typically low-rank) separated expansion + * whose per-term, per-dimension components are stored as pointers to + * @ref OperatorTree objects. This class provides: + * - bookkeeping for the operator’s *root* scale and *reach* (bandwidth), + * - storage for the raw operator terms and their per-dimension assignments, + * - utilities for bandwidth analysis and component access, and + * - construction of the 2D operator-domain @ref MultiResolutionAnalysis used + * by @ref OperatorTree. * + * Derived classes are responsible for building/populating @c raw_exp and then + * calling @ref initOperExp() to map raw terms into directional components. */ template class MWOperator { public: + /** + * @brief Construct an MW operator wrapper. + * + * @param mra D-dimensional analysis describing the function space/domain. + * @param root Operator root level (coarsest level at which the operator lives). + * @param reach Operator reach (half-width in levels at the root). Negative values + * can be interpreted by implementations as “auto”. + */ MWOperator(const MultiResolutionAnalysis &mra, int root, int reach) : oper_root(root) , oper_reach(reach) , MRA(mra) {} - MWOperator(const MWOperator &oper) = delete; - MWOperator &operator=(const MWOperator &oper) = delete; + + MWOperator(const MWOperator &oper) = delete; ///< Non-copyable + MWOperator &operator=(const MWOperator &oper) = delete; ///< Non-assignable virtual ~MWOperator() = default; + /** + * @brief Number of separated terms currently active in the operator. + */ int size() const { return this->oper_exp.size(); } + + /** + * @brief Maximum effective bandwidth at a given depth (scale). + * @param depth Depth index; if negative, returns the maximum over all depths. + * @return The maximum bandwidth, or -1 if @p depth is invalid. + */ int getMaxBandWidth(int depth = -1) const; + + /** + * @brief Vector of maximum effective bandwidths per depth. + * @return Reference to internal cache of maximum bandwidths. + */ const std::vector &getMaxBandWidths() const { return this->band_max; } + /** + * @brief Compute effective bandwidths for all components at all depths. + * @param prec Numeric tolerance used in bandwidth estimation. + */ void calcBandWidths(double prec); + + /** + * @brief Clear cached bandwidth information in all components. + */ void clearBandWidths(); + /** + * @brief Root level (coarsest scale) of the operator domain. + */ int getOperatorRoot() const { return this->oper_root; } + + /** + * @brief Operator reach (half-width at the root level). + */ int getOperatorReach() const { return this->oper_reach; } + /** + * @brief Mutable access to the @p i-th separated term, @p d-th dimension component. + * @param i Separated term index. + * @param d Cartesian direction index (0..D-1). + * @return Reference to the requested @ref OperatorTree. + */ OperatorTree &getComponent(int i, int d); + + /** + * @brief Const access to the @p i-th separated term, @p d-th dimension component. + * @param i Separated term index. + * @param d Cartesian direction index (0..D-1). + * @return Const reference to the requested @ref OperatorTree. + */ const OperatorTree &getComponent(int i, int d) const; + /** + * @brief Direct access to the array of D components for the @p i-th term. + */ std::array &operator[](int i) { return this->oper_exp[i]; } + + /** + * @brief Const direct access to the array of D components for the @p i-th term. + */ const std::array &operator[](int i) const { return this->oper_exp[i]; } protected: - int oper_root; - int oper_reach; - MultiResolutionAnalysis MRA; - std::vector> oper_exp; - std::vector> raw_exp; - std::vector band_max; + /** @name Operator geometry */ + ///@{ + int oper_root; ///< Operator root level (coarsest scale). + int oper_reach; ///< Operator reach (half-width in levels at the root). + MultiResolutionAnalysis MRA; ///< Function-space analysis (domain and basis). + ///@} + /** @name Operator storage */ + ///@{ + std::vector> oper_exp; ///< Active separated terms by dimension. + std::vector> raw_exp; ///< Owned raw operator terms (before assignment). + std::vector band_max; ///< Maximum bandwidth per depth. + ///@} + + /** + * @brief Build the 2D operator-domain MRA used by @ref OperatorTree. + * @details Operators act on a 2D lattice (row/column) even for D-D function spaces. + */ MultiResolutionAnalysis<2> getOperatorMRA() const; + /** + * @brief Initialize @ref oper_exp with @p M separated terms. + * @details By default, assigns the first @p M raw terms isotropically across all D dimensions. + * @param M Number of separated terms to activate. + */ void initOperExp(int M); + + /** + * @brief Assign a particular operator component for term @p i and direction @p d. + * @param i Term index in the separated expansion. + * @param d Cartesian direction index (0..D-1). + * @param oper Pointer to the @ref OperatorTree to be used for this component. + */ void assign(int i, int d, OperatorTree *oper) { this->oper_exp[i][d] = oper; } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/OperatorState.h b/src/operators/OperatorState.h index 677375632..f6602da12 100644 --- a/src/operators/OperatorState.h +++ b/src/operators/OperatorState.h @@ -23,10 +23,23 @@ * */ -/** OperatorState is a simple helper class for operator application. - * It keeps track of various state dependent variables and memory - * regions. We cannot have some of this information directly in OperatorFunc - * because of multi-threading issues. +/** + * @file + * @brief Lightweight state holder used during operator application. + * + * @details + * The operator application kernels (e.g., convolution/derivative calculators) + * are performance-critical and multi-threaded. To avoid sharing mutable + * state between threads, this helper encapsulates: + * - pointers to the current *source* (g) and *destination* (f) MW nodes, + * - precomputed size/stride quantities (`kp1`, `kp1_d`, …), + * - addresses of coefficient blocks for selected components (ft/gt), + * - temporary scratch buffers laid out for cache-friendly sweeps, + * - and small per-call metadata such as the maximum index offset + * (`maxDeltaL`) between the active nodes. + * + * It is deliberately simple (POD-like) and header-only to enable aggressive + * inlining by the compiler. */ #pragma once @@ -40,21 +53,66 @@ namespace mrcpp { +/** + * @def GET_OP_IDX(FT, GT, ID) + * @brief Build a 2-bit operator index for dimension @p ID from component flags. + * + * @details + * Encodes the *from* (FT) and *to* (GT) component bits at position @p ID into + * an index in the set {0,1,2,3}: + * \f[ + * \mathrm{idx} = 2 \cdot \big( (GT \gg ID) \& 1 \big) + * + \big( (FT \gg ID) \& 1 \big). + * \f] + * This compact index is used to select per-dimension operator blocks. + */ #define GET_OP_IDX(FT, GT, ID) (2 * ((GT >> ID) & 1) + ((FT >> ID) & 1)) +/** + * @class OperatorState + * @brief Thread-local state for applying an MW operator to node data. + * + * @tparam D Spatial dimension of the node (1–3). + * @tparam T Coefficient value type (e.g., double or std::complex). + * + * @details + * The class provides: + * - Binding of a *g-node* (source) at construction time. + * - Late binding of an *f-node* (destination) and its @ref NodeIndex. + * - Selection of component blocks (ft/gt) via @ref setFComponent and + * @ref setGComponent, exposing the corresponding coefficient slices. + * - Access to alternating scratch buffers arranged as + * `aux[0] = f-comp`, `aux[1..D-1]` ping-pong across `scr1`/`scr2`, + * and `aux[D] = g-comp`. + * + * The scratch layout avoids reallocation and reduces cache conflicts during + * dimension-by-dimension tensor sweeps. + */ template class OperatorState final { public: + /** + * @brief Construct with a source node and a raw scratch buffer. + * + * @param gn Source (g) node whose coefficients are read. + * @param scr1 Pointer to a scratch buffer of at least `kp1_d` elements. + * + * @details + * Two scratch regions are interleaved: `scr1` and `scr2 = scr1 + kp1_d`. + * For each interior dimension `i=1..D-1`, the buffer alternates between + * these two regions by parity of `i` to enable out-of-place 1D transforms. + */ OperatorState(MWNode &gn, T *scr1) : gNode(&gn) { - this->kp1 = this->gNode->getKp1(); - this->kp1_d = this->gNode->getKp1_d(); - this->kp1_2 = math_utils::ipow(this->kp1, 2); - this->kp1_dm1 = math_utils::ipow(this->kp1, D - 1); - this->gData = this->gNode->getCoefs(); + this->kp1 = this->gNode->getKp1(); // basis points per dim + this->kp1_d = this->gNode->getKp1_d(); // total points (kp1^D) + this->kp1_2 = math_utils::ipow(this->kp1, 2); // kp1^2 + this->kp1_dm1 = math_utils::ipow(this->kp1, D - 1); // kp1^(D-1) + this->gData = this->gNode->getCoefs(); this->maxDeltaL = -1; T *scr2 = scr1 + this->kp1_d; + // Assign alternating aux buffers for interior dimensions for (int i = 1; i < D; i++) { if (IS_ODD(i)) { this->aux[i] = scr2; @@ -64,56 +122,116 @@ template class OperatorState final { } } + /** + * @brief Convenience ctor: scratch storage provided as a std::vector. + * @param gn Source (g) node. + * @param scr1 Vector whose data pointer is used as scratch. + * + * @warning The vector must outlive the OperatorState. + */ OperatorState(MWNode &gn, std::vector scr1) : OperatorState(gn, scr1.data()) {} + + /** + * @brief Bind the destination (f) node and cache its coefficient pointer. + */ void setFNode(MWNode &fn) { this->fNode = &fn; this->fData = this->fNode->getCoefs(); } + + /** + * @brief Bind the destination node index and update @ref maxDeltaL. + * @param idx Destination node index in the tree. + * + * @details + * The maximum level shift \f$\max_d |f_l[d] - g_l[d]|\f$ is used to + * select scale-dependent operator stencils/bandwidths. + */ void setFIndex(NodeIndex &idx) { this->fIdx = &idx; calcMaxDeltaL(); } + + /** + * @brief Select the source (g) component and expose its coefficient slice. + * @param gt Component bitfield (typically 0/1 per dimension). + * + * @details Offsets the base pointer by `gt * kp1_d` and stores it in + * `aux[D]`, which operator kernels read as the final stage input. + */ void setGComponent(int gt) { this->aux[D] = this->gData + gt * this->kp1_d; this->gt = gt; } + + /** + * @brief Select the destination (f) component and expose its coefficient slice. + * @param ft Component bitfield (typically 0/1 per dimension). + * + * @details Offsets the base pointer by `ft * kp1_d` and stores it in + * `aux[0]`, which operator kernels use as the first stage buffer. + */ void setFComponent(int ft) { this->aux[0] = this->fData + ft * this->kp1_d; this->ft = ft; } + /** + * @brief Maximum level difference between the bound f/g nodes. + */ int getMaxDeltaL() const { return this->maxDeltaL; } + + /** + * @brief Build a compact operator index for dimension @p i (0..D-1). + * + * @details Uses @ref GET_OP_IDX on the currently bound @ref ft and @ref gt. + */ int getOperIndex(int i) const { return GET_OP_IDX(this->ft, this->gt, i); } + /** + * @brief Access the array of auxiliary data pointers used by kernels. + * @return `aux[0] = f-comp`, `aux[1..D-1]` scratch, `aux[D] = g-comp`. + */ T **getAuxData() { return this->aux; } + + /** + * @brief Access per-dimension operator data blocks (set by calculators). + */ double **getOperData() { return this->oData; } + // Calculator kernels are declared as friends to allow fast access. friend class ConvolutionCalculator; friend class DerivativeCalculator; private: - int ft; - int gt; - - int maxDeltaL; - double fThreshold; - double gThreshold; - // Shorthands - int kp1; - int kp1_2; - int kp1_d; - int kp1_dm1; - - MWNode *gNode; - MWNode *fNode; - NodeIndex *fIdx; - - T *aux[D + 1]; - T *gData; - T *fData; - double *oData[D]; - + // Current component selectors (bitfields) + int ft{0}; + int gt{0}; + + // Geometry / thresholds + int maxDeltaL; ///< max_d |f_l[d] - g_l[d]|; computed in calcMaxDeltaL() + double fThreshold; ///< (optional) threshold for f (may be set by calculators) + double gThreshold; ///< (optional) threshold for g (may be set by calculators) + + // Shorthands derived from the bound node + int kp1; ///< #points per dimension + int kp1_2; ///< kp1^2 + int kp1_d; ///< kp1^D (total points in a component block) + int kp1_dm1; ///< kp1^(D-1) + + // Bound nodes and indices + MWNode *gNode{nullptr}; + MWNode *fNode{nullptr}; + NodeIndex *fIdx{nullptr}; + + // Data pointers + T *aux[D + 1]{}; ///< [0]=f-comp, [1..D-1]=scratch, [D]=g-comp + T *gData{nullptr}; + T *fData{nullptr}; + double *oData[D]{}; ///< Per-dimension operator-specific metadata + + /// @brief Compute @ref maxDeltaL from the currently bound f/g nodes. void calcMaxDeltaL() { const auto &gl = this->gNode->getNodeIndex(); const auto &fl = *this->fIdx; @@ -126,4 +244,4 @@ template class OperatorState final { } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/OperatorStatistics.cpp b/src/operators/OperatorStatistics.cpp index f58ae2b0d..66aef71fe 100644 --- a/src/operators/OperatorStatistics.cpp +++ b/src/operators/OperatorStatistics.cpp @@ -23,6 +23,24 @@ * */ +/** + * @file OperatorStatistics.cpp + * @brief Implementation of lightweight counters and summaries used during + * multiwavelet operator application. + * + * @details + * This module aggregates per-thread counters while applying operators + * to multiwavelet nodes. It records: + * - Number of *g*-nodes (source nodes) computed. + * - Number of *f*-nodes (destination nodes) where an operator was applied. + * - Number of *generalized* destination nodes (as reported by MWNode::isGenNode()). + * - A small 8×8 histogram of applications by component pair (ft, gt), + * where `ft` and `gt` are component bitfields. + * + * Thread-local storage is used to avoid contention in hot loops; use + * flushNodeCounters() to accumulate into totals and reset local counters. + */ + #include "OperatorStatistics.h" #include "trees/MWNode.h" @@ -30,6 +48,17 @@ using namespace Eigen; namespace mrcpp { +/** + * @brief Construct an empty statistics object with per-thread accumulators. + * + * @details + * Allocates: + * - @c totCompCount: global 8×8 histogram (zero-initialized). + * - Per-thread scalar counters (@c fCount, @c gCount, @c genCount). + * - Per-thread 8×8 component histograms (@c compCount[i]). + * + * The number of threads is discovered via mrcpp_get_max_threads(). + */ OperatorStatistics::OperatorStatistics() : nThreads(mrcpp_get_max_threads()) , totFCount(0) @@ -57,6 +86,9 @@ OperatorStatistics::OperatorStatistics() } } +/** + * @brief Destroy statistics and free all dynamically allocated arrays. + */ OperatorStatistics::~OperatorStatistics() { for (int i = 0; i < this->nThreads; i++) { delete this->compCount[i]; } delete[] this->compCount; @@ -66,7 +98,16 @@ OperatorStatistics::~OperatorStatistics() { delete totCompCount; } -/** Sum all node counters from all threads. */ +/** + * @brief Accumulate all per-thread counters into totals and reset locals. + * + * @details + * After this call: + * - @c totFCount, @c totGCount, and @c totGenCount are increased by the + * sums over all threads. + * - @c totCompCount is incremented by each thread-local 8×8 histogram. + * - All per-thread counters/histograms are reset to zero. + */ void OperatorStatistics::flushNodeCounters() { for (int i = 0; i < this->nThreads; i++) { this->totFCount += this->fCount[i]; @@ -80,20 +121,54 @@ void OperatorStatistics::flushNodeCounters() { } } -/** Increment g-node usage counter. Needed for load balancing. */ -template void OperatorStatistics::incrementGNodeCounters(const MWNode &gNode) { +/** + * @brief Increment the *g*-node usage counter for the current thread. + * + * @tparam D Spatial dimension of the node. + * @tparam T Coefficient type. + * @param gNode Source node being processed (unused for counting). + * + * @note The thread index is obtained via mrcpp_get_thread_num(). + */ +template +void OperatorStatistics::incrementGNodeCounters(const MWNode &gNode) { int thread = mrcpp_get_thread_num(); this->gCount[thread]++; } -/** Increment operator application counter. */ -template void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int ft, int gt) { +/** + * @brief Increment the *f*-node application counters for the current thread. + * + * @tparam D Spatial dimension of the node. + * @tparam T Coefficient type. + * @param fNode Destination node to which an operator is applied. + * @param ft Destination component bitfield. + * @param gt Source component bitfield. + * + * @details + * Increments: + * - Per-thread @c fCount. + * - Per-thread component histogram at entry (ft, gt). + * - Per-thread @c genCount if @c fNode.isGenNode() is true. + */ +template +void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int ft, int gt) { int thread = mrcpp_get_thread_num(); this->fCount[thread]++; (*this->compCount[thread])(ft, gt) += 1; if (fNode.isGenNode()) { this->genCount[thread]++; } } +/** + * @brief Print a human-readable summary of accumulated totals. + * + * @param o Output stream. + * @return Reference to @p o to allow chaining. + * + * @details + * The output includes total counts for g-nodes, f-nodes, generalized nodes, + * and the aggregated 8×8 (ft, gt) component histogram. + */ std::ostream &OperatorStatistics::print(std::ostream &o) const { o << std::setw(8); o << "*OperatorFunc statistics: " << std::endl << std::endl; @@ -104,6 +179,7 @@ std::ostream &OperatorStatistics::print(std::ostream &o) const { return o; } +/* ---- Explicit template instantiations for supported node types ---- */ template void OperatorStatistics::incrementFNodeCounters<1, double>(const MWNode<1, double> &fNode, int ft, int gt); template void OperatorStatistics::incrementFNodeCounters<2, double>(const MWNode<2, double> &fNode, int ft, int gt); template void OperatorStatistics::incrementFNodeCounters<3, double>(const MWNode<3, double> &fNode, int ft, int gt); diff --git a/src/operators/OperatorStatistics.h b/src/operators/OperatorStatistics.h index 9a51728c0..4e8fc6313 100644 --- a/src/operators/OperatorStatistics.h +++ b/src/operators/OperatorStatistics.h @@ -23,6 +23,27 @@ * */ +/** + * @file OperatorStatistics.h + * @brief Thread-aware counters and summaries for multiwavelet operator application. + * + * @details + * This helper aggregates lightweight statistics collected while applying + * operators to multiwavelet nodes. For performance and thread-safety, counts + * are first accumulated in per-thread storage and later merged into global + * totals using @ref flushNodeCounters(). + * + * Tracked quantities: + * - Total number of destination (*f*) nodes where an operator was applied. + * - Total number of source (*g*) nodes evaluated. + * - Total number of destination nodes marked as “generalized”. + * - An 8×8 histogram of component-pair usages (indexed by `(ft, gt)`). + * + * The class intentionally avoids synchronization primitives inside hot loops; + * callers should invoke @ref flushNodeCounters() at safe points to consolidate + * results and reset per-thread buffers. + */ + #pragma once #include @@ -32,29 +53,75 @@ namespace mrcpp { +/** + * @class OperatorStatistics + * @brief Collects and reports counters during operator application. + * + * @note + * - Per-thread counters are sized using @c mrcpp_get_max_threads(). + * - Use the stream operator to print a human-readable summary. + */ class OperatorStatistics final { public: + /// Construct an empty statistics object with per-thread accumulators. OperatorStatistics(); + + /// Release all dynamically allocated per-thread buffers and histograms. ~OperatorStatistics(); + /** + * @brief Consolidate per-thread counters into global totals and reset locals. + * + * @details + * After calling this, @c totFCount, @c totGCount, @c totGenCount and + * @c totCompCount reflect all work since the previous flush, and the + * per-thread buffers are zeroed. + */ void flushNodeCounters(); - template void incrementFNodeCounters(const MWNode &fNode, int ft, int gt); - template void incrementGNodeCounters(const MWNode &gNode); + /** + * @brief Increment destination (*f*)-node counters for the current thread. + * @tparam D Spatial dimension of the node. + * @tparam T Coefficient/value type stored by the node. + * @param fNode Destination node being updated. + * @param ft Destination component bitfield (0–7). + * @param gt Source component bitfield (0–7). + * + * @details + * Increments the per-thread f-node count, updates the (ft,gt) entry of the + * per-thread 8×8 histogram, and increments the generalized-node count if + * @c fNode.isGenNode() returns true. + */ + template + void incrementFNodeCounters(const MWNode &fNode, int ft, int gt); + + /** + * @brief Increment source (*g*)-node counters for the current thread. + * @tparam D Spatial dimension of the node. + * @tparam T Coefficient/value type stored by the node. + * @param gNode Source node being processed (unused; for interface symmetry). + */ + template + void incrementGNodeCounters(const MWNode &gNode); + + /// Print a summary of accumulated totals and the component histogram. friend std::ostream &operator<<(std::ostream &o, const OperatorStatistics &os) { return os.print(o); } protected: - int nThreads; - int totFCount; - int totGCount; - int totGenCount; - int *fCount; - int *gCount; - int *genCount; - Eigen::Matrix *totCompCount; - Eigen::Matrix **compCount; + int nThreads; ///< Number of worker threads. + int totFCount; ///< Global total of applied *f*-nodes. + int totGCount; ///< Global total of processed *g*-nodes. + int totGenCount; ///< Global total of applied generalized nodes. + + int *fCount; ///< Per-thread *f*-node counters (size = nThreads). + int *gCount; ///< Per-thread *g*-node counters (size = nThreads). + int *genCount; ///< Per-thread generalized-node counters (size = nThreads). + + Eigen::Matrix *totCompCount; ///< Global (ft,gt) 8×8 usage histogram. + Eigen::Matrix **compCount; ///< Per-thread 8×8 usage histograms. + /// Internal pretty-printer used by the stream operator. std::ostream &print(std::ostream &o) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PHOperator.cpp b/src/operators/PHOperator.cpp index 11ccee0e9..54632e15a 100644 --- a/src/operators/PHOperator.cpp +++ b/src/operators/PHOperator.cpp @@ -23,6 +23,24 @@ * */ +/** + * @file PHOperator.cpp + * @brief Implementation of a derivative operator assembled via PHCalculator. + * + * @details + * This module builds a single-component multiwavelet operator that approximates + * a spatial derivative of order 1 or 2. Construction proceeds by: + * 1) creating a @ref PHCalculator tailored to the current scaling basis and + * requested derivative order, + * 2) expanding to an @ref OperatorTree with a @ref TreeBuilder and a simple + * @ref BandWidthAdaptor (bandwidth = 1), + * 3) transforming/caching the operator for efficient application. + * + * The class derives from @ref DerivativeOperator and uses the MRA’s root scale + * by default. The operator is stored as a single separable component and exposed + * through the common @ref MWOperator interface. + */ + #include "PHOperator.h" #include "treebuilders/BandWidthAdaptor.h" #include "treebuilders/PHCalculator.h" @@ -32,9 +50,17 @@ namespace mrcpp { -/** @returns New PHOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] order: Derivative order, defined for 1 and 2 +/** + * @brief Construct a PH-based derivative operator. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra MultiResolutionAnalysis defining the domain and basis. + * @param order Derivative order (supported: 1 or 2). + * + * @details + * Initializes the base @ref DerivativeOperator at the MRA root scale and + * triggers internal assembly via @ref initialize(). The resulting expansion + * contains a single operator block (rank-1 in the separable sense). */ template PHOperator::PHOperator(const MultiResolutionAnalysis &mra, int order) @@ -43,6 +69,18 @@ PHOperator::PHOperator(const MultiResolutionAnalysis &mra, int order) initialize(); } +/** + * @brief Assemble the operator tree for the requested derivative order. + * + * @details + * - Creates a @ref PHCalculator using the MRA’s scaling basis and the stored + * derivative order. + * - Uses a @ref BandWidthAdaptor with bandwidth 1 and the MRA’s maximum scale. + * - Builds an @ref OperatorTree with @ref TreeBuilder, computes its squared + * norm, and prepares the node cache for application. + * - Stores the built tree as a single raw term and initializes the operator + * expansion with @ref initOperExp(1). + */ template void PHOperator::initialize() { auto o_mra = this->getOperatorMRA(); @@ -68,8 +106,9 @@ template void PHOperator::initialize() { this->initOperExp(1); } +/* Explicit template instantiations */ template class PHOperator<1>; template class PHOperator<2>; template class PHOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PHOperator.h b/src/operators/PHOperator.h index d5f58bf29..a041a4f11 100644 --- a/src/operators/PHOperator.h +++ b/src/operators/PHOperator.h @@ -23,33 +23,75 @@ * */ +/** + * @file PHOperator.h + * @brief Declaration of a Holoborodko-style smoothing derivative operator. + * + * @details + * This header declares @ref mrcpp::PHOperator, a lightweight derivative operator + * constructed from the smooth, low-noise differentiators introduced by + * Pavel Holoborodko (see + * + * reference link). + * + * The operator is assembled in the multiwavelet framework and is intended + * primarily for experimentation/validation with smoothing differentiators. + * For robust production work: + * - use @ref mrcpp::ABGVOperator for functions with cusps/discontinuities, + * - or @ref mrcpp::BSOperator for sufficiently smooth functions. + * + * @see mrcpp::DerivativeOperator, mrcpp::ABGVOperator, mrcpp::BSOperator + */ + #pragma once #include "DerivativeOperator.h" namespace mrcpp { -/** @class PHOperator +/** + * @class PHOperator + * @ingroup operators * - * @brief Derivative operator based on the smoothing derivative of - * - * Pavel Holoborodko - * . + * @brief Derivative operator based on Holoborodko’s smooth, low-noise differentiators. * - * NOTE: This is _not_ the recommended derivative operator for practial calculations, it's - * a proof-of-concept operator. Use the ABGVOperator for "cuspy" functions and the - * BSOperator for smooth functions. + * @tparam D Spatial dimension (1, 2, or 3). + * + * @details + * This class derives from @ref DerivativeOperator and provides a separable, + * single-component derivative approximation whose stencil is defined by the + * Holoborodko differentiators. Internally, the concrete operator blocks are + * produced by a PH-specific calculator and stored in an @ref OperatorTree. + * + * @note This is **not** the recommended operator for general calculations. Prefer + * @ref ABGVOperator for non-smooth data and @ref BSOperator for smooth data. */ - template class PHOperator final : public DerivativeOperator { public: + /** + * @brief Construct a PH-based derivative operator. + * + * @param mra MultiResolutionAnalysis defining the domain and basis. + * @param order Derivative order (typically 1 or 2). + * + * @warning Orders beyond those implemented by the underlying calculator + * are not supported. + */ PHOperator(const MultiResolutionAnalysis &mra, int order); - PHOperator(const PHOperator &oper) = delete; - PHOperator &operator=(const PHOperator &oper) = delete; + + PHOperator(const PHOperator &oper) = delete; ///< Non-copyable + PHOperator &operator=(const PHOperator &oper) = delete; ///< Non-assignable protected: + /** + * @brief Build and cache the internal operator representation. + * + * @details + * Creates the PH calculator for the current scaling basis and requested order, + * assembles an @ref OperatorTree with bandwidth control, transforms it to the + * multiwavelet domain, and initializes the separable expansion. + */ void initialize(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PoissonKernel.cpp b/src/operators/PoissonKernel.cpp index 75e60eb7c..f21461aa9 100644 --- a/src/operators/PoissonKernel.cpp +++ b/src/operators/PoissonKernel.cpp @@ -23,14 +23,48 @@ * */ -/* +/** + * @file PoissonKernel.cpp + * @brief Builds a Gaussian expansion approximation of the 3D Poisson kernel. + * + * @details + * This implementation constructs a separated approximation to the radial + * Poisson kernel + * \f[ + * \frac{1}{\lvert \mathbf r \rvert} + * \f] + * on a finite annulus \f$ r \in [r_{\min},\, r_{\max}] \f$ by means of a + * finite sum of Gaussians + * \f[ + * \frac{1}{r} \;\approx\; \sum_{m=1}^{M} \beta_m \, e^{-\alpha_m r^2}, + * \f] + * where the coefficients \f$ \{\alpha_m,\beta_m\} \f$ are obtained by + * truncating and discretizing (via the trapezoidal rule) a suitable integral + * representation of \f$ 1/r \f$ in logarithmic variables. The truncation + * bounds \f$[s_1, s_2]\f$ and the step \f$h\f$ are chosen to meet a requested + * relative accuracy \c epsilon on the normalized interval \f$[r_{\min}/r_{\max},\,1]\f$, + * after which the expansion is rescaled back to \f$[r_{\min},\,r_{\max}]\f$. * + * ### Inputs + * - \c epsilon: Target relative error for the expansion (heuristic, affects + * the truncation window and step size). + * - \c r_min, \c r_max: Inner/outer radii that define the interval of validity. * - * \date Jul 7, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø + * ### Algorithm sketch + * 1. Normalize the domain to \f$[r_0, 1]\f$ with \f$r_0 = r_{\min}/r_{\max}\f$ and set + * \f$r_1 = r_{\max}\f$ for subsequent rescaling. + * 2. Determine auxiliary parameters \f$t_1, t_2\f$ such that the tails of the + * integral representation are below \c epsilon. + * 3. Convert tails to truncation limits \f$s_1, s_2\f$ in logarithmic coordinates. + * 4. Choose trapezoidal step size \f$h\f$ as a function of \c epsilon and compute + * the number of terms \f$M\f$. + * 5. Form nodes \f$s_i = s_1 + i h\f$ and corresponding Gaussian parameters + * \f$\alpha_i, \beta_i\f$ (with endpoint halving for the trapezoid rule). + * 6. Rescale \f$\alpha_i, \beta_i\f$ from the normalized interval back to + * \f$[r_{\min}, r_{\max}]\f$ and append each term to the @ref GaussExp. * - * \breif + * The resulting expansion length is capped by \c MaxSepRank; exceeding this + * limit aborts construction. */ #include "PoissonKernel.h" @@ -42,27 +76,43 @@ namespace mrcpp { -/** generate an approximation of the 3d poisson kernel expanded in - * gaussian functions this routine assumes that the expansion be centered +/** + * @brief Construct a Gaussian expansion of the 3D Poisson kernel on \f$[r_{\min}, r_{\max}]\f$. + * + * @param epsilon Target relative accuracy for the expansion (heuristic). + * @param r_min Minimum radius of the interval of validity (\f$>0\f$). + * @param r_max Maximum radius of the interval of validity (\f$> r_{\min}\f$). + * + * @details + * The method chooses truncation limits \f$s_1, s_2\f$ and a step size \f$h\f$ + * for a trapezoidal discretization so that the contribution of neglected tails + * is below \c epsilon in the normalized variable. Each quadrature node yields + * one Gaussian term. Endpoint weights are halved, as per the trapezoidal rule. + * + * The final expansion is rescaled to the physical interval by the mappings + * \f$ \alpha \leftarrow \alpha / r_{\max}^2 \f$ and \f$ \beta \leftarrow \beta / r_{\max} \f$, + * ensuring that the approximation targets the original (unscaled) radius. + * + * @note If the number of terms exceeds @c MaxSepRank, construction aborts. */ PoissonKernel::PoissonKernel(double epsilon, double r_min, double r_max) : GaussExp<1>() { - // Constructed on [rMin/rMax, 1.0], and then rescaled to [rMin,rMax] + // Constructed on [rMin/rMax, 1.0], then rescaled to [rMin, rMax] double r0 = r_min / r_max; double r1 = r_max; + // Choose t1, t2 so that tail contributions are below epsilon double t1 = 1.0L; while ((2.0 * t1 * std::exp(-t1)) > epsilon) t1 *= 1.1L; double t2 = 1.0L; while ((std::sqrt(t2) * std::exp(-t2) / r0) > epsilon) t2 *= 1.1L; - // Set the truncation limits s1,s2 of the integral (integrate over [s1,s2]) - // for achieving relative error epsilon + // Truncation window [s1, s2] ensuring relative error ~ epsilon double s1 = -std::log(2.0 * t1); double s2 = std::log(t2 / (r0 * r0)) / 2.0; - // Now, set the step size h for use in the trapezoidal rule for given MU + // Trapezoidal step size h determined from epsilon (empirical fit) double h = 1.0 / (0.2L - 0.47L * std::log10(epsilon)); int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1); if (n_exp > MaxSepRank) MSG_ABORT("Maximum separation rank exceeded."); @@ -73,12 +123,16 @@ PoissonKernel::PoissonKernel(double epsilon, double r_min, double r_max) double cosharg = std::cosh(arg); double onepexp = 1.0 + std::exp(-sinharg); + // Parameters before rescaling back to [r_min, r_max] double expo = 4.0L * (sinharg + std::log(onepexp)) * (sinharg + std::log(onepexp)); double coef = h * (4.0L / root_pi) * cosharg / onepexp; + // Rescale to physical interval expo *= 1.0 / (r1 * r1); coef *= 1.0 / r1; - if (i == 0 or i == (n_exp - 1)) coef *= 1.0 / 2.0; + + // Trapezoidal rule endpoint correction + if (i == 0 || i == (n_exp - 1)) coef *= 1.0 / 2.0; GaussFunc<1> gFunc(expo, coef); this->append(gFunc); diff --git a/src/operators/PoissonKernel.h b/src/operators/PoissonKernel.h index 725a6d68b..88718e678 100644 --- a/src/operators/PoissonKernel.h +++ b/src/operators/PoissonKernel.h @@ -23,15 +23,60 @@ * */ +/** + * @file PoissonKernel.h + * @brief Declaration of a Gaussian-expansion approximation to the 3D Poisson kernel. + */ + #pragma once #include "functions/GaussExp.h" namespace mrcpp { +/** + * @class PoissonKernel + * @brief Gaussian expansion of the radial Poisson kernel \f$ 1/r \f$ on a bounded interval. + * + * @details + * Builds a separated, finite Gaussian expansion that approximates the 3D Poisson kernel + * \f[ + * \frac{1}{\lvert \mathbf r \rvert} \;\approx\; \sum_{m=1}^{M} \beta_m \, e^{-\alpha_m r^2}, + * \f] + * valid for radii \f$ r \in [r_{\min},\, r_{\max}] \f$. The coefficients + * \f$ \{\alpha_m,\beta_m\}_{m=1}^M \f$ are produced by truncating and discretizing + * (via a trapezoidal rule in logarithmic variables) a continuous representation of + * \f$ 1/r \f$, with the truncation window and step size chosen to meet a target + * relative tolerance \p epsilon on the *normalized* interval + * \f$ [r_{\min}/r_{\max},\,1] \f$ and then rescaled back to \f$ [r_{\min}, r_{\max}] \f$. + * + * The resulting object is a one-dimensional @ref GaussExp "GaussExp<1>" whose entries + * can be used by separable convolution operators to assemble higher-dimensional + * kernels and operators. + * + * @note + * - Requires \f$ r_{\min} > 0 \f$ and \f$ r_{\max} > r_{\min} \f$. + * - The number of Gaussian terms \f$ M \f$ is bounded internally (see `MaxSepRank`); + * exceeding this bound will abort construction in the implementation. + * + * @see GaussExp, GaussFunc + */ class PoissonKernel final : public GaussExp<1> { public: + /** + * @brief Construct a Gaussian expansion of \f$ 1/r \f$ on \f$ [r_{\min}, r_{\max}] \f$. + * + * @param epsilon Target relative accuracy (heuristic; smaller ⇒ more terms). + * @param r_min Lower radius of validity, must satisfy \f$ r_{\min} > 0 \f$. + * @param r_max Upper radius of validity, must satisfy \f$ r_{\max} > r_{\min} \f$. + * + * @details + * Populates this @ref GaussExp with terms \f$ (\alpha_m,\beta_m) \f$ so that + * \f$ \sum_m \beta_m e^{-\alpha_m r^2} \approx 1/r \f$ over the requested interval. + * Coefficients are ordered according to the underlying quadrature and include + * standard endpoint weighting for the trapezoidal rule. + */ PoissonKernel(double epsilon, double r_min, double r_max); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PoissonOperator.cpp b/src/operators/PoissonOperator.cpp index 582d990a8..b12885d11 100644 --- a/src/operators/PoissonOperator.cpp +++ b/src/operators/PoissonOperator.cpp @@ -23,27 +23,60 @@ * */ +/** + * @file PoissonOperator.cpp + * @brief Definition of a separable convolution operator that approximates the 3D Poisson kernel. + * + * The operator is assembled from a one–dimensional Gaussian expansion of \f$1/r\f$ + * (see @ref PoissonKernel). Each 1D term is projected to a function tree and lifted + * to a 2D operator block by cross-correlation; the full 3D operator is built as a + * separable product and cached for efficient application. + */ + #include "PoissonOperator.h" #include "PoissonKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** @returns New PoissonOperator object - * @param[in] mra: Which MRA the operator is defined - * @param[in] pr: Build precision, closeness to 1/r - * @details This will construct a gaussian expansion to approximate 1/r, - * and project each term into a one-dimensional MW operator. Subsequent - * application of this operator will apply each of the terms to the input - * function in all Cartesian directions. +/** + * @class PoissonOperator + * @brief Convolution operator approximating the 3D Poisson kernel \f$1/\lvert \mathbf r\rvert\f$. + * + * @details + * The kernel is approximated on a bounded radial interval by a finite Gaussian expansion + * \f[ + * \frac{1}{r} \approx \sum_{m=1}^{M} \beta_m\, e^{-\alpha_m r^2}, + * \f] + * which enables a separated representation amenable to fast multiwavelet application + * along Cartesian axes. Construction proceeds by: + * 1) choosing a target build precision to set the effective kernel width, + * 2) computing a validity interval \f$[r_{\min}, r_{\max}]\f$ from the MRA, + * 3) generating the Gaussian terms via @ref PoissonKernel, and + * 4) projecting and lifting each term into operator blocks before caching. + */ + +/** + * @brief Build a Poisson operator on the default root/reach of the provided MRA. + * + * @param mra Three–dimensional @ref MultiResolutionAnalysis defining domain and basis. + * @param prec Target build precision (heuristic closeness to \f$1/r\f$); smaller ⇒ tighter kernel. + * + * @details + * - Uses @c k_prec = prec/10 for kernel projection and @c o_prec = prec for operator assembly. + * - The radial interval is inferred from @p mra: + * - \f$ r_{\min} = \text{MRA.calcMinDistance}(k\_prec) \f$ (resolution-limited), + * - \f$ r_{\max} = \text{MRA.calcMaxDistance}() \f$ (domain-limited). + * - Constructs a @ref PoissonKernel on \f$[r_{\min}, r_{\max}]\f$, initializes internal + * operator trees, and prepares caches for application. */ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec) : ConvolutionOperator<3>(mra) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; - double k_prec = prec / 10.0; + double o_prec = prec; // operator-assembly tolerance + double k_prec = prec / 10.0; // kernel-projection tolerance double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); @@ -54,17 +87,37 @@ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double p Printer::setPrintLevel(oldlevel); } +/** + * @brief Build a Poisson operator with explicit scale window (root/reach), e.g. for PBC-style setups. + * + * @param mra Three–dimensional @ref MultiResolutionAnalysis. + * @param prec Target build precision (heuristic closeness to \f$1/r\f$). + * @param root Operator root level (coarsest scale where the operator lives). + * @param reach Operator reach in levels (half-width around @p root); negative ⇒ auto-detect. + * + * @details + * - Uses a tighter kernel projection tolerance @c k_prec = prec/100 and @c o_prec = prec + * for assembling the operator blocks. + * - The base radial extent is obtained from the MRA; then \f$ r_{\max} \f$ is rescaled + * to reflect the selected operator scale window (periodic-world style adjustment): + * \f[ + * r_{\max} \leftarrow r_{\max} \, 2^{-(\text{oper\_root} - \text{MRA.getRootScale()})} + * \, \bigl( 2\,\text{oper\_reach} + 1 \bigr). + * \f] + * - Constructs and initializes the Gaussian expansion accordingly and prepares + * the operator components and caches. + */ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec, int root, int reach) : ConvolutionOperator<3>(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; - double k_prec = prec / 100.0; + double o_prec = prec; // operator-assembly tolerance + double k_prec = prec / 100.0; // very tight kernel-projection tolerance double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); - // Adjust r_max for periodic world + // Adjust r_max to the chosen operator scale window (e.g., periodic-world bandwidth) auto rel_root = this->oper_root - this->MRA.getRootScale(); r_max *= std::pow(2.0, -rel_root); r_max *= (2.0 * this->oper_reach) + 1.0; @@ -76,4 +129,4 @@ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double p Printer::setPrintLevel(oldlevel); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PoissonOperator.h b/src/operators/PoissonOperator.h index ad65e9cae..c03b4e85f 100644 --- a/src/operators/PoissonOperator.h +++ b/src/operators/PoissonOperator.h @@ -23,28 +23,72 @@ * */ +/** + * @file PoissonOperator.h + * @brief Separable multiwavelet convolution operator for the 3D Poisson kernel. + * + * The operator realizes a fast approximation of the Green's function + * \f$ P(\mathbf r-\mathbf r') = 1/\lvert \mathbf r-\mathbf r'\rvert \f$ + * by expanding it into a finite sum of Gaussians, + * \f[ + * \frac{1}{\lvert \mathbf r-\mathbf r'\rvert} + * \;\approx\; + * \sum_{m=1}^{M} \alpha_m \exp\!\big(-\beta_m \lvert \mathbf r-\mathbf r'\rvert^2\big), + * \f] + * which enables a tensor–separable application along Cartesian axes in the + * multiwavelet framework. See @ref ConvolutionOperator for assembly details and + * @ref PoissonOperator (implementation) for construction mechanics. + */ + #pragma once #include "ConvolutionOperator.h" namespace mrcpp { -/** @class PoissonOperator +/** + * @class PoissonOperator + * @brief Convolution with the Poisson Green's function kernel in 3D. + * + * @details + * The Poisson kernel is approximated by a Gaussian expansion, allowing the operator + * to be applied as a separated product over Cartesian directions: + * \f[ + * P(\mathbf r-\mathbf r') + * = \frac{1}{\lvert \mathbf r-\mathbf r'\rvert} + * \;\approx\; \sum_{m=1}^{M} \alpha_m \exp\!\big(-\beta_m \lvert \mathbf r-\mathbf r'\rvert^2\big). + * \f] + * Each 1D Gaussian term is projected to a function tree and lifted to operator blocks + * via cross-correlation; the full 3D operator is then cached for efficient application. * - * @brief Convolution with the Poisson Green's function kernel + * The expansion accuracy and kernel width are controlled by the requested build precision. + * An overload with explicit @p root/@p reach can confine the operator to a chosen scale window + * (useful for periodic-style setups or domain-decomposition experiments). * - * @details The Poisson kernel is approximated as a sum of Gaussian - * functions in order to allow for separated application of the operator - * in the Cartesian directions: - * \f$ P(r-r') = \frac{1}{|r-r'|} \approx \sum_m^M \alpha_m e^{-\beta_m (r-r')^2} \f$ + * @see ConvolutionOperator, PoissonKernel */ - class PoissonOperator final : public ConvolutionOperator<3> { public: + /** + * @brief Construct a Poisson operator on the default root/reach of the provided MRA. + * + * @param mra 3D @ref MultiResolutionAnalysis defining the domain and scaling basis. + * @param prec Target build precision controlling the Gaussian expansion (smaller ⇒ tighter/longer rank). + */ PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec); + + /** + * @brief Construct a Poisson operator with an explicit scale window. + * + * @param mra 3D @ref MultiResolutionAnalysis. + * @param prec Target build precision. + * @param root Operator root level (coarsest scale where the operator resides). + * @param reach Operator reach (half-width, in levels) around @p root; affects bandwidth/PBC-like extent. + */ PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec, int root, int reach = 1); - PoissonOperator(const PoissonOperator &oper) = delete; - PoissonOperator &operator=(const PoissonOperator &oper) = delete; + + PoissonOperator(const PoissonOperator &oper) = delete; ///< Non-copyable + PoissonOperator &operator=(const PoissonOperator &oper) = delete; ///< Non-assignable }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/TimeEvolutionOperator.cpp b/src/operators/TimeEvolutionOperator.cpp index 09913a591..dd14badb3 100644 --- a/src/operators/TimeEvolutionOperator.cpp +++ b/src/operators/TimeEvolutionOperator.cpp @@ -23,6 +23,27 @@ * */ +/** + * @file TimeEvolutionOperator.cpp + * @brief Construction of (real/imaginary) parts of the Schrödinger time-evolution + * operator in the multiwavelet framework. + * + * The implementation builds a separable, multi-resolution representation of the + * free-particle time-evolution semigroup + * \f[ + * U(t) = e^{\, i t \Delta} + * \f] + * (or its real/imaginary part), using cross-correlation calculators and + * precomputed power integrals \f$ \widetilde J_m \f$ (see @ref JpowerIntegrals). + * Two build modes are provided: + * - **Adaptive** down to a fixed scale \f$N=18\f$, bounding the number of + * power integrals. + * - **Uniform** down to a user-specified finest scale. + * + * Assembly follows the standard operator pipeline: + * projection/lifting → multiwavelet transform → cache/init of operator blocks. + */ + #include "TimeEvolutionOperator.h" //#include "MRCPP/MWOperators" @@ -56,20 +77,30 @@ namespace mrcpp { -/** @brief A uniform constructor for TimeEvolutionOperator class. - * - * @param[in] mra: MRA. - * @param[in] prec: precision. - * @param[in] time: the time moment (step). - * @param[in] finest_scale: the operator tree is constructed uniformly down to this scale. - * @param[in] imaginary: defines the real (faulse) or imaginary (true) part of the semigroup. - * @param[in] max_Jpower: maximum amount of power integrals used. +/** + * @brief Uniform constructor. * - * @details Constructs either real or imaginary part of the Schrodinger semigroup at a given time moment. + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra Target @ref MultiResolutionAnalysis defining domain and basis. + * @param prec Build precision for assembly and pruning. + * @param time Time parameter \f$ t \f$ of the semigroup. + * @param finest_scale Uniform build depth (finest level) of the operator tree. + * @param imaginary If `true`, build the imaginary part; otherwise, the real part. + * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ to retain. * + * @details + * Builds a **uniform** operator down to @p finest_scale. Internally sets up a + * @ref SchrodingerEvolution_CrossCorrelation calculator and calls + * the uniform @ref initialize(double,int,bool,int) overload. The operator + * expansion is finalized via @ref initOperExp(1). */ template -TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower) +TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, + double prec, + double time, + int finest_scale, + bool imaginary, + int max_Jpower) : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well { int oldlevel = Printer::setPrintLevel(0); @@ -80,27 +111,32 @@ TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis initialize(time, finest_scale, imaginary, max_Jpower); // will go outside of the constructor in future - this->initOperExp(1); // this turns out to be important + this->initOperExp(1); // Important to finalize component mapping Printer::setPrintLevel(oldlevel); } -/** @brief An adaptive constructor for TimeEvolutionOperator class. - * - * @param[in] mra: MRA. - * @param[in] prec: precision. - * @param[in] time: the time moment (step). - * @param[in] imaginary: defines the real (faulse) or imaginary (true) part of the semigroup. - * @param[in] max_Jpower: maximum amount of power integrals used. +/** + * @brief Adaptive constructor. * - * @details Adaptively constructs either real or imaginary part of the Schrodinger semigroup at a given time moment. - * It is recommended for use in case of high polynomial order in use of the scaling basis. - * - * @note For technical reasons the operator tree is constructed no deeper than to scale \f$ n = 18 \f$. - * This should be weakened in future. + * @tparam D Spatial dimension (1, 2, or 3). + * @param mra Target @ref MultiResolutionAnalysis. + * @param prec Build precision. + * @param time Time parameter \f$ t \f$ of the semigroup. + * @param imaginary If `true`, build the imaginary part; otherwise, the real part. + * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ to retain. * + * @details + * Builds an **adaptive** operator down to a fixed scale \f$N=18\f$, which keeps the number + * of necessary power integrals bounded. The assembly uses a + * @ref TimeEvolution_CrossCorrelationCalculator fed by a per-scale + * map of @ref JpowerIntegrals. */ template -TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, bool imaginary, int max_Jpower) +TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, + double prec, + double time, + bool imaginary, + int max_Jpower) : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well { int oldlevel = Printer::setPrintLevel(0); @@ -111,20 +147,29 @@ TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis initialize(time, imaginary, max_Jpower); // will go outside of the constructor in future - this->initOperExp(1); // this turns out to be important + this->initOperExp(1); // Important to finalize component mapping Printer::setPrintLevel(oldlevel); } -/** @brief Creates Re or Im of operator +/** + * @brief Adaptive build: create real or imaginary part of the operator. + * + * @param time Time parameter \f$ t \f$. + * @param imaginary If `true`, build the imaginary part; otherwise, the real part. + * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. * - * @details Adaptive down to scale \f$ N = 18 \f$. - * This scale limit bounds the amount of JpowerIntegrals - * to be calculated. - * @note In future work we plan to optimize calculation of JpowerIntegrals so that we calculate - * only needed ones, while building the tree (in progress). + * @details + * Builds **adaptively** down to scale \f$ N = 18 \f$. For each scale + * \f$ n=0,\dots,N+1 \f$ a corresponding @ref JpowerIntegrals object is created + * with parameter \f$ a = t\,4^n \f$. The operator is assembled using a + * @ref TimeEvolution_CrossCorrelationCalculator and finalized by multiwavelet + * transform, rough-scale noise removal, square-norm evaluation, and cache setup. * + * @note The fixed depth ensures a bounded number of power integrals while building. + * Future work aims to compute only the power integrals actually needed during build. */ -template void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_Jpower) { +template +void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_Jpower) { int N = 18; double o_prec = this->build_prec; @@ -144,7 +189,7 @@ template void TimeEvolutionOperator::initialize(double time, bool ima Timer trans_t; o_tree->mwTransform(BottomUp); o_tree->removeRoughScaleNoise(); - // o_tree->clearSquareNorm(); //does not affect printing + // o_tree->clearSquareNorm(); // does not affect printing o_tree->calcSquareNorm(); o_tree->setupOperNodeCache(); @@ -156,12 +201,23 @@ template void TimeEvolutionOperator::initialize(double time, bool ima for (int n = 0; n <= N + 1; n++) delete J[n]; } -/** @brief Creates Re or Im of operator +/** + * @brief Uniform build: create real or imaginary part of the operator. * - * @details Uniform down to finest scale. + * @param time Time parameter \f$ t \f$. + * @param finest_scale Finest (uniform) scale to which the operator tree is constructed. + * @param imaginary If `true`, build the imaginary part; otherwise, the real part. + * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. * + * @details + * Builds **uniformly** down to @p finest_scale using a @ref SplitAdaptor. + * A threshold of \f$ \text{prec}/1000 \f$ is used while creating + * @ref JpowerIntegrals for scales \f$ n=0,\dots,N+1 \f$ with + * \f$ a = t\,4^n \f$. The resulting @ref CornerOperatorTree is then transformed, + * squared-normed, and cached for later application. */ -template void TimeEvolutionOperator::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) { +template +void TimeEvolutionOperator::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) { double o_prec = this->build_prec; auto o_mra = this->getOperatorMRA(); @@ -191,15 +247,21 @@ template void TimeEvolutionOperator::initialize(double time, int fine for (int n = 0; n <= N + 1; n++) delete J[n]; } -/** @brief Creates Re or Im of operator (in progress) +/** + * @brief Semi-uniform build (prototype; not ready for production). + * + * @param time Time parameter \f$ t \f$. + * @param imaginary If `true`, build the imaginary part; otherwise, the real part. + * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. * - * @details Tree construction starts uniformly and then continues adaptively down to scale \f$ N = 18 \f$. - * This scale limit bounds the amount of JpowerIntegrals - * to be calculated. - * @note This method is not ready for use and should not be used (in progress). + * @details + * Starts with a small uniform prefix of the operator tree and continues adaptively + * down to \f$ N = 18 \f$. **Not implemented**—kept as a placeholder for future work. * + * @warning This method deliberately aborts at runtime. */ -template void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) { +template +void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) { MSG_ERROR("Not implemented yet method."); double o_prec = this->build_prec; @@ -236,6 +298,7 @@ template void TimeEvolutionOperator::initializeSemiUniformly(double t for (int n = 0; n <= N + 1; n++) delete J[n]; } +/* Explicit template instantiations */ template class TimeEvolutionOperator<1>; template class TimeEvolutionOperator<2>; template class TimeEvolutionOperator<3>; diff --git a/src/operators/TimeEvolutionOperator.h b/src/operators/TimeEvolutionOperator.h index 839ba7b40..a33f4745f 100644 --- a/src/operators/TimeEvolutionOperator.h +++ b/src/operators/TimeEvolutionOperator.h @@ -23,6 +23,25 @@ * */ +/** + * @file TimeEvolutionOperator.h + * @brief Interface for a separable multiwavelet representation of the + * free-particle Schrödinger time-evolution semigroup. + * + * The operator approximates (real or imaginary parts of) + * \f[ + * U(t) \;=\; e^{\, i\,t\,\Delta} + * \f] + * by building an operator tree via cross-correlations between scaling functions + * and a kernel whose coefficients are expressed through power integrals + * \f$ \widetilde J_m \f$. Two construction modes are exposed: + * - **Uniform** to a user-specified finest scale. + * - **Adaptive** down to a fixed scale (bounded work in power integrals). + * + * See the .cpp for build details and post-processing steps (MW transform, + * rough-scale filtering, caching, etc.). + */ + #pragma once #include "ConvolutionOperator.h" @@ -31,43 +50,118 @@ namespace mrcpp { -/** @class TimeEvolutionOperator +/** + * @class TimeEvolutionOperator + * @ingroup operators + * + * @brief Multiwavelet operator for the free-particle Schrödinger semigroup. * - * @brief Semigroup of the free-particle Schrodinger equation + * @tparam D Spatial dimensionality (1, 2, or 3). * - * @details Represents the semigroup - * \f$ - * \exp \left( i t \partial_x^2 \right) - * . - * \f$ - * Matrix elements (actual operator tree) of the operator can be obtained by calling getComponent(0, 0). + * @details + * Provides a separable @ref ConvolutionOperator-like interface that assembles + * the matrix elements of + * \f$ U(t) = e^{\, i\,t\,\Delta} \f$ + * (or its real/imaginary part) in a multi-resolution setting. The actual + * operator blocks can be accessed via + * @code + * getComponent(0, 0) + * @endcode + * after construction (rank-1 expansion in current implementation). * - * @note So far implementation is done for Legendre scaling functions in 1d. + * Internally, coefficients are generated from per-scale power integrals + * \f$ \widetilde J_m \f$ and a dedicated cross-correlation calculator suited + * for the Schrödinger kernel. * - * \todo: Extend to D dimensinal on a general interval [a, b] in the future. + * @note Current implementation targets Legendre scaling functions; practical + * use has primarily focused on 1D, but the interface is templated in @p D. * + * @todo Extend to general dimension on arbitrary intervals \f$[a,b]\f$. */ template class TimeEvolutionOperator : public ConvolutionOperator // One can use ConvolutionOperator instead as well { public: - TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower = 30); - TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, bool imaginary, int max_Jpower = 30); - TimeEvolutionOperator(const TimeEvolutionOperator &oper) = delete; - TimeEvolutionOperator &operator=(const TimeEvolutionOperator &oper) = delete; + /** + * @brief Construct a **uniform** time-evolution operator. + * + * @param mra Target @ref MultiResolutionAnalysis (domain/basis). + * @param prec Build precision controlling pruning and tolerances. + * @param time Time parameter \f$ t \f$. + * @param finest_scale Finest (uniform) scale to which the operator is built. + * @param imaginary If `true` build the imaginary part; otherwise real part. + * @param max_Jpower Maximum number of power-integral terms (default: 30). + */ + TimeEvolutionOperator(const MultiResolutionAnalysis &mra, + double prec, + double time, + int finest_scale, + bool imaginary, + int max_Jpower = 30); + + /** + * @brief Construct an **adaptive** time-evolution operator. + * + * @param mra Target @ref MultiResolutionAnalysis (domain/basis). + * @param prec Build precision controlling pruning and tolerances. + * @param time Time parameter \f$ t \f$. + * @param imaginary If `true` build the imaginary part; otherwise real part. + * @param max_Jpower Maximum number of power-integral terms (default: 30). + * + * @details + * The adaptive build proceeds down to a fixed scale to bound the number of + * required power integrals; see the source for the current depth choice. + */ + TimeEvolutionOperator(const MultiResolutionAnalysis &mra, + double prec, + double time, + bool imaginary, + int max_Jpower = 30); + + TimeEvolutionOperator(const TimeEvolutionOperator &oper) = delete; ///< Non-copyable + TimeEvolutionOperator &operator=(const TimeEvolutionOperator &oper) = delete; ///< Non-assignable virtual ~TimeEvolutionOperator() = default; + /// @return The build precision used to assemble the operator. double getBuildPrec() const { return this->build_prec; } protected: + /** @name Builder entry points (implementation detail) + * Internal construction routines used by the public constructors. + */ + ///@{ + /** + * @brief Uniform build to @p finest_scale. + * @param time Time parameter \f$ t \f$. + * @param finest_scale Finest scale to which the tree is constructed. + * @param imaginary Build imaginary (true) or real (false) part. + * @param max_Jpower Maximum number of power-integral terms. + */ void initialize(double time, int finest_scale, bool imaginary, int max_Jpower); + + /** + * @brief Adaptive build (fixed maximum depth). + * @param time Time parameter \f$ t \f$. + * @param imaginary Build imaginary (true) or real (false) part. + * @param max_Jpower Maximum number of power-integral terms. + */ void initialize(double time, bool imaginary, int max_Jpower); + + /** + * @brief Semi-uniform prototype (not implemented). + * @param time Time parameter \f$ t \f$. + * @param imaginary Build imaginary (true) or real (false) part. + * @param max_Jpower Maximum number of power-integral terms. + * @warning This method is a placeholder and aborts if called. + */ void initializeSemiUniformly(double time, bool imaginary, int max_Jpower); + ///@} + /// Set the build precision recorded by this operator. void setBuildPrec(double prec) { this->build_prec = prec; } - double build_prec{-1.0}; - SchrodingerEvolution_CrossCorrelation *cross_correlation{nullptr}; + double build_prec{-1.0}; ///< Build precision (assembly/pruning). + SchrodingerEvolution_CrossCorrelation *cross_correlation{nullptr}; ///< Per-dimension cross-correlation engine. }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From b30b76a120052b4a5caa98db57fce110549743ff Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Tue, 28 Oct 2025 18:13:01 +0300 Subject: [PATCH 05/51] Update BSOperator.h --- src/operators/BSOperator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operators/BSOperator.h b/src/operators/BSOperator.h index deef99407..d5aacb615 100644 --- a/src/operators/BSOperator.h +++ b/src/operators/BSOperator.h @@ -83,7 +83,7 @@ template class BSOperator final : public DerivativeOperator { * * @code * MultiResolutionAnalysis<1> mra(...); - * BSOperator<1> Dx(mra, /* order = */ 1); // first derivative in 1D + * BSOperator<1> Dx(mra, 1); // first derivative in 1D * // apply Dx to a function tree / coefficient vector later... * @endcode */ From 6bcfb114f2f56a7786214bb26db2bf8dc0238b22 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Wed, 29 Oct 2025 15:06:16 +0300 Subject: [PATCH 06/51] Done doxygen documenttion insiede treebuilders folder --- src/treebuilders/ABGVCalculator.cpp | 86 +++ src/treebuilders/BSCalculator.cpp | 96 ++- src/treebuilders/ConvolutionCalculator.cpp | 223 ++++++- src/treebuilders/CopyAdaptor.cpp | 117 ++++ .../CrossCorrelationCalculator.cpp | 98 ++- src/treebuilders/DerivativeCalculator.cpp | 190 +++++- src/treebuilders/PHCalculator.cpp | 107 ++- src/treebuilders/ProjectionCalculator.cpp | 79 ++- ...meEvolution_CrossCorrelationCalculator.cpp | 113 +++- src/treebuilders/TreeBuilder.cpp | 148 ++++- src/treebuilders/add.cpp | 171 +++-- src/treebuilders/apply.cpp | 625 ++++++++++++------ src/treebuilders/complex_apply.cpp | 131 ++-- src/treebuilders/grid.cpp | 335 ++++++---- src/treebuilders/map.cpp | 110 ++- src/treebuilders/multiply.cpp | 270 ++++---- src/treebuilders/project.cpp | 156 +++-- 17 files changed, 2321 insertions(+), 734 deletions(-) diff --git a/src/treebuilders/ABGVCalculator.cpp b/src/treebuilders/ABGVCalculator.cpp index 10252f46a..11c6e6144 100644 --- a/src/treebuilders/ABGVCalculator.cpp +++ b/src/treebuilders/ABGVCalculator.cpp @@ -23,6 +23,30 @@ * */ +/** + * @file ABGVCalculator.cpp + * @brief Local block assembly for the Alpert–Beylkin–Gines–Vozovoi derivative operator. + * + * @details + * This module implements the calculator that fills the per-node matrix blocks for the + * ABGV derivative operator used in multiresolution form. It is consumed by a + * TreeBuilder to populate an OperatorTree with local stencil entries expressed in + * the chosen scaling basis (interpolating or Legendre). + * + * The assembly depends on: + * - the basis type and quadrature order, + * - precomputed endpoint values of basis functions on the reference interval [0,1], + * - a basis-dependent local derivative matrix K, + * - two boundary weights A and B that select central, forward, backward, or + * semi-local differences. + * + * For each operator node the calculator determines the relative logical offset + * between interacting cells. Only three cases produce non-zero local couplings: + * left neighbor, same cell, and right neighbor. The four component blocks of the + * 2-by-2 cell coupling are then filled accordingly, rescaled to the current level, + * compressed to multiwavelet form, and cached with per-node norms. + */ + #include "ABGVCalculator.h" #include "core/InterpolatingBasis.h" #include "core/LegendreBasis.h" @@ -35,6 +59,22 @@ using Eigen::VectorXd; namespace mrcpp { +/** + * @brief Construct an ABGVCalculator and precompute basis-dependent tables. + * + * @param basis Scaling basis that defines quadrature order and function family. + * @param a Left boundary weight that controls semi-local coupling. + * @param b Right boundary weight that controls semi-local coupling. + * + * @details + * The constructor allocates and fills: + * - K: a kp1-by-kp1 local derivative matrix assembled on the reference cell, + * - valueZero: endpoint values phi_i(0) for all basis indices, + * - valueOne: endpoint values phi_i(1) for all basis indices. + * + * The exact formulas are basis dependent and computed in calcKMatrix and + * calcValueVectors respectively. + */ ABGVCalculator::ABGVCalculator(const ScalingBasis &basis, double a, double b) : A(a) , B(b) { @@ -46,6 +86,18 @@ ABGVCalculator::ABGVCalculator(const ScalingBasis &basis, double a, double b) calcValueVectors(basis); } +/** + * @brief Precompute endpoint values of scaling functions on [0, 1]. + * + * @param basis Scaling basis. + * + * @details + * - Interpolating basis: values are obtained by direct evaluation at 0 and 1. + * - Legendre basis on [0, 1]: closed-form values are used. + * For index i we set + * valueOne(i) = sqrt(2*i + 1), + * valueZero(i) = (-1)^i * sqrt(2*i + 1). + */ void ABGVCalculator::calcValueVectors(const ScalingBasis &basis) { int kp1 = basis.getQuadratureOrder(); double sqrtCoef[kp1]; @@ -72,6 +124,19 @@ void ABGVCalculator::calcValueVectors(const ScalingBasis &basis) { } } +/** + * @brief Assemble the local derivative matrix K on the reference cell. + * + * @param basis Scaling basis. + * + * @details + * The construction of K depends on the basis family: + * - Interpolating basis: K(i,j) = 2 * sqrt(w_j) * d(phi_i)/dx evaluated at x_j, + * where (x_j, w_j) are Gauss–Legendre quadrature nodes and weights provided + * by QuadratureCache. The factor 2 accounts for mapping from [-1,1] to [0,1]. + * - Legendre basis: a closed-form sparse pattern is used where K(j,i) is non-zero + * only if (i - j) is odd, in which case K(j,i) = 2 * sqrt(2i+1) * sqrt(2j+1). + */ void ABGVCalculator::calcKMatrix(const ScalingBasis &basis) { int kp1 = basis.getQuadratureOrder(); double sqrtCoef[kp1]; @@ -101,6 +166,27 @@ void ABGVCalculator::calcKMatrix(const ScalingBasis &basis) { } } +/** + * @brief Fill the local operator block for a given operator node and finalize it. + * + * @param node Operator node to be populated. + * + * @details + * The node couples two 1D intervals at the same scale; its logical index encodes + * which pair is assembled. Let l = idx[1] - idx[0]. Three cases are handled: + * + * - l = 0: intra-cell coupling. All four sub-blocks are filled using endpoint + * values and K, with boundary weights A and B selecting central or semi-local + * behavior. + * - l = +1: right neighbor coupling. Only the block that mixes left and right + * components is filled, proportional to B. + * - l = -1: left neighbor coupling. Only the symmetric block is filled, + * proportional to A. + * + * After filling, all entries are scaled by 2^(n+1) where n = idx.getScale() to + * account for the derivative scaling at that level, then the node is transformed + * with compression, marked as having coefficients, and its norms are computed. + */ void ABGVCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); diff --git a/src/treebuilders/BSCalculator.cpp b/src/treebuilders/BSCalculator.cpp index be6285206..565cd9c00 100644 --- a/src/treebuilders/BSCalculator.cpp +++ b/src/treebuilders/BSCalculator.cpp @@ -23,6 +23,44 @@ * */ +/** + * @file BSCalculator.cpp + * @brief Local stencil builder for smooth multiresolution derivative operators (“BS” family). + * + * @details + * The **BSCalculator** assembles the *local* building blocks used by the smooth + * derivative operator (see BSOperator). For a chosen scaling basis and derivative + * order \( n\in\{1,2,3\} \), it loads three pretabulated coupling matrices + * \f$S_{-1}, S_{0}, S_{+1}\f$ which represent the action of the derivative on a + * 1D scaling block and its immediate neighbors (left, center, right) at a given scale. + * + * Source of the matrices: + * - Files are looked up via `details::find_filters()`. + * - Filenames depend on the scaling basis type and derivative order: + * - Legendre scaling: `L_b-spline-deriv{n}.txt` + * - Interpolating scaling: `I_b-spline-deriv{n}.txt` + * - For each supported polynomial order `kp1 = 2..20`, the file stores a stacked + * 3·kp1 × kp1 array that is split into the three kp1 × kp1 blocks + * \f$S_{+1}\f$, \f$S_{0}\f$, \f$S_{-1}\f$ (in that order). + * + * Application on a node: + * - Given a 2D operator node (with index difference \f$\ell = i_1 - i_0 \in \{-1,0,+1\}\f$), + * BSCalculator writes the appropriate block(s) into the 2×2 corner layout of the node: + * - \f$\ell = -1\f$: left-neighbor coupling uses \f$S_{-1}\f$ + * - \f$\ell = 0 \f$: center block uses \f$S_{0}\f$, off-diagonals use \f$S_{\pm 1}\f$ + * - \f$\ell = +1\f$: right-neighbor coupling uses \f$S_{+1}\f$ + * + * Scale factor: + * - Derivatives scale as \f$2^{n\,(j+1)}\f$ where \f$n\f$ is the derivative order + * and \f$j+1\f$ is the node scale `np1`. The calculator multiplies all filled + * entries by \f$2^{n\,(j+1)}\f$. + * + * Limits and errors: + * - Supported derivative orders: 1, 2, 3. + * - Supported scaling orders: 1..20 (i.e., `kp1 = 2..21` in MRCPP terminology). + * - On unsupported cases or missing files, the code aborts with a diagnostic. + */ + #include "BSCalculator.h" #include @@ -36,6 +74,19 @@ using Eigen::MatrixXd; namespace mrcpp { +/** + * @brief Construct a BSCalculator and load derivative coupling blocks. + * + * @param basis Scaling basis (determines file family and polynomial order). + * @param n Derivative order (1, 2, or 3). + * + * @details + * Dispatches to #readSMatrix to load \f$S_{-1}, S_{0}, S_{+1}\f$ for the given basis + * and derivative order. Orders \f$n \ge 4\f$ are not implemented. + * + * @throws Aborts on unsupported derivative order, unsupported scaling order, + * or if the filter file cannot be opened. + */ BSCalculator::BSCalculator(const ScalingBasis &basis, int n) : diff_order(n) { if (this->diff_order <= 0) NOT_IMPLEMENTED_ABORT; @@ -45,6 +96,28 @@ BSCalculator::BSCalculator(const ScalingBasis &basis, int n) if (this->diff_order >= 4) NOT_IMPLEMENTED_ABORT; } +/** + * @brief Load the pretabulated derivative coupling matrices from disk. + * + * @param basis Scaling basis (type and order). + * @param n Character identifying derivative order: '1', '2' or '3'. + * + * @details + * - Chooses filename by basis type and derivative order. + * - Iterates over the entries in the file for polynomial orders `kp1 = 2..20` + * until it matches the current basis order (`basis.getScalingOrder() + 1`). + * - Splits the stacked 3·kp1 × kp1 array into three kp1 × kp1 blocks: + * \f$S_{+1}\f$, \f$S_{0}\f$, \f$S_{-1}\f$. + * + * File format expectations (per `kp1` section): + * - First line: integer `order` (must equal `kp1`). + * - Next 3·kp1 lines: kp1 numbers per line (row-major), forming the stacked matrix. + * + * @throws Aborts if: + * - the file cannot be opened, + * - the on-file order header does not match the expected `kp1`, + * - the basis scaling order is unsupported. + */ void BSCalculator::readSMatrix(const ScalingBasis &basis, char n) { std::string file; std::string path = details::find_filters(); @@ -73,13 +146,32 @@ void BSCalculator::readSMatrix(const ScalingBasis &basis, char n) { } if (kp1 == (basis.getScalingOrder() + 1)) { this->S_p1 = data.block(0 * kp1, 0, kp1, kp1); - this->S_0 = data.block(1 * kp1, 0, kp1, kp1); + this->S_0 = data.block(1 * kp1, 0, kp1, kp1); this->S_m1 = data.block(2 * kp1, 0, kp1, kp1); break; } } } +/** + * @brief Populate a 2D operator node with the appropriate local derivative blocks. + * + * @param node Operator node to fill (corner layout, 2×2 logical structure). + * + * @details + * Let \f$\ell = \text{idx}[1] - \text{idx}[0]\f$ denote the neighbor offset in the + * second minus the first index direction. Depending on \f$\ell\f$, write the relevant + * coupling block(s) into the node storage and multiply all entries by the scale factor + * \f$2^{\,\text{diff\_order}\cdot (j+1)}\f$, where \f$j+1 = \text{idx.getScale()}+1\f$. + * + * Block placement (coefficient planes are enumerated in the code as 0,1,2,3): + * - \f$\ell = +1\f$: only the “+1” plane is filled with \f$S_{+1}\f$. + * - \f$\ell = 0 \f$: planes [0,1,2,3] are filled with \f$S_{0}, S_{-1}, S_{+1}, S_{0}\f$. + * - \f$\ell = -1\f$: only the “+2” plane is filled with \f$S_{-1}\f$. + * + * After filling, the node is transformed (Compression), flagged as having coefficients, + * and its norms are computed. + */ void BSCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); @@ -128,4 +220,4 @@ void BSCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/ConvolutionCalculator.cpp b/src/treebuilders/ConvolutionCalculator.cpp index 497fe0dd8..24e9be125 100644 --- a/src/treebuilders/ConvolutionCalculator.cpp +++ b/src/treebuilders/ConvolutionCalculator.cpp @@ -23,6 +23,54 @@ * */ +/** + * @file ConvolutionCalculator.cpp + * @brief Adaptive node-wise application kernel for separable convolution operators. + * + * @details + * This file implements the templated class + * mrcpp::ConvolutionCalculator, which is the **workhorse** used by the + * adaptive `TreeBuilder` when applying a separable convolution operator + * (#mrcpp::ConvolutionOperator) to a multiresolution function tree + * (#mrcpp::FunctionTree). + * + * At a high level, for each **target** node \f$ g \f$ (in the output tree) + * the calculator: + * - determines the **band** of **source** nodes \f$ f \f$ that can + * contribute via the operator's bandwidth model, + * - estimates cheap **screening bounds** using precomputed operator norms, + * the local source/target norms, and a precision policy, + * - for surviving pairs \f$ (g,f) \f$, performs a sequence of small + * **tensor contractions** (one per Cartesian direction) to apply the + * separable operator component(s) and accumulates the result into \f$ g \f$. + * + * The class also: + * - precomputes **band-size factors** per depth and component-combination to + * drive thresholding, + * - supports **periodic worlds** and optional **unit-cell manipulation** + * (near-field vs. far-field selection), + * - collects **per-thread timings** and **operator-usage statistics**. + * + * ### Screening model (outline) + * Let \f$ \mathcal{O} = \sum_i \bigotimes_{d=1}^D O_i^{(d)} \f$ be the + * separable expansion (terms indexed by \f$ i \f$). For a source node + * \f$ f \f$ and target node \f$ g \f$, the calculator estimates + * \f[ + * \| \mathcal{O}_i f \| \;\lesssim\; + * \Big(\prod_{d=1}^D \|O_i^{(d)}\|\Big)\; \|f\|\; s(i, \Delta \ell) + * \f] + * where \f$ s(\cdot) \f$ is a band-size factor depending on depth and the + * component combination, and compares the bound to a target threshold + * \f$ \tau(g) \sim \texttt{prec} \cdot \sqrt{\|g\|^2 / N_\text{terms}} \f$. + * Only terms that can exceed \f$ \tau(g) \f$ are explicitly applied. + * + * ### BLAS vs. Eigen + * If BLAS is available, the directional contractions can be carried out via + * GEMM. Otherwise, an Eigen-based path is used. Both routes compute + * \f$ G \leftarrow F^\top O \f$ in each direction and **accumulate** on the + * last direction to the target buffer. + */ + #include "ConvolutionCalculator.h" #include "operators/ConvolutionOperator.h" #include "operators/OperatorState.h" @@ -46,6 +94,21 @@ using Eigen::MatrixXi; namespace mrcpp { +/** + * @brief Construct a calculator for applying a convolution operator. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param p Target precision used for screening and adaptivity. + * @param o Separable convolution operator to apply. + * @param f Source function tree (input). + * @param depth Maximum operator depth considered for band-size tables. + * + * @details + * Initializes per-term **band-size tables** (used in screening) and + * allocates per-thread timers. The `depth` argument is upper-bounded by + * `MaxDepth`. + */ template ConvolutionCalculator::ConvolutionCalculator(double p, ConvolutionOperator &o, FunctionTree &f, int depth) : maxDepth(depth) @@ -57,6 +120,9 @@ ConvolutionCalculator::ConvolutionCalculator(double p, ConvolutionOperator initTimers(); } +/** + * @brief Destructor: clear timers and print aggregated operator statistics. + */ template ConvolutionCalculator::~ConvolutionCalculator() { clearTimers(); this->operStat.flushNodeCounters(); @@ -64,6 +130,9 @@ template ConvolutionCalculator::~ConvolutionCalculator for (int i = 0; i < this->bandSizes.size(); i++) { delete this->bandSizes[i]; } } +/** + * @brief Allocate per-thread timers for band construction, calculation, and norm updates. + */ template void ConvolutionCalculator::initTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -73,6 +142,9 @@ template void ConvolutionCalculator::initTimers() { } } +/** + * @brief Release per-thread timers. + */ template void ConvolutionCalculator::clearTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -85,6 +157,9 @@ template void ConvolutionCalculator::clearTimers() { this->norm_t.clear(); } +/** + * @brief Print a compact report of thread-wise timings. + */ template void ConvolutionCalculator::printTimers() const { int oldprec = Printer::setPrecision(1); int nThreads = mrcpp_get_max_threads(); @@ -100,8 +175,15 @@ template void ConvolutionCalculator::printTimers() con Printer::setPrecision(oldprec); } -/** Initialize the number of nodes formally within the bandwidth of an - operator. The band size is used for thresholding. */ +/** + * @brief Precompute per-depth band-size factors for all operator terms. + * + * @details + * For each raw operator term and each depth, builds a table of the number of + * source nodes formally falling within the **Cartesian bandwidth box** for + * every component-combination (gt,ft). These factors are later used to scale + * screening thresholds. + */ template void ConvolutionCalculator::initBandSizes() { for (int i = 0; i < this->oper->size(); i++) { // IMPORTANT: only 0-th dimension! @@ -114,10 +196,19 @@ template void ConvolutionCalculator::initBandSizes() { } } -/** Calculate the number of nodes within the bandwidth - * of an operator. Currently this routine ignores the fact that - * there are edges on the world box, and thus over estimates - * the number of nodes. This is different from the previous version. */ +/** + * @brief Compute band-size factor for a given depth from a bandwidth model. + * + * @param[out] bs Table to be filled (rows: depth, cols: component-pairs plus a max column). + * @param[in] depth Operator depth relative to root. + * @param[in] bw Bandwidth model (per-depth widths per component index). + * + * @details + * For each component pair \f$(g_t,f_t)\f$, the routine forms the Cartesian + * product of directional half-widths to estimate the number of contributing + * source nodes and stores it in \p bs. The last column stores the row-wise + * maximum for quick access. + */ template void ConvolutionCalculator::calcBandSizeFactor(MatrixXi &bs, int depth, const BandWidth &bw) { for (int gt = 0; gt < this->nComp; gt++) { for (int ft = 0; ft < this->nComp; ft++) { @@ -138,7 +229,18 @@ template void ConvolutionCalculator::calcBandSizeFacto bs(depth, this->nComp2) = bs.row(depth).maxCoeff(); } -/** Return a vector of nodes in F affected by O, given a node in G */ +/** + * @brief Build the band of source nodes affected by the operator for a given target node. + * + * @param[in] gNode Target node (in the output tree). + * @param[out] idx_band Matching indices of the source nodes added to the band. + * @returns A vector of pointers to the source nodes \f$ f \f$. + * + * @details + * The band is the intersection between the operator's bandwidth box centered + * at \p gNode and the function-tree world box, respecting periodicity and + * (optionally) unit-cell filtering when `manipulateOperator` is enabled. + */ template MWNodeVector *ConvolutionCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { auto *band = new MWNodeVector; @@ -161,7 +263,7 @@ template MWNodeVector *ConvolutionCalculator::ma for (int i = 0; i < D; i++) { sIdx[i] = gIdx[i] - width; eIdx[i] = gIdx[i] + width; - // We need to consider the world borders + // Consider world borders / periodic wrapping int nboxes = fWorld.size(i) * (1 << o_depth); int c_i = cIdx[i] * (1 << o_depth); if (not periodic) { @@ -179,7 +281,20 @@ template MWNodeVector *ConvolutionCalculator::ma return band; } -/** Recursively retrieve all reachable f-nodes within the bandwidth. */ +/** + * @brief Recursive helper to enumerate all source indices inside the bandwidth box. + * + * @param[out] band Vector of pointers to source nodes added along the recursion. + * @param[out] idx_band Parallel vector of node indices corresponding to \p band. + * @param[in] idx Current multi-index (mutated along recursion). + * @param[in] nbox Side lengths of the bandwidth box. + * @param[in] dim Current dimension to recurse on. + * + * @details + * If **unit-cell manipulation** is enabled, nodes are included/excluded based + * on their membership in the first unit cell (for periodic worlds) and the + * `onUnitcell` flag. + */ template void ConvolutionCalculator::fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim) { int l_start = idx[dim]; for (int j = 0; j < nbox[dim]; j++) { @@ -222,6 +337,18 @@ template void ConvolutionCalculator::fillOperBand(MWNo idx[dim] = l_start; } +/** + * @brief Compute contributions to a single **target** node by scanning its band. + * + * @param[in,out] node Target node (coefficients are accumulated here). + * + * @details + * - Builds the source band for the target node. + * - Computes a **local target threshold** from the node's tree norm and `prec`. + * - Loops over band nodes and component combinations, performing **screening**. + * - For surviving pairs, applies all operator terms via `applyOperComp`. + * - Updates node norms at the end. + */ template void ConvolutionCalculator::calcNode(MWNode &node) { auto &gNode = static_cast &>(node); gNode.zeroCoefs(); @@ -232,12 +359,13 @@ template void ConvolutionCalculator::calcNode(MWNode os(gNode, tmpCoefs); this->operStat.incrementGNodeCounters(gNode); - // Get all nodes in f within the bandwith of O in g + // Get all nodes in f within the bandwidth of O around g this->band_t[mrcpp_get_thread_num()]->resume(); std::vector> idx_band; MWNodeVector *fBand = makeOperBand(gNode, idx_band); this->band_t[mrcpp_get_thread_num()]->stop(); + // Build target threshold (relative by default; may be scaled by precFunc) MWTree &gTree = gNode.getMWTree(); double gThrs = gTree.getSquareNorm(); if (gThrs > 0.0) { @@ -245,9 +373,9 @@ template void ConvolutionCalculator::calcNode(MWNodeprecFunc(gNode.getNodeIndex()); gThrs = this->prec * precFac * std::sqrt(gThrs / nTerms); } - os.gThreshold = gThrs; + // Scan band and apply screened operator terms this->calc_t[mrcpp_get_thread_num()]->resume(); for (int n = 0; n < fBand->size(); n++) { MWNode &fNode = *(*fBand)[n]; @@ -274,7 +402,20 @@ template void ConvolutionCalculator::calcNode(MWNode void ConvolutionCalculator::applyOperComp(OperatorState &os) { double fNorm = os.fNode->getComponentNorm(os.ft); int o_depth = os.fNode->getScale() - this->oper->getOperatorRoot(); @@ -288,12 +429,23 @@ template void ConvolutionCalculator::applyOperComp(Ope } } -/** @brief Apply a single operator component (term) to a single f-node. +/** + * @brief Apply a single operator term to a single source node (low-level path). * - * @details Apply a single operator component (term) to a single f-node. - * Whether the operator actualy is applied is determined by a screening threshold. - * Here we make use of the sparcity of matrices \f$ A, B, C \f$. + * @param i Index of the operator term in the separable expansion. + * @param os Operator state (nodes, buffers, norms, component indices). * + * @details + * For each direction: + * - Fetch the operator-block at the required translation (\f$ \Delta \ell \f$) + * and depth \f$ o\_depth \f$; multiply the running contraction with its norm + * and keep a raw pointer to its coefficient block. + * - If the translation is outside bandwidth, return early. + * After the per-direction setup: + * - Form an **upper bound** as product of directional norms times the + * source-threshold and compare to the target-threshold. + * - If active, dispatch to `tensorApplyOperComp` to carry out the contraction + * and accumulate into the target node buffer. */ template void ConvolutionCalculator::applyOperator(int i, OperatorState &os) { MWNode &gNode = *os.gNode; @@ -310,8 +462,7 @@ template void ConvolutionCalculator::applyOperator(int auto &oTree = this->oper->getComponent(i, d); int oTransl = fIdx[d] - gIdx[d]; - // The following will check the actual band width in each direction. - // Not needed if the thresholding at the end of this routine is active. + // Per-direction bandwidth check int a = (os.gt & (1 << d)) >> d; int b = (os.ft & (1 << d)) >> d; int idx = (a << 1) + b; @@ -329,8 +480,19 @@ template void ConvolutionCalculator::applyOperator(int } } -/** Perorm the required linear algebra operations in order to apply an -operator component to a f-node in a n-dimensional tesor space. */ +/** + * @brief Perform the directional tensor contractions for one operator term. + * + * @param os Operator state (holds mapped buffers for in-place contractions). + * + * @details + * The contraction sequence computes, for each direction \f$ d \f$, + * \f$ G \leftarrow F^\top O^{(d)} \f$, with **accumulation** on the last + * direction. If a directional block is `nullptr`, an identity map is used + * (i.e., pure transposition). + * + * Both a BLAS path (disabled here) and an Eigen path are implemented. + */ template void ConvolutionCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -382,6 +544,16 @@ template void ConvolutionCalculator::tensorApplyOperCo //#endif } +/** + * @brief Ensure parent nodes exist up to the operator root (periodic worlds). + * + * @param tree Target/output tree. + * + * @details + * When operating in periodic settings, parent nodes above the root scale + * may be required for coarse contributions; this helper guarantees their + * presence prior to work scheduling. + */ template void ConvolutionCalculator::touchParentNodes(MWTree &tree) const { if (not manipulateOperator) { const auto oper_scale = this->oper->getOperatorRoot(); @@ -398,6 +570,16 @@ template void ConvolutionCalculator::touchParentNodes( } } +/** + * @brief Create the initial list of target nodes to process. + * + * @param tree Target/output tree. + * @returns A vector of pointers to existing nodes to be processed. + * + * @details + * For periodic trees, parent nodes above the root are first touched to ensure + * consistency; then a flat node table is produced via `tree_utils::make_node_table`. + */ template MWNodeVector *ConvolutionCalculator::getInitialWorkVector(MWTree &tree) const { auto *nodeVec = new MWNodeVector; if (tree.isPeriodic()) touchParentNodes(tree); @@ -405,6 +587,7 @@ template MWNodeVector *ConvolutionCalculator::ge return nodeVec; } +// Explicit instantiations template class ConvolutionCalculator<1, double>; template class ConvolutionCalculator<2, double>; template class ConvolutionCalculator<3, double>; diff --git a/src/treebuilders/CopyAdaptor.cpp b/src/treebuilders/CopyAdaptor.cpp index 8312ebb0f..9acefcbea 100644 --- a/src/treebuilders/CopyAdaptor.cpp +++ b/src/treebuilders/CopyAdaptor.cpp @@ -23,12 +23,75 @@ * */ +/** + * @file CopyAdaptor.cpp + * @brief Tree adaptor that **copies** (follows) an existing grid structure, + * optionally widened by a user-specified bandwidth. + * + * @details + * `mrcpp::CopyAdaptor` is a `TreeAdaptor` used with `TreeBuilder` to produce an + * output function tree whose refinement pattern mirrors one or more **reference + * trees**. It decides whether a node should be split solely by inspecting the + * presence of the corresponding **children** (and their integer-neighbor shifts) + * in the reference trees. + * + * This adaptor is typically used to: + * - replicate an input grid for **fixed-grid operations** (e.g., local + * derivative applies where no adaptivity is desired), and + * - **widen** the grid along selected directions to accommodate operators + * whose stencils reach into neighboring nodes (e.g., first/second derivative + * stencils). The widening is controlled by a per-dimension integer + * bandwidth \f$ \text{bandWidth}[d] \ge 0 \f$. + * + * ### Split criterion + * For a candidate node `node` and each of its children `c` (in tensor-product + * sense), the adaptor checks, for each dimension \f$d\in\{0,\dots,D-1\}\f$, + * every integer shift \f$ \delta \in [-\text{bandWidth}[d],\text{bandWidth}[d]] \f$: + * + * 1. Form the child index `bwIdx = node.child(c)` and add the shift on the + * current dimension: `bwIdx[d] += δ`. + * 2. If **any** reference `FunctionTree` contains that child index, the adaptor + * returns **true** (requesting the split). + * + * If no such child is found in any reference, the adaptor returns **false**. + * + * ### Notes + * - This adaptor is **purely topological**; it does not inspect coefficients. + * - If `bw == nullptr`, all bandwidths default to `0` (exact copy of the + * reference grid). + * - The reference set can be a single tree or a vector of trees; the union of + * their reachable children (with bandwidth widening) drives the output grid. + * + * ### Example + * @code + * int bw[3] = {1, 0, 0}; // widen one node on each side in x + * CopyAdaptor<3,double> pre(out_inp, maxScale, bw); + * TreeBuilder<3,double> builder; + * DefaultCalculator<3,double> calc; // no-op; we only want to build the grid + * builder.build(out, calc, pre, -1); // fixed grid construction + * @endcode + */ + #include "CopyAdaptor.h" #include namespace mrcpp { +/** + * @brief Construct a copy adaptor that follows a single reference tree. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param t Reference function tree to follow. + * @param ms Maximum scale allowed for splitting (forwarded to `TreeAdaptor`). + * @param bw Optional pointer to an array of length `D` with per-dimension + * integer bandwidths. If `nullptr`, all bandwidths are set to `0`. + * + * @details + * The adaptor will request a split whenever a corresponding child (possibly + * shifted by up to `bw[d]` in each dimension) exists in the reference tree. + */ template CopyAdaptor::CopyAdaptor(FunctionTree &t, int ms, int *bw) : TreeAdaptor(ms) { @@ -36,6 +99,20 @@ CopyAdaptor::CopyAdaptor(FunctionTree &t, int ms, int *bw) tree_vec.push_back(std::make_tuple(1.0, &t)); } +/** + * @brief Construct a copy adaptor that follows the **union** of several trees. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param t Vector of `(coef, tree*)` pairs; only the tree pointers matter for + * the splitting logic, the coefficients are ignored. + * @param ms Maximum scale allowed for splitting. + * @param bw Optional per-dimension bandwidth array. If `nullptr`, zeros. + * + * @details + * A split is requested if **any** tree in `t` contains the candidate child + * (within the bandwidth neighborhood). + */ template CopyAdaptor::CopyAdaptor(FunctionTreeVector &t, int ms, int *bw) : TreeAdaptor(ms) @@ -43,6 +120,18 @@ CopyAdaptor::CopyAdaptor(FunctionTreeVector &t, int ms, int *bw) setBandWidth(bw); } +/** + * @brief Set the per-dimension bandwidths used to widen the copied grid. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param bw Pointer to an integer array of length `D`. If `nullptr`, all + * bandwidths are set to `0`. + * + * @note Negative entries are treated as `0` by the caller contract; this + * function simply copies the values. The split loop ranges over + * `[-bandWidth[d], +bandWidth[d]]`. + */ template void CopyAdaptor::setBandWidth(int *bw) { for (int d = 0; d < D; d++) { if (bw != nullptr) { @@ -53,6 +142,33 @@ template void CopyAdaptor::setBandWidth(int *bw) { } } +/** + * @brief Decide whether a node should be split to mirror (and widen) a reference grid. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param node Candidate node in the output tree. + * @return `true` if any reference tree contains a corresponding child + * (considering bandwidth shifts), `false` otherwise. + * + * @details + * For each tensor child `c` of `node` and each dimension `d`, the method scans + * integer offsets `bw ∈ [-bandWidth[d], +bandWidth[d]]`. The candidate child + * index is formed as: + * + * @code + * NodeIndex bwIdx = idx.child(c); + * bwIdx[d] += bw; + * @endcode + * + * If any reference tree contains `bwIdx`, a split is requested immediately. + * The search stops on the first positive hit. + * + * @complexity + * \f$ \mathcal{O}\big(T \cdot C \cdot \prod_{d=0}^{D-1} (2\,\text{bandWidth}[d]+1)\big) \f$, + * where `T` is the number of reference trees and `C` is the number of tensor + * children per node. + */ template bool CopyAdaptor::splitNode(const MWNode &node) const { const NodeIndex &idx = node.getNodeIndex(); for (int c = 0; c < node.getTDim(); c++) { @@ -71,6 +187,7 @@ template bool CopyAdaptor::splitNode(const MWNode; template class CopyAdaptor<2, double>; template class CopyAdaptor<3, double>; diff --git a/src/treebuilders/CrossCorrelationCalculator.cpp b/src/treebuilders/CrossCorrelationCalculator.cpp index a5eef945d..b68e71945 100644 --- a/src/treebuilders/CrossCorrelationCalculator.cpp +++ b/src/treebuilders/CrossCorrelationCalculator.cpp @@ -23,6 +23,53 @@ * */ +/** + * @file CrossCorrelationCalculator.cpp + * @brief Assembly of 2D cross–correlation operator blocks from a 1D kernel, + * for Legendre and Interpolating scaling bases. + * + * @details + * This module implements the node-wise assembly of a separable 2D operator that + * represents the *cross–correlation* between adjacent 1D kernel segments. + * Given a 1D kernel stored as an `MWTree<1>` (accessed via + * `CrossCorrelationCalculator::kernel`), the calculator: + * + * - selects the appropriate **precomputed** cross–correlation matrices + * \f$L\f$ and \f$R\f$ from a `CrossCorrelationCache` depending on the + * scaling basis (Legendre or Interpolating) and the local polynomial order, + * - extracts the relevant 1D kernel coefficient blocks at indices shifted by + * the child offset of the current 2D node, + * - forms the 2D block by the linear combination + * \f[ + * \mathbf{v}_o^{(i)} \;=\; L \,\mathbf{v}_a \;+\; R \,\mathbf{v}_b, + * \f] + * where \f$\mathbf{v}_a\f$ and \f$\mathbf{v}_b\f$ are the 1D kernel + * coefficient segments corresponding to the left/right neighboring child + * positions induced by the current 2D node child \f$i\f$, + * - applies a scale factor \f$ 2^{-\,(\text{scale}+1)/2} \f$ and a global + * normalization factor derived from the world-box scaling to obtain the + * final coefficient block for the 2D operator node. + * + * The assembled coefficients are then compressed (wavelet transform in + * `Compression` mode), marked as present, and their norms are computed for + * downstream thresholding and application. + * + * ### Indexing convention + * For a node with index \f$\ell = (\ell_0,\ell_1)\f$ and a specific tensor + * child \f$i\f$, the child index `l = idx.child(i)` induces two 1D offsets + * \f[ + * \ell_a = \ell_1 - \ell_0 - 1, + * \qquad + * \ell_b = \ell_1 - \ell_0, + * \f] + * which select adjacent 1D kernel nodes at the next finer scale. These are + * mapped to 1D node indices \f$(\text{scale}+1,\ell_a)\f$ and + * \f$(\text{scale}+1,\ell_b)\f$. + * + * @note At the moment, only **uniform scaling factors** are supported; the code + * reads the scaling factor for dimension 0 and assumes it is uniform. + */ + #include "CrossCorrelationCalculator.h" #include "trees/FunctionTree.h" #include "trees/MWNode.h" @@ -33,6 +80,22 @@ using Eigen::VectorXd; namespace mrcpp { +/** + * @brief Build the cross–correlation block for a 2D operator node. + * + * @param node Output 2D operator node to be filled (overwrites coefficients). + * + * @details + * - Zeros existing coefficients. + * - Detects the scaling basis of the underlying MRA (`Interpol` or `Legendre`). + * - Retrieves the corresponding `CrossCorrelationCache` and dispatches to + * `applyCcc()` which performs the actual linear algebra using cached + * matrices \f$L,R\f$ and the 1D kernel tree referenced by this calculator. + * - Applies compression (`mwTransform(Compression)`), marks coefficients as + * present, and computes norms (`calcNorms()`). + * + * @throws Emits an error if the scaling type is unsupported. + */ void CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); int type = node.getMWTree().getMRA().getScalingBasis().getScalingType(); @@ -56,6 +119,37 @@ void CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } +/** + * @brief Assemble one 2D node from cached cross–correlation matrices and a 1D kernel. + * + * @tparam T Tag of the scaling basis cache (`Interpol` or `Legendre`). + * @param node Output 2D operator node to be filled. + * @param ccc Cross–correlation cache for the selected scaling basis. + * + * @details + * Let \f$k\f$ denote the 1D kernel function tree pointed to by + * `this->kernel`. For each tensor child \f$i\f$ of the current 2D node: + * - compute the child index \f$l = \text{idx.child}(i)\f$ where + * \f$\text{idx}\f$ is the node index, + * - form the adjacent 1D indices \f$\ell_a = l_1-l_0-1\f$ and + * \f$\ell_b = l_1-l_0\f$ at scale \f$s = \text{node.getScale()}+1\f$, + * - fetch the 1D coefficient vectors \f$\mathbf{v}_a,\mathbf{v}_b\f$ from + * the kernel tree at \f$(s,\ell_a)\f$ and \f$(s,\ell_b)\f$, + * - compute the 2D segment + * \f[ + * \mathbf{v}_o^{(i)} \;=\; L \,\mathbf{v}_a \;+\; R \,\mathbf{v}_b, + * \f] + * where \f$L,R\f$ are read from the cache for the node order + * (`ccc.getLMatrix(node.getOrder())`, `ccc.getRMatrix(node.getOrder())`), + * - store \f$\mathbf{v}_o^{(i)}\f$ in the appropriate slot of the 2D node + * coefficient buffer after applying the normalization + * \f$ \sqrt{\text{scaling\_factor}}\, 2^{-s/2} \f$. + * + * The method writes directly into `node.getCoefs()` and does not allocate + * intermediate node structures beyond temporary vectors. + * + * @note Only uniform world-box scaling factors are supported at present. + */ template void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, CrossCorrelationCache &ccc) { const MatrixXd &lMat = ccc.getLMatrix(node.getOrder()); const MatrixXd &rMat = ccc.getRMatrix(node.getOrder()); @@ -89,8 +183,8 @@ template void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros double two_n = std::pow(2.0, -scale / 2.0); for (int i = 0; i < t_dim * kp1_d; i++) { auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0); - // This is only implemented for unifrom scaling factors - // hence the zero TODO: make it work for non-uniform scaling + // Implemented for uniform scaling factors (dimension 0). For non-uniform + // scaling a per-dimension normalization would be required. coefs[i] = std::sqrt(scaling_factor) * two_n * vec_o(i); } } diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp index b298d1b6e..db4a398b7 100644 --- a/src/treebuilders/DerivativeCalculator.cpp +++ b/src/treebuilders/DerivativeCalculator.cpp @@ -23,6 +23,43 @@ * */ +/** + * @file DerivativeCalculator.cpp + * @brief Node-wise application of multiresolution **derivative operators**. + * + * @details + * This module implements the computational kernels used to apply a + * #mrcpp::DerivativeOperator to multiresolution coefficient trees. + * The calculator works node-by-node and supports both: + * + * - **Zero-bandwidth (local) operators** — e.g. ABGV-00 type operators that + * act diagonally (per cell) in non-applied directions; handled by + * DerivativeCalculator::applyOperator_bw0(). + * - **Finite-bandwidth operators** — e.g. ABGV-55/PH/BS operators that couple + * nearest neighbors along the application direction; handled by + * DerivativeCalculator::applyOperator(). + * + * The apply pipeline for each output (g) node: + * 1. Build the **operator band** of input (f) nodes affected by the operator + * at the current depth (makeOperBand()). + * 2. For each combination of tensor components \f$(f_t,g_t)\f$, gather the + * 1D operator blocks from the pre-built #mrcpp::OperatorTree components. + * 3. Perform the separated **tensor contraction** + * (tensorApplyOperComp()) across dimensions, using identity where the + * operator is not applied. + * 4. Apply a **scaling normalization** based on the world-box scaling factor + * and the derivative order. + * 5. Compute node norms for downstream thresholding and diagnostics. + * + * The class collects **per-thread timing** and **operator-usage statistics** + * to aid profiling and load balancing. + * + * @note Scaling normalization: + * The derivative w.r.t. direction `applyDir` is normalized by the + * world-box scaling factor of that direction raised to the operator + * order. See the notes near the end of calcNode() overloads. + */ + #include "DerivativeCalculator.h" #include "operators/DerivativeOperator.h" #include "operators/OperatorState.h" @@ -42,6 +79,21 @@ using Eigen::MatrixXd; namespace mrcpp { +/** + * @brief Construct a derivative calculator. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type (e.g., double or ComplexDouble). + * @param dir Direction along which the derivative is applied (0-based, < D). + * @param o Derivative operator to apply. + * @param f Input function tree (source of coefficients). + * + * @throws Aborts if `dir` is outside \f$[0,D)\f$. + * + * @details + * The constructor stores references to the operator and the input tree, + * validates the application direction, and initializes per-thread timers. + */ template DerivativeCalculator::DerivativeCalculator(int dir, DerivativeOperator &o, FunctionTree &f) : applyDir(dir) @@ -51,11 +103,17 @@ DerivativeCalculator::DerivativeCalculator(int dir, DerivativeOperator initTimers(); } +/** + * @brief Flush usage counters and print aggregate statistics on destruction. + */ template DerivativeCalculator::~DerivativeCalculator() { this->operStat.flushNodeCounters(); println(10, this->operStat); } +/** + * @brief Initialize per-thread timers (band construction / calc / norms). + */ template void DerivativeCalculator::initTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -65,12 +123,18 @@ template void DerivativeCalculator::initTimers() { } } +/** + * @brief Clear local timer storage. + */ template void DerivativeCalculator::clearTimers() { this->band_t.clear(); this->calc_t.clear(); this->norm_t.clear(); } +/** + * @brief Print per-thread timing statistics gathered during application. + */ template void DerivativeCalculator::printTimers() const { int oldprec = Printer::setPrecision(1); int nThreads = mrcpp_get_max_threads(); @@ -86,6 +150,21 @@ template void DerivativeCalculator::printTimers() cons Printer::setPrecision(oldprec); } +/** + * @brief Apply a **local (zero-bandwidth)** derivative operator to a single node. + * + * @param[in] inpNode Source node (input function). + * @param[out] outNode Destination node (output after derivative). + * + * @details + * Uses applyOperator_bw0() which assumes the operator couples only the same + * spatial cell in non-applied directions. Identity is implicitly used in + * directions other than `applyDir`. + * + * After the tensor contraction, multiplies by \f$(\text{scale-factor})^{-p}\f$ + * where `p = oper->getOrder()` to account for physical coordinate scaling, + * then updates norms. + */ template void DerivativeCalculator::calcNode(MWNode &inpNode, MWNode &outNode) { // if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw"); outNode.zeroCoefs(); @@ -97,21 +176,35 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); if (abs(scaling_factor - 1.0) > MachineZero) { for (int i = 0; i < outNode.getNCoefs(); i++) outNode.getCoefs()[i] *= scaling_factor; } - outNode.calcNorms(); // TODO:required? norms are not used for now + outNode.calcNorms(); // norms are used by downstream screening } +/** + * @brief Apply a **finite-bandwidth** derivative operator to a single node. + * + * @param gNode Destination/output node (will be overwritten). + * + * @details + * 1. Build the operator band (list of input nodes influencing `gNode`) along + * the apply direction (makeOperBand()). + * 2. For each band node and tensor-component pair, gather operator slices + * from the #mrcpp::OperatorTree and perform the tensor product. + * 3. Apply coordinate scaling normalization by dividing by + * \f$(\text{scaleFactor})^{\text{order}}\f$ in the applied direction. + * 4. Update norms and timing statistics. + */ template void DerivativeCalculator::calcNode(MWNode &gNode) { gNode.zeroCoefs(); @@ -120,14 +213,13 @@ template void DerivativeCalculator::calcNode(MWNode os(gNode, tmpCoefs); this->operStat.incrementGNodeCounters(gNode); - // Get all nodes in f within the bandwith of O in g + // Build band of input nodes that affect gNode this->band_t[mrcpp_get_thread_num()].resume(); std::vector> idx_band; MWNodeVector fBand = makeOperBand(gNode, idx_band); this->band_t[mrcpp_get_thread_num()].stop(); this->calc_t[mrcpp_get_thread_num()].resume(); - for (int n = 0; n < fBand.size(); n++) { MWNode &fNode = *fBand[n]; NodeIndex &fIdx = idx_band[n]; @@ -143,7 +235,7 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); for (int i = 0; i < gNode.getNCoefs(); i++) gNode.getCoefs()[i] /= scaling_factor; this->calc_t[mrcpp_get_thread_num()].stop(); @@ -153,7 +245,19 @@ template void DerivativeCalculator::calcNode(MWNodenorm_t[mrcpp_get_thread_num()].stop(); } -/** Return a vector of nodes in F affected by O, given a node in G */ +/** + * @brief Build the **operator band** of input nodes that influence a given output node. + * + * @param gNode Output node for which to gather contributing input nodes. + * @param idx_band Output list of input node indices (aligned with returned vector). + * @return Vector of pointers to input nodes in @p fTree that lie within the + * operator bandwidth along `applyDir`. + * + * @details + * The band extends `width = oper->getMaxBandWidth()` cells to the left/right + * of the output index along the applied direction. Out-of-bounds indices are + * skipped; periodicity is handled by FunctionTree::getRootIndex(). + */ template MWNodeVector DerivativeCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { assert(this->applyDir >= 0); assert(this->applyDir < D); @@ -177,9 +281,18 @@ template MWNodeVector DerivativeCalculator::make return band; } -/** Apply a single operator component (term) to a single f-node assuming zero bandwidth */ +/** + * @brief Apply a single **zero-bandwidth** operator component to one input node. + * + * @param os Operator state (holds pointers/scratch and component indices). + * + * @details + * Fetches the operator block at the current depth with translation 0 in all + * directions. In non-applied directions, activates identity by passing + * `nullptr` in the operator pointers, which signals tensorApplyOperComp() to + * copy/accumulate. + */ template void DerivativeCalculator::applyOperator_bw0(OperatorState &os) { - // cout<<" applyOperator "< &gNode = *os.gNode; MWNode &fNode = *os.fNode; const NodeIndex &fIdx = *os.fIdx; @@ -197,10 +310,10 @@ template void DerivativeCalculator::applyOperator_bw0( oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oIdx == 0 or oIdx == 3) { - // This will activate the identity operator in direction i + // Identity in direction d oData[d] = nullptr; } else { - // This means that we are in a zero part of the identity operator + // Outside identity block: contributes zero return; } } @@ -209,8 +322,19 @@ template void DerivativeCalculator::applyOperator_bw0( tensorApplyOperComp(os); } -/** Apply a single operator component (term) to a single f-node. Whether the -operator actualy is applied is determined by a screening threshold. */ +/** + * @brief Apply a single **finite-bandwidth** operator component to one input node. + * + * @param os Operator state (holds pointers/scratch and component indices). + * + * @details + * For each dimension: + * - Determine the relative translation from input to output node. + * - Check that translation lies within the operator bandwidth. + * - Fetch the corresponding #mrcpp::OperatorNode data. + * - In non-applied directions, only the central identity block (translation 0, + * component 0 or 3) contributes; otherwise the term is skipped. + */ template void DerivativeCalculator::applyOperator(OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -226,8 +350,7 @@ template void DerivativeCalculator::applyOperator(Oper int oTransl = fIdx[d] - gIdx[d]; - // The following will check the actual band width in each direction. - // Not needed if the thresholding at the end of this routine is active. + // Bandwidth check in each direction int a = (os.gt & (1 << d)) >> d; int b = (os.ft & (1 << d)) >> d; int idx = (a << 1) + b; @@ -242,10 +365,10 @@ template void DerivativeCalculator::applyOperator(Oper oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oTransl == 0 and (oIdx == 0 or oIdx == 3)) { - // This will activate the identity operator in direction i + // Identity in direction d oData[d] = nullptr; } else { - // This means that we are in a zero part of the identity operator + // Zero contribution return; } } @@ -254,8 +377,19 @@ template void DerivativeCalculator::applyOperator(Oper tensorApplyOperComp(os); } -/** Perform the required linear algebra operations in order to apply an -operator component to a f-node in a n-dimensional tensor space. */ +/** + * @brief Perform the separated **tensor contraction** for one operator term. + * + * @param os Operator state (provides temporary buffers and operator slices). + * + * @details + * For each dimension i: + * - Map the \f$k\times k^{D-1}\f$ slice of the input into `f`, + * - Multiply by the \f$k\times k\f$ operator block if present + * (otherwise use identity), + * - Transpose-accumulate into the next staging buffer `g`. + * On the last dimension, accumulate into the output buffer. + */ template void DerivativeCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -264,14 +398,14 @@ template void DerivativeCalculator::tensorApplyOperCom Eigen::Map> g(aux[i + 1], os.kp1_dm1, os.kp1); if (oData[i] != nullptr) { Eigen::Map op(oData[i], os.kp1, os.kp1); - if (i == D - 1) { // Last dir: Add up into g + if (i == D - 1) { // last dim: accumulate g.noalias() += f.transpose() * op; } else { g.noalias() = f.transpose() * op; } } else { - // Identity operator in direction i - if (i == D - 1) { // Last dir: Add up into g + // Identity in dimension i + if (i == D - 1) { g.noalias() += f.transpose(); } else { g.noalias() = f.transpose(); @@ -280,10 +414,22 @@ template void DerivativeCalculator::tensorApplyOperCom } } +/** + * @brief Provide the initial work vector for a tree traversal. + * + * @param tree Output tree where results will be stored. + * @return A vector of pointers to the leaf/end nodes of @p tree. + * + * @details + * The derivative application uses a fixed grid determined by the operator. + * This helper asks the tree to provide a snapshot of its end-node table to + * seed the traversal. + */ template MWNodeVector *DerivativeCalculator::getInitialWorkVector(MWTree &tree) const { return tree.copyEndNodeTable(); } +// Explicit instantiations template class DerivativeCalculator<1, double>; template class DerivativeCalculator<2, double>; template class DerivativeCalculator<3, double>; diff --git a/src/treebuilders/PHCalculator.cpp b/src/treebuilders/PHCalculator.cpp index 2da879d7b..2b54ef730 100644 --- a/src/treebuilders/PHCalculator.cpp +++ b/src/treebuilders/PHCalculator.cpp @@ -23,6 +23,26 @@ * */ +/** + * @file PHCalculator.cpp + * @brief Populate piecewise-homogeneous (PH) derivative stencil blocks for + * 2D MW nodes and apply them as local operators. + * + * @details + * The PH operator is applied on a 2D tensor-product node and uses three + * nearest-neighbour coupling blocks along the refinement line: + * - S_m1 : block coupling to the left child (l = -1) + * - S_0 : block coupling to the same child (l = 0) + * - S_p1 : block coupling to the right child (l = +1) + * + * For a node at scale j, the coefficients are scaled by 2^{diff_order*(j+1)} + * to account for the dyadic scaling of derivatives in multiresolution analysis. + * + * The block matrices are read from precomputed text files (see @ref readSMatrix) + * that depend on the scaling basis (Legendre or Interpolating) and the + * derivative order (currently n = 1 or 2). + */ + #include "PHCalculator.h" #include @@ -36,6 +56,20 @@ using Eigen::MatrixXd; namespace mrcpp { +/** + * @brief Construct a PHCalculator and load its stencil blocks. + * + * @param[in] basis Scaling basis (type and order). + * @param[in] n Derivative order (1 or 2 supported). + * + * @throws NOT_IMPLEMENTED_ABORT if n <= 0 or n >= 3. + * + * @details + * Based on \p n the constructor selects the corresponding set of PH derivative + * blocks and loads them from disk. Supported files are: + * - Legendre: L_ph_deriv_1.txt, L_ph_deriv_2.txt + * - Interpol: I_ph_deriv_1.txt, I_ph_deriv_2.txt + */ PHCalculator::PHCalculator(const ScalingBasis &basis, int n) : diff_order(n) { if (this->diff_order <= 0) NOT_IMPLEMENTED_ABORT; @@ -44,6 +78,29 @@ PHCalculator::PHCalculator(const ScalingBasis &basis, int n) if (this->diff_order >= 3) NOT_IMPLEMENTED_ABORT; } +/** + * @brief Read PH derivative blocks from text files for the given basis. + * + * @param[in] basis Scaling basis (provides type and order). + * @param[in] n Character '1' or '2' selecting derivative order. + * + * @details + * The file format is: + * - First line per order k+1 (k+1 = 2..29): an integer "order" sentinel. + * - Followed by a 3*(k+1) by (k+1) table (row-major in the file) containing + * the vertically stacked blocks: + * [ S_{+1} ; S_{0} ; S_{-1} ] + * + * Only the block triple corresponding to the active basis order (kp1 = k+1) + * is kept: + * - S_p1 = rows [0*kp1 .. 1*kp1-1] + * - S_0 = rows [1*kp1 .. 2*kp1-1] + * - S_m1 = rows [2*kp1 .. 3*kp1-1] + * + * @note + * - Supported scaling orders: 0..28 for Interpol/Legendre (kp1 in 2..29). + * - Files are discovered via details::find_filters(). + */ void PHCalculator::readSMatrix(const ScalingBasis &basis, char n) { std::string file; std::string path = details::find_filters(); @@ -72,26 +129,50 @@ void PHCalculator::readSMatrix(const ScalingBasis &basis, char n) { } if (kp1 == (basis.getScalingOrder() + 1)) { this->S_p1 = data.block(0 * kp1, 0, kp1, kp1); - this->S_0 = data.block(1 * kp1, 0, kp1, kp1); + this->S_0 = data.block(1 * kp1, 0, kp1, kp1); this->S_m1 = data.block(2 * kp1, 0, kp1, kp1); break; } } } +/** + * @brief Fill 2D node coefficients by applying the PH derivative stencil. + * + * @param[in,out] node 2D MW node to populate (coefficients in scaling basis). + * + * @details + * Let idx = (i0, i1) be the 2D node index and l = i1 - i0. Depending on l, + * the appropriate neighbour coupling block is selected: + * - l = +1 : right neighbour uses S_p1 + * - l = 0 : interior uses S_0 (diagonal) with off-diagonals S_{-1}, S_{+1} + * - l = -1 : left neighbour uses S_m1 + * + * The coefficient tensor is laid out as 4 contiguous tiles for the four + * tensor children, each of size kp1_d = (k+1)^2. For each tile we accumulate + * the matrix-product contribution and rescale by + * two_np1 = 2^{diff_order * (scale+1)}. + * + * Finally, coefficients are transformed to MW (Compression), marked present, + * and node norms are updated. + * + * @note + * - For periodic trees, indices outside the world box are ignored (no write). + * - The switch default does nothing by design (periodic handling is upstream). + */ void PHCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); const auto &idx = node.getNodeIndex(); - int l = idx[1] - idx[0]; - int np1 = idx.getScale() + 1; - int kp1 = node.getKp1(); - int kp1_d = node.getKp1_d(); + int l = idx[1] - idx[0]; // neighbour offset along refinement line + int np1 = idx.getScale() + 1; // j+1, used in dyadic derivative scaling + int kp1 = node.getKp1(); // k+1 (polynomial order + 1) + int kp1_d = node.getKp1_d(); // (k+1)^2, tile size per child double two_np1 = std::pow(2.0, this->diff_order * np1); double *coefs = node.getCoefs(); switch (l) { - case 1: + case 1: // right neighbour: only S_{+1} contributes for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; @@ -99,18 +180,18 @@ void PHCalculator::calcNode(MWNode<2> &node) { } } break; - case 0: + case 0: // interior: stencil spans S_0 (diagonal) and S_{-1}, S_{+1} for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; - coefs[0 * kp1_d + idx] = two_np1 * this->S_0(i, j); + coefs[0 * kp1_d + idx] = two_np1 * this->S_0 (i, j); coefs[1 * kp1_d + idx] = two_np1 * this->S_m1(i, j); coefs[2 * kp1_d + idx] = two_np1 * this->S_p1(i, j); - coefs[3 * kp1_d + idx] = two_np1 * this->S_0(i, j); + coefs[3 * kp1_d + idx] = two_np1 * this->S_0 (i, j); } } break; - case -1: + case -1: // left neighbour: only S_{-1} contributes for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; @@ -122,9 +203,9 @@ void PHCalculator::calcNode(MWNode<2> &node) { // When periodic do nothing, else it should never end up here. break; } - node.mwTransform(Compression); - node.setHasCoefs(); - node.calcNorms(); + node.mwTransform(Compression); // convert to MW (wavelet) coefficients + node.setHasCoefs(); // mark coefficients present + node.calcNorms(); // update node/component norms } } // namespace mrcpp diff --git a/src/treebuilders/ProjectionCalculator.cpp b/src/treebuilders/ProjectionCalculator.cpp index 15ba014da..f72fde8cd 100644 --- a/src/treebuilders/ProjectionCalculator.cpp +++ b/src/treebuilders/ProjectionCalculator.cpp @@ -23,6 +23,42 @@ * */ +/** + * @file ProjectionCalculator.cpp + * @brief Compute scaling/wavelet coefficients by projecting an analytic (or + * otherwise representable) function onto the MW basis on a given node. + * + * @details + * The projection proceeds by evaluating the input function at a set of + * expanded child quadrature/collocation points associated with the node, + * then transforming these samples into scaling coefficients and finally + * compressing into the wavelet representation: + * + * 1. `node.getExpandedChildPts(exp_pts)` returns a D×N matrix of evaluation + * points in *local* node coordinates, where N equals `node.getNCoefs()`. + * 2. Each point is rescaled by the per-dimension world-box scaling factors + * (`scaling_factor[d]`) so that the user function is evaluated in + * physical coordinates. + * 3. The raw samples are written into the node coefficient buffer and + * converted to scaling coefficients via `cvTransform(Backward)`. + * 4. `mwTransform(Compression)` moves the representation to compressed MW + * form (wavelets across scales, scaling on roots). + * 5. Bookkeeping: mark coefficients present and update (square-)norms. + * + * The calculator is stateless across nodes; it assumes that the caller + * (TreeBuilder) handles traversal and refinement decisions (via an adaptor). + * + * @note + * - The assertion `exp_pts.cols() == node.getNCoefs()` guards consistency + * between the quadrature layout and the node’s coefficient count. + * - `scaling_factor` is typically extracted from the world box and allows + * non-unit, per-axis domain scaling. + * - This implementation works for both real and complex coefficient types. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (`double` or `ComplexDouble`). + */ + #include "ProjectionCalculator.h" #include "trees/MWNode.h" #include @@ -31,7 +67,26 @@ using Eigen::MatrixXd; namespace mrcpp { -template void ProjectionCalculator::calcNode(MWNode &node) { +/** + * @brief Project a single node by sampling the input function on the node's + * expanded child grid and transforming samples into MW coefficients. + * + * @param[in,out] node The MW node whose coefficients are to be computed. + * + * @pre `node.getExpandedChildPts(exp_pts)` provides exactly `getNCoefs()` + * columns (asserted). + * @post + * - Node coefficients represent the function in compressed MW form. + * - `node.setHasCoefs()` is set and node norms are updated. + * + * @implementation + * - Samples are taken at expanded child points, rescaled by + * `scaling_factor[d]`, and passed to `func->evalf`. + * - `cvTransform(Backward)` maps collocation values → scaling coefficients. + * - `mwTransform(Compression)` converts to MW compressed representation. + */ +template +void ProjectionCalculator::calcNode(MWNode &node) { MatrixXd exp_pts; node.getExpandedChildPts(exp_pts); @@ -44,12 +99,25 @@ template void ProjectionCalculator::calcNode(MWNodefunc->evalf(r); } - node.cvTransform(Backward); - node.mwTransform(Compression); - node.setHasCoefs(); - node.calcNorms(); + node.cvTransform(Backward); // collocation values -> scaling coefficients + node.mwTransform(Compression); // scaling/wavelet compression on the node + node.setHasCoefs(); // mark that the node now owns valid coefs + node.calcNorms(); // update norms for refinement/threshholding } +/* -------------------------------------------------------------------------- + * Legacy (interpolating) variant + * + * The block below shows an older, somewhat faster interpolating approach + * that assumes an interpolating scaling basis. It is kept as reference; + * it performs quadrature using cached roots/weights and writes block + * coefficients directly before compressing. Enable with care as it + * assumes specific basis properties (Interpol). + * + * template + * void ProjectionCalculator::calcNode(MWNode &node) { ... } + * -------------------------------------------------------------------------- */ + /* Old interpolating version, somewhat faster template void ProjectionCalculator::calcNode(MWNode &node) { @@ -106,6 +174,7 @@ void ProjectionCalculator::calcNode(MWNode &node) { } */ +/// Explicit template instantiations template class ProjectionCalculator<1, double>; template class ProjectionCalculator<2, double>; template class ProjectionCalculator<3, double>; diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp index 844f952d9..f855155b7 100644 --- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp +++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp @@ -23,6 +23,57 @@ * */ +/** + * @file TimeEvolution_CrossCorrelationCalculator.cpp + * @brief Compute node-local coefficients for the time-evolution cross–correlation + * contribution on a 2D multiwavelet node. + * + * @details + * This calculator assembles, for each 2D node, the coefficients produced by a + * cross–correlation kernel combined with a set of precomputed power integrals + * \( J_m \). Conceptually, for each child-translation \( i \in \{0,\dots,t\_dim-1\} \) + * and for each local polynomial pair \( (p,j) \) (with \(0\le p,j \le k\)), + * we accumulate + * + * \f[ + * \mathrm{vec\_o}_{i,(p,j)} + * \;+=\; + * \sum_{k=0}^{K(i,p,j)} + * J_{2k+p+j}^{(l_b)} + * \; \cdot \; + * \mathrm{CC}[k](p,j) , + * \f] + * + * where: + * - \(k\) is the polynomial order (node order), + * - \(l_b\) is the child offset along the 1D index difference (second minus first), + * - \(\mathrm{CC}[k](p,j)\) are entries of the cross–correlation matrices + * (one per \(k\)), and + * - \(J_{m}^{(l_b)}\) are power integrals looked up from + * `J_power_inetgarls[scale+1][l_b][m]` (note: member name “inetgarls” is kept as-is). + * + * The result vector `vec_o` of length `t_dim * kp1_d` (with `t_dim = 4` in 2D and + * `kp1_d = (k+1)^2`) is written into the node coefficient buffer. The node is then + * compressed (`mwTransform(Compression)`), marked as having coefficients, and its + * norms are updated. + * + * @note + * - Only the Legendre scaling basis is currently supported here; Interpol is rejected. + * - The member flag `imaginary` selects whether the imaginary or real parts of the + * \(J\)-integrals are used. + * - The code assumes the cross–correlation matrices have been pre-populated in + * `cross_correlation->Matrix[k]`, consistent with the node order. + * - No world-box rescaling is applied in this routine (values are directly assigned). + * + * @warning + * - The routine relies on external consistency: + * * `J_power_inetgarls[scale+1]` must exist for the node’s scale. + * * `J_power_inetgarls[...][l_b]` must cover all accessed indices `2*k+p+j`. + * * `cross_correlation->Matrix[k]` must be dimension-compatible with `(p,j)`. + * - If these invariants are violated, out-of-bounds access may occur upstream; + * the caller is responsible for preparing inputs correctly. + */ + #include "TimeEvolution_CrossCorrelationCalculator.h" #include "trees/FunctionTree.h" #include "trees/MWNode.h" @@ -33,12 +84,19 @@ using Eigen::VectorXd; namespace mrcpp { -/** @param[in] node: ... - * @details This will ... (work in progress) - * - * +/** + * @brief Assemble time-evolution cross–correlation coefficients on a 2D node, + * then compress to MW form. * + * @param[in,out] node The target multiwavelet node (D=2). * + * @details + * 1. Zero current coefficients. + * 2. Dispatch based on scaling basis type: + * - **Legendre**: compute through #applyCcc. + * - **Interpol**: rejected (not implemented for this calculator). + * 3. Compress (`mwTransform(Compression)`), mark coefficients present, and + * update node norms. */ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); @@ -61,27 +119,44 @@ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } -/** @param[in] node: ... - * @details This will ... (work in progress) +/** + * @brief Core assembly routine for Legendre scaling basis. * + * @param[in,out] node The target 2D node. * + * @details + * Let `t_dim = node.getTDim()` (in 2D this is 4) and `kp1_d = (k+1)^2` with + * `k = node.getOrder()`. For each child index `i` we compute its child index + * difference `l_b = l[1] - l[0]` and accumulate * + * \f[ + * \mathrm{vec\_o}[i,(p,j)] + * \;+=\; + * \sum_{k=0}^{K} + * J_{2k+p+j}^{(l_b)} \cdot \mathrm{CC}[k](p,j), + * \f] * + * writing the final `vec_o` into the node coefficient buffer without further + * rescaling in this routine. If `imaginary == true`, the imaginary parts of + * the \(J\)-integrals are used; otherwise the real parts are used. + * + * @pre + * - `this->J_power_inetgarls[node.getScale() + 1]` is allocated and populated. + * - `cross_correlation->Matrix[k]` exists for all accessed `k` and is + * indexable at `(p,j)`, with `0 <= p,j <= node.getOrder()`. */ -// template void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { - // std::cout << node; - // The scale of J power integrals: - // int scale = node.getScale() + 1; //scale = n = (n - 1) + 1 - - int t_dim = node.getTDim(); // t_dim = 4 - int kp1_d = node.getKp1_d(); // kp1_d = (k + 1)^2 + // Node configuration + int t_dim = node.getTDim(); // e.g. 4 in 2D + int kp1_d = node.getKp1_d(); // (k + 1)^2 VectorXd vec_o = VectorXd::Zero(t_dim * kp1_d); const NodeIndex<2> &idx = node.getNodeIndex(); + // Access precomputed J-power integrals for the node scale (+1 by convention). auto &J_power_inetgarls = *this->J_power_inetgarls[node.getScale() + 1]; + // Loop over children and local basis pairs (p, j) for (int i = 0; i < t_dim; i++) { NodeIndex<2> l = idx.child(i); int l_b = l[1] - l[0]; @@ -89,25 +164,27 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { int vec_o_segment_index = 0; for (int p = 0; p <= node.getOrder(); p++) for (int j = 0; j <= node.getOrder(); j++) { - // std::min(M, N) could be used for breaking the following loop - // this->cross_correlation->Matrix.size() should be big enough a priori + // Accumulate up to the largest admissible 2k+p+j supported by J_power_inetgarls[l_b] for (int k = 0; 2 * k + p + j < J_power_inetgarls[l_b].size(); k++) { double J; if (this->imaginary) J = J_power_inetgarls[l_b][2 * k + p + j].imag(); else J = J_power_inetgarls[l_b][2 * k + p + j].real(); - vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += J * cross_correlation->Matrix[k](p, j); // by default eigen library reads a transpose matrix from a file + + // Note: Eigen reads matrices row-major from file by default in this setup; + // hence the comment about transposition in the original code. + vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += + J * cross_correlation->Matrix[k](p, j); } vec_o_segment_index++; } } + // Write assembled values into the node coefficient buffer (no additional scaling here). double *coefs = node.getCoefs(); for (int i = 0; i < t_dim * kp1_d; i++) { - // auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0); coefs[i] = vec_o(i); - // std::cout<< "coefs[i] = " << coefs[i] << std::endl; } } diff --git a/src/treebuilders/TreeBuilder.cpp b/src/treebuilders/TreeBuilder.cpp index ba0e5d973..91d25e47d 100644 --- a/src/treebuilders/TreeBuilder.cpp +++ b/src/treebuilders/TreeBuilder.cpp @@ -23,6 +23,23 @@ * */ +/** + * @file TreeBuilder.cpp + * @brief Generic driver that orchestrates adaptive construction, refinement, + * and coefficient (re)calculation of multiwavelet trees. + * + * @details + * A TreeBuilder manages the high-level loop: + * 1) pick a work set of nodes (via a TreeCalculator-provided policy), + * 2) compute coefficients on those nodes (calculator), + * 3) estimate norms to drive thresholding, + * 4) ask the TreeAdaptor where to split next, + * 5) iterate until the work set is empty or a maximum iteration is reached. + * + * The builder never changes numerical kernels; it delegates all math to + * a TreeCalculator and all grid-refinement policy to a TreeAdaptor. + */ + #include "TreeBuilder.h" #include "TreeAdaptor.h" #include "TreeCalculator.h" @@ -35,41 +52,74 @@ namespace mrcpp { -template void TreeBuilder::build(MWTree &tree, TreeCalculator &calculator, TreeAdaptor &adaptor, int maxIter) const { +/** + * @brief Adaptive build of a tree using a calculator/adaptor pair. + * + * @param[in,out] tree Target tree to be populated/refined. + * @param[in,out] calculator Computes node coefficients & provides initial work set. + * @param[in,out] adaptor Decides which nodes to split next (refinement policy). + * @param[in] maxIter Maximum refinement iterations; negative => unbounded. + * + * @details + * Loop invariant: + * - `workVec` holds the nodes to be (re)computed at the current iteration. + * - After computing, the builder updates an approximate squared norm + * (scaling + wavelet) to drive relative thresholding elsewhere. + * - The adaptor produces the next `workVec` by splitting according to + * its policy. If `maxIter >= 0` and `iter >= maxIter`, splitting is + * disabled and the loop terminates after coefficients are computed. + * + * @note + * The approximate norm written into `tree.squareNorm` is for thresholding and + * progress reporting only. A precise norm is expected to be recomputed later + * (e.g., after a bottom-up transform). + */ +template +void TreeBuilder::build(MWTree &tree, + TreeCalculator &calculator, + TreeAdaptor &adaptor, + int maxIter) const { Timer calc_t(false), split_t(false), norm_t(false); println(10, " == Building tree"); MWNodeVector *newVec = nullptr; MWNodeVector *workVec = calculator.getInitialWorkVector(tree); - double sNorm = 0.0; - double wNorm = 0.0; + double sNorm = 0.0; // accumulated scaling contribution (approx.) + double wNorm = 0.0; // accumulated wavelet contribution (approx.) int iter = 0; while (workVec->size() > 0) { printout(10, " -- #" << std::setw(3) << iter << ": Calculated "); printout(10, std::setw(6) << workVec->size() << " nodes "); + + // 1) Compute coefficients on current work set calc_t.resume(); calculator.calcNodeVector(*workVec); calc_t.stop(); + // 2) Update approximate norms used for thresholding/progress only norm_t.resume(); if (iter == 0) sNorm = calcScalingNorm(*workVec); wNorm += calcWaveletNorm(*workVec); - if (sNorm < 0.0 or wNorm < 0.0) { + if (sNorm < 0.0 || wNorm < 0.0) { + // Propagate "unknown" / invalid norm tree.squareNorm = -1.0; } else { - // approximate norm for thresholding only - // exact norm is recomputed after mwTransform + // Approximate norm (exact one will be recomputed later) tree.squareNorm = sNorm + wNorm; } println(10, std::setw(24) << tree.squareNorm); norm_t.stop(); + // 3) Decide and perform refinement for the next iteration split_t.resume(); newVec = new MWNodeVector; - if (iter >= maxIter and maxIter >= 0) workVec->clear(); + if (iter >= maxIter && maxIter >= 0) { + // Respect iteration cap: stop splitting + workVec->clear(); + } adaptor.splitNodeVector(*newVec, *workVec); split_t.stop(); @@ -77,22 +127,36 @@ template void TreeBuilder::build(MWTree &tree, T workVec = newVec; iter++; } + + // Invalidate cached end-node table because the grid changed tree.resetEndNodeTable(); delete workVec; print::separator(10, ' '); - print::time(10, "Time calc", calc_t); - print::time(10, "Time norm", norm_t); + print::time(10, "Time calc", calc_t); + print::time(10, "Time norm", norm_t); print::time(10, "Time split", split_t); } -template void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculator) const { +/** + * @brief Remove all coefficients from the tree (fixed grid), using the calculator + * to "clear" node data. + * + * @param[in,out] tree Target MW tree. + * @param[in,out] calculator Calculator invoked to clear coefficients for nodes. + * + * @details + * - The grid topology is preserved. + * - `tree.squareNorm` is reset. + */ +template +void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Clearing tree"); Timer clean_t; MWNodeVector nodeVec; tree_utils::make_node_table(tree, nodeVec); - calculator.calcNodeVector(nodeVec); // clear all coefficients + calculator.calcNodeVector(nodeVec); // calculator is responsible for zeroing/clearing clean_t.stop(); tree.clearSquareNorm(); @@ -103,19 +167,41 @@ template void TreeBuilder::clear(MWTree &tree, T print::separator(10, ' '); } -template int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const { +/** + * @brief Split (refine) the current leaf nodes according to an adaptor policy. + * + * @param[in,out] tree Target tree to refine. + * @param[in,out] adaptor Adaptor that decides which nodes to split. + * @param[in] passCoefs If true, transfer parent coefficients to children + * (preserving function representation). + * + * @return Number of newly created child nodes (i.e., number of splits * children). + * + * @details + * - The end-node table is reset after refinement. + * - If `passCoefs == true` and a refined node remains a branch node, the parent + * distributes its coefficients to the children (e.g., via projection / exact transfer). + */ +template +int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const { println(10, " == Refining tree"); Timer split_t; - MWNodeVector newVec; - MWNodeVector *workVec = tree.copyEndNodeTable(); + MWNodeVector newVec; // newly created nodes (unused beyond counting) + MWNodeVector *workVec = tree.copyEndNodeTable(); // current leaves + adaptor.splitNodeVector(newVec, *workVec); + if (passCoefs) { for (int i = 0; i < workVec->size(); i++) { MWNode &node = *(*workVec)[i]; - if (node.isBranchNode()) { node.giveChildrenCoefs(true); } + if (node.isBranchNode()) { + // Transfer coefficients from parent to children + node.giveChildrenCoefs(true); + } } } + delete workVec; tree.resetEndNodeTable(); split_t.stop(); @@ -130,7 +216,19 @@ template int TreeBuilder::split(MWTree &tree, Tr return newVec.size(); } -template void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculator) const { +/** + * @brief Recalculate coefficients on the calculator-provided work set + * without refinement. + * + * @param[in,out] tree Target tree. + * @param[in,out] calculator Calculator used to compute node coefficients. + * + * @details + * Computes on the initial work vector (as defined by the calculator) and then + * recomputes the exact squared norm of the tree. + */ +template +void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Calculating tree"); Timer calc_t; @@ -147,7 +245,14 @@ template void TreeBuilder::calc(MWTree &tree, Tr print::time(10, "Time calc", calc_t); } -template double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { +/** + * @brief Sum of scaling contributions (approximate) across a vector of nodes. + * + * @param[in] vec Node vector from the current iteration. + * @return Approximate sum of scaling norms for nodes with depth >= 0. + */ +template +double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { double sNorm = 0.0; for (int i = 0; i < vec.size(); i++) { const MWNode &node = *vec[i]; @@ -156,7 +261,14 @@ template double TreeBuilder::calcScalingNorm(const MWN return sNorm; } -template double TreeBuilder::calcWaveletNorm(const MWNodeVector &vec) const { +/** + * @brief Sum of wavelet contributions (approximate) across a vector of nodes. + * + * @param[in] vec Node vector from the current iteration. + * @return Approximate sum of wavelet norms for nodes with depth >= 0. + */ +template +double TreeBuilder::calcWaveletNorm(const MWNodeVector &vec) const { double wNorm = 0.0; for (int i = 0; i < vec.size(); i++) { const MWNode &node = *vec[i]; diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp index 4ee28cff6..bb8feae7a 100644 --- a/src/treebuilders/add.cpp +++ b/src/treebuilders/add.cpp @@ -23,6 +23,46 @@ * */ +/** + * @file add.cpp + * @brief Adaptive summation of multiwavelet (MW) function trees. + * + * @details + * This module provides a family of `add` routines that assemble the linear + * combination of one or more MW functions into an output MW function on an + * adaptively refined grid. + * + * The summation is performed by the generic @ref TreeBuilder orchestrating: + * - an @ref AdditionCalculator that evaluates the local sum of input trees + * with their numerical coefficients (and optional complex conjugation), + * - a @ref WaveletAdaptor that refines the output grid where needed to meet + * the requested precision. + * + * The core algorithm (all overloads): + * - Compute MW coefficients of the sum on the **current** output grid. + * - Refine the grid according to the precision target. + * - Repeat until convergence or until a maximum number of refinement + * iterations is reached. + * - Finally transform the output to the MW domain and compute its squared norm. + * + * Precision and iteration controls: + * - `prec < 0` or `maxIter = 0` disables refinement (single pass on + * the existing output grid). + * - `maxIter < 0` removes the iteration limit and refines until the + * precision criterion is satisfied. + * - `absPrec = true` interprets `prec` as an absolute tolerance, otherwise + * it is treated as a relative criterion. + * + * Requirements: + * - All input trees must share the same @ref MultiResolutionAnalysis as the + * output tree, otherwise the routine aborts. + * + * Notes: + * - The routine starts from whatever grid is already present in `out`. This + * grid is expected to be empty in terms of coefficients. + * - Generated nodes present in input trees are removed at the end (cleanup). + */ + #include #include @@ -37,60 +77,78 @@ namespace mrcpp { -/** @brief Addition of two MW function representations, adaptive grid - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] a: Numerical coefficient of function a - * @param[in] inp_a: Input function a - * @param[in] b: Numerical coefficient of function b - * @param[in] inp_b: Input function b - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision - * - * @details The output function will be computed as the sum of the two input - * functions (including the numerical coefficient), using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). +/** + * @brief Sum two MW functions (with scalar weights) into an output tree using adaptive refinement. * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (e.g., double or ComplexDouble). + * + * @param[in] prec Target build precision for the output function. + * @param[out] out Output function tree to build (its current grid is used as a starting point). + * @param[in] a Numerical coefficient multiplying `inp_a`. + * @param[in] inp_a First input function tree. + * @param[in] b Numerical coefficient multiplying `inp_b`. + * @param[in] inp_b Second input function tree. + * @param[in] maxIter Maximum number of refinement iterations. + * Use a negative value to allow unbounded refinement. + * Use zero to disable refinement (single-pass build). + * @param[in] absPrec If true, interpret `prec` as an absolute tolerance; + * otherwise interpret it as relative. + * @param[in] conjugate When `T` is complex, conjugate all input trees before summation. + * + * @details + * Builds `out ≈ a * inp_a (+) b * inp_b` to the requested precision on an adaptively + * refined grid. After the build, `out` is transformed to the MW domain and its squared + * norm is computed. The input trees are not modified except that any generated nodes + * created temporarily during the build are cleaned up. */ -template void add(double prec, FunctionTree &out, T a, FunctionTree &inp_a, T b, FunctionTree &inp_b, int maxIter, bool absPrec, bool conjugate) { +template +void add(double prec, + FunctionTree &out, + T a, FunctionTree &inp_a, + T b, FunctionTree &inp_b, + int maxIter, + bool absPrec, + bool conjugate) { FunctionTreeVector tmp_vec; tmp_vec.push_back(std::make_tuple(a, &inp_a)); tmp_vec.push_back(std::make_tuple(b, &inp_b)); add(prec, out, tmp_vec, maxIter, absPrec, conjugate); } -/** @brief Addition of several MW function representations, adaptive grid +/** + * @brief Sum a vector of MW functions (with scalar weights) into an output tree using adaptive refinement. * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Vector of input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (e.g., double or ComplexDouble). * - * @details The output function will be computed as the sum of all input - * functions in the vector (including their numerical coefficients), using - * the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @param[in] prec Target build precision for the output function. + * @param[out] out Output function tree to build (its current grid is used as a starting point). + * @param[in] inp Vector of pairs (weight, pointer-to-tree) to be summed. + * @param[in] maxIter Maximum number of refinement iterations. + * Use a negative value to allow unbounded refinement. + * Use zero to disable refinement (single-pass build). + * @param[in] absPrec If true, interpret `prec` as an absolute tolerance; + * otherwise interpret it as relative. + * @param[in] conjugate When `T` is complex, conjugate all input trees before summation. * + * @details + * Builds `out ≈ Σ_i w_i * f_i` to the requested precision on an adaptively refined grid. + * The routine: + * - verifies that all inputs share the same MRA as `out`, + * - constructs a @ref WaveletAdaptor with the precision policy, + * - uses an @ref AdditionCalculator to evaluate the local sums, + * - runs @ref TreeBuilder to refine and assemble, + * - finishes with MW transform and squared norm computation, + * - and finally deletes any generated nodes from inputs. */ -template void add(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool conjugate) { +template +void add(double prec, + FunctionTree &out, + FunctionTreeVector &inp, + int maxIter, + bool absPrec, + bool conjugate) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -119,12 +177,36 @@ template void add(double prec, FunctionTree &out, Func print::separator(10, ' '); } -template void add(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate) { +/** + * @brief Convenience overload: sum a list of unweighted trees (weights set to 1). + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (e.g., double or ComplexDouble). + * + * @param[in] prec Target build precision for the output function. + * @param[out] out Output function tree. + * @param[in] inp Vector of pointers to input trees (all weights taken as 1). + * @param[in] maxIter Maximum number of refinement iterations (see other overload). + * @param[in] absPrec Absolute-vs-relative precision flag. + * @param[in] conjugate Conjugate complex inputs before summation. + * + * @details + * Internally wraps the list into a @ref FunctionTreeVector with unit weights + * and forwards to the vector-based overload. + */ +template +void add(double prec, + FunctionTree &out, + std::vector *> &inp, + int maxIter, + bool absPrec, + bool conjugate) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); add(prec, out, inp_vec, maxIter, absPrec, conjugate); } +/* ------- Explicit template instantiations (double) ------- */ template void add<1, double>(double prec, FunctionTree<1, double> &out, double a, FunctionTree<1, double> &tree_a, double b, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool conjugate); template void @@ -140,6 +222,7 @@ template void add<1, double>(double prec, FunctionTree<1, double> &out, std::vec template void add<2, double>(double prec, FunctionTree<2, double> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); template void add<3, double>(double prec, FunctionTree<3, double> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); +/* ------- Explicit template instantiations (ComplexDouble) ------- */ template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ComplexDouble a, @@ -176,4 +259,4 @@ template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> template void add<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); template void add<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp index bfcbf164e..9e6e91777 100644 --- a/src/treebuilders/apply.cpp +++ b/src/treebuilders/apply.cpp @@ -23,6 +23,38 @@ * */ +/** + * @file apply.cpp + * @brief Application pipelines for MW operators (convolution and derivative) to MW function trees. + * + * @details + * This module provides high-level procedures to **apply multiresolution operators** + * to MW representations of functions. Two broad operator families are supported: + * + * - **Convolution-like integral operators** (e.g., Poisson, Helmholtz, Heat, identity), + * implemented as separable kernels in the scaling basis via @ref mrcpp::ConvolutionOperator. + * Application is performed on an **adaptively refined** output grid to meet a target precision. + * + * - **Local or band-limited derivative operators** (e.g., ABGV, PH, BS) implemented via + * @ref mrcpp::DerivativeOperator. Application occurs on a **fixed grid** derived from the + * input and widened according to the operator bandwidth in the selected direction. + * + * The typical adaptive application pipeline for convolution operators is: + * - Pre-step: estimate operator bandwidths at each scale and set up an adaptive refinement policy. + * - Build-step: evaluate local operator actions on the current grid, refine where needed until + * the precision target is reached (or a maximum number of iterations is met). + * - Post-step: assemble and transform the output to the MW domain, compute norms, and clean any + * transient data generated on the inputs. + * + * Additional features: + * - **Near-/Far-field splits** on periodic domains by including/excluding contributions + * from the unit cell. + * - **Precision scaling** using auxiliary trees that modulate local tolerances based on + * maximum norms. + * - **Multi-component support** through a 4×4 metric that mixes input/output components + * for relativistic-like workflows. + */ + #include "apply.h" #include "ConvolutionCalculator.h" #include "CopyAdaptor.h" @@ -42,30 +74,65 @@ namespace mrcpp { -template void apply_on_unit_cell(bool inside, double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec); - -/** @brief Application of MW integral convolution operator - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false - * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). - * +/** + * @brief Internal helper to apply a convolution operator while restricting contributions + * to inside or outside of the unit cell on periodic domains. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type (e.g., double or ComplexDouble). + * + * @param[in] inside If true, include only contributions from inside the unit cell; + * if false, include only contributions from outside the unit cell. + * @param[in] prec Target precision that drives adaptive refinement. + * @param[out] out Output function to be built. Should contain empty root nodes on entry. + * @param[in] oper Convolution operator to apply. + * @param[in] inp Input function. + * @param[in] maxIter Maximum number of refinement iterations. Negative means unbounded. + * @param[in] absPrec If true, treat `prec` as an absolute tolerance; otherwise relative. + * + * @details + * Follows the standard adaptive pipeline for convolution operators, with the difference that + * the calculator is instructed to selectively include unit-cell contributions according to + * the `inside` flag. */ -template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { +template +void apply_on_unit_cell(bool inside, + double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + int maxIter, + bool absPrec); + +/** + * @brief Apply a convolution-like integral operator on a single-component function (adaptive). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] prec Target precision driving adaptive refinement. + * @param[out] out Output function tree. Must belong to the same MRA as `inp`. + * @param[in] oper Convolution operator to apply. + * @param[in] inp Input function tree. + * @param[in] maxIter Maximum refinement iterations (negative for unbounded, zero disables refinement). + * @param[in] absPrec If true, treat `prec` as absolute; otherwise relative. + * + * @details + * Pipeline: + * - Pre: compute operator bandwidths and create a @ref WaveletAdaptor with the given precision policy. + * - Build: @ref TreeBuilder iteratively refines and evaluates the operator action. + * - Post: transform to MW domain, compute squared norms, and clean generated structures. + * + * @note The output tree should initially contain only empty root nodes. + * @throws Aborts if `out` and `inp` belong to different MRAs. + */ +template +void apply(double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + int maxIter, + bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); Timer pre_t; @@ -74,12 +141,13 @@ template void apply(double prec, FunctionTree &out, Co WaveletAdaptor adaptor(prec, maxScale, absPrec); ConvolutionCalculator calculator(prec, oper, inp); pre_t.stop(); + TreeBuilder builder; builder.build(out, calculator, adaptor, maxIter); Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); // add coarse scale contributions + out.mwTransform(TopDown, false); out.mwTransform(BottomUp); out.calcSquareNorm(); out.deleteGeneratedParents(); @@ -92,31 +160,32 @@ template void apply(double prec, FunctionTree &out, Co print::separator(10, ' '); } -/** @brief Application of MW integral convolution operator on Four component - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] metric: 4x4 array with coefficients that relates the in and out components - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false - * - * @details The output function will be computed using the general algorithm: - * - For each input component apply the operator - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - After application multiply by metric coefficient, and put in relevant output component - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). +/** + * @brief Apply a convolution operator to a 4-component function using a mixing metric. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output multi-component function (structure copied from `inp`). + * @param[in] oper Convolution operator to apply. + * @param[in] inp Input multi-component function. + * @param[in] metric 4×4 coefficient array mapping input to output components. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. * + * @details + * For each input component `icomp`, the operator is applied and accumulated into each output + * component `ocomp` with weight `metric[icomp][ocomp]`. Real and complex specializations are + * handled, including rescaling of the result by the metric entries. */ -template void apply(double prec, CompFunction &out, ConvolutionOperator &oper, const CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { +template +void apply(double prec, + CompFunction &out, + ConvolutionOperator &oper, + const CompFunction &inp, + const ComplexDouble (*metric)[4], + int maxIter, + bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { @@ -125,40 +194,43 @@ template void apply(double prec, CompFunction &out, ConvolutionOperat if (inp.isreal()) { if (out.CompD[ocomp] == nullptr) out.alloc_comp(ocomp); apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp].real()); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompD[ocomp]->rescale(metric[icomp][ocomp].real()); + } } else { if (out.CompC[ocomp] == nullptr) out.alloc_comp(ocomp); apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } } } -/** @brief Application of MW integral convolution operator +/** + * @brief Apply a convolution operator while selectively including or excluding unit-cell contributions. * - * @param[in] inside: Use points inside (true) or outside (false) the unitcell - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false - * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * + * @param[in] inside Select inside (true) or outside (false) contributions in the unit cell. + * @param[in] prec Target precision. + * @param[out] out Output tree. + * @param[in] oper Convolution operator. + * @param[in] inp Input tree. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. */ -template void apply_on_unit_cell(bool inside, double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { +template +void apply_on_unit_cell(bool inside, + double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + int maxIter, + bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); Timer pre_t; @@ -174,7 +246,7 @@ template void apply_on_unit_cell(bool inside, double prec, F Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); // add coarse scale contributions + out.mwTransform(TopDown, false); out.mwTransform(BottomUp); out.calcSquareNorm(); out.deleteGeneratedParents(); @@ -187,37 +259,37 @@ template void apply_on_unit_cell(bool inside, double prec, F print::separator(10, ' '); } -/** @brief Application of MW integral convolution operator - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] precTrees: Precision trees - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false +/** + * @brief Apply a convolution operator with **locally scaled precision** from auxiliary trees. * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on _scaled_ `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * The precision will be scaled locally by the maxNorms of the precTrees input vector. - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @param[in] prec Base precision target. + * @param[out] out Output function tree. + * @param[in] oper Convolution operator. + * @param[in] inp Input function tree. + * @param[in] precTrees Vector of trees whose max norms modulate the local precision. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. * + * @details + * The local precision at node index `idx` is scaled by `1 / max_norm(idx)`, where `max_norm` + * is taken across the supplied `precTrees`. This provides an error budget that adapts to + * local magnitudes of reference fields. */ -template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, FunctionTreeVector &precTrees, int maxIter, bool absPrec) { +template +void apply(double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + FunctionTreeVector &precTrees, + int maxIter, + bool absPrec) { Timer pre_t; oper.calcBandWidths(prec); int maxScale = out.getMRA().getMaxScale(); - // The local precision will be scaled by the maxNorm of the - // corresponding node(s) in the precTrees vector. for (int i = 0; i < precTrees.size(); i++) get_func(precTrees, i).makeMaxSquareNorms(); auto precFunc = [&precTrees](const NodeIndex &idx) -> double { auto maxNorm = (precTrees.size()) ? 0.0 : 1.0; @@ -239,7 +311,7 @@ template void apply(double prec, FunctionTree &out, Co Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); // add coarse scale contributions + out.mwTransform(TopDown, false); out.mwTransform(BottomUp); out.calcSquareNorm(); inp.deleteGenerated(); @@ -250,8 +322,30 @@ template void apply(double prec, FunctionTree &out, Co print::separator(10, ' '); } +/** + * @brief Multi-component variant of the precision-scaled convolution application. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] prec Base precision target. + * @param[out] out Output multi-component function (structure copied from `inp`). + * @param[in] oper Convolution operator. + * @param[in] inp Input multi-component function. + * @param[in] precTrees Array (per input component) of precision trees used for scaling. + * @param[in] metric 4×4 mixing matrix. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. + */ template -void apply(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, FunctionTreeVector *precTrees, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { +void apply(double prec, + CompFunction &out, + ConvolutionOperator &oper, + CompFunction &inp, + FunctionTreeVector *precTrees, + const ComplexDouble (*metric)[4], + int maxIter, + bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { @@ -259,43 +353,64 @@ void apply(double prec, CompFunction &out, ConvolutionOperator &oper, Comp if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], precTrees[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompD[ocomp]->rescale(metric[icomp][ocomp]); + } } else { apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], precTrees[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } } } -/** @brief Application of MW integral convolution operator on a periodic cell, - excluding contributions inside the unit cell. - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false - * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound +/** + * @brief Apply a convolution operator while excluding inside-cell contributions (far-field). * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * + * @param[in] prec Target precision. + * @param[out] out Output function. + * @param[in] oper Convolution operator. + * @param[in] inp Input function. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. */ -template void apply_far_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { +template +void apply_far_field(double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + int maxIter, + bool absPrec) { apply_on_unit_cell(false, prec, out, oper, inp, maxIter, absPrec); } -template void apply_far_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { +/** + * @brief Multi-component far-field application with mixing metric. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output multi-component function. + * @param[in] oper Convolution operator. + * @param[in] inp Input multi-component function. + * @param[in] metric 4×4 mixing matrix. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. + */ +template +void apply_far_field(double prec, + CompFunction &out, + ConvolutionOperator &oper, + CompFunction &inp, + const ComplexDouble (*metric)[4], + int maxIter, + bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < 4; icomp++) { @@ -304,10 +419,14 @@ template void apply_far_field(double prec, CompFunction &out, Convolu if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply_on_unit_cell(false, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompD[ocomp]->rescale(metric[icomp][ocomp]); + } } else { apply_on_unit_cell(false, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } @@ -315,33 +434,50 @@ template void apply_far_field(double prec, CompFunction &out, Convolu } } -/** @brief Application of MW integral convolution operator on a periodic cell, - excluding contributions outside the unit cell. - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false +/** + * @brief Apply a convolution operator while excluding outside-cell contributions (near-field). * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * + * @param[in] prec Target precision. + * @param[out] out Output function. + * @param[in] oper Convolution operator. + * @param[in] inp Input function. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. */ -template void apply_near_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { +template +void apply_near_field(double prec, + FunctionTree &out, + ConvolutionOperator &oper, + FunctionTree &inp, + int maxIter, + bool absPrec) { apply_on_unit_cell(true, prec, out, oper, inp, maxIter, absPrec); } -template void apply_near_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { +/** + * @brief Multi-component near-field application with mixing metric. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output multi-component function. + * @param[in] oper Convolution operator. + * @param[in] inp Input multi-component function. + * @param[in] metric 4×4 mixing matrix. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute-vs-relative precision flag. + */ +template +void apply_near_field(double prec, + CompFunction &out, + ConvolutionOperator &oper, + CompFunction &inp, + const ComplexDouble (*metric)[4], + int maxIter, + bool absPrec) { for (int icomp = 0; icomp < 4; icomp++) { if (inp.Comp[icomp] != nullptr) { @@ -349,10 +485,14 @@ template void apply_near_field(double prec, CompFunction &out, Convol if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply_on_unit_cell(true, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompD[ocomp]->rescale(metric[icomp][ocomp]); + } } else { apply_on_unit_cell(true, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } @@ -360,41 +500,43 @@ template void apply_near_field(double prec, CompFunction &out, Convol } } -/** @brief Application of MW derivative operator - * - * @param[out] out: Output function to be built - * @param[in] oper: Derivative operator to apply - * @param[in] inp: Input function - * @param[in] dir: Direction of derivative +/** + * @brief Apply a **derivative operator** on a fixed grid in the given direction. * - * @details The output function will be computed on a FIXED grid that is - * predetermined by the type of derivative operator. For a strictly local - * operator (ABGV_00), the grid is an exact copy of the input function. For - * operators that involve also neighboring nodes (ABGV_55, PH, BS) the base grid - * will be WIDENED by one node in the direction of application (on each side). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * @note The output function should contain only empty root nodes at entry. + * @param[out] out Output function. Should contain only empty root nodes on entry. + * @param[in] oper Derivative operator (defines bandwidth and assembly policy). + * @param[in] inp Input function. + * @param[in] dir Direction of application (0 for x, 1 for y, 2 for z). * + * @details + * The output grid is constructed by copying the input grid and **widening** it by the + * operator bandwidth along the selected direction, if needed. Application then proceeds + * on this fixed grid without additional refinement. */ -template void apply(FunctionTree &out, DerivativeOperator &oper, FunctionTree &inp, int dir) { +template +void apply(FunctionTree &out, + DerivativeOperator &oper, + FunctionTree &inp, + int dir) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); TreeBuilder builder; int maxScale = out.getMRA().getMaxScale(); - int bw[D]; // Operator bandwidth in [x,y,z] + int bw[D]; for (int d = 0; d < D; d++) bw[d] = 0; - // Copy input tree plus bandwidth in operator direction Timer pre_t; - oper.calcBandWidths(1.0); // Fixed 0 or 1 for derivatives + oper.calcBandWidths(1.0); bw[dir] = oper.getMaxBandWidth(); CopyAdaptor pre_adaptor(inp, maxScale, bw); DefaultCalculator pre_calculator; builder.build(out, pre_calculator, pre_adaptor, -1); pre_t.stop(); - // Apply operator on fixed expanded grid - SplitAdaptor apply_adaptor(maxScale, false); // Splits no nodes + SplitAdaptor apply_adaptor(maxScale, false); DerivativeCalculator apply_calculator(dir, oper, inp); builder.build(out, apply_calculator, apply_adaptor, 0); if (out.isPeriodic()) out.rescale(std::pow(2.0, -oper.getOperatorRoot())); @@ -411,20 +553,38 @@ template void apply(FunctionTree &out, DerivativeOpera print::separator(10, ' '); } -template void apply(CompFunction &out, DerivativeOperator &oper, CompFunction &inp, int dir, const ComplexDouble (*metric)[4]) { - // TODO: sums and not only each components independently, when concrete examples with non diagonal metric are tested - - out = inp.paramCopy(true); // note that this will copy the factor of inp (inp.func_ptr->data.c1) +/** + * @brief Multi-component derivative application with mixing metric. + * + * @tparam D Spatial dimension. + * + * @param[out] out Output multi-component function. + * @param[in] oper Derivative operator. + * @param[in] inp Input multi-component function. + * @param[in] dir Direction of derivative. + * @param[in] metric 4×4 mixing matrix. + * + * @details + * Applies the derivative in `dir` to each input component and accumulates the result into + * output components according to `metric`. Handles real-to-complex promotion if necessary. + */ +template +void apply(CompFunction &out, + DerivativeOperator &oper, + CompFunction &inp, + int dir, + const ComplexDouble (*metric)[4]) { + out = inp.paramCopy(true); for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { for (int ocomp = 0; ocomp < 4; ocomp++) { if (std::norm(metric[icomp][ocomp]) > MachinePrec) { - if (inp.isreal() and (std::imag(metric[icomp][ocomp]) < MachinePrec or inp.Ncomp() == 1)) { + if (inp.isreal() && (std::imag(metric[icomp][ocomp]) < MachinePrec || inp.Ncomp() == 1)) { apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir); if (std::norm(metric[icomp][ocomp] - 1.0) > MachinePrec) { if (std::imag(metric[icomp][ocomp]) < MachinePrec) out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp])); else - out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; // To consider: multiply c1 in rescale? + out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; } out.func_ptr->isreal = 1; } else { @@ -436,26 +596,32 @@ template void apply(CompFunction &out, DerivativeOperator &oper, C } else { apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir); } - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + out.CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } } } -/** @brief Calculation of gradient vector of a function +/** + * @brief Compute the gradient vector of a scalar function using a derivative operator. * - * @param[in] oper: Derivative operator to apply - * @param[in] inp: Input function - * @returns FunctionTreeVector containing the gradient + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * @details The derivative operator is applied in each Cartesian direction to - * the input function and appended to the output vector. - * - * @note The length of the output vector will be the template dimension D. + * @param[in] oper Derivative operator to apply in each Cartesian direction. + * @param[in] inp Input scalar function. + * @return FunctionTreeVector containing D components of the gradient. * + * @details + * Applies the operator in each direction `d = 0..D-1` and returns the resulting + * component trees with unit weights. */ -template FunctionTreeVector gradient(DerivativeOperator &oper, FunctionTree &inp) { +template +FunctionTreeVector gradient(DerivativeOperator &oper, + FunctionTree &inp) { FunctionTreeVector out; for (int d = 0; d < D; d++) { auto *grad_d = new FunctionTree(inp.getMRA()); @@ -465,7 +631,21 @@ template FunctionTreeVector gradient(DerivativeOperato return out; } -std::vector *> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, const ComplexDouble (*metric)[4]) { +/** + * @brief Compute the gradient for 3D multi-component inputs with mixing metric. + * + * @param[in] oper Derivative operator. + * @param[in] inp Input multi-component function. + * @param[in] metric 4×4 mixing matrix. + * @return Vector of component functions for each spatial direction. + * + * @details + * For each spatial direction, applies the derivative operator to each component and + * mixes according to `metric`. Handles both real and complex cases. + */ +std::vector *> gradient(DerivativeOperator<3> &oper, + CompFunction<3> &inp, + const ComplexDouble (*metric)[4]) { std::vector *> out; for (int d = 0; d < 3; d++) { @@ -479,13 +659,17 @@ std::vector *> gradient(DerivativeOperator<3> &oper, CompFunctio grad_d->func_ptr->iscomplex = 0; grad_d->CompD[ocomp] = new FunctionTree<3, double>(inp.CompD[0]->getMRA()); apply(*(grad_d->CompD[ocomp]), oper, *inp.CompD[icomp], d); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real()); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real()); + } } else { grad_d->func_ptr->isreal = 0; grad_d->func_ptr->iscomplex = 1; grad_d->CompC[ocomp] = new FunctionTree<3, ComplexDouble>(inp.CompC[0]->getMRA()); apply(*(grad_d->CompC[ocomp]), oper, *inp.CompC[icomp], d); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]); } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { + grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]); + } } } } @@ -495,23 +679,27 @@ std::vector *> gradient(DerivativeOperator<3> &oper, CompFunctio return out; } -/** @brief Calculation of divergence of a function vector +/** + * @brief Compute the divergence of a vector field using a derivative operator. * - * @param[out] out: Output function - * @param[in] oper: Derivative operator to apply - * @param[in] inp: Input function vector + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * @details The derivative operator is applied in each Cartesian direction to - * the corresponding components of the input vector and added up to the final - * output. The grid of the output is fixed as the union of the component - * grids (including any derivative widening, see derivative apply). + * @param[out] out Output scalar function. + * @param[in] oper Derivative operator applied to each component. + * @param[in] inp Vector of D function components with coefficients. * - * @note - * - The length of the input vector must be the same as the template dimension D. - * - The output function should contain only empty root nodes at entry. + * @details + * Applies the derivative to each component along its matching direction and + * sums the results on the **union grid** of the widened component grids. * + * @note The length of `inp` must equal `D`. The output should contain only + * empty root nodes on entry. */ -template void divergence(FunctionTree &out, DerivativeOperator &oper, FunctionTreeVector &inp) { +template +void divergence(FunctionTree &out, + DerivativeOperator &oper, + FunctionTreeVector &inp) { if (inp.size() != D) MSG_ABORT("Dimension mismatch"); for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -525,22 +713,40 @@ template void divergence(FunctionTree &out, Derivative tmp_vec.push_back(std::make_tuple(coef_d, out_d)); } build_grid(out, tmp_vec); - add(-1.0, out, tmp_vec, 0); // Addition on union grid + add(-1.0, out, tmp_vec, 0); clear(tmp_vec, true); } -template void divergence(CompFunction &out, DerivativeOperator &oper, FunctionTreeVector *inp, const ComplexDouble (*metric)[4]) { +/** + * @brief Divergence for multi-component inputs with metric (not implemented). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + */ +template +void divergence(CompFunction &out, + DerivativeOperator &oper, + FunctionTreeVector *inp, + const ComplexDouble (*metric)[4]) { MSG_ABORT("not implemented"); } -template void divergence(FunctionTree &out, DerivativeOperator &oper, std::vector *> &inp) { +/** + * @brief Convenience overload: divergence from a list of unweighted component trees. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + */ +template +void divergence(FunctionTree &out, + DerivativeOperator &oper, + std::vector *> &inp) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); divergence(out, oper, inp_vec); } -template void divergence(CompFunction &out, DerivativeOperator &oper, std::vector *> *inp, const ComplexDouble (*metric)[4]) { - MSG_ABORT("not implemented"); -} + +/* ---------- Explicit template instantiations ---------- */ template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); @@ -548,27 +754,31 @@ template void apply<3, double>(double prec, FunctionTree<3, double> &out, Convol template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, const CompFunction<1> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, const CompFunction<2> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, const CompFunction<3> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); -template void -apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec); -template void -apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec); -template void -apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec); + +template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec); +template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec); +template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec); + template void apply_far_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply_far_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); template void apply_far_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec); + template void apply_near_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply_near_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); template void apply_near_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec); + template void apply<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTree<1, double> &inp, int dir); template void apply<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTree<2, double> &inp, int dir); template void apply<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTree<3, double> &inp, int dir); + template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, double> &inp); template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, double> &inp); template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, double> &inp); + template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, std::vector *> &inp); template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, std::vector *> &inp); template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, std::vector *> &inp); + template FunctionTreeVector<1, double> gradient<1>(DerivativeOperator<1> &oper, FunctionTree<1, double> &inp); template FunctionTreeVector<2, double> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, double> &inp); template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, double> &inp); @@ -598,22 +808,27 @@ template void apply<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec); + template void apply_far_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_far_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_far_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec); + template void apply_near_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_near_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_near_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec); + template void apply<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int dir); template void apply<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int dir); template void apply<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int dir); + template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, ComplexDouble> &inp); template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, ComplexDouble> &inp); template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, ComplexDouble> &inp); + template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, std::vector *> &inp); template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, std::vector *> &inp); template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, std::vector *> &inp); template void apply(CompFunction<3> &out, DerivativeOperator<3> &oper, CompFunction<3> &inp, int dir = -1, const ComplexDouble (*metric)[4]); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/complex_apply.cpp b/src/treebuilders/complex_apply.cpp index 5cf0e3b08..026152347 100644 --- a/src/treebuilders/complex_apply.cpp +++ b/src/treebuilders/complex_apply.cpp @@ -23,6 +23,52 @@ * */ +/** + * @file complex_apply.cpp + * @brief Complex-valued application of multiresolution convolution operators. + * + * @details + * This module provides a **complex** front-end to the real-valued adaptive + * application pipeline used throughout MRCPP. A complex operator + * \f$ \mathcal{O} = \mathcal{O}_\mathrm{R} + i\,\mathcal{O}_\mathrm{I} \f$ + * acting on a complex function + * \f$ f = f_\mathrm{R} + i\,f_\mathrm{I} \f$ + * is evaluated via the standard decomposition: + * \f[ + * \mathcal{O} f + * = (\mathcal{O}_\mathrm{R} f_\mathrm{R} - \mathcal{O}_\mathrm{I} f_\mathrm{I}) + * \;+\; + * i\,(\mathcal{O}_\mathrm{I} f_\mathrm{R} + \mathcal{O}_\mathrm{R} f_\mathrm{I}). + * \f] + * + * Internally, the routine delegates every real application + * \f$ \mathcal{O}_\bullet f_\bullet \f$ to the standard adaptive `apply` for + * real data structures (see `apply.h`), and then combines the four real + * results to produce the complex output. + * + * ### Precision model and adaptivity + * The same adaptive refinement loop is honored as in the real case: + * - **Relative precision** (default): refine where local wavelet details exceed + * a fraction of the local norm. + * - **Absolute precision** (`absPrec = true`): refine until local details fall + * below a fixed absolute threshold. + * + * The `prec` parameter and `maxIter` semantics are identical to the real-valued + * `apply`: + * - `prec < 0` or `maxIter = 0` disables refinement, + * - `maxIter < 0` removes the iteration bound. + * + * ### Preconditions + * - All real and imaginary parts (operator and function) must share the same + * `MultiResolutionAnalysis`. + * - The output complex object should reference **empty** (uninitialized) trees + * at entry; the routine will construct their contents. + * + * @note This is a thin complex wrapper; all heavy lifting (bandwidth computation, + * adaptive splitting, transformations, norm updates) happens in the + * underlying real `apply`. + */ + #include "complex_apply.h" #include "ConvolutionCalculator.h" #include "CopyAdaptor.h" @@ -42,56 +88,63 @@ namespace mrcpp { -/** @brief Application of MW integral convolution operator (complex version) - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] oper: Convolution operator to apply - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 - * @param[in] absPrec: Build output tree based on absolute precision, default false - * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * The default is to work with relative precision - * (stop when the wavelet coefficients are below a given (small) fraction of - * function norm. - * Sometimes it is better to use absolute precision (e.g. a contribution in a sum) - * which means stop once wavelet coefficients are below a certain (absoute) value - * Rel prec ∣d∣<ϵ/∣f∣ - * Abs prec ∣d∣<ϵ - * The two ϵ are not necessarily the same. - * The first one is (in general) the overall precision of the calculation (not always...) - * The second one depends on the particular operation which you are performing. - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). - * \todo !!! Here should be given a method for greed cleaning !!! +/** + * @brief Apply a complex convolution operator to a complex function (adaptive). + * + * @tparam D Spatial dimension (1, 2, or 3). * + * @param[in] prec Target build precision for the adaptive application. + * @param[out] out Complex output function tree (real and imaginary parts filled). + * @param[in] oper Complex convolution operator (real and imaginary parts provided). + * @param[in] inp Complex input function tree (real and imaginary parts provided). + * @param[in] maxIter Maximum refinement iterations; `-1` means unbounded. + * @param[in] absPrec Use absolute (`true`) versus relative (`false`, default) precision. * + * @details + * The routine evaluates + * \f[ + * \Re(\mathcal{O}f) = \mathcal{O}_\mathrm{R} f_\mathrm{R} - \mathcal{O}_\mathrm{I} f_\mathrm{I},\quad + * \Im(\mathcal{O}f) = \mathcal{O}_\mathrm{I} f_\mathrm{R} + \mathcal{O}_\mathrm{R} f_\mathrm{I} + * \f] + * by two real `apply` calls per part, followed by linear combinations via `add`. + * Temporary real trees are allocated on the same MRA as the input. + * + * ### Implementation notes + * - The real building blocks `apply(prec, ...)` are identical to the scalar path + * and include: bandwidth precomputation, adaptive refinement, top-down coarse + * contributions, bottom-up transforms, and norm updates. + * - Output parts are formed with `add(prec, ...)` to maintain consistent grid + * and transformation state. + * + * @warning The MRA of `inp.real`, `inp.imaginary`, `oper.real`, and + * `oper.imaginary` must match. No cross-MRA application is supported. */ -template void apply(double prec, ComplexObject> &out, ComplexObject> &oper, ComplexObject> &inp, int maxIter, bool absPrec) { +template +void apply(double prec, + ComplexObject> &out, + ComplexObject> &oper, + ComplexObject> &inp, + int maxIter, + bool absPrec) { FunctionTree temp1(inp.real->getMRA()); FunctionTree temp2(inp.real->getMRA()); - apply(prec, temp1, *oper.real, *inp.real, maxIter, absPrec); + // Real part: OR*FR - OI*FI + apply(prec, temp1, *oper.real, *inp.real, maxIter, absPrec); apply(prec, temp2, *oper.imaginary, *inp.imaginary, maxIter, absPrec); add(prec, *out.real, 1.0, temp1, -1.0, temp2); - // temp1.setZero(); - // temp2.setZero(); - - apply(prec, temp1, *oper.imaginary, *inp.real, maxIter, absPrec); - apply(prec, temp2, *oper.real, *inp.imaginary, maxIter, absPrec); + // Imag part: OI*FR + OR*FI + apply(prec, temp1, *oper.imaginary, *inp.real, maxIter, absPrec); + apply(prec, temp2, *oper.real, *inp.imaginary, maxIter, absPrec); add(prec, *out.imaginary, 1.0, temp1, 1.0, temp2); } -template void apply<1>(double prec, ComplexObject> &out, ComplexObject> &oper, ComplexObject> &inp, int maxIter, bool absPrec); +template void apply<1>(double prec, + ComplexObject> &out, + ComplexObject> &oper, + ComplexObject> &inp, + int maxIter, + bool absPrec); } // namespace mrcpp diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp index 0e7fb968b..11d94fc19 100644 --- a/src/treebuilders/grid.cpp +++ b/src/treebuilders/grid.cpp @@ -23,6 +23,41 @@ * */ +/** + * @file grid.cpp + * @brief Utilities for constructing, copying, clearing, and refining + * multiresolution grids and functions. + * + * @details + * This module provides a unified set of routines for: + * + * - **Uniform grid construction** by splitting all leaves a fixed number of times. + * - **Analytic-driven/adaptive grid construction** using a + * #mrcpp::RepresentableFunction as a splitter oracle. + * - **Gaussian-expansion–driven grid construction** that places resolution + * according to Gaussian positions and exponents (supports periodic and + * non-periodic worlds). + * - **Copying grids** (structure only) and **copying functions** (coefficients) + * between trees with the same #mrcpp::MultiResolutionAnalysis. + * - **Clearing** coefficients on an existing grid without altering its topology. + * - **Refining** an existing grid either uniformly, by precision-driven + * wavelet criteria, by another reference tree, or by an analytic function. + * + * All routines operate on #mrcpp::FunctionTree objects (and component-wise on + * #mrcpp::CompFunction where relevant). Behind the scenes, they use + * #mrcpp::TreeBuilder with different adaptors: + * + * - #mrcpp::SplitAdaptor: unconditional splitting. + * - #mrcpp::WaveletAdaptor: split by wavelet-based precision criterion. + * - #mrcpp::AnalyticAdaptor: split by analytic visibility/zero checks. + * - #mrcpp::CopyAdaptor: split to match an existing tree structure. + * + * @note Unless otherwise stated, all "build_grid" functions **extend** the + * current grid of the output tree; they do not clear it first. Use + * #copy_grid when you want the output to match another grid exactly + * (it clears first). + */ + #include "grid.h" #include "AnalyticAdaptor.h" #include "CopyAdaptor.h" @@ -38,15 +73,20 @@ namespace mrcpp { -/** @brief Build empty grid by uniform refinement +/** + * @brief Build an **empty** grid by uniform refinement. * - * @param[in,out] out: Output tree to be built - * @param[in] scales: Number of refinement levels + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree whose grid is refined. + * @param[in] scales Number of uniform refinement sweeps to apply. * - * @details This will split ALL leaf nodes in the tree the given number of times. + * @details + * Performs `scales` iterations of unconditional splitting on **all** current + * leaf nodes (using #mrcpp::SplitAdaptor). No coefficients are created; this + * only modifies the grid topology. * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called. + * @note Starts from the existing grid of @p out and extends it. */ template void build_grid(FunctionTree &out, int scales) { auto maxScale = out.getMRA().getMaxScale(); @@ -56,24 +96,22 @@ template void build_grid(FunctionTree &out, int scales for (auto n = 0; n < scales; n++) builder.build(out, calculator, adaptor, 1); } -/** @brief Build empty grid based on info from analytic function - * - * @param[out] out: Output tree to be built - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * - * @details The grid of the output function will be EXTENDED using the general - * algorithm: - * - Loop through current leaf nodes of the output tree - * - Refine node based on custom split check from the function - * - Repeat until convergence or `maxIter` is reached - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called. It requires that the functions - * `isVisibleAtScale()` and `isZeroOnInterval()` is implemented in the - * particular `RepresentableFunction`. - * +/** + * @brief Build an **empty** grid guided by an analytic function (adaptive). + * + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree whose grid will be extended. + * @param[in] inp Analytic function used as a splitting oracle. + * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). + * + * @details + * Uses #mrcpp::AnalyticAdaptor to ask the analytic function @p inp whether a + * node is visible at a given scale and whether it is identically zero on the + * node interval. Nodes are split until convergence or @p maxIter is reached. + * + * @note Requires @p inp to implement `isVisibleAtScale()` and + * `isZeroOnInterval()`. */ template void build_grid(FunctionTree &out, const RepresentableFunction &inp, int maxIter) { auto maxScale = out.getMRA().getMaxScale(); @@ -84,24 +122,24 @@ template void build_grid(FunctionTree &out, const Repr print::separator(10, ' '); } -/** @brief Build empty grid based on info from Gaussian expansion +/** + * @brief Build an **empty** grid guided by a Gaussian expansion (adaptive). * - * @param[out] out: Output tree to be built - * @param[in] inp: Input Gaussian expansion - * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @tparam D Spatial dimension. + * @param[out] out Output tree whose grid will be extended. + * @param[in] inp Gaussian expansion. + * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). * - * @details The grid of the output function will be EXTENDED using the general - * algorithm: - * - Loop through current leaf nodes of the output tree - * - Refine node based on custom split check from the function - * - Repeat until convergence or `maxIter` is reached - * - `maxIter < 0` means no bound + * @details + * For a non-periodic world: + * iterates over all Gaussians in @p inp and drives refinement with + * #mrcpp::AnalyticAdaptor using each Gaussian's position and exponent. * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called. It will loop through the Gaussians in the - * expansion and extend the grid based on the position and exponent of each - * term. Higher exponent means finer resolution. + * For a periodic world: + * copies and reuses the same logic via temporary Gaussian objects so that + * periodic replication is handled consistently. * + * Higher exponents imply finer resolution near the Gaussian center. */ template void build_grid(FunctionTree &out, const GaussExp &inp, int maxIter) { if (!out.getMRA().getWorldBox().isPeriodic()) { @@ -114,6 +152,7 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i } } else { auto period = out.getMRA().getWorldBox().getScalingFactors(); + (void)period; // currently unused; kept to document intent for (auto i = 0; i < inp.size(); i++) { auto *gauss = inp.getFunc(i).copy(); build_grid(out, *gauss, maxIter); @@ -123,24 +162,20 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i print::separator(10, ' '); } -/** @brief Build empty grid based on another MW function representation - * - * @param[out] out: Output tree to be built - * @param[in] inp: Input tree - * @param[in] maxIter: Maximum number of refinement iterations in output tree +/** + * @brief Build an **empty** grid by taking the union with another MW tree. * - * @details The grid of the output function will be EXTENDED with all existing - * nodes in corresponding input function, using the general algorithm: - * - Loop through current leaf nodes of the output tree - * - Refine node if the corresponding node in the input has children - * - Repeat until all input nodes are covered or `maxIter` is reached - * - `maxIter < 0` means no bound + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree to be extended. + * @param[in] inp Input tree whose structure drives refinement. + * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called. This means that all nodes on the input - * tree will also be in the final output tree (unless `maxIter` is reached, - * but NOT vice versa. + * @details + * Uses #mrcpp::CopyAdaptor to ensure that any node that exists (and has + * children) in @p inp will also exist in @p out after the call. * + * @warning @p out and @p inp must share the same #mrcpp::MultiResolutionAnalysis. */ template void build_grid(FunctionTree &out, FunctionTree &inp, int maxIter) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); @@ -152,24 +187,20 @@ template void build_grid(FunctionTree &out, FunctionTr print::separator(10, ' '); } -/** @brief Build empty grid based on several MW function representation +/** + * @brief Build an **empty** grid by taking the union of several MW trees. * - * @param[out] out: Output tree to be built - * @param[in] inp: Input tree vector - * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree to be extended. + * @param[in] inp Vector of (coef, tree) pairs. + * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). * - * @details The grid of the output function will be EXTENDED with all existing - * nodes in all corresponding input functions, using the general algorithm: - * - Loop through current leaf nodes of the output tree - * - Refine node if the corresponding node in one of the inputs has children - * - Repeat until all input nodes are covered or `maxIter` is reached - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called. This means that the final output grid - * will contain (at least) the union of the nodes of all input trees (unless - * `maxIter` is reached). + * @details + * Uses #mrcpp::CopyAdaptor to extend @p out so that all nodes present in any + * of the input trees are represented in the resulting grid (union). * + * @warning All trees must share the same #mrcpp::MultiResolutionAnalysis as @p out. */ template void build_grid(FunctionTree &out, FunctionTreeVector &inp, int maxIter) { for (auto i = 0; i < inp.size(); i++) @@ -183,24 +214,29 @@ template void build_grid(FunctionTree &out, FunctionTr print::separator(10, ' '); } +/** + * @brief Convenience overload: build a grid from a list of tree pointers. + */ template void build_grid(FunctionTree &out, std::vector *> &inp, int maxIter) { FunctionTreeVector inp_vec; for (auto *t : inp) inp_vec.push_back({1.0, t}); build_grid(out, inp_vec, maxIter); } -/** @brief Copy function from one tree onto the grid of another tree, fixed grid - * - * @param[out] out: Output function - * @param[in] inp: Input function +/** + * @brief Copy a function from one tree to the fixed grid of another. * - * @details The output function will be computed using the general algorithm: - * - Loop through current leaf nodes of the output tree - * - Copy MW coefs from the corresponding input node + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree (grid must already exist). + * @param[in] inp Input tree (source of coefficients). * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called and will overwrite any existing coefs. + * @details + * Traverses the **current leaves** of @p out and copies the corresponding + * coefficients from @p inp where nodes align, using the addition kernel + * with fixed grid (no refinement). * + * @note Overwrites existing coefficients in @p out; does not modify its grid. */ template void copy_func(FunctionTree &out, FunctionTree &inp) { FunctionTreeVector tmp_vec; @@ -208,15 +244,19 @@ template void copy_func(FunctionTree &out, FunctionTre add(-1.0, out, tmp_vec); } -/** @brief Build empty grid that is identical to another MW grid +/** + * @brief Make @p out's grid an exact copy of @p inp's grid (clears first). * - * @param[out] out: Output tree to be built - * @param[in] inp: Input tree + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[out] out Output tree to be rebuilt. + * @param[in] inp Input tree supplying the grid structure. * - * @note The difference from the corresponding `build_grid` function is that - * this will first clear the grid of the `out` function, while `build_grid` - * will _extend_ the existing grid. + * @details + * Clears @p out completely (removes all nodes) and then extends its grid to + * match @p inp using #build_grid(out, inp). * + * @warning @p out and @p inp must share the same #mrcpp::MultiResolutionAnalysis. */ template void copy_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") @@ -224,15 +264,17 @@ template void copy_grid(FunctionTree &out, FunctionTre build_grid(out, inp); } -/** @brief Build empty grid that is identical to another MW grid for every component +/** + * @brief Component-wise grid copy for composite functions (clears first). * - * @param[out] out: Output to be built - * @param[in] inp: Input - * - * @note The difference from the corresponding `build_grid` function is that - * this will first clear the grid of the `out` function, while `build_grid` - * will _extend_ the existing grid. + * @tparam D Spatial dimension. + * @param[out] out Destination composite function. + * @param[in] inp Source composite function. * + * @details + * Recreates @p out with the same number of components and data parameters as + * @p inp, then for each component copies the grid using the tree-based + * #build_grid overload. */ template void copy_grid(CompFunction &out, CompFunction &inp) { out.free(); @@ -244,14 +286,16 @@ template void copy_grid(CompFunction &out, CompFunction &inp) { } } -/** @brief Clear the MW coefficients of a function representation - * - * @param[in,out] out: Output function to be cleared +/** + * @brief Clear coefficients on an existing grid (topology unchanged). * - * @note This will only clear the MW coefs in the existing nodes, it will not - * change the grid of the function. Use `FunctionTree::clear()` to remove all - * grid refinement as well. + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[in,out] out Tree whose coefficients will be zeroed. * + * @details + * Uses #mrcpp::TreeBuilder::clear with #mrcpp::DefaultCalculator to reset + * coefficients while preserving node structure. */ template void clear_grid(FunctionTree &out) { TreeBuilder builder; @@ -259,16 +303,19 @@ template void clear_grid(FunctionTree &out) { builder.clear(out, calculator); } -/** @brief Refine the grid of a MW function representation - * - * @param[in,out] out: Output tree to be refined - * @param[in] scales: Number of refinement levels - * @returns The number of nodes that were split - * - * @details This will split ALL leaf nodes in the tree the given number of - * times, then it will compute scaling coefs of the new nodes, thus leaving - * the function representation unchanged, but on a larger grid. - * +/** + * @brief Uniformly refine a grid and **transfer scaling coefficients**. + * + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[in,out] out Tree to refine. + * @param[in] scales Number of refinement sweeps. + * @return Number of nodes that were split. + * + * @details + * Splits all leaves `scales` times using #mrcpp::TreeBuilder::split with + * coefficient transfer to children, so the function representation remains + * unchanged while resolution increases. */ template int refine_grid(FunctionTree &out, int scales) { auto nSplit = 0; @@ -281,18 +328,20 @@ template int refine_grid(FunctionTree &out, int scales return nSplit; } -/** @brief Refine the grid of a MW function representation - * - * @param[in,out] out: Output tree to be refined - * @param[in] prec: Precision for initial split check - * @param[in] absPrec: Build output tree based on absolute precision - * @returns The number of nodes that were split - * - * @details This will first perform a split check on the existing leaf nodes in - * the tree based on the provided precision parameter, then it will compute - * scaling coefs of the new nodes, thus leaving the function representation - * unchanged, but (possibly) on a larger grid. - * +/** + * @brief Precision-driven refinement using wavelet criteria. + * + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[in,out] out Tree to refine. + * @param[in] prec Precision target for split checks. + * @param[in] absPrec If true, use absolute precision; otherwise relative. + * @return Number of nodes that were split. + * + * @details + * Uses #mrcpp::WaveletAdaptor to test split conditions based on wavelet + * coefficients against @p prec (absolute or relative). When splitting, scales + * are updated by transferring coefficients to the children. */ template int refine_grid(FunctionTree &out, double prec, bool absPrec) { int maxScale = out.getMRA().getMaxScale(); @@ -302,17 +351,18 @@ template int refine_grid(FunctionTree &out, double pre return nSplit; } -/** @brief Refine the grid of a MW function representation - * - * @param[in,out] out: Output tree to be refined - * @param[in] inp: Input tree that defines the new grid - * @returns The number of nodes that were split - * - * @details This will first perform a split check on the existing leaf nodes - * in the output tree based on the structure of the input tree (same as - * build_grid), then it will compute scaling coefs of the new nodes, thus - * leaving the function representation unchanged, but on a larger grid. - * +/** + * @brief Refine a grid to include all structure present in a reference tree. + * + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[in,out] out Tree to refine (and receive coefficient transfer). + * @param[in] inp Reference tree that defines where @p out should split. + * @return Number of nodes that were split. + * + * @details + * Uses #mrcpp::CopyAdaptor to mirror structural refinement from @p inp into + * @p out and transfers coefficients to children where splits occur. */ template int refine_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") @@ -323,18 +373,19 @@ template int refine_grid(FunctionTree &out, FunctionTr return nSplit; } -/** @brief Refine the grid of a MW function representation - * - * @param[in,out] out: Output tree to be refined - * @param[in] inp: Input function - * - * @details This will first perform a split check on the existing leaf nodes - * in the output tree based on the structure of the input function (same as - * build_grid), then it will compute scaling coefs of the new nodes, thus - * leaving the function representation unchanged, but on a larger grid. - * It requires that the functions `isVisibleAtScale()` and `isZeroOnInterval()` - * is implemented in the particular `RepresentableFunction`. - * +/** + * @brief Analytic-driven refinement using a representable function. + * + * @tparam D Spatial dimension. + * @tparam T Scalar coefficient type. + * @param[in,out] out Tree to refine. + * @param[in] inp Analytic function to act as a split oracle. + * @return Number of nodes that were split. + * + * @details + * Uses #mrcpp::AnalyticAdaptor to request refinement where @p inp is visible + * at scale and not identically zero on the cell. Coefficients are transferred + * upon splitting so the represented function remains unchanged. */ template int refine_grid(FunctionTree &out, const RepresentableFunction &inp) { auto maxScale = out.getMRA().getMaxScale(); @@ -344,6 +395,8 @@ template int refine_grid(FunctionTree &out, const Repr return nSplit; } +// -------------------- explicit instantiations -------------------- + template void copy_grid(CompFunction<1> &out, CompFunction<1> &inp); template void copy_grid(CompFunction<2> &out, CompFunction<2> &inp); template void copy_grid(CompFunction<3> &out, CompFunction<3> &inp); diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp index b363bf806..02d4ad7a9 100644 --- a/src/treebuilders/map.cpp +++ b/src/treebuilders/map.cpp @@ -23,6 +23,53 @@ * */ +/** + * @file map.cpp + * @brief Adaptive mapping of multiresolution (MW) function trees through a user + * supplied scalar-to-scalar mapping. + * + * @details + * This module implements an adaptive **pointwise mapping** of an input + * #mrcpp::FunctionTree onto an output #mrcpp::FunctionTree by applying a user + * provided mapping function \f$f:\mathbb{R}\to\mathbb{R}\f$ to the function + * values represented on the MW grid. + * + * The mapping is realized via the standard MRCPP build loop: + * - On the **current** output grid, coefficients are computed by evaluating + * the input function and applying the mapping function (handled by + * #mrcpp::MapCalculator). + * - A **wavelet-based split criterion** (via #mrcpp::WaveletAdaptor) refines + * the grid wherever the mapped function requires more resolution to meet + * the requested precision. + * - This **refine–recompute** cycle repeats until convergence or a maximum + * number of iterations is reached. + * + * ### Precision semantics + * - If `absPrec == false` (default), the adaptor uses **relative precision**: + * refinement stops when wavelet coefficients are small compared to the + * current function norm, roughly \f$|d| < \varepsilon\,/\,\|f\|\f$. + * - If `absPrec == true`, the adaptor enforces an **absolute threshold**: + * \f$|d| < \varepsilon\f$. + * + * ### Responsibilities and caveats + * - MRCPP does **not** impose constraints on the mapping function; the user + * must ensure it is numerically safe (no division by zero, no overflow, etc.). + * - The mapping is **pointwise**: it does not solve PDEs or apply operators. + * For linear/nonlinear operators, consider specialized operator modules. + * + * ### Typical usage + * @code + * // Assume 'mra' is a configured MultiResolutionAnalysis + * FunctionTree<3,double> in(mra), out(mra); + * // ... build 'in' somehow (project analytic function, read from file, etc.) + * + * auto clamp_nonnegative = [](double x) { return x < 0.0 ? 0.0 : x; }; + * map<3>(1e-6, out, in, clamp_nonnegative, -1 /* maxIter (unbounded) */, false /* relative precision */); + * @endcode + * + * @see mrcpp::MapCalculator, mrcpp::WaveletAdaptor, mrcpp::TreeBuilder + */ + #include "map.h" #include "MapCalculator.h" #include "MultiplicationCalculator.h" @@ -38,34 +85,44 @@ namespace mrcpp { -/** @brief map a MW function onto another representations, adaptive grid - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Input function - * @param[in] fmap: mapping function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision - * - * @details The output function tree will be computed by mapping the input tree values through the fmap function, - * using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * No assumption is made for how the mapping function looks. It is - * left to the end-user to guarantee that the mapping function does - * not lead to numerically unstable/inaccurate situations (e.g. divide - * by zero, overflow, etc...) - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). +/** + * @brief Adaptively map an input MW function through a scalar mapping function. + * + * @tparam D Spatial dimension (1, 2, or 3). + * + * @param[in] prec Target build precision (relative or absolute depending on @p absPrec). + * @param[out] out Output function tree to be constructed (should start empty). + * @param[in] inp Input function tree providing the source values. + * @param[in] fmap Mapping function \f$f:\mathbb{R}\to\mathbb{R}\f$ to apply pointwise. + * @param[in] maxIter Maximum refinement iterations (negative = unbounded). + * @param[in] absPrec If true: interpret @p prec as absolute; otherwise relative. + * + * @details + * Pipeline: + * 1. Create a #mrcpp::MapCalculator that evaluates @p inp and applies @p fmap. + * 2. Drive refinement with a #mrcpp::WaveletAdaptor at the MRA max scale, + * honoring @p prec and @p absPrec. + * 3. Build the output via #mrcpp::TreeBuilder until convergence or @p maxIter. + * 4. Perform bottom-up MW transform and square-norm computation for diagnostics. + * 5. Clean temporary/generated artifacts on the input tree. + * + * @note + * - The algorithm **extends** whatever grid @p out currently has. For a fresh build, + * ensure @p out is empty (no coefficients). + * - The input and output trees must belong to a compatible MRA setup. * + * @warning + * The user is responsible for the numerical stability of @p fmap. + * Discontinuous or extremely steep mappings may require tighter precision or + * more iterations to resolve features adequately. */ -template void map(double prec, FunctionTree &out, FunctionTree &inp, FMap fmap, int maxIter, bool absPrec) { +template +void map(double prec, + FunctionTree &out, + FunctionTree &inp, + FMap fmap, + int maxIter, + bool absPrec) { int maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -88,6 +145,7 @@ template void map(double prec, FunctionTree &out, FunctionTre print::separator(10, ' '); } +// explicit instantiations template void map<1>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &inp, FMap fmap, int maxIter, bool absPrec); template void map<2>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &inp, FMap fmap, int maxIter, bool absPrec); template void map<3>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &inp, FMap fmap, int maxIter, bool absPrec); diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp index 4e046126e..1b57b10d2 100644 --- a/src/treebuilders/multiply.cpp +++ b/src/treebuilders/multiply.cpp @@ -23,6 +23,39 @@ * */ +/** + * @file multiply.cpp + * @brief Adaptive algebra on multiresolution (MW) function trees: product, + * square, power, (componentwise) dot, and inner products. + * + * @details + * This module implements a family of adaptive build routines that produce + * a new #mrcpp::FunctionTree from algebraic combinations of one or more input + * trees. The build is driven by the multiresolution refinement loop + * (TreeBuilder + Adaptor + Calculator): + * + * - On the current output grid, local contributions are computed by a + * Calculator (e.g. MultiplicationCalculator, SquareCalculator, PowerCalculator). + * - A refinement Adaptor (WaveletAdaptor by default, or MultiplicationAdaptor + * when useMaxNorms is enabled) decides whether to split nodes based on + * requested precision. + * - The refine–recompute process repeats until the target precision is met + * or the iteration limit is reached. + * + * Precision semantics: + * - Relative precision (absPrec = false): split while |d| is not small + * relative to the function norm. + * - Absolute precision (absPrec = true): split while |d| is above a fixed + * absolute threshold. + * + * Notes: + * - All routines assume the output tree starts with an empty grid (no coeffs). + * The grid is grown adaptively unless otherwise stated. + * - The input and output trees must belong to compatible MRAs. + * - Some routines can optionally use max-norm estimates from inputs to guide + * refinement (useMaxNorms). + */ + #include #include "MultiplicationAdaptor.h" @@ -44,30 +77,27 @@ namespace mrcpp { -/** @brief Multiplication of two MW function representations, adaptive grid - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] c: Numerical coefficient - * @param[in] inp_a: Input function a - * @param[in] inp_b: Input function b - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision - * @param[in] useMaxNorms: Build output tree based on norm estimates from input +/** + * @brief Adaptive product of two MW functions with an overall scalar factor. * - * @details The output function will be computed as the product of the two input - * functions (including the numerical coefficient), using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - conjugate is applied on inp_b + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (double or ComplexDouble). * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @param[in] prec Target build precision. + * @param[out] out Output function tree to construct. + * @param[in] c Scalar prefactor multiplying inp_a * inp_b. + * @param[in] inp_a First input tree. + * @param[in] inp_b Second input tree. + * @param[in] maxIter Max refinement iterations (-1 means unbounded). + * @param[in] absPrec If true: absolute precision; else relative. + * @param[in] useMaxNorms If true: use MultiplicationAdaptor with local + * max-norm estimates from inputs for split checks. + * @param[in] conjugate If true: apply complex conjugation to inp_b during multiplication. * + * @details + * Builds out = c * inp_a * (conjugate ? conj(inp_b) : inp_b) on an adaptively + * refined grid. If useMaxNorms is true, each input tree contributes local + * estimates (makeMaxSquareNorms) to scale the precision per node. */ template void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp_a, FunctionTree &inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { @@ -77,31 +107,28 @@ void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate); } -/** @brief Multiplication of several MW function representations, adaptive grid +/** + * @brief Adaptive product of several MW functions (with per-input scalars). * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Vector of input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision - * @param[in] useMaxNorms: Build output tree based on norm estimates from input + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type. * - * @details The output function will be computed as the product of all input - * functions in the vector (including their numerical coefficients), using - * the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - conjugate is applied on all the trees in inp, except the first - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @param[in] prec Target build precision. + * @param[out] out Output function tree to construct. + * @param[in] inp Vector of inputs (scalar, tree) pairs. + * @param[in] maxIter Max refinement iterations (-1 means unbounded). + * @param[in] absPrec If true: absolute precision; else relative. + * @param[in] useMaxNorms Use norm-based adaptor when true. + * @param[in] conjugate Conjugate all trees except the first (if complex). * + * @details + * Builds out = (Π_k a_k * f_k) where each (a_k, f_k) is the k-th pair. + * If conjugate is true, all factors except the first are conjugated in the + * complex case. When useMaxNorms is true, #mrcpp::MultiplicationAdaptor + * scales the split threshold by input-node max norms to improve targeting. */ -template void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { +template +void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -135,34 +162,30 @@ template void multiply(double prec, FunctionTree &out, print::separator(10, ' '); } -template void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { +/** + * @brief Convenience overload: product of a list of trees (unit coefficients). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + */ +template +void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms, conjugate); } -/** @brief Out-of-place square of MW function representations, adaptive grid - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Input function to square - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision - * - * @details The output function will be computed as the square of the input - * function, using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound +/** + * @brief Adaptive, out-of-place square: out = (conjugate ? conj(inp) : inp)^2. * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * + * @details + * Uses #mrcpp::SquareCalculator over a wavelet-driven adaptive refinement. */ -template void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter, bool absPrec, bool conjugate) { +template +void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter, bool absPrec, bool conjugate) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); int maxScale = out.getMRA().getMaxScale(); @@ -186,29 +209,16 @@ template void square(double prec, FunctionTree &out, F print::separator(10, ' '); } -/** @brief Out-of-place power of MW function representations, adaptive grid - * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Input function to square - * @param[in] p: Numerical power - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision +/** + * @brief Adaptive power: out = inp^p (real exponent p). * - * @details The output function will be computed as the input function raised - * to the given power, using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound - * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * + * @warning Conjugated inputs are not supported here. */ -template void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter, bool absPrec) { +template +void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter, bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (inp.conjugate()) MSG_ABORT("Not implemented"); @@ -233,24 +243,26 @@ template void power(double prec, FunctionTree &out, Fu print::separator(10, ' '); } -/** @brief Dot product of two MW function vectors, adaptive grid +/** + * @brief Adaptive componentwise dot product of two function vectors. * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp_a: Input function vector - * @param[in] inp_b: Input function vector - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * @details The output function will be computed as the dot product of the two - * input vectors (including their numerical coefficients). The precision - * parameter is used only in the multiplication part, the final addition will - * be on the fixed union grid of the components. - * - * @note The length of the input vectors must be the same. + * @param[in] prec Target build precision for the per-component products. + * @param[out] out Output tree holding the sum over component products. + * @param[in] inp_a First vector of (scalar, tree) pairs. + * @param[in] inp_b Second vector of (scalar, tree) pairs. + * @param[in] maxIter Max refinement iterations per component product. + * @param[in] absPrec Absolute vs relative precision. * + * @details + * Computes out = Σ_d (a_d f_d) · (b_d g_d) by first forming per-component + * products on grids compatible with @p out, then summing these contributions + * on the fixed union grid (addition step uses a fixed grid, not adaptive). */ -template void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter, bool absPrec) { +template +void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter, bool absPrec) { if (inp_a.size() != inp_b.size()) MSG_ABORT("Input length mismatch"); FunctionTreeVector tmp_vec; @@ -270,20 +282,21 @@ template void dot(double prec, FunctionTree &out, Func clear(tmp_vec, true); } -/** @returns Dot product of two MW function representations - * - * @param[in] bra: Bra side input function - * @param[in] ket: Ket side input function +/** + * @brief Inner product ⟨bra|ket⟩ on compressed MW trees. * - * @details The dot product is computed with the trees in compressed form, i.e. - * scaling coefs only on root nodes, wavelet coefs on all nodes. Since wavelet - * functions are orthonormal through ALL scales and the root scaling functions - * are orthonormal to all finer level wavelet functions, this becomes a rather - * efficient procedure as you only need to compute the dot product where the - * grids overlap. + * @tparam D Spatial dimension. + * @tparam T Coefficient type of bra. + * @tparam U Coefficient type of ket. + * @tparam V Return type (double or ComplexDouble). * + * @details + * Works directly on compressed representation: scaling coefficients on roots + * and wavelet coefficients on all nodes. Orthonormality across scales makes + * this efficient: only overlapping nodes contribute. */ -template V dot(FunctionTree &bra, FunctionTree &ket) { +template +V dot(FunctionTree &bra, FunctionTree &ket) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible"); MWNodeVector nodeTable; TreeIterator it(bra); @@ -295,13 +308,7 @@ template V dot(FunctionTree &b int nNodes = nodeTable.size(); V result = 0.0; V locResult = 0.0; - // OMP is disabled in order to get EXACT results (to the very last digit), the - // order of summation makes the result different beyond the 14th digit or so. - // OMP does improve the performace, but its not worth it for the time being. - //#pragma omp parallel firstprivate(n_nodes, locResult) num_threads(mrcpp_get_num_threads()) - // shared(nodeTable,rhs,result) - // { - //#pragma omp for schedule(guided) + for (int n = 0; n < nNodes; n++) { const auto &braNode = static_cast &>(*nodeTable[n]); const MWNode *mwNode = ket.findNode(braNode.getNodeIndex()); @@ -311,25 +318,32 @@ template V dot(FunctionTree &b if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode); locResult += dot_wavelet(braNode, ketNode); } - //#pragma omp critical result += locResult; - return result; } -/** @brief abs-dot product of two MW function representations +/** + * @brief Absolute inner product proxy based on node norms. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. * - * @param[in] bra: Bra side input function - * @param[in] ket: Ket side input function + * @param[in] bra First input function. + * @param[in] ket Second input function. + * @param[in] exact If true, requires ket's grid to include bra's grid and + * uses absolute coefficients per node. If false, uses an + * approximate product of node norms and root-node norms. * - * If exact=true: the grid of ket MUST include the grid of bra. - * If exact=false: does not at any time read the coefficients individually. - * The product is done for the end nodes of the bra multiplied by the nodes from the - * ket with either the same idx, or using a lower scale and assuming uniform - * distribution within the node. - * If the product is zero, the functions are disjoints. + * @returns Value proportional to the absolute inner product. + * + * @details + * With exact = true, the routine converts to interpolating coefficients, + * takes absolute values, and accumulates exact contributions node by node. + * With exact = false, it avoids per-coefficient access and approximates the + * product via node norms; disjoint functions yield zero. */ -template double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact) { +template +double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Incompatible MRA"); double result = 0.0; @@ -342,14 +356,12 @@ template double node_norm_dot(FunctionTree &bra, Funct FunctionNode &node = bra.getEndFuncNode(n); const NodeIndex idx = node.getNodeIndex(); if (exact) { - // convert to interpolating coef, take abs, convert back FunctionNode *mwNode = static_cast *>(ket.findNode(idx)); if (mwNode == nullptr) MSG_ABORT("Trees must have same grid"); node.getAbsCoefs(valA); mwNode->getAbsCoefs(valB); for (int i = 0; i < ncoef; i++) result += std::norm(valA[i] * valB[i]); } else { - // approximate by product of node norms int rIdx = ket.getRootBox().getBoxIndex(idx); assert(rIdx >= 0); const MWNode &root = ket.getRootBox().getNode(rIdx); @@ -360,6 +372,8 @@ template double node_norm_dot(FunctionTree &bra, Funct return result; } +// ---- Explicit instantiations ------------------------------------------------ + template void multiply<1, double>(double prec, FunctionTree<1, double> &out, double c, FunctionTree<1, double> &tree_a, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate); template void diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp index 7eea89416..e24750f95 100644 --- a/src/treebuilders/project.cpp +++ b/src/treebuilders/project.cpp @@ -23,6 +23,36 @@ * */ +/** + * @file project.cpp + * @brief Projection of analytic (scalar or vector) functions onto a + * multiwavelet (MW) basis on an adaptively refined grid. + * + * @details + * This module builds a MW representation of an analytic function by + * adaptively refining the grid and computing (scale-/wavelet-) coefficients + * until a user-prescribed tolerance is achieved. + * + * ### Algorithm (adaptive projection) + * 1. Start from the current grid in @p out (should be empty or root-only). + * 2. On the current leaves, compute MW coefficients using + * ProjectionCalculator (quadrature in the scaling basis). + * 3. Use WaveletAdaptor to decide where to refine: + * - **Relative precision** (default): stop when local wavelet norms + * drop below `prec * ||f||_node`. + * - **Absolute precision** (`absPrec = true`): stop when local wavelet + * norms drop below `prec`. + * 4. Repeat until convergence or `maxIter` is reached. + * 5. Perform final MW transforms (TopDown/BottomUp as needed) and compute + * the tree square-norm for bookkeeping. + * + * The projection accounts for non-unit world-box scaling through + * a per-dimension scaling factor passed to ProjectionCalculator. + * + * @note The functions here operate on templated dimension @p D (1,2,3) + * and coefficient type @p T (double or ComplexDouble). + */ + #include "project.h" #include "ProjectionCalculator.h" #include "TreeBuilder.h" @@ -36,58 +66,77 @@ namespace mrcpp { -/** @brief Project an analytic function onto the MW basis, adaptive grid +/** + * @brief Project a lambda/std::function onto the MW basis (convenience overload). * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision + * Wraps the callable into an AnalyticFunction and delegates to the + * RepresentableFunction overload. * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound + * @tparam D Spatial dimension (1,2,3). + * @tparam T Coefficient type (double or ComplexDouble). * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @param[in] prec Target precision (relative by default, see @p absPrec). + * @param[out] out Output function tree to be built (should contain only empty roots). + * @param[in] func Callable \f$f:\mathbb{R}^D \to T\f$ returning values at coordinates. + * @param[in] maxIter Maximum refinement iterations (-1 = no bound). + * @param[in] absPrec Use absolute (true) or relative (false, default) thresholding. * + * @details + * This is syntactic sugar for quickly projecting a user-provided callable. + * The adaptive procedure, grid policy, and stopping criteria are identical + * to the main projection overload below. + * + * @note The current grid in @p out is honored and extended; it is not cleared. + * For a fresh build, ensure @p out has only root nodes and no coefficients. */ -template void project(double prec, FunctionTree &out, std::function &r)> func, int maxIter, bool absPrec) { +template +void project(double prec, + FunctionTree &out, + std::function &r)> func, + int maxIter, + bool absPrec) { AnalyticFunction inp(func); - mrcpp::project(prec, out, inp, maxIter, absPrec); } -/** @brief Project an analytic function onto the MW basis, adaptive grid +/** + * @brief Project a RepresentableFunction onto the MW basis, adaptive grid. * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function to be built - * @param[in] inp: Input function - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision + * @tparam D Spatial dimension (1,2,3). + * @tparam T Coefficient type (double or ComplexDouble). * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound + * @param[in] prec Target precision (relative by default, see @p absPrec). + * @param[out] out Output function tree to be built (should contain only empty roots). + * @param[in] inp Analytic/representable function to project. + * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). + * @param[in] absPrec Use absolute (true) or relative (false) thresholding. * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @details + * - Builds a WaveletAdaptor with precision policy (relative/absolute). + * - Creates a ProjectionCalculator configured with world-box scaling + * factors to ensure correct physical rescaling of integrals. + * - Uses TreeBuilder to iterate: + * compute coefs → test refinement → split nodes → repeat. + * - Finalizes with a BottomUp MW transform and tree norm accumulation. * + * @par Precision semantics + * - **Relative** (`absPrec=false`): local wavelet norm compared to local function norm. + * - **Absolute** (`absPrec=true`): local wavelet norm compared to @p prec directly. + * + * @warning The output tree @p out must be compatible (same MRA/world box) + * with any other trees you later combine it with. */ -template void project(double prec, FunctionTree &out, RepresentableFunction &inp, int maxIter, bool absPrec) { +template +void project(double prec, + FunctionTree &out, + RepresentableFunction &inp, + int maxIter, + bool absPrec) { int maxScale = out.getMRA().getMaxScale(); const auto scaling_factor = out.getMRA().getWorldBox().getScalingFactors(); + TreeBuilder builder; WaveletAdaptor adaptor(prec, maxScale, absPrec); - ProjectionCalculator calculator(inp, scaling_factor); builder.build(out, calculator, adaptor, maxIter); @@ -101,29 +150,34 @@ template void project(double prec, FunctionTree &out, print::separator(10, ' '); } -/** @brief Project an analytic vector function onto the MW basis, adaptive grid +/** + * @brief Project a vector of analytic functions (component-wise), adaptive grid. * - * @param[in] prec: Build precision of output function - * @param[out] out: Output function vector to be built - * @param[in] inp: Input function vector - * @param[in] maxIter: Maximum number of refinement iterations in output tree - * @param[in] absPrec: Build output tree based on absolute precision + * @tparam D Spatial dimension (1,2,3). + * @tparam T Coefficient type (double or ComplexDouble). * - * @details The output function will be computed using the general algorithm: - * - Compute MW coefs on current grid - * - Refine grid where necessary based on `prec` - * - Repeat until convergence or `maxIter` is reached - * - `prec < 0` or `maxIter = 0` means NO refinement - * - `maxIter < 0` means no bound + * @param[in] prec Target precision (relative by default, see @p absPrec). + * @param[out] out Output vector of trees (size must match @p func). + * @param[in] func Vector of component callables \f$f_j:\mathbb{R}^D \to T\f$. + * @param[in] maxIter Maximum refinement iterations (-1 = unbounded). + * @param[in] absPrec Use absolute (true) or relative (false) thresholding. * - * @note This algorithm will start at whatever grid is present in the `out` - * tree when the function is called (this grid should however be EMPTY, e.i. - * no coefs). + * @details + * Projects each component independently with the same precision policy and + * refinement limits, storing the result in the corresponding entry of @p out. * + * @throws MSG_ABORT if @p out.size() != @p func.size(). */ -template void project(double prec, FunctionTreeVector &out, std::vector &r)>> func, int maxIter, bool absPrec) { +template +void project(double prec, + FunctionTreeVector &out, + std::vector &r)>> func, + int maxIter, + bool absPrec) { if (out.size() != func.size()) MSG_ABORT("Size mismatch"); - for (auto j = 0; j < D; j++) mrcpp::project(prec, get_func(out, j), func[j], maxIter, absPrec); + for (auto j = 0; j < D; j++) { + mrcpp::project(prec, get_func(out, j), func[j], maxIter, absPrec); + } } template void project<1, double>(double prec, FunctionTree<1, double> &out, RepresentableFunction<1, double> &inp, int maxIter, bool absPrec); From b4505336883fd5c6395985109bebd78055051edb Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Wed, 29 Oct 2025 15:20:31 +0300 Subject: [PATCH 07/51] Update map.cpp --- src/treebuilders/map.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp index 02d4ad7a9..8e76b907c 100644 --- a/src/treebuilders/map.cpp +++ b/src/treebuilders/map.cpp @@ -64,7 +64,7 @@ * // ... build 'in' somehow (project analytic function, read from file, etc.) * * auto clamp_nonnegative = [](double x) { return x < 0.0 ? 0.0 : x; }; - * map<3>(1e-6, out, in, clamp_nonnegative, -1 /* maxIter (unbounded) */, false /* relative precision */); + * map<3>(1e-6, out, in, clamp_nonnegative, -1, false); // -1: unbounded maxIter, false: relative precision * @endcode * * @see mrcpp::MapCalculator, mrcpp::WaveletAdaptor, mrcpp::TreeBuilder From e9420da618620488cb3599a955bb3ea972da4c01 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Wed, 29 Oct 2025 17:42:11 +0300 Subject: [PATCH 08/51] Done doxygen .h files in trees folder --- src/trees/BandWidth.cpp | 11 +- src/trees/BandWidth.h | 77 +++- src/trees/BoundingBox.cpp | 222 ++---------- src/trees/BoundingBox.h | 234 ++++++++++-- src/trees/CornerOperatorTree.cpp | 24 +- src/trees/CornerOperatorTree.h | 78 +++- src/trees/FunctionNode.cpp | 106 +----- src/trees/FunctionNode.h | 162 ++++++++- src/trees/FunctionTree.cpp | 179 +-------- src/trees/FunctionTree.h | 259 +++++++++++-- src/trees/FunctionTreeVector.h | 129 +++++-- src/trees/HilbertPath.h | 87 ++++- src/trees/MWNode.cpp | 502 +------------------------- src/trees/MWNode.h | 212 ++++++++--- src/trees/MWTree.cpp | 232 +----------- src/trees/MWTree.h | 288 ++++++++++++--- src/trees/MultiResolutionAnalysis.cpp | 91 +---- src/trees/MultiResolutionAnalysis.h | 137 ++++++- src/trees/NodeAllocator.cpp | 67 +--- src/trees/NodeAllocator.h | 163 ++++++++- src/trees/NodeBox.h | 109 +++++- src/trees/NodeIndex.h | 132 ++++++- src/trees/OperatorNode.cpp | 30 +- src/trees/OperatorNode.h | 87 ++++- src/trees/OperatorTree.cpp | 50 +-- src/trees/OperatorTree.h | 116 +++++- src/trees/TreeIterator.h | 160 +++++++- 27 files changed, 2272 insertions(+), 1672 deletions(-) diff --git a/src/trees/BandWidth.cpp b/src/trees/BandWidth.cpp index a79814d2f..e536ab752 100644 --- a/src/trees/BandWidth.cpp +++ b/src/trees/BandWidth.cpp @@ -26,13 +26,16 @@ #include "BandWidth.h" #include "utils/Printer.h" +#include +#include + namespace mrcpp { BandWidth &BandWidth::operator=(const BandWidth &bw) = default; bool BandWidth::isEmpty(int depth) const { - if (depth > getDepth()) { return true; } - if (this->widths(depth, 4) < 0) { return true; } + if (depth > getDepth()) return true; + if (this->widths(depth, 4) < 0) return true; return false; } @@ -41,7 +44,7 @@ void BandWidth::setWidth(int depth, int index, int wd) { assert(index >= 0 and index < 4); assert(wd >= 0); this->widths(depth, index) = wd; - if (wd > this->widths(depth, 4)) { this->widths(depth, 4) = wd; } + if (wd > this->widths(depth, 4)) this->widths(depth, 4) = wd; } std::ostream &BandWidth::print(std::ostream &o) const { @@ -60,4 +63,4 @@ std::ostream &BandWidth::print(std::ostream &o) const { return o; } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/BandWidth.h b/src/trees/BandWidth.h index b4ee49e8d..fa520c26e 100644 --- a/src/trees/BandWidth.h +++ b/src/trees/BandWidth.h @@ -23,41 +23,110 @@ * */ -/* - * BandWidth.h +/** + * @file BandWidth.h + * @brief Lightweight storage for per-depth operator bandwidths. + * + * @details + * This class stores, for each tree depth, four component band widths plus + * a cached maximum among them. A negative width denotes “unset/empty”. + * + * - Rows correspond to depths in \f$\{0,\dots,\text{maxDepth}\}\f$. + * - Columns \f$0..3\f$ are per-component widths (e.g. for blocks T, C, B, A). + * - Column \f$4\f$ caches the **maximum** width at that depth. + * + * The class provides convenience accessors, mutation with automatic update of + * the per-depth maximum, and formatted printing. */ #pragma once #include #include +#include namespace mrcpp { +/** + * @class BandWidth + * @brief Container for band widths over depths and components. + */ class BandWidth final { public: + /** + * @brief Construct with storage for @p depth + 1 rows. + * @param depth Maximum depth to allocate (inclusive). + * + * All entries are initialized to -1 (empty). + */ BandWidth(int depth = 0) : widths(depth + 1, 5) { this->clear(); } + + /** + * @brief Copy-construct from another instance. + */ BandWidth(const BandWidth &bw) : widths(bw.widths) {} + + /** + * @brief Copy-assign from another instance. + */ BandWidth &operator=(const BandWidth &bw); + /** + * @brief Set all widths (including cached maxima) to -1. + */ void clear() { this->widths.setConstant(-1); } + /** + * @brief Check whether the row for @p depth is effectively empty. + * @param depth Depth to test. + * @return True if @p depth is out of range or the cached max is < 0. + */ bool isEmpty(int depth) const; + + /** + * @brief Highest valid depth index stored. + * @return The maximum depth (rows - 1). + */ int getDepth() const { return this->widths.rows() - 1; } + + /** + * @brief Cached maximum width for a depth. + * @param depth Depth to query. + * @return Max width at @p depth, or -1 if @p depth is out of range. + */ int getMaxWidth(int depth) const { return (depth > getDepth()) ? -1 : this->widths(depth, 4); } + + /** + * @brief Component width accessor. + * @param depth Depth to query. + * @param index Component in {0,1,2,3}. + * @return Width for (@p depth, @p index), or -1 if @p depth is out of range. + */ int getWidth(int depth, int index) const { return (depth > getDepth()) ? -1 : this->widths(depth, index); } + + /** + * @brief Set component width and update the cached per-depth maximum. + * @param depth Depth to modify (0..getDepth()). + * @param index Component in {0,1,2,3}. + * @param wd Non-negative band width. + */ void setWidth(int depth, int index, int wd); + /** + * @brief Stream pretty-printer. + */ friend std::ostream &operator<<(std::ostream &o, const BandWidth &bw) { return bw.print(o); } private: - Eigen::MatrixXi widths; /// column 5 stores max width at depth + /// Matrix of widths; columns 0..3 = components, column 4 = cached max per depth. + Eigen::MatrixXi widths; + /// Implementation of formatted printing. std::ostream &print(std::ostream &o) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/BoundingBox.cpp b/src/trees/BoundingBox.cpp index ff86abf2e..8309e733d 100644 --- a/src/trees/BoundingBox.cpp +++ b/src/trees/BoundingBox.cpp @@ -32,19 +32,8 @@ namespace mrcpp { -/** @brief Constructor for BoundingBox object. - * - * @param[in] box: [lower, upper] bound in all dimensions. - * @returns New BoundingBox object. - * - * @details Creates a box with appropriate root scale and scaling - * factor to fit the given bounds, which applies to _all_ dimensions. - * Root scale is chosen such that the scaling factor becomes 1 < sfac < 2. - * - * Limitations: Box must be _either_ [0,L] _or_ [-L,L], with L a positive integer. - * This is the most general constructor, which will create a world with no periodic boundary conditions. - */ -template BoundingBox::BoundingBox(std::array box) { +template +BoundingBox::BoundingBox(std::array box) { if (box[1] < 1) { MSG_ERROR("Invalid upper bound: " << box[1]); box[1] = 1; @@ -79,25 +68,12 @@ template BoundingBox::BoundingBox(std::array box) { setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] n: Length scale, default 0. - * @param[in] l: Corner translation, default [0, 0, ...]. - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default false. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. The parameter n defines the length scale, which, together with sf, determines the unit length of each side of the boxes by \f$ [2^{-n}]^D \f$. - * The parameter l defines the corner translation of the lower corner of the box relative to the world origin. - * The parameter nb defines the number of boxes in each dimension. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. - * This constructor is used for work in periodic systems. - * - */ template -BoundingBox::BoundingBox(int n, const std::array &l, const std::array &nb, const std::array &sf, bool pbc) +BoundingBox::BoundingBox(int n, + const std::array &l, + const std::array &nb, + const std::array &sf, + bool pbc) : cornerIndex(n, l) { setPeriodic(pbc); setNBoxes(nb); @@ -105,21 +81,10 @@ BoundingBox::BoundingBox(int n, const std::array &l, const std::array setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] idx: index of the lower corner of the box. - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. - * The parameter idx defines the index of the lower corner of the box relative to the world origin. - * The parameter nb defines the number of boxes in each dimension. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * This constructor creates a world with no periodic boundary conditions. - */ template -BoundingBox::BoundingBox(const NodeIndex &idx, const std::array &nb, const std::array &sf) +BoundingBox::BoundingBox(const NodeIndex &idx, + const std::array &nb, + const std::array &sf) : cornerIndex(idx) { setPeriodic(false); setNBoxes(nb); @@ -127,16 +92,6 @@ BoundingBox::BoundingBox(const NodeIndex &idx, const std::array &n setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default true. - * - * @details Creates a box with given parameters. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. - * This construtor is used for work in periodic systems. - */ template BoundingBox::BoundingBox(const std::array &sf, bool pbc) : cornerIndex() { @@ -146,17 +101,6 @@ BoundingBox::BoundingBox(const std::array &sf, bool pbc) setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default true. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes specific dimensions periodic. - * This is used for work in periodic systems. - */ template BoundingBox::BoundingBox(const std::array &sf, std::array pbc) : cornerIndex() { @@ -166,14 +110,6 @@ BoundingBox::BoundingBox(const std::array &sf, std::array setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] box: Other BoundingBox object. - * @returns New BoundingBox object. - * - * @details Creates a box identical to the input box paramter. - * This constructor uses all parameters from the other BoundingBox to create a new one. - */ template BoundingBox::BoundingBox(const BoundingBox &box) : cornerIndex(box.cornerIndex) { @@ -183,14 +119,8 @@ BoundingBox::BoundingBox(const BoundingBox &box) setDerivedParameters(); } -/** @brief Assignment operator overload for BoundingBox object. - * - * @returns New BoundingBox object. - * @param[in] box: Other BoundingBox object. - * - * @details Allocates all parameters in this BoundingBox to be that of the other BoundingBox. - */ -template BoundingBox &BoundingBox::operator=(const BoundingBox &box) { +template +BoundingBox &BoundingBox::operator=(const BoundingBox &box) { if (&box != this) { this->cornerIndex = box.cornerIndex; this->periodic = box.periodic; @@ -201,14 +131,8 @@ template BoundingBox &BoundingBox::operator=(const BoundingBox return *this; } -/** @brief Sets the number of boxes in each dimension. - * - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * - * @details For each dimentions D it sets the number of boxes in that dimension in the nBoxes array and the total amount of boxes in the world in the totBoxes variable. - * This just sets counters for the number of boxes in each dimension. - */ -template void BoundingBox::setNBoxes(const std::array &nb) { +template +void BoundingBox::setNBoxes(const std::array &nb) { this->totBoxes = 1; for (int d = 0; d < D; d++) { this->nBoxes[d] = (nb[d] > 0) ? nb[d] : 1; @@ -216,17 +140,8 @@ template void BoundingBox::setNBoxes(const std::array &nb) { } } -/** @brief Computes and sets all derived parameters. - * - * @details For all parameters that have been initialized in the constructor, - * this function will compute the necessary derived parameters in each dimension. - * The unit length is set to \a sfac \f$ \cdot 2^{-n} \f$ where \a sfac is the scaling factor (default 1.0) and n is the length scale. - * The unit length is the base unit which is used for the size and positioning of the boxes around origin. - * The boxLength is the total length of the box in each dimension, which is the unit length times the number of boxes in that dimension. - * The lowerBound is computed from the index of the lower corner of the box and the unit length. - * The upperBound is computed to be the lower corner plus the total length in that dimension. - */ -template void BoundingBox::setDerivedParameters() { +template +void BoundingBox::setDerivedParameters() { assert(this->totBoxes > 0); const NodeIndex &cIdx = this->cornerIndex; for (int d = 0; d < D; d++) { @@ -238,13 +153,8 @@ template void BoundingBox::setDerivedParameters() { } } -/** @brief Sets the number of boxes in each dimension. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * - * @details This checks that the sf variable has sane values before assigning it to the member variable scalingFactor. - */ -template void BoundingBox::setScalingFactors(const std::array &sf) { +template +void BoundingBox::setScalingFactors(const std::array &sf) { assert(this->totBoxes > 0); for (auto &x : sf) if (x <= 0.0 and sf != std::array{}) MSG_ABORT("Non-positive scaling factor: " << x); @@ -252,37 +162,18 @@ template void BoundingBox::setScalingFactors(const std::array{}) scalingFactor.fill(1.0); } -/** @brief Sets which dimensions are periodic. - * - * @param[in] pbc: Boolean which is used to set all dimension to either periodic or not - * - * @details this fills in the periodic array with the values from the input. - */ -template void BoundingBox::setPeriodic(bool pbc) { +template +void BoundingBox::setPeriodic(bool pbc) { this->periodic.fill(pbc); } -/** @brief Sets which dimensions are periodic. - * - * @param[in] pbs: D-dimensional array holding boolean values for each dimension. - * - * @details This fills in the periodic array with the values from the input array. - */ -template void BoundingBox::setPeriodic(std::array pbc) { +template +void BoundingBox::setPeriodic(std::array pbc) { this->periodic = pbc; } -/** @brief Fetches a NodeIndex object from a given box index. - * - * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. - * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. - * - * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, - * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex - * object of the cell at the lower back corner of the box object indexed by bIdx. - * Specialized for D=1 below - */ -template NodeIndex BoundingBox::getNodeIndex(int bIdx) const { +template +NodeIndex BoundingBox::getNodeIndex(int bIdx) const { assert(bIdx >= 0 and bIdx <= this->totBoxes); std::array l; for (int d = D - 1; d >= 0; d--) { @@ -300,14 +191,8 @@ template NodeIndex BoundingBox::getNodeIndex(int bIdx) const { return NodeIndex(getScale(), l); } -/** @brief Fetches the index of a box from a given coordinate. - * - * @param[in] r: D-dimensional array representaing a coordinate in the simulation box - * @returns The index value of the boxes in the position given as it is in the generated world. - * - * @details Specialized for D=1 below - */ -template int BoundingBox::getBoxIndex(Coord r) const { +template +int BoundingBox::getBoxIndex(Coord r) const { if (this->isPeriodic()) { periodic::coord_manipulation(r, this->getPeriodic()); } @@ -334,16 +219,8 @@ template int BoundingBox::getBoxIndex(Coord r) const { return bIdx; } -/** @brief Fetches the index of a box from a given NodeIndex. - * - * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. - * @returns The index value of the boxes in which the NodeIndex object is mapping to. - * - * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. - * Each node will get its own index value, but will still be part of one of the original boxes of the world. - * Specialized for D=1 below - */ -template int BoundingBox::getBoxIndex(NodeIndex nIdx) const { +template +int BoundingBox::getBoxIndex(NodeIndex nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation(nIdx, this->getPeriodic()); }; int n = nIdx.getScale(); @@ -366,14 +243,8 @@ template int BoundingBox::getBoxIndex(NodeIndex nIdx) const { return bIdx; } -/** @brief Prints information about the BoundinBox object. - * - * @param[in] o: Output stream variable which will be used to print the information - * @returns The output stream variable. - * - * @details A function which prints information about the BoundingBox object. - */ -template std::ostream &BoundingBox::print(std::ostream &o) const { +template +std::ostream &BoundingBox::print(std::ostream &o) const { int oldprec = Printer::setPrecision(5); o << std::fixed; if (isPeriodic()) { o << " The World is Periodic" << std::endl; } @@ -401,28 +272,16 @@ template std::ostream &BoundingBox::print(std::ostream &o) const { return o; } -/** @brief Fetches a NodeIndex object from a given box index, specialiced for 1-D. - * - * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. - * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. - * - * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, - * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex - * object of the cell at the lower back corner of the box object indexed by bIdx. - */ -template <> NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { +template <> +NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { const NodeIndex<1> &cIdx = this->cornerIndex; int n = cIdx.getScale(); int l = bIdx + cIdx[0]; return NodeIndex<1>(n, {l}); } -/** @brief Fetches the index of a box from a given coordinate, specialized for 1D. - * - * @param[in] r: 1-dimensional array representaing a coordinate in the simulation box - * @returns The index value of the boxes in the position given as it is in the generated world. - */ -template <> int BoundingBox<1>::getBoxIndex(Coord<1> r) const { +template <> +int BoundingBox<1>::getBoxIndex(Coord<1> r) const { if (this->isPeriodic()) { periodic::coord_manipulation<1>(r, this->getPeriodic()); } @@ -435,15 +294,8 @@ template <> int BoundingBox<1>::getBoxIndex(Coord<1> r) const { return static_cast(iint); } -/** @brief Fetches the index of a box from a given NodeIndex specialized for 1-D. - * - * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. - * @returns The index value of the boxes in which the NodeIndex object is mapping to. - * - * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. - * Each node will get its own index value, but will still be part of one of the original boxes of the world. - */ -template <> int BoundingBox<1>::getBoxIndex(NodeIndex<1> nIdx) const { +template <> +int BoundingBox<1>::getBoxIndex(NodeIndex<1> nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation<1>(nIdx, this->getPeriodic()); }; int n = nIdx.getScale(); @@ -466,4 +318,4 @@ template class BoundingBox<1>; template class BoundingBox<2>; template class BoundingBox<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/BoundingBox.h b/src/trees/BoundingBox.h index 28ad9c052..768a42c31 100644 --- a/src/trees/BoundingBox.h +++ b/src/trees/BoundingBox.h @@ -27,6 +27,7 @@ #include #include +#include #include "NodeIndex.h" #include "utils/details.h" @@ -35,79 +36,241 @@ namespace mrcpp { -/** @class BoundingBox +/** + * @file BoundingBox.h + * @brief Declaration of the @ref BoundingBox domain descriptor. * - * @brief Class defining the computational domain + * @details + * The bounding box defines the computational “world” for multiresolution + * trees. In \(D\) dimensions it is described by: + * - a **corner index** (scale and integer translation), + * - a **count of boxes** per dimension (all on the same scale), + * - a **scaling factor** per dimension (physical unit lengths), + * - and optional **periodic boundary conditions** per dimension. * - * @details The computational domain is made up of a collection of D-dimensional - * boxes on a particular length scale \f$ n \f$. The size of each box is then - * \f$ [2^{-n}]^D \f$, i.e. higher scale means smaller boxes, and the scale - * may be negative. The number of boxes can be different in each dimension - * \f$ [n_x, n_y, \dots] \f$, but they must all be on the same scale (size). - * Box translations relative to the world origin _must_ be an integer - * multiple of the given scale size \f$ 2^{-n} \f$. + * From these fundamental parameters, derived quantities such as unit + * lengths, total box lengths and physical bounds are computed. */ +/** + * @class BoundingBox + * @tparam D Spatial dimension (1, 2, or 3). + * @brief Defines the \(D\)-dimensional computational domain (“world”). + * + * @details + * The world is a Cartesian grid of equally-sized boxes at a given scale + * \(n\). Each box has edge length \(2^{-n}\) in grid units, optionally + * multiplied by a per-dimension scaling factor to reflect physical units. + * The lower-back corner of the world is given by an integer translation + * at the same scale. Periodicity can be enabled per dimension. + */ template class BoundingBox { public: + /** + * @brief Construct a non-periodic world from symmetric or half-open bounds. + * + * @param box Two integers \{lower, upper\}. Supported forms are + * \{0, L\} or \{-L, L\} with \(L>0\). + * + * @details + * Chooses a root scale so that the per-dimension scaling factor + * satisfies \(1 < \text{sfac} < 2\). The same bounds apply to all + * dimensions. Periodicity is disabled. + */ explicit BoundingBox(std::array box); - explicit BoundingBox(int n = 0, const std::array &l = {}, const std::array &nb = {}, const std::array &sf = {}, bool pbc = false); - explicit BoundingBox(const NodeIndex &idx, const std::array &nb = {}, const std::array &sf = {}); + + /** + * @brief Fully-specified constructor (all dimensions share the same scale). + * + * @param n Root scale (can be negative). + * @param l Integer translation (corner index) per dimension. + * @param nb Number of boxes per dimension (non-zero positives will be used; zeros mean 1). + * @param sf Scaling factor per dimension (non-positive entries are rejected). + * @param pbc If true, all dimensions are periodic. + * + * @details + * This is the most general constructor for rectangular worlds at a single + * multiresolution scale. Periodicity is global (all-or-nothing). + */ + explicit BoundingBox(int n = 0, + const std::array &l = {}, + const std::array &nb = {}, + const std::array &sf = {}, + bool pbc = false); + + /** + * @brief Construct from a corner @ref NodeIndex and per-dimension sizes. + * + * @param idx Corner node index (scale and integer translation). + * @param nb Number of boxes per dimension. + * @param sf Scaling factor per dimension. + * + * @details + * Periodicity is disabled. Useful when the corner is already known + * in multiresolution units. + */ + explicit BoundingBox(const NodeIndex &idx, + const std::array &nb = {}, + const std::array &sf = {}); + + /** + * @brief Construct periodic (all dimensions) world from scaling factors. + * + * @param sf Scaling factor per dimension. + * @param pbc If true, enables periodicity for all dimensions (default true). + */ explicit BoundingBox(const std::array &sf, bool pbc = true); + + /** + * @brief Construct world with per-dimension periodicity flags. + * + * @param sf Scaling factor per dimension. + * @param pbc Periodicity flags per dimension. + */ BoundingBox(const std::array &sf, std::array pbc); + + /** + * @brief Copy constructor. + */ BoundingBox(const BoundingBox &box); + + /** + * @brief Copy assignment. + */ BoundingBox &operator=(const BoundingBox &box); + + /// Defaulted virtual destructor. virtual ~BoundingBox() = default; + /** + * @name Equality + * @brief Compare corner and per-dimension box counts. + * @{ + */ inline bool operator==(const BoundingBox &box) const; inline bool operator!=(const BoundingBox &box) const; + /// @} + /** + * @brief Convert a world-box index to a @ref NodeIndex at the root scale. + * @param bIdx Linear index of the box within the world. + * @return Corner node index for that box. + * + * @note Specializations provide efficient versions for \(D=1\). + */ NodeIndex getNodeIndex(int bIdx) const; + /** + * @brief Map a physical coordinate to the enclosing world-box index. + * @param r Physical coordinate (scaled by @ref getScalingFactors()). + * @return Linear index of the box, or -1 if outside and non-periodic. + */ int getBoxIndex(Coord r) const; + + /** + * @brief Map a @ref NodeIndex to the enclosing world-box index. + * @param nIdx Node index (possibly at a finer scale). + * @return Linear index of the box, or -1 if outside or at coarser scale. + */ int getBoxIndex(NodeIndex nIdx) const; - int size() const { return this->totBoxes; } - int size(int d) const { return this->nBoxes[d]; } - int getScale() const { return this->cornerIndex.getScale(); } - double getScalingFactor(int d) const { return this->scalingFactor[d]; } - double getUnitLength(int d) const { return this->unitLengths[d]; } - double getBoxLength(int d) const { return this->boxLengths[d]; } - double getLowerBound(int d) const { return this->lowerBounds[d]; } - double getUpperBound(int d) const { return this->upperBounds[d]; } - bool isPeriodic() const { return details::are_any(this->periodic, true); } - const std::array &getPeriodic() const { return this->periodic; } + /// @name Size and scale queries + /// @{ + int size() const { return this->totBoxes; } ///< Total number of boxes. + int size(int d) const { return this->nBoxes[d]; } ///< Number of boxes along dimension @p d. + int getScale() const { return this->cornerIndex.getScale(); } ///< Root scale \(n\). + /// @} + + /// @name Geometry (per-dimension) + /// @{ + double getScalingFactor(int d) const { return this->scalingFactor[d]; } ///< Physical scaling factor. + double getUnitLength(int d) const { return this->unitLengths[d]; } ///< Unit length \(= \text{sfac}\cdot 2^{-n}\). + double getBoxLength(int d) const { return this->boxLengths[d]; } ///< Total world length along @p d. + double getLowerBound(int d) const { return this->lowerBounds[d]; } ///< Physical lower bound. + double getUpperBound(int d) const { return this->upperBounds[d]; } ///< Physical upper bound. + /// @} + + /// @name Periodicity + /// @{ + bool isPeriodic() const { return details::are_any(this->periodic, true); } ///< Any dimension periodic? + const std::array &getPeriodic() const { return this->periodic; } ///< Per-dimension flags. + /// @} + + /// @name Bulk getters + /// @{ const Coord &getUnitLengths() const { return this->unitLengths; } const Coord &getBoxLengths() const { return this->boxLengths; } const Coord &getLowerBounds() const { return this->lowerBounds; } const Coord &getUpperBounds() const { return this->upperBounds; } const NodeIndex &getCornerIndex() const { return this->cornerIndex; } const std::array &getScalingFactors() const { return this->scalingFactor; } + /// @} + + /** + * @brief Pretty-printer (human-readable). + */ friend std::ostream &operator<<(std::ostream &o, const BoundingBox &box) { return box.print(o); } protected: - // Fundamental parameters - NodeIndex cornerIndex; ///< Index defining the lower corner of the box - std::array nBoxes{}; ///< Number of boxes in each dim, last entry total - std::array scalingFactor{}; - std::array periodic{}; ///< Sets which dimension has Periodic boundary conditions. - - // Derived parameters - int totBoxes{1}; - Coord unitLengths; ///< 1/2^initialScale - Coord boxLengths; ///< Total length (unitLength times nBoxes) - Coord lowerBounds; ///< Box lower bound (not real) - Coord upperBounds; ///< Box upper bound (not real) + // ---------------- Fundamental parameters ---------------- + + NodeIndex cornerIndex; ///< Lower-corner node (scale + integer translation). + std::array nBoxes{}; ///< Number of boxes per dimension. + std::array scalingFactor{}; ///< Physical scaling factors per dimension. + std::array periodic{}; ///< Periodicity flags per dimension. + // ---------------- Derived parameters ---------------- + + int totBoxes{1}; ///< Product of @ref nBoxes. + Coord unitLengths; ///< Per-dimension unit length (\( \text{sfac}\cdot 2^{-n} \)). + Coord boxLengths; ///< Total world length per dimension. + Coord lowerBounds; ///< Physical lower bounds. + Coord upperBounds; ///< Physical upper bounds. + + /** + * @brief Set number of boxes per dimension. + * @param nb If an entry is zero, it is treated as one. + */ void setNBoxes(const std::array &nb = {}); + + /** + * @brief Compute all derived parameters from fundamentals. + * + * @details + * Uses @ref cornerIndex, @ref nBoxes and @ref scalingFactor to fill + * unit lengths, box lengths and physical bounds. + */ void setDerivedParameters(); + + /** + * @brief Set scaling factors per dimension, validating positivity. + * @param sf Per-dimension scaling factors. Empty value means all ones. + */ void setScalingFactors(const std::array &sf); + + /** + * @brief Set periodicity per dimension. + * @param periodic Flags per dimension. + */ void setPeriodic(std::array periodic); + + /** + * @brief Set global periodicity (all-or-nothing). + * @param periodic If true, all dimensions are periodic. + */ void setPeriodic(bool periodic); + /** + * @brief Print a formatted summary to stream @p o. + */ std::ostream &print(std::ostream &o) const; }; +// ---------------- Inline comparisons ---------------- + +/** + * @brief Equality: same corner index and per-dimension box counts. + */ template bool BoundingBox::operator==(const BoundingBox &box) const { if (getCornerIndex() != box.getCornerIndex()) return false; for (int d = 0; d < D; d++) { @@ -116,6 +279,9 @@ template bool BoundingBox::operator==(const BoundingBox &box) cons return true; } +/** + * @brief Inequality: differs in corner index or in any per-dimension box count. + */ template bool BoundingBox::operator!=(const BoundingBox &box) const { if (getCornerIndex() != box.getCornerIndex()) return true; for (int d = 0; d < D; d++) { @@ -124,4 +290,4 @@ template bool BoundingBox::operator!=(const BoundingBox &box) cons return false; } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/CornerOperatorTree.cpp b/src/trees/CornerOperatorTree.cpp index 6de235dd3..245666823 100644 --- a/src/trees/CornerOperatorTree.cpp +++ b/src/trees/CornerOperatorTree.cpp @@ -32,15 +32,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief Calculates band widths of the non-standard form matrices. - * - * @param[in] prec: Precision used for thresholding - * - * @details It is starting from \f$ l = 2^n \f$ and updating the band width value each time we encounter - * considerable value while keeping decreasing down to \f$ l = 0 \f$, that stands for the distance to the diagonal. - * This procedure is repeated for each matrix \f$ A, B \f$ and \f$ C \f$. - * - */ void CornerOperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -49,7 +40,7 @@ void CornerOperatorTree::calcBandWidth(double prec) { getMaxTranslations(max_transl); if (prec < 0.0) prec = this->normPrec; - double thrs = std::max(MachinePrec, prec / 10.0); // should be enough due to oscillating behaviour of corner matrix elements (it's affected by polynomial order) + double thrs = std::max(MachinePrec, prec / 10.0); for (int depth = 0; depth < this->getDepth(); depth++) { int l = (1 << depth) - 1; @@ -71,17 +62,8 @@ void CornerOperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } -/** @brief Checks if the distance to diagonal is lesser than the operator band width. - * - * @param[in] oTransl: distance to diagonal - * @param[in] o_depth: scaling order - * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. - * - * @returns True if \b oTransl is outside of the corner band (close to diagonal) and False otherwise. - * - */ bool CornerOperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { - return abs(oTransl) < this->bandWidth->getWidth(o_depth, idx); + return std::abs(oTransl) < this->bandWidth->getWidth(o_depth, idx); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/CornerOperatorTree.h b/src/trees/CornerOperatorTree.h index 0ac2ad5bd..8da4f5a11 100644 --- a/src/trees/CornerOperatorTree.h +++ b/src/trees/CornerOperatorTree.h @@ -29,23 +29,89 @@ namespace mrcpp { -/** @class CornerOperatorTree +/** + * @file CornerOperatorTree.h + * @brief Declaration of CornerOperatorTree, a specialization of OperatorTree + * for "corner" non-standard form operators. * - * @brief Special case of OperatorTree class + * @details + * Many MRCPP operators are represented in non-standard form and decompose + * into the four corner submatrices T, A, B, C. This helper class provides: + * - computation of per-depth band widths for those corner blocks, and + * - a fast band screen used during operator application. * - * @details Tree structure of operators having corner matrices - * \f$ A, B, C \f$ in the non-standard form. + * The band width information is stored in a BandWidth object owned by + * the base class OperatorTree. * + * @par Example + * @code + * CornerOperatorTree cot(mra, 10); // maxDepth = 10 + * cot.calcBandWidth(1e-8); // build band widths with a threshold + * bool within = cot.isOutsideBand(3, 4, 1); // oTransl=3, o_depth=4, idx=1 + * @endcode + */ + +/** + * @class CornerOperatorTree + * @brief Operator tree for non-standard form corner matrices. + * + * @details + * This final class only adds band-handling logic on top of @ref OperatorTree. + * Construction and storage are inherited from the base class; the only + * public operations exposed here are: + * - @ref calcBandWidth to build/update the band widths, and + * - @ref isOutsideBand for a quick test against the stored band. */ class CornerOperatorTree final : public OperatorTree { public: - using OperatorTree::OperatorTree; // Import the single valid constructor from OperatorTree + /// Inherit the valid constructor(s) from OperatorTree. + using OperatorTree::OperatorTree; + CornerOperatorTree(const CornerOperatorTree &tree) = delete; CornerOperatorTree &operator=(const CornerOperatorTree &tree) = delete; ~CornerOperatorTree() override = default; + /** + * @brief Compute per-depth band widths for the corner matrices. + * + * @param prec Threshold used when scanning matrix entries. + * If negative, the implementation falls back to the + * tree’s internal default (e.g. @c normPrec ). + * + * @details + * For each depth and for each corner component \f$\{T,A,B,C\}\f$, + * the routine scans along increasing distance from the diagonal and + * records the largest translation \f$\ell\f$ for which the component + * norm still exceeds the threshold. The resulting widths are stored + * in the underlying @ref BandWidth structure. + * + * @note Calling this will (re)allocate and overwrite the stored band + * widths for the whole tree. + */ void calcBandWidth(double prec = -1.0) override; + + /** + * @brief Test an offset against the stored band width. + * + * @param oTransl Integer offset (translation) from the diagonal. + * @param o_depth Operator depth at which to query the band. + * @param idx Corner component selector in \f$\{0,1,2,3\}\f$ + * corresponding to \f$\{T,A,B,C\}\f$. + * + * @return + * **true** if \f$|oTransl| < \mathrm{width}(o\_depth, idx)\f$, + * **false** otherwise. + * + * @details + * Despite the historical name, this method returns @b true when the + * offset lies @em inside the retained band (i.e., strictly smaller + * than the stored width). Callers typically use it as a quick screen + * to decide whether a sparse block needs to be applied. + * + * @warning This assumes @ref calcBandWidth has been called at least + * once; otherwise widths may be unset or conservative. + */ bool isOutsideBand(int oTransl, int o_depth, int idx) override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp index ff23fb394..231c95c19 100644 --- a/src/trees/FunctionNode.cpp +++ b/src/trees/FunctionNode.cpp @@ -42,14 +42,14 @@ using namespace Eigen; namespace mrcpp { -/** Function evaluation. - * Evaluate all polynomials defined on the node. */ template T FunctionNode::evalf(Coord r) { if (not this->hasCoefs()) MSG_ERROR("Evaluating node without coefs"); // The 1.0 appearing in the if tests comes from the period is always 1.0 // from the point of view of this function. - if (this->getMWTree().getRootBox().isPeriodic()) { periodic::coord_manipulation(r, this->getMWTree().getRootBox().getPeriodic()); } + if (this->getMWTree().getRootBox().isPeriodic()) { + periodic::coord_manipulation(r, this->getMWTree().getRootBox().getPeriodic()); + } this->threadSafeGenChildren(); int cIdx = this->getChildIndex(r); @@ -87,11 +87,6 @@ template T FunctionNode::evalScaling(const Coord &r return two_n * result; } -/** Function integration. - * - * Wrapper for function integration, that requires different methods depending - * on scaling type. Integrates the function represented on the node on the - * full support of the node. */ template T FunctionNode::integrate() const { if (not this->hasCoefs()) { return 0.0; } switch (this->getScalingType()) { @@ -106,26 +101,12 @@ template T FunctionNode::integrate() const { } } -/** Function integration, Legendre basis. - * - * Integrates the function represented on the node on the full support of the - * node. The Legendre basis is particularly easy to integrate, as the work is - * already done when calculating its coefficients. The coefficients of the - * node is defined as the projection integral - * s_i = int f(x)phi_i(x)dx - * and since the first Legendre function is the constant 1, the first - * coefficient is simply the integral of f(x). */ template T FunctionNode::integrateLegendre() const { double n = (D * this->getScale()) / 2.0; double two_n = std::pow(2.0, -n); return two_n * this->getCoefs()[0]; } -/** Function integration, Interpolating basis. - * - * Integrates the function represented on the node on the full support of the - * node. A bit more involved than in the Legendre basis, as is requires some - * coupling of quadrature weights. */ template T FunctionNode::integrateInterpolating() const { int qOrder = this->getKp1(); getQuadratureCache(qc); @@ -139,7 +120,6 @@ template T FunctionNode::integrateInterpolating() cons Eigen::Matrix coefs; this->getCoefs(coefs); for (int p = 0; p < D; p++) { - int n = 0; for (int i = 0; i < kp1_p[D - p - 1]; i++) { for (int j = 0; j < qOrder; j++) { @@ -157,11 +137,6 @@ template T FunctionNode::integrateInterpolating() cons return two_n * sum; } -/** Function integration, Interpolating basis. - * - * Integrates the function represented on the node on the full support of the - * node. A bit more involved than in the Legendre basis, as is requires some - * coupling of quadrature weights. */ template T FunctionNode::integrateValues() const { int qOrder = this->getKp1(); getQuadratureCache(qc); @@ -234,12 +209,6 @@ template void FunctionNode::getValues(Matrix void FunctionNode::getAbsCoefs(T *absCoefs) { T *coefsTmp = this->coefs; for (int i = 0; i < this->n_coefs; i++) absCoefs[i] = coefsTmp[i]; // copy @@ -381,9 +350,6 @@ template void FunctionNode::dealloc() { } } -/** Update the coefficients of the node by a mw transform of the scaling - * coefficients of the children. Option to overwrite or add up existing - * coefficients. Specialized for D=3 below. */ template void FunctionNode::reCompress() { MWNode::reCompress(); } @@ -408,14 +374,6 @@ template <> void FunctionNode<3>::reCompress() { } } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -433,14 +391,6 @@ template double dot_scaling(const FunctionNode &bra, const Fu #endif } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -467,14 +417,6 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -493,14 +435,6 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -519,14 +453,6 @@ template ComplexDouble dot_scaling(const FunctionNode &bra, c return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -547,14 +473,6 @@ template double dot_wavelet(const FunctionNode &bra, const Fu #endif } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -583,14 +501,6 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -611,14 +521,6 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -675,4 +577,4 @@ template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, co template ComplexDouble dot_scaling(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket); template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h index d1bfaaa31..d4ed4d645 100644 --- a/src/trees/FunctionNode.h +++ b/src/trees/FunctionNode.h @@ -32,63 +32,221 @@ namespace mrcpp { +/** + * @file FunctionNode.h + * @brief Leaf/branch node type that stores function coefficients on a + * multiresolution tree. + * + * @details + * A FunctionNode is a concrete MWNode specialized for function representations. + * It holds scaling and wavelet coefficients, provides allocation and refinement + * helpers, and exposes utilities for evaluation, coefficient access and + * basic per-node operations such as integration and local dot products. + * + * Template parameters: + * - D: spatial dimension (1, 2 or 3) + * - T: scalar type (double or ComplexDouble) + */ + +/** + * @class FunctionNode + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @brief Node of a FunctionTree that stores coefficients and implements + * function-specific operations. + * + * @note Construction is managed by FunctionTree and NodeAllocator. Users do not + * construct FunctionNode directly. + */ template class FunctionNode final : public MWNode { public: + /** @name Typed accessors */ + ///@{ + + /** @brief Return the owning FunctionTree (non-const). */ FunctionTree &getFuncTree() { return static_cast &>(*this->tree); } + + /** @brief Return the parent node cast to FunctionNode (non-const). */ FunctionNode &getFuncParent() { return static_cast &>(*this->parent); } + + /** @brief Return the i-th child cast to FunctionNode (non-const). */ FunctionNode &getFuncChild(int i) { return static_cast &>(*this->children[i]); } + /** @brief Return the owning FunctionTree (const). */ const FunctionTree &getFuncTree() const { return static_cast &>(*this->tree); } + + /** @brief Return the parent node cast to FunctionNode (const). */ const FunctionNode &getFuncParent() const { return static_cast &>(*this->parent); } + + /** @brief Return the i-th child cast to FunctionNode (const). */ const FunctionNode &getFuncChild(int i) const { return static_cast &>(*this->children[i]); } + ///@} + + /** @name Tree-structure overrides */ + ///@{ + + /** + * @brief Create children of this node. + * @param coefs If true, initialize children by transferring coefficients + * from this node as appropriate for the basis. + * + * @details Allocates child nodes through the node allocator and updates + * the internal topology. When coefs is true, scaling/wavelet blocks are + * propagated so that the represented function is unchanged by the split. + */ void createChildren(bool coefs) override; + + /** + * @brief Generate (allocate) children if absent. + * @details Convenience wrapper that creates children without coefficient + * transfer. Intended for topology building when coefficients are filled + * later by a calculator. + */ void genChildren() override; + + /** + * @brief Ensure a parent exists and is allocated. + * @details Creates the parent node if missing and links this node into the + * parent children array. + */ void genParent() override; + + /** + * @brief Delete children of this node. + * @details Deallocates child nodes and updates internal state. Coefficients + * in this node remain untouched. + */ void deleteChildren() override; + ///@} + + /** + * @brief Integrate the node contribution over its spatial support. + * @return The integral of the locally represented function on this node. + * + * @details Uses the current scaling basis to compute the exact contribution + * from scaling and wavelet parts confined to this node. For orthonormal + * wavelets the integral often reduces to the scaling block. + */ T integrate() const; + /** @name Value and coefficient access */ + ///@{ + + /** + * @brief Set nodal values from a vector. + * @param vec Column vector of size getNCoefs(). + * + * @details The vector is interpreted in the node's value layout used by + * the scaling basis. Typical use is for interpolating bases, where values + * correspond to quadrature or interpolation points. Internally, node + * coefficients are updated accordingly. + */ void setValues(const Eigen::Matrix &vec); + + /** + * @brief Extract nodal values into a vector. + * @param[out] vec Column vector resized to getNCoefs(). + * + * @details The returned values correspond to the basis-specific value + * layout for this node (e.g. interpolation/expanded points). + */ void getValues(Eigen::Matrix &vec); + + /** + * @brief Write absolute values of coefficients into a raw buffer. + * @param[out] absCoefs Pointer to memory of length getNCoefs(). + * + * @details Fills absCoefs[i] = abs(coef[i]). For complex T this is the + * magnitude; for real T this is std::abs. The ordering matches the node's + * internal coefficient layout. + */ void getAbsCoefs(T *absCoefs); + ///@} + friend class FunctionTree; friend class NodeAllocator; protected: + /** @name Constructors and assignment (managed by the tree) */ + ///@{ + FunctionNode() : MWNode() {} - FunctionNode(MWTree *tree, int rIdx) + + explicit FunctionNode(MWTree *tree, int rIdx) : MWNode(tree, rIdx) {} + FunctionNode(MWNode *parent, int cIdx) : MWNode(parent, cIdx) {} + FunctionNode(MWTree *tree, const NodeIndex &idx) : MWNode(tree, idx) {} + FunctionNode(const FunctionNode &node) = delete; FunctionNode &operator=(const FunctionNode &node) = delete; ~FunctionNode() = default; + ///@} + + /** @brief Evaluate the reconstructed function at r (using this node only). */ T evalf(Coord r); + + /** @brief Evaluate the scaling part at r. */ T evalScaling(const Coord &r) const; + /** @brief Deallocate node-owned memory and reset local state. */ void dealloc() override; + + /** @brief Recompress local coefficients after updates. */ void reCompress() override; + /** @brief Integration helper for Legendre scaling basis. */ T integrateLegendre() const; + + /** @brief Integration helper for interpolating scaling basis. */ T integrateInterpolating() const; + + /** @brief Integration helper when values representation is active. */ T integrateValues() const; }; + +/** @name Per-node local dot-product helpers (double) */ +///@{ + +/** + * @brief Dot product of scaling parts on matching nodes (double). + * @return Sum over matching scaling blocks within the two nodes. + */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket); + +/** + * @brief Dot product of wavelet parts on matching nodes (double). + * @return Sum over matching wavelet blocks within the two nodes. + */ template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); +///@} + +/** @name Per-node local dot-product helpers (complex-complex) */ +///@{ + template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); +///@} + +/** @name Per-node local dot-product helpers (complex-real and real-complex) */ +///@{ + template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -} // namespace mrcpp +///@} + +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp index 72ca68e39..8b8553fd7 100644 --- a/src/trees/FunctionTree.cpp +++ b/src/trees/FunctionTree.cpp @@ -42,15 +42,6 @@ using namespace Eigen; namespace mrcpp { -/** @returns New FunctionTree object - * - * @param[in] mra: Which MRA the function is defined - * @param[in] sh_mem: Pointer to MPI shared memory block - * - * @details Constructs an uninitialized tree, containing only empty root nodes. - * If a shared memory pointer is provided the tree will be allocated in this - * shared memory window, otherwise it will be local to each MPI process. - */ template FunctionTree::FunctionTree(const MultiResolutionAnalysis &mra, SharedMemory *sh_mem, const std::string &name) : MWTree(mra, name) @@ -107,11 +98,6 @@ template FunctionTree::~FunctionTree() { if (this->getNNodes() > 0) this->deleteRootNodes(); } -/** @brief Read a previously stored tree assuming text/ASCII format, - * in a representation using MADNESS conventions for n, l and index order. - * @param[in] file: File name - * @note This tree must have the exact same MRA the one that was saved(?) - */ template void FunctionTree::loadTreeTXT(const std::string &file) { std::ifstream in(file); int NDIM, k; @@ -285,10 +271,6 @@ template void FunctionTree::loadTreeTXT(const std::str this->calcSquareNorm(); } -/** @brief Write the tree to disk in text/ASCII format in a representation - * using MADNESS conventions for n, l and index order. - * @param[in] file: File name - */ template void FunctionTree::saveTreeTXT(const std::string &fname) { int nRoots = this->getRootBox().size(); MWNode **roots = this->getRootBox().getNodes(); @@ -332,9 +314,9 @@ template void FunctionTree::saveTreeTXT(const std::str std::array l; NodeIndex idx = this->endNodeTable[count]->getNodeIndex(); MWNode *node = &(this->getNode(idx, false)); - T *coefs = node->getCoefs(); - for (int i = 0; i < ncoefs * Tdim; i++) values[i] = coefs[i]; - node->attachCoefs(values); + T *coefs = node->getCoefs(); + for (int i = 0; i < ncoefs * Tdim; i++) values[i] = coefs[i]; + node->attachCoefs(values); int n = idx.getScale(); node->mwTransform(Reconstruction); node->cvTransform(Forward); @@ -353,13 +335,11 @@ template void FunctionTree::saveTreeTXT(const std::str for (int i = 0; i < ncoefs; i++) out << values[cix * ncoefs + mapMRC[i]] << " "; out << std::endl; } - node->attachCoefs(coefs); // put back original coeff - } + node->attachCoefs(coefs); // put back original coeff + } out.close(); } -/** @brief Write the tree structure to disk, for later use - * @param[in] file: File name, will get ".tree" extension - */ + template void FunctionTree::saveTree(const std::string &file) { Timer t1; @@ -376,17 +356,13 @@ template void FunctionTree::saveTree(const std::string f.write((char *)&nChunks, sizeof(int)); // Write tree data, chunk by chunk for (int iChunk = 0; iChunk < nChunks; iChunk++) { - f.write((char *)allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize()); - f.write((char *)allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize()); + f.write((char *)allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize()); + f.write((char *)allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize()); } f.close(); print::time(10, "Time write", t1); } -/** @brief Read a previously stored tree structure from disk - * @param[in] file: File name, will get ".tree" extension - * @note This tree must have the exact same MRA the one that was saved - */ template void FunctionTree::loadTree(const std::string &file) { Timer t1; @@ -419,7 +395,6 @@ template void FunctionTree::loadTree(const std::string print::time(10, "Time rewrite pointers", t2); } -/** @returns Integral of the function over the entire computational domain */ template T FunctionTree::integrate() const { T result = 0.0; @@ -438,7 +413,6 @@ template T FunctionTree::integrate() const { return jacobian * result; } -/** @returns Integral of a representable function over the grid given by the tree */ template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) { // traverse tree, and treat end nodes only std::vector *> stack; // node from this @@ -473,20 +447,6 @@ template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunct return jacobian * result; } -/** @returns Function value in a point, out of bounds returns zero - * - * @param[in] r: Cartesian coordinate - * - * @note This will only evaluate the _scaling_ part of the - * leaf nodes in the tree, which means that the function - * values will not be fully accurate. - * This is done to allow a fast and const function evaluation - * that can be done in OMP parallel. If you want to include - * also the _final_ wavelet part you can call the corresponding - * evalf_precise function, _or_ you can manually extend - * the MW grid by one level before evaluating, using - * `mrcpp::refine_grid(tree, 1)` - */ template T FunctionTree::evalf(const Coord &r) const { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -511,16 +471,6 @@ template T FunctionTree::evalf(const Coord &r) cons return coef * result; } -/** @returns Function value in a point, out of bounds returns zero - * - * @param[in] r: Cartesian coordinate - * - * @note This will evaluate the _true_ value (scaling + wavelet) of the - * leaf nodes in the tree. This requires an on-the-fly MW transform - * on the node which makes this function slow and non-const. If you - * need fast evaluation, use refine_grid(tree, 1) first, and then - * evalf. - */ template T FunctionTree::evalf_precise(const Coord &r) { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -546,12 +496,6 @@ template T FunctionTree::evalf_precise(const Coord return coef * result; } -/** @brief In-place square of MW function representations, fixed grid - * - * @details The leaf node point values of the function will be in-place - * squared, no grid refinement. - * - */ template void FunctionTree::square() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -575,14 +519,6 @@ template void FunctionTree::square() { this->calcSquareNorm(); } -/** @brief In-place power of MW function representations, fixed grid - * - * @param[in] p: Numerical power - * - * @details The leaf node point values of the function will be in-place raised - * to the given power, no grid refinement. - * - */ template void FunctionTree::power(double p) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -606,14 +542,6 @@ template void FunctionTree::power(double p) { this->calcSquareNorm(); } -/** @brief In-place multiplication by a scalar, fixed grid - * - * @param[in] c: Scalar coefficient - * - * @details The leaf node point values of the function will be - * in-place multiplied by the given coefficient, no grid refinement. - * - */ template void FunctionTree::rescale(T c) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads()) @@ -633,7 +561,6 @@ template void FunctionTree::rescale(T c) { this->calcSquareNorm(); } -/** @brief In-place rescaling by a function norm \f$ ||f||^{-1} \f$, fixed grid */ template void FunctionTree::normalize() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); double sq_norm = this->getSquareNorm(); @@ -641,15 +568,6 @@ template void FunctionTree::normalize() { this->rescale(1.0 / std::sqrt(sq_norm)); } -/** @brief In-place addition with MW function representations, fixed grid - * - * @param[in] c: Numerical coefficient of input function - * @param[in] inp: Input function to add - * - * @details The input function will be added in-place on the current grid of - * the function, i.e. no further grid refinement. - * - */ template void FunctionTree::add(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -670,15 +588,7 @@ template void FunctionTree::add(T c, FunctionTreecalcSquareNorm(); inp.deleteGenerated(); } -/** @brief In-place addition with MW function representations, fixed grid - * - * @param[in] c: Numerical coefficient of input function - * @param[in] inp: Input function to add - * - * @details The input function will be added to the union of the current grid of - * and input the function grid. - * - */ + template void FunctionTree::add_inplace(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -701,15 +611,6 @@ template void FunctionTree::add_inplace(T c, FunctionT inp.deleteGenerated(); } -/** @brief In-place addition of absolute values of MW function representations - * - * @param[in] c Numerical coefficient of input function - * @param[in] inp Input function to add - * - * The absolute value of input function will be added in-place on the current grid of the output - * function, i.e. no further grid refinement. - * - */ template void FunctionTree::absadd(T c, FunctionTree &inp) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads()) @@ -736,15 +637,6 @@ template void FunctionTree::absadd(T c, FunctionTree void FunctionTree::multiply(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -772,14 +664,6 @@ template void FunctionTree::multiply(T c, FunctionTree inp.deleteGenerated(); } -/** @brief In-place mapping with a predefined function f(x), fixed grid - * - * @param[in] fmap: mapping function - * - * @details The input function will be mapped in-place on the current grid - * of the function, i.e. no further grid refinement. - * - */ template void FunctionTree::map(FMap fmap) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); { @@ -839,20 +723,6 @@ template std::ostream &FunctionTree::print(std::ostrea return MWTree::print(o); } -/** @brief Reduce the precision of the tree by deleting nodes - * - * @param prec: New precision criterion - * @param splitFac: Splitting factor: 1, 2 or 3 - * @param absPrec: Use absolute precision - * - * @details This will run the tree building algorithm in "reverse", starting - * from the leaf nodes, and perform split checks on each node based on the given - * precision and the local wavelet norm. - * - * @note The splitting factor appears in the threshold for the wavelet norm as - * \f$ ||w|| < 2^{-sn/2} ||f|| \epsilon \f$. In principal, `s` should be equal - * to the dimension; in practice, it is set to `s=1`. - */ template int FunctionTree::crop(double prec, double splitFac, bool absPrec) { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -864,10 +734,6 @@ template int FunctionTree::crop(double prec, double sp return nChunks; } -/** Traverse tree using BFS and returns an array with the address of the coefs. - * Also returns an array with the corresponding indices defined as the - * values of serialIx in refTree, and an array with the indices of the parent. - * Set index -1 for nodes that are not present in refTree */ template void FunctionTree::makeCoeffVector(std::vector &coefs, std::vector &indices, @@ -918,10 +784,6 @@ void FunctionTree::makeCoeffVector(std::vector &coefs, } } -/** Traverse tree using DFS and reconstruct it using node info from the - * reference tree and a list of coefficients. - * It is the reference tree (refTree) which is traversed, but one does not descend - * into children if the norm of the tree is smaller than absPrec. */ template void FunctionTree::makeTreefromCoeff(MWTree &refTree, std::vector coefpVec, std::map &ix2coef, double absPrec, const std::string &mode) { std::vector *> stack; std::map *> ix2node; // gives the nodes in this tree for a given ix @@ -998,9 +860,6 @@ template void FunctionTree::makeTreefromCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -1039,7 +898,6 @@ template void FunctionTree::appendTreeNoCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -1131,17 +989,11 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() { return this->NodeIndex2serialIx.size(); } -/** @brief Deep copy of tree - * - * @details Exact copy without any binding between old and new tree - */ template void FunctionTree::deep_copy(FunctionTree *out) { copy_grid(*out, *this); copy_func(*out, *this); } -/** @brief New tree with only real part - */ template FunctionTree *FunctionTree::Real() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); @@ -1149,7 +1001,7 @@ template FunctionTree *FunctionTree::Real() //#pragma omp parallel num_threads(mrcpp_get_num_threads()) { int nNodes = this->getNEndNodes(); - //#pragma omp for schedule(guided) + //#pragma omp for schedule(guided) for (int n = 0; n < nNodes; n++) { MWNode &inp_node = *this->endNodeTable[n]; MWNode &out_node = out->getNode(inp_node.getNodeIndex(), true); @@ -1165,15 +1017,13 @@ template FunctionTree *FunctionTree::Real() return out; } -/** @brief New tree with only imaginary part - */ template FunctionTree *FunctionTree::Imag() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); //#pragma omp parallel num_threads(mrcpp_get_num_threads()) { int nNodes = this->getNEndNodes(); - //#pragma omp for schedule(guided) + //#pragma omp for schedule(guided) for (int n = 0; n < nNodes; n++) { MWNode &inp_node = *this->endNodeTable[n]; MWNode &out_node = out->getNode(inp_node.getNodeIndex(), true); @@ -1188,11 +1038,6 @@ template FunctionTree *FunctionTree::Imag() return out; } -/* - * From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents. - * Should use a deep_copy if generalized in the future. - */ - template <> void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&outTree) { delete outTree; double ref = 0.0; @@ -1362,4 +1207,4 @@ template class FunctionTree<1, ComplexDouble>; template class FunctionTree<2, ComplexDouble>; template class FunctionTree<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h index 9d976d6be..0b4da8789 100644 --- a/src/trees/FunctionTree.h +++ b/src/trees/FunctionTree.h @@ -33,77 +33,240 @@ namespace mrcpp { -/** @class FunctionTree - * - * @brief Function representation in MW basis +/** + * @file FunctionTree.h + * @brief Declaration of the FunctionTree class template. * * @details - * Constructing a full grown FunctionTree involves a number of steps, - * including setting up a memory allocator, constructing root nodes according - * to the given MRA, building an adaptive tree structure and computing MW - * coefficients. The FunctionTree constructor does only half of these steps: - * It takes an MRA argument, which defines the computational domain and scaling - * basis (these are fixed parameters that cannot be changed after construction). - * The tree is initialized with a memory allocator and a set of root nodes, but - * it does not compute any coefficients and the function is initially - * *undefined*. An undefined FunctionTree will have a well defined tree - * structure (at the very least the root nodes of the given MRA, but possibly - * with additional refinement) and its MW coefficient will be allocated but - * uninitialized, and its square norm will be negative (minus one). + * A FunctionTree represents a scalar field on a multiresolution (MW) grid. + * It owns the MW-node topology, coefficient storage, and basic utilities + * for evaluation, integration, and in–place algebra on the represented + * function. Construction initializes the tree structure (root nodes and + * allocator) according to a given MultiResolutionAnalysis (MRA), but does + * not compute coefficients; initially the function is undefined and the + * tree's square norm is negative to signal this state. */ -template class FunctionTree final : public MWTree, public RepresentableFunction { +/** + * @class FunctionTree + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Scalar type (double or ComplexDouble). + * @brief Function representation in the MW basis with adaptive topology. + * + * @details + * The class derives from MWTree (topology and node management) and + * RepresentableFunction (evaluation interface). Typical workflows build + * or refine the tree via calculators/adaptors and then apply algebraic + * transforms in place. + */ +template +class FunctionTree final : public MWTree, public RepresentableFunction { public: + /** + * @brief Construct a tree bound to an MRA with a user label. + * @param mra Multi-resolution analysis (domain and basis). + * @param name Optional textual name of the function. + * + * @note Coefficients are not computed by the constructor. + */ FunctionTree(const MultiResolutionAnalysis &mra, const std::string &name) : FunctionTree(mra, nullptr, name) {} - FunctionTree(const MultiResolutionAnalysis &mra, SharedMemory *sh_mem = nullptr, const std::string &name = "nn"); + + /** + * @brief Construct a tree bound to an MRA with optional shared memory and name. + * @param mra Multi-resolution analysis (domain and basis). + * @param sh_mem Optional shared-memory arena for coefficient storage. + * @param name Optional textual name of the function. + * + * @details Root nodes and allocators are created. The function is + * undefined until coefficients are computed by a builder/calculator. + */ + FunctionTree(const MultiResolutionAnalysis &mra, + SharedMemory *sh_mem = nullptr, + const std::string &name = "nn"); + + /// Deleted copy semantics (trees are heavy objects). FunctionTree(const FunctionTree &tree) = delete; FunctionTree &operator=(const FunctionTree &tree) = delete; + + /// Virtual destructor. ~FunctionTree() override; + /** + * @brief Integrate the represented function over the world domain. + * @return Integral value. + */ T integrate() const; + + /** + * @brief Integrate only end nodes against a provided analytic function. + * @param f RepresentableFunction used as integrand partner. + * @return Integral value as double. + * + * @details Useful for quadrature-like post-processing on the current grid. + */ double integrateEndNodes(RepresentableFunction_M &f); + + /** + * @brief Evaluate with high accuracy at a given coordinate. + * @param r Physical coordinate. + * @return Function value. + * + * @details May be more expensive than evalf due to stricter handling. + */ T evalf_precise(const Coord &r); + + /** + * @brief Evaluate the function at a given coordinate. + * @param r Physical coordinate. + * @return Function value. + */ T evalf(const Coord &r) const override; + /** + * @brief Number of generated (non-root) nodes currently alive. + * @return Count of nodes managed by the generated-node allocator. + */ int getNGenNodes() const { return getGenNodeAllocator().getNNodes(); } + /** + * @brief Collect values on end nodes into a dense vector. + * @param[out] data Column vector sized to the total number of end-node values. + */ void getEndValues(Eigen::Matrix &data); + + /** + * @brief Set end-node values from a dense vector. + * @param[in] data Column vector holding values; its size must match. + */ void setEndValues(Eigen::Matrix &data); + /** + * @brief Persist the tree (binary). + * @param file Output filename. + */ void saveTree(const std::string &file); + + /** + * @brief Persist the tree (text). + * @param file Output filename. + */ void saveTreeTXT(const std::string &file); + + /** + * @brief Load the tree (binary). + * @param file Input filename. + */ void loadTree(const std::string &file); + + /** + * @brief Load the tree (text). + * @param file Input filename. + */ void loadTreeTXT(const std::string &file); - // In place operations + /** @name In-place algebra on the represented function */ + ///@{ + + /// Square the function pointwise. void square(); + + /// Raise the function to power p pointwise. void power(double p); + + /// Multiply the function by a scalar c. void rescale(T c); + + /// Normalize the function to unit norm (when meaningful). void normalize(); + + /// Compute this := this + c * inp (alloc/refine as needed). void add(T c, FunctionTree &inp); + + /// In-place addition on the existing grid only. void add_inplace(T c, FunctionTree &inp); + + /// Compute this := this + c * |inp| (absolute values). void absadd(T c, FunctionTree &inp); + + /// Compute this := this * (c * inp) pointwise. void multiply(T c, FunctionTree &inp); + + /// Apply a scalar-to-scalar map pointwise. void map(FMap fmap); + ///@} + + /** + * @brief Number of memory chunks reserved for nodes. + * @return Total chunk count. + */ int getNChunks() { return this->getNodeAllocator().getNChunks(); } + + /** + * @brief Number of memory chunks currently in use. + * @return Used chunk count. + */ int getNChunksUsed() { return this->getNodeAllocator().getNChunksUsed(); } + /** + * @brief Prune small contributions and optionally refine slightly. + * @param prec Threshold used for pruning. + * @param splitFac Optional split factor for balancing. + * @param absPrec If true, use absolute thresholding. + * @return Number of nodes removed or affected. + */ int crop(double prec, double splitFac = 1.0, bool absPrec = true); + /** @name Typed access to nodes */ + ///@{ + + /// Get i-th end node cast to FunctionNode (non-const). FunctionNode &getEndFuncNode(int i) { return static_cast &>(this->getEndMWNode(i)); } + + /// Get i-th root node cast to FunctionNode (non-const). FunctionNode &getRootFuncNode(int i) { return static_cast &>(this->rootBox.getNode(i)); } + /// Allocator for generated nodes (non-const). NodeAllocator &getGenNodeAllocator() { return *this->genNodeAllocator_p; } + + /// Allocator for generated nodes (const). const NodeAllocator &getGenNodeAllocator() const { return *this->genNodeAllocator_p; } + /// Get i-th end node cast to FunctionNode (const). const FunctionNode &getEndFuncNode(int i) const { return static_cast &>(this->getEndMWNode(i)); } + + /// Get i-th root node cast to FunctionNode (const). const FunctionNode &getRootFuncNode(int i) const { return static_cast &>(this->rootBox.getNode(i)); } + ///@} + + /** + * @brief Delete nodes that were generated during the last build/refine step. + * @details Restores the tree to the pre-generation state without touching + * persisted nodes and data. + */ void deleteGenerated(); + + /** + * @brief Delete generated nodes and their generated parents if they became empty. + */ void deleteGeneratedParents(); + /** + * @brief Build a flat view of the coefficient storage. + * + * @param[out] coefs Pointers to coefficient blocks per node. + * @param[out] indices Node indices mapped to a compact integer id. + * @param[out] parent_indices Parent ids matching indices. + * @param[out] scalefac Per-node scale factors (e.g. for normalization). + * @param[out] max_index Maximum assigned compact id. + * @param[in] refTree Reference tree defining traversal order. + * @param[in] refNodes Optional explicit node list to follow. + * + * @details Intended for exporting the tree into custom linear algebra + * back-ends or checkpoint formats. + */ void makeCoeffVector(std::vector &coefs, std::vector &indices, std::vector &parent_indices, @@ -111,25 +274,77 @@ template class FunctionTree final : public MWTree, pub int &max_index, MWTree &refTree, std::vector *> *refNodes = nullptr); - void makeTreefromCoeff(MWTree &refTree, std::vector coefpVec, std::map &ix2coef, double absPrec, const std::string &mode = "adaptive"); + + /** + * @brief Reconstruct a tree topology from a coefficient vector. + * @param refTree Reference topology to follow. + * @param coefpVec Pointers to coefficient blocks. + * @param ix2coef Mapping from node compact id to coefpVec index. + * @param absPrec Threshold for adaptive creation. + * @param mode Creation mode: "adaptive" or fixed variants. + */ + void makeTreefromCoeff(MWTree &refTree, + std::vector coefpVec, + std::map &ix2coef, + double absPrec, + const std::string &mode = "adaptive"); + + /** + * @brief Append topology from another tree (no coefficients copied). + * @param inTree Input tree. + */ void appendTreeNoCoeff(MWTree &inTree); + + /// @overload void appendTreeNoCoeff(MWTree &inTree); + + /** + * @brief Copy topology and coefficients from a real-valued tree. + * @param inTree Source tree. + */ void CopyTree(FunctionTree &inTree); - // tools for use of local (nodes are stored in Bank) representation - int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients + + /** + * @brief Move all node coefficients to a bank and remove them from nodes. + * @return Number of nodes affected. + */ + int saveNodesAndRmCoeff(); + + /** + * @brief Deep-copy entire tree into out (topology and data). + * @param out Destination tree pointer (must be non-null and compatible). + */ void deep_copy(FunctionTree *out); + + /** + * @brief Extract real part into a newly allocated real tree. + * @return Pointer to a new FunctionTree of type double. + */ FunctionTree *Real(); + + /** + * @brief Extract imaginary part into a newly allocated real tree. + * @return Pointer to a new FunctionTree of type double. + */ FunctionTree *Imag(); + + /** @name Real/complex conversion helpers */ + ///@{ void CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&out); void CopyTreeToComplex(FunctionTree<2, ComplexDouble> *&out); void CopyTreeToComplex(FunctionTree<1, ComplexDouble> *&out); void CopyTreeToReal(FunctionTree<3, double> *&out); // for testing + ///@} protected: + /// Allocator for generated nodes. std::unique_ptr> genNodeAllocator_p{nullptr}; + + /// Print a short, human-readable description of the tree. std::ostream &print(std::ostream &o) const override; + /// Allocate and initialize root nodes according to the MRA. void allocRootNodes(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/FunctionTreeVector.h b/src/trees/FunctionTreeVector.h index 142113e1f..332867d46 100644 --- a/src/trees/FunctionTreeVector.h +++ b/src/trees/FunctionTreeVector.h @@ -32,14 +32,56 @@ namespace mrcpp { -template using CoefsFunctionTree = std::tuple *>; -template using FunctionTreeVector = std::vector>; +/** + * @file FunctionTreeVector.h + * @brief Lightweight helpers for working with collections of FunctionTree objects. + * + * @details + * Many high-level algorithms (addition, multiplication, dot products, etc.) + * operate on *lists* of trees paired with a numeric coefficient. This header + * provides two aliases for such lists and a few utility functions to manage + * them without introducing additional container classes. + */ -/** @brief Remove all entries in the vector - * @param[in] fs: Vector to clear - * @param[in] dealloc: Option to free FunctionTree pointer before clearing +/** + * @brief Alias for a weighted FunctionTree pointer. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Scalar type of the trees (defaults to double). + * + * @details + * The tuple layout is: + * - element 0: numeric coefficient of type @p T, + * - element 1: pointer to a @c FunctionTree. + * + * Ownership of the pointer is not implied by the alias; see @ref clear(). */ -template void clear(FunctionTreeVector &fs, bool dealloc = false) { +template +using CoefsFunctionTree = std::tuple *>; + +/** + * @brief Alias for a vector of weighted FunctionTree pointers. + * + * @tparam D Spatial dimension. + * @tparam T Scalar type (defaults to double). + */ +template +using FunctionTreeVector = std::vector>; + +/** + * @brief Remove all entries from a FunctionTreeVector. + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in,out] fs Vector to clear. + * @param[in] dealloc If true, delete each stored FunctionTree pointer + * before clearing the vector. + * + * @note When @p dealloc is false (the default), this function does not + * take ownership of the pointers and will not delete them. + */ +template +void clear(FunctionTreeVector &fs, bool dealloc = false) { if (dealloc) { for (auto &t : fs) { auto f = std::get<1>(t); @@ -50,10 +92,16 @@ template void clear(FunctionTreeVector &fs, bool deall fs.clear(); } -/** @returns Total number of nodes of all trees in the vector - * @param[in] fs: Vector to fetch from +/** + * @brief Compute the total number of nodes across all trees in the vector. + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in] fs Vector to inspect. + * @return Sum of @c getNNodes() over all non-null trees. */ -template int get_n_nodes(const FunctionTreeVector &fs) { +template +int get_n_nodes(const FunctionTreeVector &fs) { int nNodes = 0; for (const auto &t : fs) { auto f = std::get<1>(t); @@ -62,10 +110,16 @@ template int get_n_nodes(const FunctionTreeVector &fs) return nNodes; } -/** @returns Total size of all trees in the vector, in kB - * @param[in] fs: Vector to fetch from +/** + * @brief Compute the total size of all trees in the vector (in kilobytes). + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in] fs Vector to inspect. + * @return Sum of @c getSizeNodes() over all non-null trees. */ -template int get_size_nodes(const FunctionTreeVector &fs) { +template +int get_size_nodes(const FunctionTreeVector &fs) { int sNodes = 0; for (const auto &t : fs) { auto f = std::get<1>(t); @@ -74,27 +128,52 @@ template int get_size_nodes(const FunctionTreeVector & return sNodes; } -/** @returns Numerical coefficient at given position in vector - * @param[in] fs: Vector to fetch from - * @param[in] i: Position in vector +/** + * @brief Access the numeric coefficient at a given position. + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in] fs Vector to access. + * @param[in] i Zero-based position. + * @return The coefficient stored at position @p i. + * + * @pre @p i must be a valid index in @p fs. */ -template T get_coef(const FunctionTreeVector &fs, int i) { +template +T get_coef(const FunctionTreeVector &fs, int i) { return std::get<0>(fs[i]); } -/** @returns FunctionTree at given position in vector - * @param[in] fs: Vector to fetch from - * @param[in] i: Position in vector +/** + * @brief Access the FunctionTree at a given position (non-const). + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in] fs Vector to access. + * @param[in] i Zero-based position. + * @return Reference to the @c FunctionTree at position @p i. + * + * @pre The pointer stored at position @p i must be non-null. */ -template FunctionTree &get_func(FunctionTreeVector &fs, int i) { +template +FunctionTree &get_func(FunctionTreeVector &fs, int i) { return *(std::get<1>(fs[i])); } -/** @returns FunctionTree at given position in vector - * @param[in] fs: Vector to fetch from - * @param[in] i: Position in vector +/** + * @brief Access the FunctionTree at a given position (const). + * + * @tparam D Spatial dimension. + * @tparam T Scalar type. + * @param[in] fs Vector to access. + * @param[in] i Zero-based position. + * @return Const reference to the @c FunctionTree at position @p i. + * + * @pre The pointer stored at position @p i must be non-null. */ -template const FunctionTree &get_func(const FunctionTreeVector &fs, int i) { +template +const FunctionTree &get_func(const FunctionTreeVector &fs, int i) { return *(std::get<1>(fs[i])); } -} // namespace mrcpp + +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/HilbertPath.h b/src/trees/HilbertPath.h index 519e7ac73..007a7edcd 100644 --- a/src/trees/HilbertPath.h +++ b/src/trees/HilbertPath.h @@ -27,31 +27,108 @@ namespace mrcpp { -template class HilbertPath final { +/** + * @file HilbertPath.h + * @brief Lookup-based helper to traverse octree/quadtree children in Hilbert order. + * + * @details + * A Hilbert curve traversal depends on an **orientation state** that changes + * from parent to child. This lightweight class stores the current state and + * provides constant-time conversions between: + * - the **Hilbert child index** \f$h \in \{0,\dots,2^D-1\}\f$ for the + * current state, and + * - the corresponding **Z-order (Morton) index** \f$z\f$, + * as well as the **next orientation state** after descending to child \f$h\f$. + * + * The mappings are implemented via static lookup tables (declared here, defined + * in the corresponding translation unit). The template parameter @p D is the + * spatial dimension; typical values are 2 (quadtree) or 3 (octree). + */ + +/** + * @class HilbertPath + * @tparam D Spatial dimension (e.g., 2 for quadtree, 3 for octree). + * + * @brief Encapsulates the current Hilbert orientation state and child mappings. + * + * @details + * Each node visit in a Hilbert traversal has an associated **state** that + * determines how the children are ordered. Given the current state: + * - @ref getZIndex maps a Hilbert child index to the corresponding Morton + * (Z-order) child index; + * - @ref getHIndex performs the inverse mapping (Morton to Hilbert); and + * - @ref getChildPath returns the orientation state to use after descending + * to a specific Hilbert child. + */ +template +class HilbertPath final { public: + /** @brief Default constructor; initializes to the canonical root state. */ HilbertPath() = default; + + /** @brief Copy constructor. */ HilbertPath(const HilbertPath &p) : path(p.path) {} + + /** + * @brief Construct a child-state from a parent-state and a child index. + * + * @param[in] p Parent @ref HilbertPath state. + * @param[in] cIdx Child index expressed in **Morton (Z-order)** for this parent. + * + * @details + * The provided @p cIdx is first converted to the corresponding **Hilbert** + * index for the parent state, then the next orientation state is selected + * via the transition table. + */ HilbertPath(const HilbertPath &p, int cIdx) { int hIdx = p.getHIndex(cIdx); this->path = p.getChildPath(hIdx); } + + /** @brief Assignment operator. */ HilbertPath &operator=(const HilbertPath &p) { this->path = p.path; return *this; } + /** @brief Return the current orientation state identifier. */ short int getPath() const { return this->path; } + + /** + * @brief Transition: state after descending to Hilbert child @p hIdx. + * @param[in] hIdx Child index in **Hilbert** order for the current state. + * @return Orientation state identifier for the child. + */ short int getChildPath(int hIdx) const { return this->pTable[this->path][hIdx]; } + /** + * @brief Map Hilbert child index to Morton (Z-order) child index. + * @param[in] hIdx Child index in **Hilbert** order for the current state. + * @return Corresponding **Morton** child index. + */ int getZIndex(int hIdx) const { return this->zTable[this->path][hIdx]; } + + /** + * @brief Map Morton (Z-order) child index to Hilbert child index. + * @param[in] zIdx Child index in **Morton** order for the current state. + * @return Corresponding **Hilbert** child index. + */ int getHIndex(int zIdx) const { return this->hTable[this->path][zIdx]; } private: + /// Current Hilbert orientation state (table row selector). short int path{0}; - static const short int pTable[][8]; - static const int zTable[][8]; - static const int hTable[][8]; + + /** + * @name Lookup tables (declared here, defined in the .cpp) + * Each table has 2^D columns (up to 8 for D=3) and one row per state. + * @{ + */ + static const short int pTable[][8]; ///< Next-state table: state × h -> state' + static const int zTable[][8]; ///< Mapping: state × h -> z + static const int hTable[][8]; ///< Mapping: state × z -> h + /** @} */ }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp index 2d521b468..bc3a48330 100644 --- a/src/trees/MWNode.cpp +++ b/src/trees/MWNode.cpp @@ -23,10 +23,6 @@ * */ -/** - * Simple n-dimensional node - */ - #include "MWNode.h" #include "MWTree.h" #include "NodeAllocator.h" @@ -41,10 +37,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief MWNode default constructor. - * - * @details Should be used only by NodeAllocator to obtain - * virtual table pointers for the derived classes. */ template MWNode::MWNode() : tree(nullptr) @@ -59,13 +51,6 @@ MWNode::MWNode() MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] tree: the MWTree the root node belongs to - * @param[in] idx: the NodeIndex defining scale and translation of the node - * - * @details Constructor for an empty node, given the corresponding MWTree and NodeIndex - */ template MWNode::MWNode(MWTree *tree, const NodeIndex &idx) : tree(tree) @@ -79,14 +64,6 @@ MWNode::MWNode(MWTree *tree, const NodeIndex &idx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] tree: the MWTree the root node belongs to - * @param[in] rIdx: the integer specifying the corresponding root node - * - * @details Constructor for root nodes. It requires the corresponding - * MWTree and an integer to fetch the right NodeIndex - */ template MWNode::MWNode(MWTree *tree, int rIdx) : tree(tree) @@ -100,14 +77,6 @@ MWNode::MWNode(MWTree *tree, int rIdx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] parent: parent node - * @param[in] cIdx: child index of the current node - * - * @details Constructor for leaf nodes. It requires the corresponding - * parent and an integer to identify the correct child. - */ template MWNode::MWNode(MWNode *parent, int cIdx) : tree(parent->tree) @@ -121,15 +90,6 @@ MWNode::MWNode(MWNode *parent, int cIdx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode copy constructor. - * - * @param[in] node: the original node - * @param[in] allocCoef: if true MW coefficients are allocated and copied from the original node - * - * @details Creates loose nodes and optionally copy coefs. The node - * does not "belong" to the tree: it cannot be accessed by traversing - * the tree. - */ template MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) : tree(node.tree) @@ -159,31 +119,15 @@ MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode destructor. - * - * @details Recursive deallocation of a node and all its decendants - */ template MWNode::~MWNode() { if (this->isLooseNode()) this->freeCoefs(); MRCPP_DESTROY_OMP_LOCK(); } -/** @brief Dummy deallocation of MWNode coefficients. - * - * @details This is just to make sure this method never really gets - * called (derived classes must implement their own version). This was - * to avoid having pure virtual methods in the base class. - */ template void MWNode::dealloc() { NOT_REACHED_ABORT; } -/** @brief Allocate the coefs vector. - * - * @details This is only used by loose nodes, because the loose nodes - * are not treated by the NodeAllocator class. - * - */ template void MWNode::allocCoefs(int n_blocks, int block_size) { if (this->n_coefs != 0) MSG_ABORT("n_coefs should be zero"); if (this->isAllocated()) MSG_ABORT("Coefs already allocated"); @@ -196,12 +140,6 @@ template void MWNode::allocCoefs(int n_blocks, int blo this->setIsAllocated(); } -/** @brief Deallocate the coefs vector. - * - * @details This is only used by loose nodes, because the loose nodes - * are not treated by the NodeAllocator class. - * - */ template void MWNode::freeCoefs() { if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!"); @@ -214,8 +152,6 @@ template void MWNode::freeCoefs() { this->clearIsAllocated(); } -/** @brief Printout of node coefficients - */ template void MWNode::printCoefs() const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); println(0, "\nMW coefs"); @@ -226,8 +162,6 @@ template void MWNode::printCoefs() const { } } -/** @brief wraps the MW coefficients into an eigen vector object - */ template void MWNode::getCoefs(Eigen::Matrix &c) const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); if (not this->hasCoefs()) MSG_ABORT("Node has no coefs"); @@ -236,9 +170,6 @@ template void MWNode::getCoefs(Eigen::Matrix::Map(this->coefs, this->n_coefs); } -/** @brief sets all MW coefficients and the norms to zero - * - */ template void MWNode::zeroCoefs() { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated " << *this); @@ -247,68 +178,26 @@ template void MWNode::zeroCoefs() { this->setHasCoefs(); } -/** @brief Attach a set of coefs to this node. Only used locally (the tree is not aware of this). - */ template void MWNode::attachCoefs(T *coefs) { this->coefs = coefs; this->setHasCoefs(); } -/** @brief assigns values to a block of coefficients - * - * @param[in] c: the input coefficients - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::setCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = c[i]; } } -/** @brief adds values to a block of coefficients - * - * @param[in] c: the input coefficients - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::addCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] += c[i]; } } -/** @brief sets values of a block of coefficients to zero - * - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::zeroCoefBlock(int block, int block_size) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = 0.0; } } -/** @brief forward MW transform from this node to its children - * - * @param[in] overwrite: if true the coefficients of the children are - * overwritten. If false the values are summed to the already present - * ones. - * - * @details it performs forward MW transform inserting the result - * directly in the right place for each child node. The children must - * already be present and its memory allocated for this to work - * properly. - */ template void MWNode::giveChildrenCoefs(bool overwrite) { assert(this->isBranchNode()); if (not this->isAllocated()) MSG_ABORT("Not allocated!"); @@ -318,7 +207,6 @@ template void MWNode::giveChildrenCoefs(bool overwrite for (int i = 0; i < getTDim(); i++) getMWChild(i).zeroCoefs(); } - // coeff of child should be have been allocated already here int stride = getMWChild(0).getNCoefs(); T *inp = getCoefs(); T *out = getMWChild(0).getCoefs(); @@ -330,21 +218,10 @@ template void MWNode::giveChildrenCoefs(bool overwrite for (int i = 0; i < getTDim(); i++) { getMWChild(i).setHasCoefs(); - getMWChild(i).calcNorms(); // should need to compute only scaling norms + getMWChild(i).calcNorms(); } } -/** @brief forward MW transform to compute scaling coefficients of a single child - * - * @param[in] cIdx: child index - * @param[in] overwrite: if true the coefficients of the children are - * overwritten. If false the values are summed to the already present - * ones. - * - * @details it performs forward MW transform in place on a loose - * node. The scaling coefficients of the selected child are then - * copied/summed in the correct child node. - */ template void MWNode::giveChildCoefs(int cIdx, bool overwrite) { MWNode node_i = *this; @@ -365,12 +242,6 @@ template void MWNode::giveChildCoefs(int cIdx, bool ov child.calcNorms(); } -/** Takes a MWParent and generates coefficients, reverse operation from - * giveChildrenCoefs */ -/** @brief backward MW transform to compute scaling/wavelet coefficients of a parent - * - * \warning This routine is only used in connection with Periodic Boundary Conditions - */ template void MWNode::giveParentCoefs(bool overwrite) { MWNode node = *this; MWNode &parent = getMWParent(); @@ -387,12 +258,6 @@ template void MWNode::giveParentCoefs(bool overwrite) parent.calcNorms(); } -/** @brief Copy scaling coefficients from children to parent - * - * @details Takes the scaling coefficients of the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. - */ template void MWNode::copyCoefsFromChildren() { int kp1_d = this->getKp1_d(); int nChildren = this->getTDim(); @@ -403,14 +268,6 @@ template void MWNode::copyCoefsFromChildren() { } } -/** @brief Generates scaling coefficients of children - * - * @details If the node is a leafNode, it takes the scaling&wavelet - * coefficients of the parent and it generates the scaling - * coefficients for the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. - */ template void MWNode::threadSafeGenChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -421,14 +278,6 @@ template void MWNode::threadSafeGenChildren() { MRCPP_UNSET_OMP_LOCK(); } -/** @brief Creates scaling coefficients of children - * - * @details If the node is a leafNode, it takes the scaling&wavelet - * coefficients of the parent and it generates the scaling - * coefficients for the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. The new node is permanently added to the tree. - */ template void MWNode::threadSafeCreateChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -439,16 +288,6 @@ template void MWNode::threadSafeCreateChildren() { MRCPP_UNSET_OMP_LOCK(); } -/** @brief Coefficient-Value transform - * - * @details This routine transforms the scaling coefficients of the node to the - * function values in the corresponding quadrature roots (of its children). - * - * @param[in] operation: forward (coef->value) or backward (value->coef). - * - * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1) - * representation, instead of s/d (scaling and wavelet). - */ template void MWNode::cvTransform(int operation, bool firstchild) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -477,9 +316,9 @@ template void MWNode::cvTransform(int operation, bool const auto scaling_factor = this->getMWTree().getMRA().getWorldBox().getScalingFactors(); double sf_prod = 1.0; for (const auto &s : scaling_factor) sf_prod *= s; - if (sf_prod <= MachineZero) sf_prod = 1.0; // When there is no scaling factor + if (sf_prod <= MachineZero) sf_prod = 1.0; - int np1 = getScale() + 1; // we're working on scaling coefs on next scale + int np1 = getScale() + 1; double two_fac = std::pow(2.0, D * np1) / sf_prod; if (operation == Backward) { two_fac = std::sqrt(1.0 / two_fac); @@ -492,72 +331,7 @@ template void MWNode::cvTransform(int operation, bool for (int i = 0; i < nCoefs; i++) { this->coefs[i] *= two_fac; } } } -/* Old interpolating version, somewhat faster -template -void MWNode::cvTransform(int operation) { - const ScalingBasis &sf = this->getMWTree().getMRA().getScalingBasis(); - if (sf.getScalingType() != Interpol) { - NOT_IMPLEMENTED_ABORT; - } - - int quadratureOrder = sf.getQuadratureOrder(); - getQuadratureCache(qc); - - double two_scale = std::pow(2.0, this->getScale() + 1); - VectorXd modWeights = qc.getWeights(quadratureOrder); - if (operation == Forward) { - modWeights = modWeights.array().inverse(); - modWeights *= two_scale; - modWeights = modWeights.array().sqrt(); - } else if (operation == Backward) { - modWeights *= 1.0/two_scale; - modWeights = modWeights.array().sqrt(); - } else { - MSG_ABORT("Invalid operation"); - } - - int kp1 = this->getKp1(); - int kp1_d = this->getKp1_d(); - int kp1_p[D]; - for (int d = 0; d < D; d++) { - kp1_p[d] = math_utils::ipow(kp1, d); - } - - for (int m = 0; m < this->getTDim(); m++) { - for (int p = 0; p < D; p++) { - int n = 0; - for (int i = 0; i < kp1_p[D - p - 1]; i++) { - for (int j = 0; j < kp1; j++) { - for (int k = 0; k < kp1_p[p]; k++) { - this->coefs[m * kp1_d + n] *= modWeights[j]; - n++; - } - } - } - } - } -} -*/ -/** @brief Multiwavelet transform - * - * @details Application of the filters on one node to pass from a 0/1 (scaling - * on child 0 and 1) representation to an s/d (scaling and - * wavelet) representation. Bit manipulation is used in order to - * determine the correct filters and whether to apply them or just - * pass to the next couple of indexes. The starting coefficients are - * preserved until the application is terminated, then they are - * overwritten. With minor modifications this code can also be used - * for the inverse mw transform (just use the transpose filters) or - * for the application of an operator (using A, B, C and T parts of an - * operator instead of G1, G0, H1, H0). This is the version where the - * three directions are operated one after the other. Although this - * is formally faster than the other algorithm, the separation of the - * three dimensions prevent the possibility to use the norm of the - * operator in order to discard a priori negligible contributions. - * - * * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1). - */ template void MWNode::mwTransform(int operation) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -575,9 +349,6 @@ template void MWNode::mwTransform(int operation) { for (int gt = 0; gt < this->getTDim(); gt++) { T *out = out_vec + gt * kp1_d; for (int ft = 0; ft < this->getTDim(); ft++) { - /* Operate in direction i only if the bits along other - * directions are identical. The bit of the direction we - * operate on determines the appropriate filter/operator */ if ((gt | mask) == (ft | mask)) { T *in = in_vec + ft * kp1_d; int fIdx = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -597,19 +368,16 @@ template void MWNode::mwTransform(int operation) { } } -/** @brief Set all norms to Undefined. */ template void MWNode::clearNorms() { this->squareNorm = -1.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = -1.0; } } -/** @brief Set all norms to zero. */ template void MWNode::zeroNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = 0.0; } } -/** @brief Calculate and store square norm and component norms, if allocated. */ template void MWNode::calcNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { @@ -619,7 +387,6 @@ template void MWNode::calcNorms() { } } -/** @brief Calculate and return the squared scaling norm. */ template double MWNode::getScalingNorm() const { double sNorm = this->getComponentNorm(0); if (sNorm >= 0.0) { @@ -629,7 +396,6 @@ template double MWNode::getScalingNorm() const { } } -/** @brief Calculate and return the squared wavelet norm. */ template double MWNode::getWaveletNorm() const { double wNorm = 0.0; for (int i = 1; i < this->getTDim(); i++) { @@ -643,7 +409,6 @@ template double MWNode::getWaveletNorm() const { return wNorm; } -/** @brief Calculate the norm of one component (NOT the squared norm!). */ template double MWNode::calcComponentNorm(int i) const { if (this->isGenNode() and i != 0) return 0.0; assert(this->isAllocated()); @@ -654,13 +419,10 @@ template double MWNode::calcComponentNorm(int i) const int start = i * size; double sq_norm = 0.0; - for (int i = start; i < start + size; i++) { sq_norm += std::norm(c[i]); } + for (int i2 = start; i2 < start + size; i2++) { sq_norm += std::norm(c[i2]); } return std::sqrt(sq_norm); } -/** @brief Update the coefficients of the node by a mw transform of the scaling - * coefficients of the children. - */ template void MWNode::reCompress() { if (this->isGenNode()) NOT_IMPLEMENTED_ABORT; if (this->isBranchNode()) { @@ -672,12 +434,6 @@ template void MWNode::reCompress() { } } -/** @brief Recurse down until an EndNode is found, and then crop children below the given precision threshold - * - * @param[in] prec: precision required - * @param[in] splitFac: factor used in the split check (larger factor means tighter threshold for finer nodes) - * @param[in] absPrec: flag to switch from relative (false) to absolute (true) precision. - */ template bool MWNode::crop(double prec, double splitFac, bool absPrec) { if (this->isEndNode()) { return true; @@ -707,11 +463,6 @@ template void MWNode::genParent() { NOT_REACHED_ABORT; } -/** @brief Recursive deallocation of children and all their descendants. - * - * @details - * Leaves node as LeafNode and children[] as null pointer. - */ template void MWNode::deleteChildren() { if (this->isLeafNode()) return; for (int cIdx = 0; cIdx < getTDim(); cIdx++) { @@ -726,7 +477,6 @@ template void MWNode::deleteChildren() { this->setIsLeafNode(); } -/** @brief Recursive deallocation of parent and all their forefathers. */ template void MWNode::deleteParent() { if (this->parent == nullptr) return; MWNode &parent = getMWParent(); @@ -736,7 +486,6 @@ template void MWNode::deleteParent() { this->parent = nullptr; } -/** @brief Deallocation of all generated nodes . */ template void MWNode::deleteGenerated() { if (this->isBranchNode()) { if (this->isEndNode()) { @@ -747,7 +496,6 @@ template void MWNode::deleteGenerated() { } } -/** @brief returns the coordinates of the centre of the node */ template Coord MWNode::getCenter() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -757,7 +505,6 @@ template Coord MWNode::getCenter() const { return r; } -/** @brief returns the upper bounds of the D-interval defining the node */ template Coord MWNode::getUpperBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -767,7 +514,6 @@ template Coord MWNode::getUpperBounds() const { return ub; } -/** @brief returns the lower bounds of the D-interval defining the node */ template Coord MWNode::getLowerBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -777,14 +523,6 @@ template Coord MWNode::getLowerBounds() const { return lb; } -/** @brief Routine to find the path along the tree. - * - * @param[in] nIdx: the sought after node through its NodeIndex - * - * @details Given the translation indices at the final scale, computes the child m - * to be followed at the current scale in oder to get to the requested - * node at the final scale. The result is the index of the child needed. - * The index is obtained by bit manipulation of of the translation indices. */ template int MWNode::getChildIndex(const NodeIndex &nIdx) const { assert(isAncestor(nIdx)); int cIdx = 0; @@ -799,12 +537,6 @@ template int MWNode::getChildIndex(const NodeIndex return cIdx; } -/** @brief Routine to find the path along the tree. - * - * @param[in] r: the sought after node through the coordinates of a point in space - * - * @detailsGiven a point in space, determines which child should be followed - * to get to the corresponding terminal node. */ template int MWNode::getChildIndex(const Coord &r) const { assert(hasCoord(r)); int cIdx = 0; @@ -818,18 +550,6 @@ template int MWNode::getChildIndex(const Coord &r) return cIdx; } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. - * - * @details The original quadrature points are fetched and then - * dilated and translated. For each cartesian direction \f$ \alpha = - * x,y,z... \f$ the set of quadrature points becomes \f$ x^\alpha_i = - * 2^{-n} (x_i + l^\alpha \f$. By taking all possible - * \f$(k+1)^d\f$ combinations, they will then define a d-dimensional - * grid of quadrature points. - * - */ template void MWNode::getPrimitiveQuadPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, kp1); @@ -842,19 +562,6 @@ template void MWNode::getPrimitiveQuadPts(MatrixXd &pt for (int d = 0; d < D; d++) pts.row(d) = sFac * (roots.array() + static_cast(l[d])); } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. - * - * @details The original quadrature points are fetched and then - * dilated and translated to match the quadrature points in the - * children of the given node. For each cartesian direction \f$ \alpha = x,y,z... \f$ - * the set of quadrature points becomes \f$ x^\alpha_i = 2^{-n-1} (x_i + 2 l^\alpha + t^\alpha) \f$, where \f$ t^\alpha = - * 0,1 \f$. By taking all possible \f$(k+1)^d\combinations \f$, they will - * then define a d-dimensional grid of quadrature points for the child - * nodes. - * - */ template void MWNode::getPrimitiveChildPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, 2 * kp1); @@ -870,16 +577,6 @@ template void MWNode::getPrimitiveChildPts(MatrixXd &p } } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: expanded quadrature points in a \f$ d \times - * (k+1)^d \f$ matrix form. - * - * @details The primitive quadrature points are used to obtain a - * tensor-product representation collecting all \f$ (k+1)^d \f$ - * vectors of quadrature points. - * - */ template void MWNode::getExpandedQuadPts(Eigen::MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveQuadPts(prim_pts); @@ -894,16 +591,6 @@ template void MWNode::getExpandedQuadPts(Eigen::Matrix if (D >= 4) NOT_IMPLEMENTED_ABORT; } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: expanded quadrature points in a \f$ d \times - * 2^d(k+1)^d \f$ matrix form. - * - * @details The primitive quadrature points of the children are used to obtain a - * tensor-product representation collecting all \f$ 2^d (k+1)^d \f$ - * vectors of quadrature points. - * - */ template void MWNode::getExpandedChildPts(MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveChildPts(prim_pts); @@ -928,23 +615,13 @@ template void MWNode::getExpandedChildPts(MatrixXd &pt } } -/** @brief Const version of node retriever that NEVER generates. - * - * @param[in] idx: the requested NodeIndex - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine returns the appropriate Node, or a NULL pointer if - * the node does not exist, or if it is a GenNode. Recursion starts at at this - * node and ASSUMES the requested node is in fact decending from this node. - */ template const MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) const { - if (getScale() == idx.getScale()) { // we're done + if (getScale() == idx.getScale()) { assert(getNodeIndex() == idx); return this; } assert(this->isAncestor(idx)); - if (this->isEndNode()) { // don't return GenNodes + if (this->isEndNode()) { return nullptr; } int cIdx = getChildIndex(idx); @@ -952,23 +629,13 @@ template const MWNode *MWNode::retrieveNodeNoGen return this->children[cIdx]->retrieveNodeNoGen(idx); } -/** @brief Node retriever that NEVER generates. - * - * @param[in] idx: the requested NodeIndex - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine returns the appropriate Node, or a NULL pointer if - * the node does not exist, or if it is a GenNode. Recursion starts at at this - * node and ASSUMES the requested node is in fact decending from this node. - */ template MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) { - if (getScale() == idx.getScale()) { // we're done + if (getScale() == idx.getScale()) { assert(getNodeIndex() == idx); return this; } assert(this->isAncestor(idx)); - if (this->isEndNode()) { // don't return GenNodes + if (this->isEndNode()) { return nullptr; } int cIdx = getChildIndex(idx); @@ -976,18 +643,6 @@ template MWNode *MWNode::retrieveNodeNoGen(const return this->children[cIdx]->retrieveNodeNoGen(idx); } -/** @brief Node retriever that returns requested Node or EndNode (const version). - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template const MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) const { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -995,18 +650,6 @@ template const MWNode *MWNode::retrieveNodeOrEnd return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } -/** @brief Node retriever that returns requested Node or EndNode. - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -1014,68 +657,30 @@ template MWNode *MWNode::retrieveNodeOrEndNode(c return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } -/** @brief Node retriever that returns requested Node or EndNode (const version). - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template const MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) const { - if (getScale() == idx.getScale()) { // we're done + if (getScale() == idx.getScale()) { assert(getNodeIndex() == idx); return this; } assert(isAncestor(idx)); - // We should in principle lock before read, but it makes things slower, - // and the EndNode status does not change (normally ;) if (isEndNode()) { return this; } int cIdx = getChildIndex(idx); assert(children[cIdx] != nullptr); return this->children[cIdx]->retrieveNodeOrEndNode(idx); } -/** @brief Node retriever that returns requested Node or EndNode. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) { - if (getScale() == idx.getScale()) { // we're done + if (getScale() == idx.getScale()) { assert(getNodeIndex() == idx); return this; } assert(isAncestor(idx)); - // We should in principle lock before read, but it makes things slower, - // and the EndNode status does not change (normally ;) if (isEndNode()) { return this; } int cIdx = getChildIndex(idx); assert(children[cIdx] != nullptr); return this->children[cIdx]->retrieveNodeOrEndNode(idx); } -/** @brief Node retriever that ALWAYS returns the requested node. - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine always returns the appropriate node, and will generate nodes - * that does not exist. Recursion starts at this node and ASSUMES the - * requested node is in fact decending from this node. - */ template MWNode *MWNode::retrieveNode(const Coord &r, int depth) { if (depth < 0) MSG_ABORT("Invalid argument"); @@ -1087,26 +692,10 @@ template MWNode *MWNode::retrieveNode(const Coor return this->children[cIdx]->retrieveNode(r, depth); } -/** @brief Node retriever that ALWAYS returns the requested node, possibly without coefs. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. This - * routine always returns the appropriate node, and will generate nodes that - * does not exist. Recursion starts at this node and ASSUMES the requested - * node is in fact descending from this node. - * If create = true, the nodes are permanently added to the tree. - */ template MWNode *MWNode::retrieveNode(const NodeIndex &idx, bool create) { - if (getScale() == idx.getScale()) { // we're done + if (getScale() == idx.getScale()) { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; - // has to fetch coeff in Bank. NOT USED YET - // int ncoefs = (1 << D) * this->getKp1_d(); - // coefs = new double[ncoefs]; // TODO must be cleaned at some stage - // coefs = new double[ncoefs]; // TODO must be cleaned at some stage - // tree->getNodeCoeff(idx, coefs); } assert(getNodeIndex() == idx); return this; @@ -1123,18 +712,6 @@ template MWNode *MWNode::retrieveNode(const Node return this->children[cIdx]->retrieveNode(idx, create); } -/** Node retriever that ALWAYS returns the requested node. - * - * WARNING: This routine is NOT thread safe! Must be used within omp critical. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. This - * routine always returns the appropriate node, and will generate nodes that - * does not exist. Recursion starts at this node and ASSUMES the requested - * node is in fact related to this node. - */ template MWNode *MWNode::retrieveParent(const NodeIndex &idx) { if (getScale() < idx.getScale()) MSG_ABORT("Scale error") if (getScale() == idx.getScale()) return this; @@ -1145,22 +722,13 @@ template MWNode *MWNode::retrieveParent(const No return this->parent->retrieveParent(idx); } -/** @brief Gives the norm (absolute value) of the node at the given NodeIndex. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find the node with a given NodeIndex. When an EndNode is - * found, do not generate any new node, but rather give the value of the norm - * assuming the function is uniformly distributed within the node. - */ template double MWNode::getNodeNorm(const NodeIndex &idx) const { - if (this->getScale() == idx.getScale()) { // we're done + if (this->getScale() == idx.getScale()) { assert(getNodeIndex() == idx); return std::sqrt(this->squareNorm); } assert(isAncestor(idx)); - if (this->isEndNode()) { // we infer norm at lower scales + if (this->isEndNode()) { return std::sqrt(this->squareNorm * std::pow(2.0, -D * (idx.getScale() - getScale()))); } int cIdx = getChildIndex(idx); @@ -1168,46 +736,21 @@ template double MWNode::getNodeNorm(const NodeIndex return this->children[cIdx]->getNodeNorm(idx); } -/** @brief Test if a given coordinate is within the boundaries of the node. - * - * @param[in] r: point coordinates - */ template bool MWNode::hasCoord(const Coord &r) const { double sFac = std::pow(2.0, -getScale()); const NodeIndex &l = getNodeIndex(); - // println(1, "[" << r[0] << "," << r[1] << "," << r[2] << "]"); - // println(1, "[" << l[0] << "," << l[1] << "," << l[2] << "]"); - // println(1, *this); for (int d = 0; d < D; d++) { if (r[d] < sFac * l[d] or r[d] > sFac * (l[d] + 1)) { - // println(1, "false"); return false; } } - // println(1, "true"); return true; } -/** Testing if nodes are compatible wrt NodeIndex and Tree (order, rootScale, - * relPrec, etc). */ template bool MWNode::isCompatible(const MWNode &node) { NOT_IMPLEMENTED_ABORT; - // if (nodeIndex != node.nodeIndex) { - // println(0, "nodeIndex mismatch" << std::endl); - // return false; - // } - // if (not this->tree->checkCompatible(*node.tree)) { - // println(0, "tree type mismatch" << std::endl); - // return false; - // } - // return true; -} - -/** @brief Test if the node is decending from a given NodeIndex, that is, if they have - * overlapping support. - * - * @param[in] idx: the NodeIndex of the requested node - */ +} + template bool MWNode::isAncestor(const NodeIndex &idx) const { int relScale = idx.getScale() - getScale(); if (relScale < 0) return false; @@ -1223,10 +766,6 @@ template bool MWNode::isDecendant(const NodeIndex & NOT_IMPLEMENTED_ABORT; } -/** @brief printout ofm the node content. - * - * @param[in] o: the output stream - */ template std::ostream &MWNode::print(std::ostream &o) const { std::string flags = " "; o << getNodeIndex(); @@ -1252,12 +791,6 @@ template std::ostream &MWNode::print(std::ostream &o) return o; } -/** @brief recursively set maxSquaredNorm and maxWSquareNorm of parent and descendants - * - * @details - * normalization is such that a constant function gives constant value, - * i.e. *not* same normalization as a squareNorm - */ template void MWNode::setMaxSquareNorm() { auto n = this->getScale(); this->maxWSquareNorm = calcScaledWSquareNorm(); @@ -1272,8 +805,7 @@ template void MWNode::setMaxSquareNorm() { } } } -/** @brief recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1 - */ + template void MWNode::resetMaxSquareNorm() { auto n = this->getScale(); this->maxSquareNorm = -1.0; @@ -1293,4 +825,4 @@ template class MWNode<1, ComplexDouble>; template class MWNode<2, ComplexDouble>; template class MWNode<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h index f86313846..de8558322 100644 --- a/src/trees/MWNode.h +++ b/src/trees/MWNode.h @@ -37,26 +37,54 @@ namespace mrcpp { -/** @class MWNode +/** + * @file MWNode.h + * @brief Base node for multiresolution (multiwavelet) trees. * - * @brief Base class for Multiwavelet nodes + * @details + * A node stores scaling/wavelet coefficients for one cell at scale `n` and + * translation `l` in `D` spatial dimensions. It also keeps structural + * information (parent/children, Hilbert path, status flags) and provides + * utilities to: * - * @details A MWNode will contain the scaling and wavelet coefficients - * to represent functions or operators within a Multiwavelet - * framework. The nodes are in multidimensional. The dimensionality is - * set thoucgh the template parameter D=1,2,3. In addition to the - * coefficients the node contains metadata such as the scale, the - * translation index, the norm, pointers to parent node and child - * nodes, pointer to the corresponding MWTree etc... See member and - * data descriptions for details. + * - allocate/attach coefficient buffers, + * - compute and cache norms (total, per-component, maximum scaled norms), + * - perform CV/MW transforms on the node, + * - navigate and generate parts of the tree (parents/children), + * - fetch geometry (bounds, center) and quadrature/child evaluation points. * + * This class is templated on spatial dimension `D` (1, 2, or 3) and on the + * scalar type `T` (e.g., `double` or `ComplexDouble`). */ -template class MWNode { + +/** + * @class MWNode + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Scalar type of coefficients (e.g., double, ComplexDouble). + * + * @brief Base class for multiwavelet tree nodes. + * + * @note + * Nodes are created and managed by @ref MWTree and specialized trees + * (e.g., @ref FunctionTree). Most users should not instantiate nodes + * directly; instead, operate at the tree level. + */ +template +class MWNode { public: + /** + * @brief Copy-construct a node. + * @param node Source node. + * @param allocCoef If true, allocate a new coefficient buffer. + * @param SetCoef If true and @p allocCoef is true, copy coefficients. + */ MWNode(const MWNode &node, bool allocCoef = true, bool SetCoef = true); + MWNode &operator=(const MWNode &node) = delete; virtual ~MWNode(); + /// @name Basis/order and topology queries + ///@{ int getKp1() const { return getMWTree().getKp1(); } int getKp1_d() const { return getMWTree().getKp1_d(); } int getOrder() const { return getMWTree().getOrder(); } @@ -70,16 +98,26 @@ template class MWNode { const NodeIndex &getNodeIndex() const { return this->nodeIndex; } const HilbertPath &getHilbertPath() const { return this->hilbertPath; } + ///@} + /// @name Geometry + ///@{ Coord getCenter() const; Coord getUpperBounds() const; Coord getLowerBounds() const; bool hasCoord(const Coord &r) const; + ///@} + + /// @name Structural relations + ///@{ bool isCompatible(const MWNode &node); bool isAncestor(const NodeIndex &idx) const; bool isDecendant(const NodeIndex &idx) const; + ///@} + /// @name Norms + ///@{ double getSquareNorm() const { return this->squareNorm; } double getMaxSquareNorm() const { return (maxSquareNorm > 0.0) ? maxSquareNorm : calcScaledSquareNorm(); } double getMaxWSquareNorm() const { return (maxWSquareNorm > 0.0) ? maxWSquareNorm : calcScaledWSquareNorm(); } @@ -87,19 +125,28 @@ template class MWNode { double getScalingNorm() const; virtual double getWaveletNorm() const; double getComponentNorm(int i) const { return this->componentNorms[i]; } + ///@} + /// @name Coefficients access + ///@{ int getNCoefs() const { return this->n_coefs; } void getCoefs(Eigen::Matrix &c) const; void printCoefs() const; T *getCoefs() { return this->coefs; } const T *getCoefs() const { return this->coefs; } + ///@} + /// @name Evaluation points (quadrature / children) + ///@{ void getPrimitiveQuadPts(Eigen::MatrixXd &pts) const; void getPrimitiveChildPts(Eigen::MatrixXd &pts) const; void getExpandedQuadPts(Eigen::MatrixXd &pts) const; void getExpandedChildPts(Eigen::MatrixXd &pts) const; + ///@} + /// @name Tree navigation (typed accessors) + ///@{ MWTree &getMWTree() { return static_cast &>(*this->tree); } MWNode &getMWParent() { return static_cast &>(*this->parent); } MWNode &getMWChild(int i) { return static_cast &>(*this->children[i]); } @@ -107,28 +154,48 @@ template class MWNode { const MWTree &getMWTree() const { return static_cast &>(*this->tree); } const MWNode &getMWParent() const { return static_cast &>(*this->parent); } const MWNode &getMWChild(int i) const { return static_cast &>(*this->children[i]); } + ///@} + /// @name Coefficients editing (block-wise) + ///@{ void zeroCoefs(); void setCoefBlock(int block, int block_size, const T *c); void addCoefBlock(int block, int block_size, const T *c); void zeroCoefBlock(int block, int block_size); void attachCoefs(T *coefs); + ///@} + /// @name Norm bookkeeping + ///@{ void calcNorms(); void zeroNorms(); void clearNorms(); + ///@} + /// @name Topology modification + ///@{ virtual void createChildren(bool coefs); virtual void genChildren(); virtual void genParent(); virtual void deleteChildren(); virtual void deleteParent(); + ///@} + /// @name Local transforms + ///@{ virtual void cvTransform(int kind, bool firstchild = false); virtual void mwTransform(int kind); + ///@} + /** + * @brief Node-norm at an arbitrary index. + * @param idx Target index (may be at a finer scale). + * @return A node-wise norm consistent with the basis and scale. + */ double getNodeNorm(const NodeIndex &idx) const; + /// @name Status flags + ///@{ bool hasParent() const { return (parent != nullptr) ? true : false; } bool hasCoefs() const { return (this->status & FlagHasCoefs); } bool isEndNode() const { return (this->status & FlagEndNode); } @@ -153,9 +220,11 @@ template class MWNode { void clearIsGenNode() { CLEAR_BITS(status, FlagGenNode); } void clearIsRootNode() { CLEAR_BITS(status, FlagRootNode); } void clearIsAllocated() { CLEAR_BITS(status, FlagAllocated); } + ///@} friend std::ostream &operator<<(std::ostream &o, const MWNode &nd) { return nd.print(o); } + // Friend classes that are allowed to operate on internals. friend class TreeBuilder; friend class MultiplicationCalculator; friend class NodeAllocator; @@ -165,98 +234,149 @@ template class MWNode { friend class FunctionNode; friend class OperatorNode; friend class DerivativeCalculator; - bool isComplex = false; // TODO put as one of the flags - friend class FunctionTree; // required if a ComplexDouble tree access a double node from another tree! + bool isComplex = false; ///< Helper flag for mixed-real/complex workflows. + friend class FunctionTree; ///< Allows complex trees to access real nodes when needed. friend class FunctionTree; - int childSerialIx{-1}; ///< index of first child in serial Tree, or -1 for leafnodes/endnodes + int childSerialIx{-1}; ///< Index of first child in a serialized view, or -1 for leaves. protected: - MWTree *tree{nullptr}; ///< Tree the node belongs to - MWNode *parent{nullptr}; ///< Parent node - MWNode *children[1 << D]; ///< 2^D children - - double squareNorm{-1.0}; ///< Squared norm of all 2^D (k+1)^D coefficients - double componentNorms[1 << D]; ///< Squared norms of the separeted 2^D components - double maxSquareNorm{-1.0}; ///< Largest squared norm among itself and descendants. - double maxWSquareNorm{-1.0}; ///< Largest wavelet squared norm among itself and descendants. - ///< NB: must be set before used. - T *coefs{nullptr}; ///< the 2^D (k+1)^D MW coefficients - ///< For example, in case of a one dimensional function \f$ f \f$ - ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$, - ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$ - ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$. - ///< Here \f$ n, l \f$ are unique for every node. - int n_coefs{0}; - - int serialIx{-1}; ///< index in serial Tree - int parentSerialIx{-1}; ///< index of parent in serial Tree, or -1 for roots - - NodeIndex nodeIndex; ///< Scale and translation of the node - HilbertPath hilbertPath; ///< To be documented - + // -------- Ownership and hierarchy -------- + MWTree *tree{nullptr}; ///< Tree the node belongs to. + MWNode *parent{nullptr}; ///< Parent node (nullptr for roots). + MWNode *children[1 << D]; ///< Array of 2^D children (valid if branch node). + + // -------- Norms (cached) -------- + double squareNorm{-1.0}; ///< Squared norm of all 2^D (k+1)^D coefficients. + double componentNorms[1 << D]; ///< Squared norms of the 2^D components. + double maxSquareNorm{-1.0}; ///< Maximum scaled squared norm among node and descendants. + double maxWSquareNorm{-1.0}; ///< Maximum scaled wavelet squared norm among node and descendants. + + // -------- Coefficients -------- + T *coefs{nullptr}; ///< Buffer of size 2^D (k+1)^D with MW coefficients. + int n_coefs{0}; ///< Number of coefficients in @ref coefs. + + // -------- Serialization helpers -------- + int serialIx{-1}; ///< Index in a serialized traversal. + int parentSerialIx{-1}; ///< Serialized index of parent, or -1 for roots. + + // -------- Indexing and space-filling path -------- + NodeIndex nodeIndex; ///< Scale and translation of this node. + HilbertPath hilbertPath; ///< Current Hilbert path state for child ordering. + + // -------- Construction helpers -------- MWNode(); MWNode(MWTree *tree, int rIdx); MWNode(MWTree *tree, const NodeIndex &idx); MWNode(MWNode *parent, int cIdx); + /// Free coefficient buffer and reset counters. virtual void dealloc(); + /// Crop node based on precision; may trigger refinement. bool crop(double prec, double splitFac, bool absPrec); + /// Initialize thread lock (when OpenMP is enabled). void initNodeLock() { MRCPP_INIT_OMP_LOCK(); } + + /// Allocate coefficient buffer as `n_blocks * block_size`. virtual void allocCoefs(int n_blocks, int block_size); + + /// Release coefficient buffer. virtual void freeCoefs(); + /// Update cached maxima from descendants. void setMaxSquareNorm(); + + /// Invalidate cached maxima for this branch. void resetMaxSquareNorm(); + + /// Scaled total norm \f$ 2^{D n}\|c\|^2 \f$ (lazy). double calcScaledSquareNorm() const { return std::pow(2.0, D * getScale()) * getSquareNorm(); } + + /// Scaled wavelet norm \f$ 2^{D n}\|d\|^2 \f$ (lazy). double calcScaledWSquareNorm() const { return std::pow(2.0, D * getScale()) * getWaveletNorm(); } + + /// Component-wise norm computation hook. virtual double calcComponentNorm(int i) const; + /// Recompress local representation after edits. virtual void reCompress(); + + /// Push coefficients from parent to all children. virtual void giveChildrenCoefs(bool overwrite = true); + + /// Push coefficients from parent to a specific child. virtual void giveChildCoefs(int cIdx, bool overwrite = true); + + /// Pull coefficients from children to parent. virtual void giveParentCoefs(bool overwrite = true); + + /// Rebuild local buffer from children (inverse of giveChildrenCoefs). virtual void copyCoefsFromChildren(); + /// Child index for a target node index (same scale or finer). int getChildIndex(const NodeIndex &nIdx) const; + + /// Child index for a spatial coordinate. int getChildIndex(const Coord &r) const; + /// Whether two nodes lie in different branches (fast check). bool diffBranch(const MWNode &rhs) const; + /// Retrieve node owning coordinate @p r at given depth (may create). MWNode *retrieveNode(const Coord &r, int depth); + + /// Retrieve node at index @p idx (may create). MWNode *retrieveNode(const NodeIndex &idx, bool create = false); + + /// Retrieve parent node for index @p idx (may create ancestors). MWNode *retrieveParent(const NodeIndex &idx); + /// Lookup without generation (const). const MWNode *retrieveNodeNoGen(const NodeIndex &idx) const; + + /// Lookup without generation (mutable). MWNode *retrieveNodeNoGen(const NodeIndex &idx); + /// Find node or end node by coordinate (const). const MWNode *retrieveNodeOrEndNode(const Coord &r, int depth) const; + + /// Find node or end node by coordinate (mutable). MWNode *retrieveNodeOrEndNode(const Coord &r, int depth); + /// Find node or end node by index (const). const MWNode *retrieveNodeOrEndNode(const NodeIndex &idx) const; + + /// Find node or end node by index (mutable). MWNode *retrieveNodeOrEndNode(const NodeIndex &idx); + /// Thread-safe child creation. void threadSafeCreateChildren(); + + /// Thread-safe generation of children. void threadSafeGenChildren(); + + /// Remove nodes generated during adaptive build. void deleteGenerated(); + /// Printable diagnostics for a node. virtual std::ostream &print(std::ostream &o) const; + // --- Bit flags describing node state (see status member) --- static const unsigned char FlagBranchNode = B8(00000001); - static const unsigned char FlagGenNode = B8(00000010); - static const unsigned char FlagHasCoefs = B8(00000100); - static const unsigned char FlagAllocated = B8(00001000); - static const unsigned char FlagEndNode = B8(00010000); - static const unsigned char FlagRootNode = B8(00100000); - static const unsigned char FlagLooseNode = B8(01000000); + static const unsigned char FlagGenNode = B8(00000010); + static const unsigned char FlagHasCoefs = B8(00000100); + static const unsigned char FlagAllocated = B8(00001000); + static const unsigned char FlagEndNode = B8(00010000); + static const unsigned char FlagRootNode = B8(00100000); + static const unsigned char FlagLooseNode = B8(01000000); private: - unsigned char status{0}; + unsigned char status{0}; ///< Bit mask of @ref FlagBranchNode, @ref FlagGenNode, etc. #ifdef MRCPP_HAS_OMP - omp_lock_t omp_lock; + omp_lock_t omp_lock; ///< Per-node lock for thread-safe edits (OpenMP). #endif }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp index 6a646d33f..67e761bca 100644 --- a/src/trees/MWTree.cpp +++ b/src/trees/MWTree.cpp @@ -40,39 +40,23 @@ using namespace Eigen; namespace mrcpp { -/** @brief MWTree constructor. - * - * @param[in] mra: the multiresolution analysis object - * @param[in] n: the name of the tree (only for printing purposes) - * - * @details Creates an empty tree object, containing only the set of - * root nodes. The information for the root node configuration to use - * is in the mra object which is passed to the constructor. - */ template MWTree::MWTree(const MultiResolutionAnalysis &mra, const std::string &n) : MRA(mra) - , order(mra.getOrder()) /// polynomial order - , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) /// nr of scaling coefficients \f$ (k+1)^D \f$ + , order(mra.getOrder()) + , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) , name(n) , squareNorm(-1.0) , rootBox(mra.getWorldBox()) { this->nodesAtDepth.push_back(0); } -/** @brief MWTree destructor. */ template MWTree::~MWTree() { this->endNodeTable.clear(); if (this->nodesAtDepth.size() != 1) MSG_ERROR("Nodes at depth != 1 -> " << this->nodesAtDepth.size()); if (this->nodesAtDepth[0] != 0) MSG_ERROR("Nodes at depth 0 != 0 -> " << this->nodesAtDepth[0]); } -/** @brief Deletes all the nodes in the tree - * - * @details This method will recursively delete all the nodes, - * including the root nodes. Derived classes will call this method - * when the object is deleted. - */ template void MWTree::deleteRootNodes() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -82,14 +66,6 @@ template void MWTree::deleteRootNodes() { } } -/** @brief Remove all nodes in the tree - * - * @details Leaves the tree in the same state as after construction, - * i.e. undefined tree structure containing only root nodes without - * coefficients. The assigned memory, including branch and leaf - * nodes, (nodeChunks in NodeAllocator) is NOT released, but is - * immediately available to the new function. - */ template void MWTree::clear() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -101,11 +77,6 @@ template void MWTree::clear() { this->clearSquareNorm(); } -/** @brief Calculate the squared norm \f$ ||f||^2_{\ldots} \f$ of a function represented as a tree. - * - * @details The norm is calculated using endNodes only. The specific - * type of norm which is computed will depend on the derived class - */ template void MWTree::calcSquareNorm(bool deep) { double treeNorm = 0.0; for (int n = 0; n < this->getNEndNodes(); n++) { @@ -117,29 +88,6 @@ template void MWTree::calcSquareNorm(bool deep) { this->squareNorm = treeNorm; } -/** @brief Full Multiwavelet transform of the tree in either directions - * - * @param[in] type: TopDown (from roots to leaves) or BottomUp (from - * leaves to roots) which specifies the direction of the MW transform - * @param[in] overwrite: if true, the result will overwrite - * preexisting coefficients. - * - * @details It performs a Multiwavlet transform of the whole tree. The - * input parameters will specify the direction (upwards or downwards) - * and whether the result is added to the coefficients or it - * overwrites them. See the documentation for the #mwTransformUp - * and #mwTransformDown for details. - * \f[ - * \pmatrix{ - * s_{nl}\\ - * d_{nl} - * } - * \rightleftarrows \pmatrix{ - * s_{n+1,2l}\\ - * s_{n+1,2l+1} - * } - * \f] - */ template void MWTree::mwTransform(int type, bool overwrite) { switch (type) { case TopDown: @@ -154,15 +102,6 @@ template void MWTree::mwTransform(int type, bool overw } } -/** @brief Regenerates all s/d-coeffs by backtransformation - * - * @details It starts at the bottom of the tree (scaling coefficients - * of the leaf nodes) and it generates the scaling and wavelet - * coefficients of the parent node. It then proceeds recursively all the - * way up to the root nodes. This is generally used after a function - * projection to purify the coefficients obtained by quadrature at - * coarser scales which are therefore not precise enough. - */ template void MWTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -180,17 +119,6 @@ template void MWTree::mwTransformUp() { } } -/** @brief Regenerates all scaling coeffs by MW transformation of existing s/w-coeffs - * on coarser scales - * - * @param[in] overwrite: if true the preexisting coefficients are overwritten - * - * @details The transformation starts at the rootNodes and proceeds - * recursively all the way to the leaf nodes. The existing scaling - * coefficeints will either be overwritten or added to. The latter - * operation is generally used after the operator application. - * - */ template void MWTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -215,12 +143,6 @@ template void MWTree::mwTransformDown(bool overwrite) } } -/** @brief Set the MW coefficients to zero, keeping the same tree structure - * - * @details Keeps the node structure of the tree, even though the zero - * function is representable at depth zero. One should then use \ref cropTree to remove - * unnecessary nodes. - */ template void MWTree::setZero() { TreeIterator it(*this); while (it.next()) { @@ -230,13 +152,6 @@ template void MWTree::setZero() { this->squareNorm = 0.0; } -/** @brief Increments node counter by one for non-GenNodes. - * - * @details TO BE DOCUMENTED - * \warning: This routine is not thread - * safe, and must NEVER be called outside a critical region in parallel. - * It's way. way too expensive to lock the tree, so don't even think - * about it. */ template void MWTree::incrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -254,14 +169,6 @@ template void MWTree::incrementNodeCount(int scale) { } } -/** @brief Decrements node counter by one for non-GenNodes. - * - * @details TO BE DOCUMENTED - * \warning: This routine is not thread - * safe, and must NEVER be called outside a critical region in parallel. - * It's way. way too expensive to lock the tree, so don't even think - * about it. - */ template void MWTree::decrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -277,10 +184,6 @@ template void MWTree::decrementNodeCount(int scale) { } } -/** @returns Total number of nodes in the tree, at given depth (not in use) - * - * @param[in] depth: Tree depth (0 depth is the coarsest scale) to count. - */ template int MWTree::getNNodesAtDepth(int depth) const { int N = 0; if (depth < 0) { @@ -291,19 +194,11 @@ template int MWTree::getNNodesAtDepth(int depth) const return N; } -/** @returns Size of all MW coefs in the tree, in kB */ template int MWTree::getSizeNodes() const { auto nCoefs = 1ll * getNNodes() * getTDim() * getKp1_d(); return sizeof(T) * nCoefs / 1024; } -/** @brief Finds and returns the node pointer with the given \ref NodeIndex, const version. - * - * @details Recursive routine to find and return the node with a given - * NodeIndex. This routine returns the appropriate Node, or a NULL - * pointer if the node does not exist, or if it is a - * GenNode. Recursion starts at the appropriate rootNode. - */ template const MWNode *MWTree::findNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -313,13 +208,6 @@ template const MWNode *MWTree::findNode(NodeInde return root.retrieveNodeNoGen(idx); } -/** @brief Finds and returns the node pointer with the given \ref NodeIndex. - * - * @details Recursive routine to find and return the node with a given - * NodeIndex. This routine returns the appropriate Node, or a NULL - * pointer if the node does not exist, or if it is a - * GenNode. Recursion starts at the appropriate rootNode. - */ template MWNode *MWTree::findNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -329,14 +217,6 @@ template MWNode *MWTree::findNode(NodeIndex i return root.retrieveNodeNoGen(idx); } -/** @brief Finds and returns the node reference with the given NodeIndex. - * - * @details This routine ALWAYS returns the node you ask for. If the - * node does not exist, it will be generated by MW - * transform. Recursion starts at the appropriate rootNode and descends - * from this. - * The nodes are permanently added to the tree if create = true - */ template MWNode &MWTree::getNode(NodeIndex idx, bool create) { if (getRootBox().isPeriodic()) periodic::index_manipulation(idx, getRootBox().getPeriodic()); @@ -351,14 +231,6 @@ template MWNode &MWTree::getNode(NodeIndex id return *out; } -/** @brief Finds and returns the node with the given NodeIndex. - * - * @details This routine returns the Node you ask for, or the EndNode - * on the path to the requested node, if the requested one is deeper - * than the leaf node ancestor. It will never create or return - * GenNodes. Recursion starts at the appropriate rootNode and decends - * from this. - */ template MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } MWNode &root = getRootBox().getNode(idx); @@ -366,13 +238,6 @@ template MWNode &MWTree::getNodeOrEndNode(NodeIn return *root.retrieveNodeOrEndNode(idx); } -/** @brief Finds and returns the node reference with the given NodeIndex. Const version. - * - * @details This routine ALWAYS returns the node you ask for. If the - * node does not exist, it will be generated by MW - * transform. Recursion starts at the appropriate rootNode and decends - * from this. - */ template const MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } const MWNode &root = getRootBox().getNode(idx); @@ -380,15 +245,6 @@ template const MWNode &MWTree::getNodeOrEndNode( return *root.retrieveNodeOrEndNode(idx); } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine ALWAYS returns the node you ask for, and will - * generate nodes that do not exist. Recursion starts at the - * appropriate rootNode and decends from this. - */ template MWNode &MWTree::getNode(Coord r, int depth) { MWNode &root = getRootBox().getNode(r); if (depth >= 0) { @@ -398,44 +254,18 @@ template MWNode &MWTree::getNode(Coord r, int } } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine returns the Node you ask for, or the EndNode on - * the path to the requested node, and will never create or return GenNodes. - * Recursion starts at the appropriate rootNode and decends from this. - */ template MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) { - if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } - MWNode &root = getRootBox().getNode(r); return *root.retrieveNodeOrEndNode(r, depth); } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. Const version - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine returns the Node you ask for, or the EndNode on - * the path to the requested node, and will never create or return GenNodes. - * Recursion starts at the appropriate rootNode and decends from this. - */ template const MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) const { - if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } const MWNode &root = getRootBox().getNode(r); return *root.retrieveNodeOrEndNode(r, depth); } -/** @brief Returns the list of all EndNodes - * - * @details copies the list of all EndNode pointers into a new vector - * and returns it. - */ template MWNodeVector *MWTree::copyEndNodeTable() { auto *nVec = new MWNodeVector; for (int n = 0; n < getNEndNodes(); n++) { @@ -445,12 +275,6 @@ template MWNodeVector *MWTree::copyEndNodeTable( return nVec; } -/** @brief Recreate the endNodeTable - * - * @details the endNodeTable is first deleted and then rebuilt from - * scratch. It makes use of the TreeIterator to traverse the tree. - * - */ template void MWTree::resetEndNodeTable() { clearEndNodeTable(); TreeIterator it(*this, TopDown, Hilbert); @@ -467,55 +291,16 @@ template int MWTree::countBranchNodes(int depth) { template int MWTree::countLeafNodes(int depth) { NOT_IMPLEMENTED_ABORT; - // int nNodes = 0; - // TreeIterator it(*this); - // while (it.next()) { - // MWNode &node = it.getNode(); - // if (node.getDepth() == depth or depth < 0) { - // if (node.isLeafNode()) { - // nNodes++; - // } - // } - // } - // return nNodes; } -/* Traverse tree and count nodes belonging to this rank. */ template int MWTree::countNodes(int depth) { NOT_IMPLEMENTED_ABORT; - // TreeIterator it(*this); - // int count = 0; - // while (it.next()) { - // MWNode &node = it.getNode(); - // if (node.isGenNode()) { - // continue; - // } - // if (not node.isForeign()) { - // count++; - // } - // } - // return count; } -/* Traverse tree and count nodes with allocated coefficients. */ template int MWTree::countAllocNodes(int depth) { NOT_IMPLEMENTED_ABORT; - // TreeIterator it(*this); - // int count = 0; - // while (it.next()) { - // MWNode &node = it.getNode(); - // if (node.isGenNode()) { - // continue; - // } - // if (node.hasCoefs()) { - // count++; - // } - // } - // return count; } -/** @brief Prints a summary of the tree structure on the output file - */ template std::ostream &MWTree::print(std::ostream &o) const { o << " square norm: " << this->squareNorm << std::endl; o << " root scale: " << this->getRootScale() << std::endl; @@ -528,25 +313,14 @@ template std::ostream &MWTree::print(std::ostream &o) return o; } -/** @brief sets values for maxSquareNorm in all nodes - * - * @details it defines the upper bound of the squared norm \f$ - * ||f||^2_{\ldots} \f$ in this node or its descendents - */ template void MWTree::makeMaxSquareNorms() { NodeBox &rBox = this->getRootBox(); MWNode **roots = rBox.getNodes(); for (int rIdx = 0; rIdx < rBox.size(); rIdx++) { - // recursively set value of children and descendants roots[rIdx]->setMaxSquareNorm(); } } -/** @brief gives serialIx of a node from its NodeIndex - * - * @details gives a unique integer for each nodes corresponding to the position - * of the node in the serialized representation - */ template int MWTree::getIx(NodeIndex nIdx) { if (this->isLocal == false) MSG_ERROR("getIx only implemented in local representation"); if (NodeIndex2serialIx.count(nIdx) == 0) @@ -571,4 +345,4 @@ template class MWTree<1, ComplexDouble>; template class MWTree<2, ComplexDouble>; template class MWTree<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h index b0261aca6..8e96d47e1 100644 --- a/src/trees/MWTree.h +++ b/src/trees/MWTree.h @@ -23,6 +23,23 @@ * */ +/** + * @file MWTree.h + * @brief Base template for multiwavelet (MW) tree data structures. + * + * @details + * An MW tree stores a hierarchical collection of @ref MWNode "MWNode" + * objects arranged as a 2^D-ary tree over a @ref MultiResolutionAnalysis + * (computational domain + basis). It provides: + * - ownership and construction of the root nodes (via @ref NodeBox), + * - navigation and on-demand generation of nodes, + * - bookkeeping of per-depth node counts, + * - utilities for MW transforms, norms, and end-node tables, and + * - access to a @ref NodeAllocator for memory management. + * + * This class is a base for both **function** and **operator** trees. + */ + #pragma once #include @@ -40,114 +57,282 @@ namespace mrcpp { class BankAccount; -/** @class MWTree +/** + * @class MWTree + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient scalar type (e.g. double, ComplexDouble). * - * @brief Base class for Multiwavelet tree structures, such as FunctionTree and OperatorTree + * @brief Base class for MW tree structures (e.g., FunctionTree, OperatorTree). * - * @details The MWTree class is the base class for all tree structures - * needed for Multiwavelet calculations. The MWTree is a D-dimensional - * tree structure of MWNodes. The tree starts from a set of root nodes - * at a common given scale, defining the world box. The most common - * settings are either a single root node or \f$ 2^D \f$ root - * nodes. Other configurations are however allowed. For example, in 3D - * one could have a total of 12 root nodes (a 2x2x3 set of root - * nodes). Once the tree structure is generated, each node will have a - * parent node (except for the root nodes) and \f$ 2^D \f$ child nodes - * (except for leaf nodes). Most of the methods deal with traversing - * the tree structure in different ways to fetch specific nodes. Some - * of them will return a node present in the tree; some other methods - * will generate the required node on the fly using the MW transform; - * some methods will return an empty pointer if the node is not - * present. See specific methods for details. + * @details + * A tree is defined over a @ref MultiResolutionAnalysis (MRA). The set of root + * nodes is determined by the MRA world box. Each node has up to 2^D children. + * Some accessors only *find* existing nodes, while others may *create* nodes + * lazily (e.g., by splitting and transferring coefficients). * + * ### Node retrieval semantics + * - @ref findNode returns a pointer or `nullptr` if the node is missing. + * - @ref getNode and @ref getNodeOrEndNode return a reference and can + * create intermediate nodes if requested (see parameters). + * + * ### Norms + * @ref calcSquareNorm computes the global L2 norm (squared) either from + * the existing coefficients only, or by visiting descendants when `deep=true`. */ template class MWTree { public: + /** + * @brief Construct an empty tree bound to an MRA. + * @param mra Multi-resolution analysis (domain + basis). + * @param n A short name for logging/printing. + * + * @post Root nodes are created according to the MRA world box. + * No coefficients are computed; the tree is “undefined”. + */ MWTree(const MultiResolutionAnalysis &mra, const std::string &n); + + /// Non-copyable. MWTree(const MWTree &tree) = delete; + /// Non-assignable. MWTree &operator=(const MWTree &tree) = delete; + + /// Virtual destructor. virtual ~MWTree(); + /** + * @brief Set all existing node coefficients to zero (structure unchanged). + * @note Does not refine/coarsen the tree, only zeroes values. + */ void setZero(); + + /** + * @brief Remove all nodes and reset to a freshly constructed state. + * @post Root nodes are recreated; end-node table and counters cleared. + */ void clear(); - /** @returns Squared L2 norm of the function */ + /** @name Norms */ + ///@{ + /// @return Global squared L2 norm of the representation (negative if undefined). double getSquareNorm() const { return this->squareNorm; } + + /** + * @brief Recompute the global squared L2 norm. + * @param deep If `true`, may traverse deeper to ensure accuracy. + */ void calcSquareNorm(bool deep = false); + + /// @brief Mark the norm as undefined (sets it to -1). void clearSquareNorm() { this->squareNorm = -1.0; } + ///@} + /** @name Basis/structure parameters */ + ///@{ + /// @return Polynomial order k. int getOrder() const { return this->order; } + /// @return k+1. int getKp1() const { return this->order + 1; } + /// @return (k+1)^D. int getKp1_d() const { return this->kp1_d; } + /// @return Spatial dimension D. int getDim() const { return D; } + /// @return 2^D (number of children per internal node). int getTDim() const { return (1 << D); } - /** @returns the total number of nodes in the tree */ + /// @return Total number of nodes currently allocated in the tree. int getNNodes() const { return getNodeAllocator().getNNodes(); } + /// @return Number of records kept for negative-depth counts. int getNNegScales() const { return this->nodesAtNegativeDepth.size(); } + /// @return Root scale (MRA world scale). int getRootScale() const { return this->rootBox.getScale(); } + /// @return Number of depth levels for which counters are stored. int getDepth() const { return this->nodesAtDepth.size(); } + /// @return Number of nodes counted at a given depth. int getNNodesAtDepth(int i) const; + /// @return Approximate memory footprint of nodes (kB). int getSizeNodes() const; + ///@} - /** @returns */ + /** @name MRA / root access */ + ///@{ + /// @return Mutable root-node container. NodeBox &getRootBox() { return this->rootBox; } + /// @return Const root-node container. const NodeBox &getRootBox() const { return this->rootBox; } + /// @return MRA bound to this tree. const MultiResolutionAnalysis &getMRA() const { return this->MRA; } - + ///@} + + /** + * @brief Perform a multiresolution transform. + * @param type Transform kind (implementation-defined selector). + * @param overwrite If `true`, may reuse buffers for speed. + * @note Typical directions are “top-down” and “bottom-up”; see implementation. + */ void mwTransform(int type, bool overwrite = true); + /** @name Naming */ + ///@{ + /// Set a short descriptive name (used in logs). void setName(const std::string &n) { this->name = n; } + /// Get the current name. const std::string &getName() const { return this->name; } + ///@} + /** @name Root-index helpers */ + ///@{ + /// @return Root-box index containing coordinate @p r, or -1 if out-of-bounds (non-periodic). int getRootIndex(Coord r) const { return this->rootBox.getBoxIndex(r); } + /// @return Root-box index containing node @p nIdx, or -1 if out-of-bounds (non-periodic). int getRootIndex(NodeIndex nIdx) const { return this->rootBox.getBoxIndex(nIdx); } - + ///@} + + /** @name Node lookup / retrieval */ + ///@{ + /** + * @brief Find an existing node. + * @param nIdx Target node index. + * @return Pointer to the node if present, otherwise `nullptr`. + * @warning Does not create nodes. + */ MWNode *findNode(NodeIndex nIdx); + + /// Const overload of @ref findNode. const MWNode *findNode(NodeIndex nIdx) const; + /** + * @brief Get a node by index, optionally creating it. + * @param nIdx Target node index. + * @param create If `true`, missing nodes may be generated on demand. + * @return Reference to the node. + */ MWNode &getNode(NodeIndex nIdx, bool create = false); + + /** + * @brief Get a node or the “closest” end node containing it. + * @param nIdx Target node index. + * @return Reference to an existing node; may be an end node if exact match is absent. + * @note Never creates new nodes. + */ MWNode &getNodeOrEndNode(NodeIndex nIdx); + + /// Const overload of @ref getNodeOrEndNode(NodeIndex). const MWNode &getNodeOrEndNode(NodeIndex nIdx) const; + /** + * @brief Get a node by spatial coordinate. + * @param r Spatial coordinate. + * @param depth Desired depth; if negative, use current deepest. + * @return Reference to the node; may create if required by the implementation. + */ MWNode &getNode(Coord r, int depth = -1); + + /** + * @brief Get a node or containing end node by coordinate. + * @param r Spatial coordinate. + * @param depth Desired depth; if negative, use current deepest. + * @return Reference to an existing node; may be an end node. + */ MWNode &getNodeOrEndNode(Coord r, int depth = -1); + + /// Const overload of @ref getNodeOrEndNode(Coord,int). const MWNode &getNodeOrEndNode(Coord r, int depth = -1) const; + ///@} + /** @name End-node table */ + ///@{ + /// @return Number of nodes currently listed as “end nodes”. int getNEndNodes() const { return this->endNodeTable.size(); } + /// @return Number of root nodes. int getNRootNodes() const { return this->rootBox.size(); } + + /// @return Mutable reference to i-th end node. MWNode &getEndMWNode(int i) { return *this->endNodeTable[i]; } + /// @return Mutable reference to i-th root node. MWNode &getRootMWNode(int i) { return this->rootBox.getNode(i); } + + /// @return Const reference to i-th end node. const MWNode &getEndMWNode(int i) const { return *this->endNodeTable[i]; } + /// @return Const reference to i-th root node. const MWNode &getRootMWNode(int i) const { return this->rootBox.getNode(i); } + ///@} + /// @return `true` if the underlying world box has any periodic directions. bool isPeriodic() const { return this->MRA.getWorldBox().isPeriodic(); } + /** + * @brief Copy the current end-node table. + * @return New heap-allocated vector; caller takes ownership. + */ MWNodeVector *copyEndNodeTable(); + + /// @return Direct pointer to the internal end-node table. MWNodeVector *getEndNodeTable() { return &this->endNodeTable; } + /** @name Tree maintenance */ + ///@{ + /// Delete all root nodes and reset root structures. void deleteRootNodes(); + /// Rebuild the end-node table by traversing the tree. void resetEndNodeTable(); + /// Clear the end-node table without traversing. void clearEndNodeTable() { this->endNodeTable.clear(); } + ///@} + /** @name Node statistics (current tree) */ + ///@{ + /// Count branch (non-leaf) nodes; if depth < 0, count all depths. int countBranchNodes(int depth = -1); + /// Count leaf nodes; if depth < 0, count all depths. int countLeafNodes(int depth = -1); + /// Count allocated nodes; if depth < 0, count all depths. int countAllocNodes(int depth = -1); + /// Count nodes; if depth < 0, count all depths. int countNodes(int depth = -1); - bool isLocal = false; // to know whether the tree coeffcients are stored in the Bank - int getIx(NodeIndex nIdx); // gives serialIx of a stored node from its NodeIndex if isLocal - - void makeMaxSquareNorms(); // sets values for maxSquareNorm and maxWSquareNorm in all nodes - + ///@} + + /// If `true`, coefficients are stored externally (Bank); used by serialization tools. + bool isLocal = false; + + /** + * @brief Map a node index to its serial index (when stored locally). + * @param nIdx Node index. + * @return Serial index, or a negative value if not present. + */ + int getIx(NodeIndex nIdx); + + /** + * @brief Precompute per-node maxima used by some adaptive algorithms. + * @details Fills `maxSquareNorm` and `maxWSquareNorm` for all nodes. + */ + void makeMaxSquareNorms(); + + /** @name Allocator access */ + ///@{ + /// @return Mutable reference to the node allocator. NodeAllocator &getNodeAllocator() { return *this->nodeAllocator_p; } + /// @return Const reference to the node allocator. const NodeAllocator &getNodeAllocator() const { return *this->nodeAllocator_p; } - MWNodeVector endNodeTable; ///< Final projected nodes + ///@} + + /// Vector of final projected nodes (end nodes). + MWNodeVector endNodeTable; - void getNodeCoeff(NodeIndex nIdx, T *data); // fetch coefficient from a specific node stored in Bank + /** + * @brief Fetch coefficients of a specific node (when using a Bank). + * @param nIdx Node index. + * @param data Destination buffer of size (k+1)^D * 2^D. + */ + void getNodeCoeff(NodeIndex nIdx, T *data); + + /// @return Whether the tree is marked as conjugated (used by some ops). bool conjugate() const { return this->conj; } + /// Set or clear the conjugation flag. void setConjugate(bool conjug) { this->conj = conjug; } + /// Print tree summary to a stream. friend std::ostream &operator<<(std::ostream &o, const MWTree &tree) { return tree.print(o); } + // Friends that require access to internals friend class MWNode; friend class FunctionNode; friend class OperatorNode; @@ -155,35 +340,52 @@ template class MWTree { friend class NodeAllocator; protected: - // Parameters that are set in construction and should never change - const MultiResolutionAnalysis MRA; + /** @name Immutable construction-time state */ + ///@{ + const MultiResolutionAnalysis MRA; ///< Domain and basis. - // Constant parameters that are derived internally - const int order; - const int kp1_d; + const int order; ///< Polynomial order k. + const int kp1_d; ///< (k+1)^D. + ///@} - std::map, int> NodeIndex2serialIx; // to store nodes serialIx + /// Map node index -> serial index (used by local/banked storage). + std::map, int> NodeIndex2serialIx; - // Parameters that are dynamic and can be set by user - std::string name; + /** @name User-settable metadata */ + ///@{ + std::string name; ///< Short name for diagnostics. + ///@} + /// Node memory allocator. std::unique_ptr> nodeAllocator_p{nullptr}; - // Tree data - double squareNorm; - NodeBox rootBox; ///< The actual container of nodes - std::vector nodesAtDepth; ///< Node counter - std::vector nodesAtNegativeDepth; ///< Node counter + /** @name Tree data & counters */ + ///@{ + double squareNorm; ///< Global squared L2 norm (-1 if undefined). + NodeBox rootBox; ///< Container of root nodes. + std::vector nodesAtDepth; ///< Per-depth node counts (depth >= 0). + std::vector nodesAtNegativeDepth; ///< For negative-depth bookkeeping. + ///@} + /** @name MW transforms (internals) */ + ///@{ virtual void mwTransformDown(bool overwrite); virtual void mwTransformUp(); + ///@} + /// Increment per-depth counters for a node at the given scale. void incrementNodeCount(int scale); + /// Decrement per-depth counters for a node at the given scale. void decrementNodeCount(int scale); + /// Optional external storage of coefficients. BankAccount *NodesCoeff = nullptr; + + /// Conjugation flag for algorithms that need it. bool conj{false}; + /// Print a formatted summary (override in derived classes if needed). virtual std::ostream &print(std::ostream &o) const; }; -} // namespace mrcpp + +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp index 43b39c32d..e68dd8a36 100644 --- a/src/trees/MultiResolutionAnalysis.cpp +++ b/src/trees/MultiResolutionAnalysis.cpp @@ -32,22 +32,6 @@ namespace mrcpp { -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) - * - * @param[in] bb: 2-element integer array [Lower, Upper] defining the bounds for a BoundingBox object representing the computational domain - * @param[in] order: Maximum polynomial order of the multiwavelet basis, - * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA - * @param[in] maxDepth: Exponent of the node refinement in base 2, relative to root scale. - * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. - * - * @details Constructor of the MultiResolutionAnalysis class from scratch, without requiring any pre-existing complex structure. - * The constructor calls the InterpolatingBasis basis constructor to generate the MultiWavelets basis of functions, - * then the BoundingBox constructor to create the computational domain. The constructor then checks if the generated node depth, or - * node refinement is beyond the root scale or the maximum depth allowed, in which case it will abort the process. - * Otherwise, the process goes on to setup the filters with the class' setupFilter method. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int order, int depth) : maxDepth(depth) @@ -58,18 +42,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int o setupFilter(); } -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) from a pre-existing BoundingBox object - * - * @param[in] bb: BoundingBox object representing the computational domain - * @param[in] order: (integer) Maximum polynomial order of the multiwavelet basis, - * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA - * @param[in] maxDepth: (integer) Exponent of the node refinement in base 2, relative to root scale. - * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. - * - * @details Constructor of the MultiResolutionAnalysis class from a BoundingBox object. For more details see the first constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, int order, int depth) : maxDepth(depth) @@ -80,14 +52,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, in setupFilter(); } -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Copy constructor for a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) - * - * @param[in] mra: Pre-existing MRA object - * - * @details Copy a MultiResolutionAnalysis object without modifying the original. For more details see the first constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysis &mra) : maxDepth(mra.maxDepth) @@ -98,17 +62,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysi setupFilter(); } -/** @returns New MultiResolutionAnalysis object - * - * @brief Constructor for a MultiResolutionAnalysis object from a pre-existing BoundingBox (computational domain) and a ScalingBasis (Multiwavelet basis) objects - * - * @param[in] bb: Computational domain as a BoundingBox object, taken by constant reference - * @param[in] sb: Polynomial basis (MW) as a ScalingBasis object - * @param[in] depth: Maximum allowed resolution depth, relative to root scale - * - * @details Creates a MRA object from pre-existing BoundingBox and ScalingBasis objects. These objects are taken as reference. For more details about the constructor itself, see the first - * constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, const ScalingBasis &sb, int depth) : maxDepth(depth) @@ -119,16 +72,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, co setupFilter(); } -/** @returns Whether the two MRA objects are equal. - * - * @brief Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis, computational domain and maximum depth, and false otherwise - * - * @param[in] mra: MRA object, taken by constant reference - * - * @details Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis - * object) and maximum depth (integer), and false otherwise. Computations on different MRA cannot be combined, this operator can be used to make sure that the multiple MRAs are compatible. For more - * information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes. - */ template bool MultiResolutionAnalysis::operator==(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return false; if (this->world != mra.world) return false; @@ -136,16 +79,6 @@ template bool MultiResolutionAnalysis::operator==(const MultiResoluti return true; } -/** @returns Whether the two MRA objects are not equal. - * - * @brief Inequality operator for the MultiResolutionAnalysis class, returns false if both MRAs have the same polynomial basis, computational domain and maximum depth, and true otherwise - * - * @param[in] mra: MRA object, taken by constant reference - * - * @details Inequality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis - * object) and maximum depth (integer), and false otherwise. Opposite of the == operator. For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their - * respective classes. - */ template bool MultiResolutionAnalysis::operator!=(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return true; if (this->world != mra.world) @@ -157,14 +90,6 @@ template bool MultiResolutionAnalysis::operator!=(const MultiResoluti return false; } -/** - * - * @brief Displays the MRA's attributes in the outstream defined in the Printer class - * - * @details This function displays the attributes of the MRA in the using the Printer class. - * By default, the Printer class writes all information in the output file, not the terminal. - * - */ template void MultiResolutionAnalysis::print() const { print::separator(0, ' '); print::header(0, "MultiResolution Analysis"); @@ -174,15 +99,6 @@ template void MultiResolutionAnalysis::print() const { print::separator(0, '=', 2); } -/** - * - * @brief Initializes the MW filters for the given MW basis. - * - * @details By calling the get() function for the appropriate MW basis, the global - * FilterCache Singleton object is initialized. Any subsequent reference to this - * particular filter will point to the same unique global object. - * - */ template void MultiResolutionAnalysis::setupFilter() { getLegendreFilterCache(lfilters); getInterpolatingFilterCache(ifilters); @@ -200,11 +116,6 @@ template void MultiResolutionAnalysis::setupFilter() { } } -/** @returns Maximum possible distance between two points in the MRA domain - * - * @brief Computes the difference between the lower and upper bounds of the computational domain - * - */ template double MultiResolutionAnalysis::calcMaxDistance() const { const Coord &lb = getWorldBox().getLowerBounds(); const Coord &ub = getWorldBox().getUpperBounds(); @@ -215,4 +126,4 @@ template class MultiResolutionAnalysis<1>; template class MultiResolutionAnalysis<2>; template class MultiResolutionAnalysis<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/MultiResolutionAnalysis.h b/src/trees/MultiResolutionAnalysis.h index 00135d822..0282ab464 100644 --- a/src/trees/MultiResolutionAnalysis.h +++ b/src/trees/MultiResolutionAnalysis.h @@ -33,52 +33,159 @@ namespace mrcpp { -/** @class MultiResolutionAnalysis +/** + * @file MultiResolutionAnalysis.h + * @brief Declaration of the MultiResolutionAnalysis class template. * - * @brief Class collecting computational domain and MW basis + * @details + * A **MultiResolutionAnalysis (MRA)** bundles the information that must be + * shared by compatible functions and operators: + * - the computational domain (see @ref BoundingBox), + * - the multiresolution scaling basis (see @ref ScalingBasis), and + * - a maximum refinement depth. * - * @details In order to combine different functions and operators in - * mathematical operations, they need to be compatible. That is, they must - * be defined on the same computational domain and constructed using the same - * polynomial basis (order and type). This information constitutes an MRA, - * which needs to be defined and passed as argument to all function and - * operator constructors, and only functions and operators with compatible - * MRAs can be combined in subsequent calculations. + * Two objects (functions/operators) can only be combined if their MRAs are + * equal, i.e. identical domain, basis order/type, and depth. + * + * @par Example + * @code{.cpp} + * using MRA3 = mrcpp::MultiResolutionAnalysis<3>; + * + * // Domain: [-4, 4]^3 with automatically chosen root scale + * mrcpp::BoundingBox<3> world({-4, 4}); + * + * // Build a 3D MRA with Legendre order=7 and maxDepth=12 + * mrcpp::ScalingBasis basis(Legendre, /*order=*/7); + * MRA3 mra(world, basis, /*depth=*/12); + * + * // Query information + * int order = mra.getOrder(); + * int maxScale = mra.getMaxScale(); + * auto &box = mra.getWorldBox(); + * auto &sbasis = mra.getScalingBasis(); + * @endcode */ -template class MultiResolutionAnalysis final { +/** + * @class MultiResolutionAnalysis + * @tparam D Spatial dimension (1, 2, or 3). + * + * @brief Collects the computational domain and multiresolution basis. + * + * @details + * The MRA fixes: + * - the **world box** (domain tiling and scaling), + * - the **scaling basis** (type and polynomial order), and + * - the **maximum depth** of refinement relative to the world’s root scale. + * + * The combination of these parameters determines the finest admissible scale + * via @ref getMaxScale. + */ +template +class MultiResolutionAnalysis final { public: + /** + * @brief Construct from a symmetric domain and a basis order. + * + * @param[in] bb Domain bounds as either [0,L] or [-L,L] (L>0). + * @param[in] order Polynomial order of the scaling basis. + * @param[in] depth Maximum refinement depth (relative to root scale). + * + * @details + * The scaling basis type is chosen by MRCPP defaults for the given @p order. + * The root scale is inferred from @p bb to keep the per-dimension scaling + * factor in (1, 2). + */ MultiResolutionAnalysis(std::array bb, int order, int depth = MaxDepth); + + /** + * @brief Construct from a preconfigured @ref BoundingBox and basis order. + * + * @param[in] bb Computational domain (possibly periodic). + * @param[in] order Polynomial order of the scaling basis. + * @param[in] depth Maximum refinement depth. + */ MultiResolutionAnalysis(const BoundingBox &bb, int order, int depth = MaxDepth); + + /** + * @brief Construct from a @ref BoundingBox and a fully specified @ref ScalingBasis. + * + * @param[in] bb Computational domain. + * @param[in] sb Scaling basis (type and order). + * @param[in] depth Maximum refinement depth. + */ MultiResolutionAnalysis(const BoundingBox &bb, const ScalingBasis &sb, int depth = MaxDepth); + + /** @brief Copy constructor. */ MultiResolutionAnalysis(const MultiResolutionAnalysis &mra); + + /** @brief Deleted assignment (MRAs are intended to be immutable after construction). */ MultiResolutionAnalysis &operator=(const MultiResolutionAnalysis &mra) = delete; + /** @brief Return polynomial order of the scaling basis. */ int getOrder() const { return this->basis.getScalingOrder(); } + + /** @brief Maximum refinement depth relative to the world’s root scale. */ int getMaxDepth() const { return this->maxDepth; } + + /** + * @brief Absolute finest scale index. + * + * @details + * This is the sum of the world root scale and @ref getMaxDepth, i.e. + * the maximum scale the MRA allows trees to reach. + */ int getMaxScale() const { return this->world.getScale() + this->maxDepth; } + /** @brief Low-level filter associated with the current basis. */ const MWFilter &getFilter() const { return *this->filter; } + + /** @brief The scaling basis specification (type and order). */ const ScalingBasis &getScalingBasis() const { return this->basis; } + + /** @brief The computational domain (world box). */ const BoundingBox &getWorldBox() const { return this->world; } + /** + * @brief Convenience: compute a minimal length scale from a tolerance. + * + * @param[in] epsilon Target tolerance. + * @return A distance proportional to \f$\sqrt{\epsilon\,2^{-\mathrm{maxScale}}}\f$. + */ double calcMinDistance(double epsilon) const { return std::sqrt(epsilon * std::pow(2.0, -getMaxScale())); } + + /** + * @brief Convenience: compute a maximal relevant distance. + * + * @details The exact definition is basis-dependent and implemented in + * the corresponding source file. + */ double calcMaxDistance() const; + /** @brief Root (coarsest) scale index of the world. */ int getRootScale() const { return this->world.getScale(); } + /** + * @brief Equality: same world, same basis (type & order), same depth. + * + * @note Two MRAs must compare equal to allow mixing functions/operators. + */ bool operator==(const MultiResolutionAnalysis &mra) const; + + /** @brief Inequality. */ bool operator!=(const MultiResolutionAnalysis &mra) const; + /** @brief Human-readable diagnostics to stdout. */ void print() const; protected: - const int maxDepth; - const ScalingBasis basis; - const BoundingBox world; - MWFilter *filter; + const int maxDepth; ///< Maximum refinement depth permitted by this MRA. + const ScalingBasis basis; ///< Scaling basis (type and polynomial order). + const BoundingBox world; ///< Computational domain description. + MWFilter *filter; ///< Low-level filter derived from @ref basis. + /** @brief Internal helper to instantiate @ref filter based on @ref basis. */ void setupFilter(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp index f4f72061b..8c01ed388 100644 --- a/src/trees/NodeAllocator.cpp +++ b/src/trees/NodeAllocator.cpp @@ -44,7 +44,6 @@ NodeAllocator::NodeAllocator(FunctionTree *tree, SharedMemory *me , maxNodesPerChunk(nodesPerChunk) , tree_p(tree) , shmem_p(mem) { - // reserve space for chunk pointers to avoid excessive reallocation this->nodeChunks.reserve(100); this->coefChunks.reserve(100); @@ -61,7 +60,6 @@ NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory *mem, i , maxNodesPerChunk(nodesPerChunk) , tree_p(tree) , shmem_p(mem) { - // reserve space for chunk pointers to avoid excessive reallocation this->nodeChunks.reserve(100); this->coefChunks.reserve(100); @@ -78,7 +76,7 @@ template NodeAllocator::NodeAllocator(OperatorTree *tr template NodeAllocator::~NodeAllocator() { for (auto &chunk : this->nodeChunks) delete[](char *) chunk; - if (not isShared()) // if the data is shared, it must be freed by MPI_Win_free + if (not isShared()) for (auto &chunk : this->coefChunks) delete[] chunk; this->stackStatus.clear(); MRCPP_DESTROY_OMP_LOCK(); @@ -100,46 +98,39 @@ template T *NodeAllocator::getCoef_p(int sIdx) { template MWNode *NodeAllocator::getNodeNoLock(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr; - int chunk = sIdx / this->maxNodesPerChunk; // which chunk - int cIdx = sIdx % this->maxNodesPerChunk; // position in chunk + int chunk = sIdx / this->maxNodesPerChunk; + int cIdx = sIdx % this->maxNodesPerChunk; return this->nodeChunks[chunk] + cIdx; } template T *NodeAllocator::getCoefNoLock(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr; - int chunk = sIdx / this->maxNodesPerChunk; // which chunk - int idx = sIdx % this->maxNodesPerChunk; // position in chunk + int chunk = sIdx / this->maxNodesPerChunk; + int idx = sIdx % this->maxNodesPerChunk; return this->coefChunks[chunk] + idx * this->coefsPerNode; } template int NodeAllocator::alloc(int nNodes, bool coefs) { MRCPP_SET_OMP_LOCK(); if (nNodes <= 0 or nNodes > this->maxNodesPerChunk) MSG_ABORT("Cannot allocate " << nNodes << " nodes"); - // move topstack to start of next chunk if current chunk is too small int cIdx = this->topStack % (this->maxNodesPerChunk); bool chunkOverflow = ((cIdx + nNodes) > this->maxNodesPerChunk); if (chunkOverflow) this->topStack = this->maxNodesPerChunk * ((this->topStack + nNodes - 1) / this->maxNodesPerChunk); - // append chunk if necessary int chunk = this->topStack / this->maxNodesPerChunk; bool needNewChunk = (chunk >= this->nodeChunks.size()); if (needNewChunk) appendChunk(coefs); - // return value is index of first new node auto sIdx = this->topStack; - // we require that the index for first child is a multiple of 2**D - // so that we can find the sibling rank using rank=sIdx%(2**D) if (sIdx % nNodes != 0) MSG_WARN("Warning: recommended number of siblings is 2**D"); - // fill stack status auto &status = this->stackStatus; for (int i = sIdx; i < sIdx + nNodes; i++) { if (status[i] != 0) MSG_ERROR(" NodeStackStatus: not available [" << i << "] : " << status[i]); status[i] = 1; } - // advance stack pointers this->nNodes += nNodes; this->topStack += nNodes; this->last_p = getNodeNoLock(sIdx) + nNodes; @@ -153,13 +144,12 @@ template void NodeAllocator::dealloc(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx); auto *node_p = getNodeNoLock(sIdx); node_p->~MWNode(); - this->stackStatus[sIdx] = 0; // mark as available - if (sIdx == this->topStack - 1) { // top of stack + this->stackStatus[sIdx] = 0; + if (sIdx == this->topStack - 1) { while (this->stackStatus[this->topStack - 1] == 0) { this->topStack--; if (this->topStack < 1) break; } - // has to redefine last_p this->last_p = getNodeNoLock(this->topStack); } this->nNodes--; @@ -180,7 +170,6 @@ template void NodeAllocator::init(int nChunks, bool co if (nChunks <= 0) MSG_ABORT("Invalid number of chunks: " << nChunks); for (int i = getNChunks(); i < nChunks; i++) appendChunk(coefs); - // reinitialize stacks int nodeCount = this->nodeChunks.size() * this->maxNodesPerChunk; this->stackStatus.resize(nodeCount); std::fill(this->stackStatus.begin(), this->stackStatus.end(), 0); @@ -188,14 +177,11 @@ template void NodeAllocator::init(int nChunks, bool co } template void NodeAllocator::appendChunk(bool coefs) { - // make coeff chunk if (coefs) { T *c_chunk = nullptr; if (this->isShared()) { - // for coefficients, take from the shared memory block c_chunk = this->shmem_p->sh_end_ptr; this->shmem_p->sh_end_ptr += (this->coefsPerNode * this->maxNodesPerChunk); - // may increase size dynamically in the future if (this->shmem_p->sh_max_ptr < this->shmem_p->sh_end_ptr) MSG_ABORT("Shared block too small"); } else { c_chunk = new T[getCoefChunkSize() / sizeof(T)]; @@ -203,7 +189,6 @@ template void NodeAllocator::appendChunk(bool coefs) { this->coefChunks.push_back(c_chunk); } - // make node chunk auto n_chunk = (MWNode *)new char[getNodeChunkSize()]; for (int i = 0; i < this->maxNodesPerChunk; i++) { n_chunk[i].serialIx = -1; @@ -212,35 +197,32 @@ template void NodeAllocator::appendChunk(bool coefs) { } this->nodeChunks.push_back(n_chunk); - // append to stackStatus int oldsize = this->stackStatus.size(); int newsize = oldsize + this->maxNodesPerChunk; this->stackStatus.resize(newsize); std::fill(this->stackStatus.begin() + oldsize, this->stackStatus.end(), 0); } -/** Fill all holes in the chunks with occupied nodes, then remove all empty chunks */ template int NodeAllocator::compress() { MRCPP_SET_OMP_LOCK(); int nNodes = (1 << D); if (this->maxNodesPerChunk * this->nodeChunks.size() <= getTree().getNNodes() + this->maxNodesPerChunk + nNodes - 1) { MRCPP_UNSET_OMP_LOCK(); - return 0; // nothing to compress + return 0; } int posocc = 0; - int posavail = getTree().getRootBox().size(); // start after root nodes + int posavail = getTree().getRootBox().size(); while (true) { posavail = findNextAvailable(posavail, nNodes); - if (posavail >= this->topStack) break; // treated all nodes + if (posavail >= this->topStack) break; posocc = findNextOccupied(posavail); - if (posocc >= this->topStack) break; // treated all nodes + if (posocc >= this->topStack) break; moveNodes(nNodes, posocc, posavail); } - // find the last used node posocc = this->topStack - 1; while (this->stackStatus[posocc] == 0 and posocc > 0) posocc--; this->topStack = posocc + 1; @@ -254,21 +236,18 @@ template int NodeAllocator::compress() { } template int NodeAllocator::deleteUnusedChunks() { - // number of occupied chunks int nChunksTotal = getNChunks(); int nChunksUsed = getNChunksUsed(); - if (nChunksTotal == nChunksUsed) return 0; // no unused chunks + if (nChunksTotal == nChunksUsed) return 0; assert(nChunksTotal >= nChunksUsed); for (int i = nChunksUsed; i < nChunksTotal; i++) delete[](char *)(this->nodeChunks[i]); if (isShared()) { - // shared coefficients cannot be fully deallocated, only pointer is moved. getMemory().sh_end_ptr -= (nChunksTotal - nChunksUsed) * this->coefsPerNode * this->maxNodesPerChunk; } else { for (int i = nChunksUsed; i < nChunksTotal; i++) delete[] this->coefChunks[i]; } - // shrink the stacks this->nodeChunks.resize(nChunksUsed); this->coefChunks.resize(nChunksUsed); this->stackStatus.resize(nChunksUsed * this->maxNodesPerChunk); @@ -284,34 +263,27 @@ template void NodeAllocator::moveNodes(int nNodes, int assert(srcNode != nullptr); assert(dstNode != nullptr); - // check that all siblings are consecutive. Should never be root node. for (int i = 0; i < nNodes; i++) assert(this->stackStatus[dstIdx + i] == 0); - for (int i = 1; i < nNodes; i++) assert((srcNode + i)->parent->serialIx == srcNode->parent->serialIx); // siblings + for (int i = 1; i < nNodes; i++) assert((srcNode + i)->parent->serialIx == srcNode->parent->serialIx); - // just copy everything "as is" for (int i = 0; i < nNodes * this->sizeOfNode; i++) ((char *)dstNode)[i] = ((char *)srcNode)[i]; - // coefs have new adresses T *coefs_p = getCoefNoLock(dstIdx); - if (coefs_p == nullptr) NOT_IMPLEMENTED_ABORT; // Nodes without coefs not handled atm + if (coefs_p == nullptr) NOT_IMPLEMENTED_ABORT; for (int i = 0; i < nNodes; i++) (dstNode + i)->coefs = coefs_p + i * getNCoefs(); - // copy coefs to new adress if (not isShared()) { for (int i = 0; i < nNodes * this->coefsPerNode; i++) dstNode->coefs[i] = srcNode->coefs[i]; } else { - if (getMemory().rank == 0) // only master copy the data. careful with sync + if (getMemory().rank == 0) for (int i = 0; i < nNodes * this->coefsPerNode; i++) dstNode->coefs[i] = srcNode->coefs[i]; } - // update node for (int i = 0; i < nNodes; i++) (dstNode + i)->serialIx = dstIdx + i; - // update parent dstNode->parent->childSerialIx = dstIdx; for (int i = 0; i < nNodes; i++) dstNode->parent->children[i] = dstNode + i; - // update children for (int i = 0; i < nNodes; i++) { for (int j = 0; j < (dstNode + i)->getNChildren(); j++) { (dstNode + i)->children[j]->parentSerialIx = dstIdx + i; @@ -319,16 +291,13 @@ template void NodeAllocator::moveNodes(int nNodes, int } } - // mark moved nodes as occupied for (int i = 0; i < nNodes; i++) this->stackStatus[dstIdx + i] = 1; dstIdx += nNodes; - // delete "old" nodes for (int i = 0; i < nNodes; i++) this->stackStatus[srcIdx + i] = 0; for (int i = 0; i < nNodes; i++) (srcNode + i)->serialIx = -1; } -// Last positions on a chunk cannot be used if there is no place for nNodes siblings on the same chunk template int NodeAllocator::findNextAvailable(int sIdx, int nNodes) const { assert(sIdx >= 0); assert(sIdx < this->stackStatus.size()); @@ -362,7 +331,6 @@ template int NodeAllocator::findNextOccupied(int sIdx) return sIdx; } -/** Traverse tree and redefine pointer, counter and tables. */ template void NodeAllocator::reassemble() { MRCPP_SET_OMP_LOCK(); this->nNodes = 0; @@ -394,7 +362,6 @@ template void NodeAllocator::reassemble() { if (node_p->isEndNode()) getTree().squareNorm += node_p->getSquareNorm(); if (node_p->isEndNode()) getTree().endNodeTable.push_back(node_p); - // normally (intel) the virtual table does not change, but we overwrite anyway *(char **)(node_p) = this->cvptr; node_p->initNodeLock(); @@ -411,7 +378,7 @@ template void NodeAllocator::reassemble() { stack.push(child_p); child_p++; } - this->stackStatus[sIdx] = 1; // occupied + this->stackStatus[sIdx] = 1; } this->last_p = getNodeNoLock(this->topStack); assert(this->last_p != nullptr); @@ -448,4 +415,4 @@ template class NodeAllocator<1, ComplexDouble>; template class NodeAllocator<2, ComplexDouble>; template class NodeAllocator<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/NodeAllocator.h b/src/trees/NodeAllocator.h index 7e33b7e21..433e6a3c6 100644 --- a/src/trees/NodeAllocator.h +++ b/src/trees/NodeAllocator.h @@ -24,11 +24,19 @@ */ /** + * @file NodeAllocator.h + * @brief Chunked allocator for MWNode objects and their coefficient storage. * - * \date July, 2016 - * \author Peter Wind \n - * CTCC, University of Tromsø + * @details + * The allocator handles: + * - contiguous chunk allocation for **nodes** and **coefficients**, + * - a simple stack-like free list for fast allocation/deallocation, + * - optional backing via a shared memory block (@ref SharedMemory), + * - utility routines for compaction (@ref compress) and reassembly + * after structural edits, and + * - query helpers for chunk sizes and usage. * + * It is used by both @ref FunctionTree and @ref OperatorTree. */ #pragma once @@ -40,71 +48,188 @@ namespace mrcpp { +/** + * @class NodeAllocator + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Scalar coefficient type (e.g., double, ComplexDouble). + * + * @brief Chunked memory manager for @ref MWNode objects and their coefficients. + * + * @details + * Nodes and their coefficient arrays are organized in **chunks** to reduce + * allocation overhead and improve spatial locality. Indices into this storage + * are referred to as *serial indices* (`sIdx`), which are stable within a + * given tree instance until compaction or reassembly occurs. + * + * ### Thread-safety + * When MRCPP is built with OpenMP support (`MRCPP_HAS_OMP`), critical regions + * in the allocator use locks to avoid races during allocation and pointer + * retrieval. Callers are still responsible for higher-level synchronization + * of tree edits. + */ template class NodeAllocator final { public: + /** + * @brief Construct an allocator bound to an operator tree. + * @param tree Owning @ref OperatorTree instance. + * @param mem Optional shared-memory provider for coefficients (may be `nullptr`). + * @param coefsPerNode Number of coefficients per node. + * @param nodesPerChunk Maximum number of nodes per chunk. + */ NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + + /** + * @brief Construct an allocator bound to a function tree. + * @param tree Owning @ref FunctionTree instance. + * @param mem Optional shared-memory provider for coefficients (may be `nullptr`). + * @param coefsPerNode Number of coefficients per node. + * @param nodesPerChunk Maximum number of nodes per chunk. + */ NodeAllocator(FunctionTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + + /// Non-copyable. NodeAllocator(const NodeAllocator &tree) = delete; + /// Non-assignable. NodeAllocator &operator=(const NodeAllocator &tree) = delete; + + /// Destructor; releases all owned chunks (nodes and coefficients). ~NodeAllocator(); + /** + * @brief Allocate a consecutive block of nodes. + * @param nNodes Number of nodes to allocate. + * @param coefs If `true`, also ensure coefficient storage is available. + * @return Serial index (`sIdx`) of the first newly allocated node. + * + * @note May grow the underlying chunk arrays if space is exhausted. + */ int alloc(int nNodes, bool coefs = true); + + /** + * @brief Deallocate a node at serial index. + * @param sIdx Serial index of the node to free. + * + * @warning Does not shrink chunks; it only marks the slot as free. + */ void dealloc(int sIdx); + + /** + * @brief Deallocate coefficient arrays for all nodes. + * @note Node objects remain allocated; only their coefficient buffers are freed. + */ void deallocAllCoeff(); + /** + * @brief Pre-allocate a number of chunks. + * @param nChunks Number of chunks to append. + * @param coefs If `true`, allocate coefficient chunks as well. + * + * @details Useful to avoid repeated growth when the final size is known. + */ void init(int nChunks, bool coefs = true); + /** + * @brief Compact allocated nodes to reduce fragmentation. + * @return Number of nodes moved during compaction. + * + * @details After compaction, serial indices may change internally; users + * should refresh any external mappings that depend on `sIdx`. + */ int compress(); + + /** + * @brief Rebuild internal pointers after external moves/shuffling. + * @details Typically invoked after operations that reorder nodes without + * using @ref compress. + */ void reassemble(); + + /** + * @brief Drop trailing unused chunks to release memory. + * @return Number of chunks deleted. + */ int deleteUnusedChunks(); + /** @name Introspection */ + ///@{ + /// @return Number of nodes currently in use (allocated and not freed). int getNNodes() const { return this->nNodes; } + /// @return Number of coefficients per node. int getNCoefs() const { return this->coefsPerNode; } + /// @return Total number of allocated node chunks. int getNChunks() const { return this->nodeChunks.size(); } + /// @return Number of chunks currently used by active nodes. int getNChunksUsed() const { return (this->topStack + this->maxNodesPerChunk - 1) / this->maxNodesPerChunk; } + /// @return Size in bytes of one node chunk (nodes only). int getNodeChunkSize() const { return this->maxNodesPerChunk * this->sizeOfNode; } + /// @return Size in bytes of one coefficient chunk. int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); } + /// @return Maximum number of nodes that fit in a single chunk. int getMaxNodesPerChunk() const { return this->maxNodesPerChunk; } + ///@} + /** + * @brief Get pointer to the coefficient array for a node. + * @param sIdx Serial index of the node. + * @return Pointer to `T[coefsPerNode]` or `nullptr` if unavailable. + */ T *getCoef_p(int sIdx); + + /** + * @brief Get pointer to a node object by serial index. + * @param sIdx Serial index of the node. + * @return Pointer to the @ref MWNode instance. + */ MWNode *getNode_p(int sIdx); + /// @return Pointer to the i-th coefficient chunk (contiguous block). T *getCoefChunk(int i) { return this->coefChunks[i]; } + /// @return Pointer to the i-th node chunk (contiguous block). MWNode *getNodeChunk(int i) { return this->nodeChunks[i]; } + /// Print allocator status (chunks, usage, sizes) to stdout. void print() const; protected: - int nNodes{0}; // number of nodes actually in use - int topStack{0}; // index of last node on stack - int sizeOfNode{0}; // sizeof(NodeType) - int coefsPerNode{0}; // number of coef for one node - int maxNodesPerChunk{0}; // max number of nodes per allocation - - std::vector stackStatus{}; - std::vector coefChunks{}; - std::vector *> nodeChunks{}; - - char *cvptr{nullptr}; // pointer to virtual table - MWNode *last_p{nullptr}; // pointer just after the last active node, i.e. where to put next node - MWTree *tree_p{nullptr}; // pointer to external object - SharedMemory *shmem_p{nullptr}; // pointer to external object - + int nNodes{0}; ///< Number of nodes actually in use. + int topStack{0}; ///< Index of the next free slot (stack top). + int sizeOfNode{0}; ///< `sizeof(NodeType)` used in chunks. + int coefsPerNode{0}; ///< Number of coefficients per node. + int maxNodesPerChunk{0}; ///< Capacity (in nodes) of each chunk. + + std::vector stackStatus{}; ///< Slot state (occupied/free). + std::vector coefChunks{}; ///< Coefficient chunk base pointers. + std::vector *> nodeChunks{};///< Node chunk base pointers. + + char *cvptr{nullptr}; ///< Vtable cookie to initialize node objects. + MWNode *last_p{nullptr}; ///< Pointer just past the last active node. + MWTree *tree_p{nullptr}; ///< Back-pointer to owning tree. + SharedMemory *shmem_p{nullptr}; ///< Optional shared-memory backend. + + /// @return Whether coefficients are backed by @ref SharedMemory. bool isShared() const { return (this->shmem_p != nullptr); } + /// @return Owning tree (non-const). MWTree &getTree() { return *this->tree_p; } + /// @return Shared-memory provider (non-const). SharedMemory &getMemory() { return *this->shmem_p; } + /// Internal: get coefficient pointer w/o locking (caller must synchronize). T *getCoefNoLock(int sIdx); + /// Internal: get node pointer w/o locking (caller must synchronize). MWNode *getNodeNoLock(int sIdx); + /// Move a block of nodes within chunks (used by @ref compress). void moveNodes(int nNodes, int srcIdx, int dstIdx); + /// Append a new chunk; if `coefs` is true, also append a coefficient chunk. void appendChunk(bool coefs); + /// Find next contiguous range of free slots starting at or after `sIdx`. int findNextAvailable(int sIdx, int nNodes) const; + /// Find next occupied slot at or after `sIdx`. int findNextOccupied(int sIdx) const; #ifdef MRCPP_HAS_OMP - omp_lock_t omp_lock; + omp_lock_t omp_lock; ///< OpenMP lock for critical sections. #endif }; diff --git a/src/trees/NodeBox.h b/src/trees/NodeBox.h index 7a7fc086e..732cee06b 100644 --- a/src/trees/NodeBox.h +++ b/src/trees/NodeBox.h @@ -23,6 +23,19 @@ * */ +/** + * @file NodeBox.h + * @brief Container that associates a regular grid of boxes with pointers to MW nodes. + * + * @details + * A NodeBox is a thin wrapper around @ref BoundingBox that, in addition to the + * geometric information (bounds, scale, periodicity), keeps a dense array of + * pointers to @ref MWNode objects—one slot per box at the underlying scale. + * It is used by @ref MWTree to store and access the set of **root nodes** + * at the world scale, and by other components whenever a compact mapping + * from box indices to nodes is required. + */ + #pragma once #include "BoundingBox.h" @@ -30,33 +43,125 @@ namespace mrcpp { +/** + * @class NodeBox + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Scalar type of the associated @ref MWNode (e.g., double, ComplexDouble). + * + * @brief Bounding box with node-pointer storage. + * + * @details + * The class allocates and owns a contiguous array of pointers, one per box + * defined by the base @ref BoundingBox. Pointers are not owned by NodeBox + * (ownership stays with the corresponding @ref MWTree allocator); NodeBox only + * stores and clears them. The counter @ref nOccupied tracks how many slots + * are non-null. + */ template class NodeBox final : public BoundingBox { public: + /** + * @brief Construct a NodeBox from a lower-corner index and number of boxes. + * @param idx Lower-corner @ref NodeIndex at the world scale. + * @param nb Number of boxes per dimension (defaults to all ones). + * + * @details The geometric information is taken from @p idx and @p nb. + * Internal pointer storage is allocated and initialized to `nullptr`. + */ NodeBox(const NodeIndex &idx, const std::array &nb = {}); + + /** + * @brief Copy-construct from another NodeBox. + * @param box Source NodeBox. + * + * @details Copies the underlying @ref BoundingBox state and recreates + * pointer storage; node pointers themselves are copied (shallow). + */ NodeBox(const NodeBox &box); + + /** + * @brief Construct from a plain @ref BoundingBox. + * @param box Geometric box to take as base. + * + * @details Creates an equivalent NodeBox and allocates empty pointer storage. + */ NodeBox(const BoundingBox &box); + + /// Non-assignable (pointer storage is managed per-instance). NodeBox &operator=(const NodeBox &box) = delete; + + /// Destructor; releases the internal pointer array (not the nodes). ~NodeBox() override; + /** + * @brief Store a node pointer in slot @p idx. + * @param idx Linear box index in `[0, size())`. + * @param node Address of the node pointer to store (double pointer). + * + * @details The stored value is `*node`. If it was previously `nullptr` + * and the new value is non-null, @ref nOccupied is incremented. If it was + * non-null and is reset to `nullptr`, @ref nOccupied is decremented. + */ void setNode(int idx, MWNode **node); + + /** + * @brief Clear the node pointer in slot @p idx (set it to `nullptr`). + * @param idx Linear box index. + */ void clearNode(int idx) { this->nodes[idx] = nullptr; } + /** + * @name Node access (mutable) + * @{ + */ + + /** + * @brief Get the node stored at the box corresponding to @p idx. + * @param idx Node index at the world scale. + * @return Reference to the node. + * @pre The slot must contain a non-null pointer. + */ MWNode &getNode(NodeIndex idx); + + /** + * @brief Get the node stored at the box containing coordinate @p r. + * @param r A point in world coordinates. + * @return Reference to the node. + * @pre The slot must contain a non-null pointer. + */ MWNode &getNode(Coord r); + + /** + * @brief Get the node stored at linear index @p i. + * @param i Linear box index (default 0). + * @return Reference to the node. + * @pre The slot must contain a non-null pointer. + */ MWNode &getNode(int i = 0); + ///@} + /** + * @name Node access (const) + * @{ + */ const MWNode &getNode(NodeIndex idx) const; const MWNode &getNode(Coord r) const; const MWNode &getNode(int i = 0) const; + ///@} + /// @return Number of slots with non-null pointers. int getNOccupied() const { return this->nOccupied; } + + /// @return Raw pointer to the internal node-pointer array (size == size()). MWNode **getNodes() { return this->nodes; } protected: - int nOccupied; ///< Number of non-zero pointers in box - MWNode **nodes; ///< Container of nodes + int nOccupied; ///< Number of non-null entries in @ref nodes. + MWNode **nodes; ///< Dense array of node pointers (size equals number of boxes). + /// Allocate and zero-initialize the @ref nodes array. void allocNodePointers(); + + /// Clear all stored pointers (does not delete nodes). void deleteNodes(); }; diff --git a/src/trees/NodeIndex.h b/src/trees/NodeIndex.h index f73ded001..f5236fa25 100644 --- a/src/trees/NodeIndex.h +++ b/src/trees/NodeIndex.h @@ -23,46 +23,96 @@ * */ -/* - * \breif Simple storage class for scale and translation indexes. - * The usefulness of the class becomes evident when examining - * the parallel algorithms for projection & friends. +/** + * @file NodeIndex.h + * @brief Compact storage for multiresolution node indices (scale and translation). + * + * @details + * A NodeIndex encodes the position of a node in a multiresolution tree by: + * - an integer **scale** \f$N\f$ (node size \f$\propto 2^{-N}\f$), and + * - an integer **translation** vector \f$\mathbf{L}\in\mathbb{Z}^D\f$. + * + * The class provides helpers to obtain the parent/child indices, comparisons + * (including a strict weak ordering for associative containers), and utilities + * to test ancestry/sibling relations (see free functions @ref related and + * @ref siblings below). */ #pragma once +#include #include #include namespace mrcpp { +/** + * @class NodeIndex + * @tparam D Spatial dimension (1, 2 or 3). + * @brief Scale–translation pair identifying a node in a MW tree. + * + * @details + * The scale is stored as a short integer; the translation is a D-dimensional + * integer vector. The translation follows the standard dyadic refinement: + * children are obtained by doubling each component and adding the child-bit + * extracted from the child index. + */ template class NodeIndex final { public: - // regular constructors + /** + * @name Constructors + * @{ + */ + + /** + * @brief Construct from scale and translation. + * @param n Scale \f$N\f$. + * @param l Translation vector \f$\mathbf{L}\f$ (defaults to all zeros). + */ NodeIndex(int n = 0, const std::array &l = {}) : N(static_cast(n)) , L(l) {} - // relative constructors + /** + * @brief Index of the parent node (one level coarser). + * @return Parent index \f$(N-1, \lfloor L/2 \rfloor)\f$ with correct rounding for negatives. + */ NodeIndex parent() const { std::array l; for (int d = 0; d < D; d++) l[d] = (this->L[d] < 0) ? (this->L[d] - 1) / 2 : this->L[d] / 2; return NodeIndex(this->N - 1, l); } + + /** + * @brief Index of a child node (one level finer). + * @param cIdx Child linear index in \f$[0, 2^D)\f$; bit @c d selects the offset in dimension @c d. + * @return Child index \f$(N+1, 2L + b)\f$ with @c b given by the bits of @p cIdx. + */ NodeIndex child(int cIdx) const { std::array l; for (int d = 0; d < D; d++) l[d] = (2 * this->L[d]) + ((cIdx >> d) & 1); return NodeIndex(this->N + 1, l); } + /// @} - // comparisons + /** + * @name Comparisons + * @{ + */ + /// Inequality. bool operator!=(const NodeIndex &idx) const { return not(*this == idx); } + + /// Equality (same scale and same translation vector). bool operator==(const NodeIndex &idx) const { bool out = (this->N == idx.N); for (int d = 0; d < D; d++) out &= (this->L[d] == idx.L[d]); return out; } - // defines an order of the nodes (allows to use std::map) + + /** + * @brief Strict weak ordering (by scale, then lexicographically by translation). + * @details Enables usage as key in @c std::map / @c std::set. + */ bool operator<(const NodeIndex &idy) const { const NodeIndex &idx = *this; if (idx.N != idy.N) return idx.N < idy.N; @@ -70,20 +120,49 @@ template class NodeIndex final { if (idx.L[1] != idy.L[1] or D < 3) return idx.L[1] < idy.L[1]; return idx.L[2] < idy.L[2]; } + /// @} - // setters + /** + * @name Setters + * @{ + */ + /// Set the scale. void setScale(int n) { this->N = static_cast(n); } + + /// Set the translation vector. void setTranslation(const std::array &l) { this->L = l; } + /// @} - // value getters + /** + * @name Getters (values) + * @{ + */ + /// @return The scale \f$N\f$. int getScale() const { return this->N; } + + /// @return Component @p d of the translation vector. int getTranslation(int d) const { return this->L[d]; } + + /// @return Full translation vector. std::array getTranslation() const { return this->L; } + /// @} - // reference getters + /** + * @name Getters (references) + * @{ + */ + /// Mutable access to translation component @p d. int &operator[](int d) { return this->L[d]; } + + /// Const access to translation component @p d. const int &operator[](int d) const { return this->L[d]; } + /// @} + /** + * @brief Print as "[ N | L0, L1, ... ]". + * @param o Output stream. + * @return The stream @p o. + */ std::ostream &print(std::ostream &o) const { o << "[ " << std::setw(3) << this->N << " | "; for (int d = 0; d < D - 1; d++) o << std::setw(4) << this->L[d] << ", "; @@ -92,16 +171,29 @@ template class NodeIndex final { } private: - short int N{0}; ///< Length scale index 2^N - std::array L{}; ///< Translation index [x,y,z,...] + short int N{0}; ///< Length-scale index \f$N\f$ (node size \f$\propto 2^{-N}\f$). + std::array L{}; ///< Translation vector \f$\mathbf{L}\f$. }; -/** @brief ostream printer */ +/** + * @brief Stream inserter for @ref NodeIndex. + * @relates NodeIndex + */ template std::ostream &operator<<(std::ostream &o, const NodeIndex &idx) { return idx.print(o); } -/** @brief Check whether indices are directly related (not sibling) */ +/** + * @brief Test if two indices are on the same branch (ancestor/descendant relation). + * @tparam D Dimension. + * @param a First index. + * @param b Second index. + * @return @c true if the coarser index equals the finer index truncated to the coarser scale. + * + * @details + * Let @c sr be the shallower (coarser) of @p a and @p b, and @c jr the deeper (finer). + * They are related if \f$\mathbf{L}_{\text{sr}} = \lfloor \mathbf{L}_{\text{jr}} / 2^{N_{\text{jr}}-N_{\text{sr}}}\rfloor\f$. + */ template bool related(const NodeIndex &a, const NodeIndex &b) { const auto &sr = (a.getScale() < b.getScale()) ? a : b; const auto &jr = (a.getScale() >= b.getScale()) ? a : b; @@ -112,9 +204,15 @@ template bool related(const NodeIndex &a, const NodeIndex &b) { return related; } -/** @brief Check whether indices are siblings, i.e. same parent */ +/** + * @brief Test if two indices are siblings (share the same parent). + * @tparam D Dimension. + * @param a First index. + * @param b Second index. + * @return @c true if @p a.parent() == @p b.parent(). + */ template bool siblings(const NodeIndex &a, const NodeIndex &b) { return (a.parent() == b.parent()); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/OperatorNode.cpp b/src/trees/OperatorNode.cpp index 37f576eac..74679d0bd 100644 --- a/src/trees/OperatorNode.cpp +++ b/src/trees/OperatorNode.cpp @@ -42,17 +42,6 @@ void OperatorNode::dealloc() { this->tree->getNodeAllocator().dealloc(sIdx); } -/** - * @brief Calculate one specific component norm of the OperatorNode (TODO: needs to be specified more). - * - * @param[in] i: TODO: deens to be specified - * - * @details OperatorNorms are defined as matrix 2-norms that are expensive to calculate. - * Thus we calculate some cheaper upper bounds for this norm for thresholding. - * First a simple vector norm, then a product of the 1- and infinity-norm. - * (TODO: needs to be more presiced). - * - */ double OperatorNode::calcComponentNorm(int i) const { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); @@ -64,7 +53,7 @@ double OperatorNode::calcComponentNorm(int i) const { int kp1 = this->getKp1(); int kp1_d = this->getKp1_d(); const VectorXd &comp_vec = coef_vec.segment(i * kp1_d, kp1_d); - const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1); // one can use MatrixXd OperatorNode::getComponent(int i) + const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1); double norm = 0.0; double vecNorm = comp_vec.norm(); @@ -79,20 +68,6 @@ double OperatorNode::calcComponentNorm(int i) const { return norm; } -/** @brief Matrix elements of the non-standard form. - * - * @param[in] i: Index enumerating the matrix type in the non-standard form. - * @returns A submatrix of \f$ (k + 1) \times (k + 1) \f$-size from the non-standard form. - * - * @details OperatorNode is uniquely associted with a scale \f$ n \f$ and translation - * \f$ l = -2^n + 1, \ldots, 2^n = 1 \f$. - * The non-standard form \f$ T_n, B_n, C_n, A_n \f$ defines matrices - * \f$ \sigma_l^n, \beta_l^n, \gamma_l^n, \alpha_l^n \f$ for a given pair \f$ (n, l) \f$. - * One of these matrices is returned by the method according to the choice of the index parameter - * \f$ i = 0, 1, 2, 3 \f$, respectively. - * For example, \f$ \alpha_l^n = \text{getComponent}(3) \f$. - * - */ MatrixXd OperatorNode::getComponent(int i) { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); @@ -120,7 +95,6 @@ void OperatorNode::createChildren(bool coefs) { this->childSerialIx = sIdx; for (int cIdx = 0; cIdx < nChildren; cIdx++) { - // construct into allocator memory new (child_p) OperatorNode(this, cIdx); this->children[cIdx] = child_p; @@ -155,4 +129,4 @@ void OperatorNode::deleteChildren() { this->setIsEndNode(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/OperatorNode.h b/src/trees/OperatorNode.h index f7d313d9d..406574ee8 100644 --- a/src/trees/OperatorNode.h +++ b/src/trees/OperatorNode.h @@ -23,44 +23,129 @@ * */ +/** + * @file OperatorNode.h + * @brief Node type for operator trees (2D non-standard form blocks). + * + * @details + * This header declares @ref mrcpp::OperatorNode, a concrete node type used by + * @ref mrcpp::OperatorTree. It specializes @ref mrcpp::MWNode with spatial + * dimension @c D=2 to store and manage the coefficients of non-standard form + * operator blocks (typically the corner blocks \f$T, A, B, C\f$). + * + * The class offers typed accessors to the owning @ref mrcpp::OperatorTree and + * to parent/children nodes, and overrides a few hooks related to allocation, + * child generation, and norm computation that are specific to operator nodes. + */ + #pragma once +#include // for Eigen::MatrixXd + #include "MWNode.h" #include "OperatorTree.h" namespace mrcpp { +/** + * @class OperatorNode + * @brief Leaf/branch node used inside @ref OperatorTree (fixed to 2D). + * + * @details + * An OperatorNode stores the \f$2^D(k+1)^D\f$ multiwavelet coefficients for an + * operator block at a given scale/translation and exposes: + * - typed getters for the owning @ref OperatorTree and relatives, + * - overrides for child creation/deletion and coefficient management, + * - an overridden component-norm calculation suitable for operator blocks, and + * - a helper to extract a single component block as an Eigen matrix. + * + * @note The spatial dimension is fixed to @c D=2 for operator trees. + */ class OperatorNode final : public MWNode<2> { public: + /** @name Typed accessors to tree/relatives */ + ///@{ + /// @return Owning operator tree (non-const). OperatorTree &getOperTree() { return static_cast(*this->tree); } + /// @return Parent node as @ref OperatorNode (non-const). OperatorNode &getOperParent() { return static_cast(*this->parent); } + /// @return Child @p i as @ref OperatorNode (non-const). OperatorNode &getOperChild(int i) { return static_cast(*this->children[i]); } + /// @return Owning operator tree (const). const OperatorTree &getOperTree() const { return static_cast(*this->tree); } + /// @return Parent node as @ref OperatorNode (const). const OperatorNode &getOperParent() const { return static_cast(*this->parent); } + /// @return Child @p i as @ref OperatorNode (const). const OperatorNode &getOperChild(int i) const { return static_cast(*this->children[i]); } + ///@} + /** + * @brief Create children nodes. + * @param coefs If @c true, also allocate coefficient storage for each child. + * @details Overrides @ref MWNode::createChildren to honor operator-specific + * allocation and bookkeeping. + */ void createChildren(bool coefs) override; + + /** + * @brief Generate children on demand (without necessarily allocating coefs). + * @details Overrides @ref MWNode::genChildren to implement the operator-tree + * generation policy. + */ void genChildren() override; + + /** + * @brief Delete all children nodes (and their coefficient storage). + * @details Overrides @ref MWNode::deleteChildren with operator-specific cleanup. + */ void deleteChildren() override; friend class OperatorTree; friend class NodeAllocator<2>; protected: + /** @name Construction and assignment */ + ///@{ + /// Default constructor (used by allocators). OperatorNode() : MWNode<2>(){}; + /// Root-node constructor (called by the owning tree). OperatorNode(MWTree<2> *tree, int rIdx) : MWNode<2>(tree, rIdx){}; + /// Child-node constructor (called when splitting a parent). OperatorNode(MWNode<2> *parent, int cIdx) : MWNode<2>(parent, cIdx){}; + /// Non-copyable. OperatorNode(const OperatorNode &node) = delete; + /// Non-assignable. OperatorNode &operator=(const OperatorNode &node) = delete; + /// Virtual destructor. ~OperatorNode() = default; + ///@} + /** + * @brief Release coefficient storage (if owned) and reset node state. + * @details Overrides @ref MWNode::dealloc to ensure operator-node invariants. + */ void dealloc() override; + + /** + * @brief Compute squared norm of a specific component (scaling/wavelet block). + * @param i Component index in \f$[0, 2^D)\f$. + * @return Squared L2 norm of the requested component. + * @details Overrides @ref MWNode::calcComponentNorm to match the operator + * interpretation of components (e.g., corner blocks in non-standard form). + */ double calcComponentNorm(int i) const override; + + /** + * @brief Extract a component block as a dense matrix. + * @param i Component index in \f$[0, 2^D)\f$. + * @return A matrix view/copy of the component coefficients (size \f$(k+1)\times(k+1)\f$). + * @note Primarily intended for diagnostics and I/O. + */ Eigen::MatrixXd getComponent(int i); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/OperatorTree.cpp b/src/trees/OperatorTree.cpp index 890f2677c..f91f13a47 100644 --- a/src/trees/OperatorTree.cpp +++ b/src/trees/OperatorTree.cpp @@ -98,14 +98,6 @@ void OperatorTree::clearBandWidth() { this->bandWidth = nullptr; } -/** @brief Calculates band widths of the non-standard form matrices. - * - * @param[in] prec: Precision used for thresholding - * - * @details It is starting from \f$ l = 0 \f$ and updating the band width value each time we encounter - * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal. - * - */ void OperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -133,32 +125,10 @@ void OperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } -/** @brief Checks if the distance to diagonal is bigger than the operator band width. - * - * @param[in] oTransl: distance to diagonal - * @param[in] o_depth: scaling order - * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. - * - * @returns True if \b oTransl is outside of the band and False otherwise. - * - */ bool OperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { return abs(oTransl) > this->bandWidth->getWidth(o_depth, idx); } -/** @brief Cleans up end nodes. - * - * @param[in] trust_scale: there is no cleaning down below \b trust_scale (it speeds up operator building). - * - * @details Traverses the tree and rewrites end nodes having branch node twins, - * i. e. identical with respect to scale and translation. - * This method is very handy, when an adaptive operator construction - * can make a significunt noise at low scaling depth. - * Its need comes from the fact that mwTransform up cannot override - * rubbish that can potentially stick to end nodes at a particular level, - * and as a result spread further up to the root with mwTransform. - * - */ void OperatorTree::removeRoughScaleNoise(int trust_scale) { MWNode<2> *p_rubbish; // possibly inexact end node MWNode<2> *p_counterpart; // exact branch node @@ -191,12 +161,6 @@ void OperatorTree::getMaxTranslations(VectorXi &maxTransl) { } } -/** Make 1D lists, adressable from [-l, l] scale by scale, of operator node - * pointers for fast operator retrieval. This method is not thread safe, - * since it projects missing operator nodes on the fly. Hence, it must NEVER - * be called within a parallel region, or all hell will break loose. This is - * not really a problem, but you have been warned. - */ void OperatorTree::setupOperNodeCache() { int nScales = this->nodesAtDepth.size(); int rootScale = this->getRootScale(); @@ -245,12 +209,6 @@ void OperatorTree::clearOperNodeCache() { } } -/** Regenerate all s/d-coeffs by backtransformation, starting at the bottom and - * thus purifying all coefficients. Option to overwrite or add up existing - * coefficients of BranchNodes (can be used after operator application). - * Reimplementation of MWTree::mwTransform() without OMP, as calculation - * of OperatorNorm is done using random vectors, which is non-deterministic - * in parallel. FunctionTrees should be fine. */ void OperatorTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -264,12 +222,6 @@ void OperatorTree::mwTransformUp() { } } -/** Regenerate all scaling coeffs by MW transformation of existing s/w-coeffs - * on coarser scales, starting at the rootNodes. Option to overwrite or add up - * existing scaling coefficients (can be used after operator application). - * Reimplementation of MWTree::mwTransform() without OMP, as calculation - * of OperatorNorm is done using random vectors, which is non-deterministic - * in parallel. FunctionTrees should be fine. */ void OperatorTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -287,4 +239,4 @@ std::ostream &OperatorTree::print(std::ostream &o) const { return MWTree<2>::print(o); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/OperatorTree.h b/src/trees/OperatorTree.h index 83be4789a..792bcde2a 100644 --- a/src/trees/OperatorTree.h +++ b/src/trees/OperatorTree.h @@ -23,55 +23,157 @@ * */ +/** + * @file OperatorTree.h + * @brief Declaration of the multiwavelet operator tree (2D non-standard form). + * + * @details + * An @ref mrcpp::OperatorTree stores a bivariate (D=2) operator in + * multiwavelet (MW) **non-standard form**, i.e. split into corner blocks + * \f$T, A, B, C\f$ at each scale. It provides: + * - adaptive storage and traversal via the base @ref mrcpp::MWTree, + * - optional **band-width screening** of corner blocks through @ref BandWidth, + * - cached direct access to operator nodes to avoid repeated tree lookups, and + * - MW up/down transforms specialized for operator data. + * + * Only trees built from **compatible** MRAs (same domain, order, and depth) + * should be combined in further computations. + */ + #pragma once +#include // for Eigen::VectorXi + #include "MWTree.h" #include "NodeAllocator.h" namespace mrcpp { +// Forward declarations to avoid including the full headers here. +class BandWidth; +class OperatorNode; + +/** + * @class OperatorTree + * @brief Base class for 2D operator trees in non-standard form. + * + * @details + * The tree is organized like any MW tree (roots/branches/leaves) but stores + * operator coefficients. A per-depth **band width** (distance from the main + * diagonal in translation space) can be estimated to prune negligible corner + * blocks during application. + */ class OperatorTree : public MWTree<2> { public: + /** + * @brief Construct an operator tree. + * @param mra Multi-resolution analysis (domain + basis) shared by the tree. + * @param np “Norm precision” used when estimating/screening norms. + * @param name Optional diagnostic name. + */ OperatorTree(const MultiResolutionAnalysis<2> &mra, double np, const std::string &name = "nn"); + OperatorTree(const OperatorTree &tree) = delete; OperatorTree &operator=(const OperatorTree &tree) = delete; + + /// Virtual destructor. virtual ~OperatorTree() override; + /// @return The precision value used for norm-based screening. double getNormPrecision() const { return this->normPrec; } + /** + * @brief Release any existing @ref BandWidth object and set the pointer to null. + * @details Call this if the operator has changed and band widths must be recomputed. + */ void clearBandWidth(); + + /** + * @brief Estimate per-depth band widths for the corner matrices. + * @param prec Threshold used when deciding if a component is significant. + * If negative, the internal @ref getNormPrecision() is used. + * @details Populates the internally owned @ref BandWidth structure. + */ virtual void calcBandWidth(double prec = -1.0); + + /** + * @brief Quick band-screening predicate. + * @param oTransl Distance from the diagonal in translation space (|l\_bra−l\_ket|). + * @param o_depth Depth/scale index where the test is performed. + * @param idx Corner block selector: 0 = T, 1 = C, 2 = B, 3 = A (convention as used internally). + * @return @c true if @p oTransl is **outside** the currently stored band at @p o_depth for block @p idx. + * @note Requires a previously computed @ref BandWidth (see @ref calcBandWidth()). + */ virtual bool isOutsideBand(int oTransl, int o_depth, int idx); + + /** + * @brief Dampen/remove rough-scale numerical noise in the operator. + * @param trust_scale Scales finer (greater or equal to this) are trusted and preserved. + * @details Useful after building operators from noisy input data. + */ void removeRoughScaleNoise(int trust_scale = 10); + /** + * @brief Build cache tables for direct @ref OperatorNode access. + * @details Populates @ref nodePtrStore and @ref nodePtrAccess to avoid repeated lookups. + */ void setupOperNodeCache(); + + /// @brief Clear the operator-node caches built by @ref setupOperNodeCache(). void clearOperNodeCache(); + /// @return Mutable reference to the stored @ref BandWidth (must exist). BandWidth &getBandWidth() { return *this->bandWidth; } + /// @return Const reference to the stored @ref BandWidth (must exist). const BandWidth &getBandWidth() const { return *this->bandWidth; } + /** + * @brief Fast accessor to a node by (scale, diagonal distance). + * @param n Scale (depth measured from the root scale). + * @param l Distance to the diagonal (translation difference); l=0 hits the diagonal. + * @return Reference to the requested @ref OperatorNode. + * @warning Valid only after calling @ref setupOperNodeCache(). + */ OperatorNode &getNode(int n, int l) { return *nodePtrAccess[n][l]; - } ///< TODO: It has to be specified more. - ///< \b l is distance to the diagonal. + } + /// Const overload of @ref getNode(int,int). const OperatorNode &getNode(int n, int l) const { return *nodePtrAccess[n][l]; } + /** + * @brief Downward MW transform specialized for operator data. + * @param overwrite If @c true, child coefficients may overwrite parent storage. + */ void mwTransformDown(bool overwrite) override; + + /// @brief Upward MW transform specialized for operator data. void mwTransformUp() override; + // Bring MWTree overloads into scope. using MWTree<2>::getNode; using MWTree<2>::findNode; protected: - const double normPrec; - BandWidth *bandWidth; - OperatorNode ***nodePtrStore; ///< Avoids tree lookups - OperatorNode ***nodePtrAccess; ///< Center (l=0) of node list + const double normPrec; ///< Default precision used in norm-based heuristics. + BandWidth *bandWidth; ///< Optional per-depth band-width description (owned). + /// @name Operator-node cache (built by @ref setupOperNodeCache()). + ///@{ + OperatorNode ***nodePtrStore; ///< Storage for contiguous (n,l) -> node pointers. + OperatorNode ***nodePtrAccess; ///< Centered view so that index l=0 addresses the diagonal. + ///@} + + /// @brief Allocate all root nodes required by the current world box. void allocRootNodes(); + + /** + * @brief Compute the maximum translation index at each depth. + * @param[out] maxTransl Vector whose @c d-th entry stores the maximum |l| at that depth. + */ void getMaxTranslations(Eigen::VectorXi &maxTransl); + /// @brief Human-readable dump of tree statistics. std::ostream &print(std::ostream &o) const override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/trees/TreeIterator.h b/src/trees/TreeIterator.h index 82ea49eb9..b9c913570 100644 --- a/src/trees/TreeIterator.h +++ b/src/trees/TreeIterator.h @@ -23,6 +23,35 @@ * */ +/** + * @file TreeIterator.h + * @brief Iteration helpers for traversing multiwavelet trees. + * + * @details + * This header provides a generic depth-aware iterator over @ref MWTree nodes. + * It supports different **traversal directions** and **node-ordering schemes**, + * selected via constants defined in @c MRCPP/constants.h: + * - Traversal mode: @c TopDown or @c BottomUp + * - Iterator type: @c Lebesgue (Z-order) or @c Hilbert (space-filling) + * + * The iterator yields @ref MWNode instances from one or more root nodes, + * honoring a user-provided maximum depth and whether *generated* (non-end) + * nodes should be returned. + * + * @par Example + * @code{.cpp} + * using namespace mrcpp; + * TreeIterator<3,double> it(tree, TopDown, Lebesgue); + * it.setReturnGenNodes(true); // include generated/branch nodes + * it.setMaxDepth(5); // restrict to depth <= 5 + * + * while (it.next()) { + * MWNode<3,double> &nd = it.getNode(); + * // ... inspect nd, read coefficients/norms, etc. + * } + * @endcode + */ + #pragma once #include "MRCPP/constants.h" @@ -30,36 +59,117 @@ namespace mrcpp { +/** + * @class TreeIterator + * @brief Stateful iterator for traversing an @ref MWTree. + * + * @tparam D Spatial dimensionality (1, 2, or 3). + * @tparam T Coefficient type (e.g., @c double or @c ComplexDouble). + * + * @details + * The iterator walks the tree starting from each root node, producing nodes + * according to: + * - a **traversal direction** (@c TopDown or @c BottomUp), and + * - an **ordering scheme** within siblings (@c Lebesgue or @c Hilbert). + * + * The behavior can be refined with: + * - @ref setReturnGenNodes() to toggle inclusion of generated (non-leaf) nodes, + * - @ref setMaxDepth() to limit the traversal depth, + * - @ref setTraverse() / @ref setIterator() to change policies at runtime. + * + * The iteration state is represented by a small internal linked stack of + * @ref IteratorNode frames. + */ template class TreeIterator { public: + /** + * @brief Construct a detached iterator (no tree bound yet). + * @param traverse Traversal mode (e.g., @c TopDown or @c BottomUp). + * @param iterator Node-ordering mode (e.g., @c Lebesgue or @c Hilbert). + * + * @note Call @ref init() before the first @ref next() if you use this ctor. + */ TreeIterator(int traverse = TopDown, int iterator = Lebesgue); + + /** + * @brief Construct an iterator bound to a tree. + * @param tree Tree to traverse. + * @param traverse Traversal mode (e.g., @c TopDown or @c BottomUp). + * @param iterator Node-ordering mode (e.g., @c Lebesgue or @c Hilbert). + */ TreeIterator(MWTree &tree, int traverse = TopDown, int iterator = Lebesgue); + + /// @brief Destructor (releases internal traversal state). virtual ~TreeIterator(); + /** + * @brief Include/exclude generated (non-end) nodes in the iteration stream. + * @param i If @c true, generated nodes are returned by @ref next(). + * If @c false, only end (leaf) nodes are produced. + */ void setReturnGenNodes(bool i = true) { this->returnGenNodes = i; } + + /** + * @brief Set maximum depth measured from the root scale. + * @param depth Non-negative maximum depth; if negative, no limit is applied. + */ void setMaxDepth(int depth) { this->maxDepth = depth; } + + /** + * @brief Change traversal mode at runtime. + * @param traverse @c TopDown or @c BottomUp (see @c MRCPP/constants.h). + * @warning Changing mode invalidates in-flight assumptions; call before @ref init(). + */ void setTraverse(int traverse); + + /** + * @brief Change sibling-ordering policy at runtime. + * @param iterator @c Lebesgue or @c Hilbert (see @c MRCPP/constants.h). + * @warning Changing mode invalidates in-flight assumptions; call before @ref init(). + */ void setIterator(int iterator); + /** + * @brief Bind the iterator to a tree and reset traversal state. + * @param tree Tree to traverse. + */ void init(MWTree &tree); + + /** + * @brief Advance to the next node according to the current policy. + * @return @c true if a node is available (use @ref getNode()), @c false when finished. + */ bool next(); + + /** + * @brief Move the cursor to the parent of the current node (if any). + * @return @c true if the parent exists and becomes current, otherwise @c false. + */ bool nextParent(); + + /** + * @brief Access the current node. + * @return Reference to the node yielded by the last successful @ref next() / @ref nextParent(). + */ MWNode &getNode() { return *this->state->node; } friend class IteratorNode; protected: - int root; - int nRoots; - int mode; - int type; - int maxDepth; - bool returnGenNodes{true}; - IteratorNode *state; - IteratorNode *initialState; + int root{0}; ///< Index of the current root box. + int nRoots{0}; ///< Number of root boxes in the tree. + int mode{TopDown}; ///< Traversal mode (@c TopDown or @c BottomUp). + int type{Lebesgue}; ///< Iterator type (@c Lebesgue or @c Hilbert). + int maxDepth{-1}; ///< Max depth limit; negative means unlimited. + bool returnGenNodes{true}; ///< If @c true, also return generated (non-leaf) nodes. + IteratorNode *state{nullptr}; ///< Current traversal frame. + IteratorNode *initialState{nullptr}; ///< Initial frame for the current root. + /// @brief Map logical child order [0..2^D) to physical child index based on @ref type. int getChildIndex(int i) const; + /// @name Traversal helpers + ///@{ bool tryParent(); bool tryChild(int i); bool tryNode(); @@ -68,18 +178,40 @@ template class TreeIterator { void removeState(); bool checkDepth(const MWNode &node) const; bool checkGenerated(const MWNode &node) const; + ///@} }; +/** + * @class IteratorNode + * @brief Lightweight frame holding traversal state for one MW node. + * + * @tparam D Spatial dimensionality (1, 2, or 3). + * @tparam T Coefficient type (e.g., @c double or @c ComplexDouble). + * + * @details + * The iterator maintains a small linked list (stack) of these frames while + * walking the tree. Each frame keeps: + * - a pointer to the node, + * - a link to the previous frame, + * - completion flags for the current node, its parent, and its children. + */ template class IteratorNode final { public: - MWNode *node; - IteratorNode *next; - bool doneNode; - bool doneParent; - bool doneChild[1 << D]; + MWNode *node; ///< Current node. + IteratorNode *next; ///< Previous frame in the stack. + bool doneNode; ///< Whether the node itself has been yielded. + bool doneParent; ///< Whether the parent transition has been attempted. + bool doneChild[1 << D]; ///< Whether each child has been attempted. + /** + * @brief Construct a traversal frame. + * @param nd Pointer to the MW node represented by this frame. + * @param nx Link to the previous frame (can be @c nullptr). + */ IteratorNode(MWNode *nd, IteratorNode *nx = nullptr); + + /// @brief Recursively delete the linked frames that follow this one. ~IteratorNode() { delete this->next; } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From 5d86c59ab5049e816a89acb2f8fa17d7369f87f3 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Wed, 29 Oct 2025 17:52:38 +0300 Subject: [PATCH 09/51] Update MultiResolutionAnalysis.h --- src/trees/MultiResolutionAnalysis.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/trees/MultiResolutionAnalysis.h b/src/trees/MultiResolutionAnalysis.h index 0282ab464..891a3fd6e 100644 --- a/src/trees/MultiResolutionAnalysis.h +++ b/src/trees/MultiResolutionAnalysis.h @@ -38,7 +38,7 @@ namespace mrcpp { * @brief Declaration of the MultiResolutionAnalysis class template. * * @details - * A **MultiResolutionAnalysis (MRA)** bundles the information that must be + * A MultiResolutionAnalysis (MRA) bundles the information that must be * shared by compatible functions and operators: * - the computational domain (see @ref BoundingBox), * - the multiresolution scaling basis (see @ref ScalingBasis), and @@ -52,17 +52,17 @@ namespace mrcpp { * using MRA3 = mrcpp::MultiResolutionAnalysis<3>; * * // Domain: [-4, 4]^3 with automatically chosen root scale - * mrcpp::BoundingBox<3> world({-4, 4}); + * mrcpp::BoundingBox<3> world({-4.0, 4.0}); * - * // Build a 3D MRA with Legendre order=7 and maxDepth=12 - * mrcpp::ScalingBasis basis(Legendre, /*order=*/7); - * MRA3 mra(world, basis, /*depth=*/12); + * // Build a 3D MRA with Legendre, order = 7, depth = 12 + * mrcpp::ScalingBasis basis(Legendre, 7); + * MRA3 mra(world, basis, 12); * * // Query information - * int order = mra.getOrder(); - * int maxScale = mra.getMaxScale(); - * auto &box = mra.getWorldBox(); - * auto &sbasis = mra.getScalingBasis(); + * int order = mra.getOrder(); + * int maxScale = mra.getMaxScale(); + * auto &box = mra.getWorldBox(); + * auto &sbasis = mra.getScalingBasis(); * @endcode */ From b075a8beb037855e20209461b175967dbc5f6291 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 08:17:24 +0300 Subject: [PATCH 10/51] Done doxygen documentation in .h files within utils folder --- src/utils/Bank.h | 494 +++++++++++++++-- src/utils/CompFunction.cpp | 990 +++++++++++------------------------ src/utils/CompFunction.h | 570 +++++++++++++++++--- src/utils/Plotter.cpp | 131 +---- src/utils/Plotter.h | 245 ++++++++- src/utils/Printer.cpp | 78 +-- src/utils/Printer.h | 358 +++++++++---- src/utils/Timer.cpp | 13 - src/utils/Timer.h | 126 ++++- src/utils/details.cpp | 26 +- src/utils/details.h | 109 +++- src/utils/math_utils.cpp | 85 +-- src/utils/math_utils.h | 230 +++++++- src/utils/mpi_utils.cpp | 41 -- src/utils/mpi_utils.h | 199 ++++++- src/utils/omp_utils.cpp | 1 - src/utils/omp_utils.h | 106 +++- src/utils/parallel.cpp | 199 ++----- src/utils/parallel.h | 248 ++++++++- src/utils/periodic_utils.cpp | 2 - src/utils/periodic_utils.h | 94 +++- src/utils/tree_utils.cpp | 54 +- src/utils/tree_utils.h | 143 ++++- 23 files changed, 2989 insertions(+), 1553 deletions(-) diff --git a/src/utils/Bank.h b/src/utils/Bank.h index 69719c530..57ce028b3 100644 --- a/src/utils/Bank.h +++ b/src/utils/Bank.h @@ -1,4 +1,55 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #pragma once +/** + * @file + * @brief Distributed “Bank” service for sharing functions and raw data across MPI ranks. + * + * This header declares a minimal runtime that lets multiple MPI ranks exchange: + * - Multiresolution functions (`CompFunction<3>`) and + * - Raw numeric buffers (`double` / `ComplexDouble`) + * + * The service is organized as a central **Bank** that maintains per-client + * accounts. Each rank interacts with the Bank through a lightweight RAII + * client, **BankAccount**. A simple **TaskManager** piggybacks on the same + * infrastructure to distribute integer-indexed tasks and collect “ready” + * notifications. + * + * @par High-level design + * - The **Bank** lives on one or more designated MPI ranks (see `mpi::is_bank` + * in the runtime). Non-bank ranks act as clients. + * - Clients open an **account** and then `put_*` or `get_*` by integer IDs or + * by `NodeIndex<3>` keys. The Bank tracks sizes in kB for accounting. + * - The **TaskManager** provides a tiny work-queue: clients request the next + * task, mark tasks as ready, and optionally consume ready items. + * + * @note The concrete message-passing, blocking semantics, and memory + * ownership rules are implemented in the Bank source (MPI-based). + * This header documents intent and call contracts at a high level. + */ #include "CompFunction.h" #include "parallel.h" @@ -8,128 +59,473 @@ namespace mrcpp { using namespace mpi; +/** + * @brief A deposited item stored by the Bank. + * + * A deposit can represent either a multiresolution function (`orb`) or a + * raw data buffer (`data`). Exactly one of them is expected to be active + * for a given deposit. + */ struct deposit { - CompFunction<3> *orb; - double *data; // for pure data arrays - bool hasdata; - int datasize; + /** Pointer to a deposited function (3D component function). */ + CompFunction<3> *orb = nullptr; + /** Pointer to a deposited plain data buffer (contiguous). */ + double *data = nullptr; // for pure data arrays + /** True if this deposit contains a raw data buffer in @ref data. */ + bool hasdata = false; + /** Size (number of elements) for @ref data when @ref hasdata is true. */ + int datasize = 0; + /** Application-defined identifier used to name and retrieve this deposit. */ int id = -1; // to identify what is deposited - int source; // mpi rank from the source of the data + /** MPI rank that originally deposited the item. */ + int source = 0; // mpi rank from the source of the data }; +/** + * @brief Queue bookkeeping for task-ready notifications. + * + * Associates a queue identifier with a list of client ranks that registered + * interest or contributed ready items. + */ struct queue_struct { - int id; + /** Queue identifier (application-defined). */ + int id = 0; + /** Ranks that have entries or are waiting on this queue. */ std::vector clients; }; +/** + * @brief Command codes exchanged between clients and the Bank/TaskManager. + * + * These enumerators are used as operation selectors in MPI messages. Listed + * values are stable and intentionally explicit to simplify debugging. + */ enum { - // (the values are used to interpret error messages) - CLOSE_BANK, // 0 - CLEAR_BANK, // 1 - NEW_ACCOUNT, // 2 - CLOSE_ACCOUNT, // 3 - GET_ORBITAL, // 4 - GET_FUNCTION_AND_WAIT, // 5 - GET_FUNCTION_AND_DELETE, // 6 - SAVE_ORBITAL, // 7 - GET_FUNCTION, // 8 - SAVE_FUNCTION, // 9 - GET_DATA, // 10 - SAVE_DATA, // 11 - SAVE_NODEDATA, // 12 - GET_NODEDATA, // 13 - GET_NODEBLOCK, // 14 - GET_ORBBLOCK, // 15 - CLEAR_BLOCKS, // 16 - GET_MAXTOTDATA, // 17 - GET_TOTDATA, // 18 - INIT_TASKS, // 19 - GET_NEXTTASK, // 20 - PUT_READYTASK, // 21 - DEL_READYTASK, // 22 - GET_READYTASK, // 23 - GET_READYTASK_DEL, // 24 + CLOSE_BANK, ///< 0 — Shut down the Bank service. + CLEAR_BANK, ///< 1 — Remove all accounts and deposits. + NEW_ACCOUNT, ///< 2 — Open a new client account. + CLOSE_ACCOUNT, ///< 3 — Close (delete) an existing account. + GET_ORBITAL, ///< 4 — Retrieve an orbital (internal legacy op). + GET_FUNCTION_AND_WAIT, ///< 5 — Blocking fetch of a function until available. + GET_FUNCTION_AND_DELETE, ///< 6 — Fetch and erase a function. + SAVE_ORBITAL, ///< 7 — Store an orbital (internal legacy op). + GET_FUNCTION, ///< 8 — Non-blocking fetch of a function if available. + SAVE_FUNCTION, ///< 9 — Store a function. + GET_DATA, ///< 10 — Fetch a raw data buffer. + SAVE_DATA, ///< 11 — Store a raw data buffer. + SAVE_NODEDATA, ///< 12 — Store node-scoped raw data. + GET_NODEDATA, ///< 13 — Fetch node-scoped raw data. + GET_NODEBLOCK, ///< 14 — Fetch a contiguous block for a node id across IDs. + GET_ORBBLOCK, ///< 15 — Fetch a contiguous block for an orbital id across nodes. + CLEAR_BLOCKS, ///< 16 — Clear block caches/aggregations. + GET_MAXTOTDATA, ///< 17 — Query max total stored size (kB). + GET_TOTDATA, ///< 18 — Query per-account total sizes (kB). + INIT_TASKS, ///< 19 — Initialize TaskManager with N tasks. + GET_NEXTTASK, ///< 20 — Acquire the next task index. + PUT_READYTASK, ///< 21 — Mark (i,j) as ready. + DEL_READYTASK, ///< 22 — Remove a ready marker (i,j). + GET_READYTASK, ///< 23 — Get ready list for i (keep). + GET_READYTASK_DEL, ///< 24 — Get ready list for i and consume. }; +/** + * @brief Central repository for distributed function/data sharing and task queues. + * + * The Bank owns per-client accounts, holds deposits, and tracks memory usage + * in kB. Only designated Bank ranks instantiate and `open()` the service; + * clients interact via @ref BankAccount and @ref TaskManager on worker ranks. + * + * @par Thread-safety + * Bank methods are orchestrated via MPI; within a single rank, methods are not + * inherently thread-safe unless otherwise guarded at the call site. + */ class Bank { public: + /** @brief Construct an unopened Bank instance on a Bank rank. */ Bank() = default; + + /** @brief Destructor. Ensures resources are released if not already closed. */ ~Bank(); + + /** + * @brief Start the Bank service (receive loop, state init). + * + * Must be called on the Bank rank(s). After `open()`, the service listens + * for client commands and manages account state. + */ void open(); + + /** + * @brief Stop the Bank service and release all resources. + * + * Closes all accounts and clears all deposits. + */ void close(); + + /** + * @brief Maximum total footprint observed so far, in kB. + * @return Peak cumulative size (across all accounts) since `open()`. + */ int get_maxtotalsize(); + + /** + * @brief Current total sizes per account, in kB. + * @return A vector of sizes aligned with internal account ordering. + */ std::vector get_totalsize(); private: friend class BankAccount; friend class TaskManager; - // used by BankAccount + // ---- Account control (called by clients through Bank's command loop) ---- + + /** + * @brief Create a new account for client rank @p iclient. + * @param iclient Rank creating the account (logical owner). + * @param comm Communicator the client uses to reach the Bank. + * @return Integer account identifier (>0 on success). + */ int openAccount(int iclient, MPI_Comm comm); - int clearAccount(int account, int iclient, MPI_Comm comm); // closes and open fresh account - void closeAccount(int account_id); // remove the account - // used by TaskManager; + /** + * @brief Clear and reinitialize an existing account. + * + * Equivalent to closing and reopening the account, preserving the account id. + * + * @param account Account id to clear. + * @param iclient Requesting client rank. + * @param comm Client communicator. + * @return 0 on success, negative on error. + */ + int clearAccount(int account, int iclient, MPI_Comm comm); + + /** + * @brief Permanently remove an account and all of its deposits. + * @param account_id Account identifier. + */ + void closeAccount(int account_id); + + // ---- Task manager control (internal) ---- + + /** + * @brief Initialize task bookkeeping for @p ntasks items. + * @param ntasks Number of tasks available (0..ntasks-1). + * @param iclient Requesting rank. + * @param comm Client communicator. + * @return Account id of the task manager context. + */ int openTaskManager(int ntasks, int iclient, MPI_Comm comm); + + /** + * @brief Close and remove a TaskManager context. + * @param account_id Associated account id. + */ void closeTaskManager(int account_id); - // used internally by Bank; + // ---- Internal utilities ---- + + /** @brief Remove all accounts and deposits (global reset). */ void clear_bank(); - void remove_account(int account); // remove the content and the account - long long totcurrentsize = 0ll; // number of kB used by all accounts - std::vector accounts; // open bank accounts - std::map *> get_deposits; // gives deposits of an account + /** + * @brief Remove a single account and its content. + * @param account Account id to erase. + */ + void remove_account(int account); + + // ---- Accounting & indices ---- + long long totcurrentsize = 0ll; ///< Sum of all account sizes (kB). + std::vector accounts; ///< Active account ids. + + /** Map: account id → vector of deposits. */ + std::map *> get_deposits; + + /** Map: account id → (item id → index in deposits vector). */ std::map *> get_id2ix; + + /** Map: account id → (queue id → index in queue vector). */ std::map *> get_id2qu; - std::map *> get_queue; // gives deposits of an account - std::map> *> get_readytasks; // used by task manager - std::map currentsize; // total deposited data size (without containers) - long long maxsize = 0; // max total deposited data size (without containers) + + /** Map: account id → queue collection (task-ready queues). */ + std::map *> get_queue; + + /** Map: account id → (i → vector of j ready items). */ + std::map> *> get_readytasks; + + /** Map: account id → current size in kB (without container overhead). */ + std::map currentsize; + + /** Peak total size (kB) observed since last reset. */ + long long maxsize = 0; }; +/** + * @brief RAII client-side view of a Bank account. + * + * A `BankAccount` encapsulates a live account and offers typed methods to + * deposit and retrieve functions or raw buffers. Most methods are thin + * request wrappers; the Bank performs the actual storage. + * + * @note By default, rank and communicator are taken from the MPI worker + * context (`mpi::wrk_rank`, `mpi::comm_wrk`). + * + * @par Ownership & lifetime + * Returned raw pointers (e.g., from `get_orbblock`) typically reference + * storage owned by the Bank. Callers should copy data if it must outlive + * subsequent Bank interactions. See the implementation for exact details. + */ class BankAccount { public: + /** + * @brief Open a new account for @p iclient on communicator @p comm. + * @param iclient Client rank that owns the account (default: @ref mpi::wrk_rank). + * @param comm Communicator used to contact the Bank (default: @ref mpi::comm_wrk). + */ BankAccount(int iclient = wrk_rank, MPI_Comm comm = comm_wrk); + + /** @brief Close the account and release any client-side resources. */ ~BankAccount(); + + /** @brief Bank-assigned account identifier (≥0 when open). */ int account_id = -1; + + /** + * @brief Clear and reinitialize this account. + * @param i Client rank issuing the request. + * @param comm Client communicator. + */ void clear(int i = wrk_rank, MPI_Comm comm = comm_wrk); - // int put_orb(int id, ComplexFunction &orb); - // int get_orb(int id, ComplexFunction &orb, int wait = 0); + + // --- Function storage/retrieval --- + + /** + * @brief Fetch a function by @p id and delete it on the Bank. + * @param id Application-level identifier of the function. + * @param orb Output destination; resized/assigned by the Bank. + * @return 0 on success, negative on error. + */ int get_func_del(int id, CompFunction<3> &orb); + + /** + * @brief Deposit a function under identifier @p id. + * @param id Application-level identifier. + * @param func Function object to store (copied/serialized by Bank). + * @return 0 on success, negative on error. + */ int put_func(int id, CompFunction<3> &func); + + /** + * @brief Fetch a function by @p id. + * @param id Application-level identifier. + * @param func Output destination; resized/assigned by the Bank. + * @param wait If nonzero, block until available; otherwise return immediately if missing. + * @return 0 on success; negative on error; positive (e.g. 1) if not found and @p wait==0. + */ int get_func(int id, CompFunction<3> &func, int wait = 0); + + // --- Raw data buffers by plain id --- + + /** + * @brief Deposit a real-valued buffer. + * @param id Application-level identifier. + * @param size Number of elements in @p data. + * @param data Pointer to contiguous buffer (copied by the Bank). + * @return 0 on success, negative on error. + */ int put_data(int id, int size, double *data); + + /** + * @brief Deposit a complex-valued buffer. + * @copydetails put_data(int,int,double*) + */ int put_data(int id, int size, ComplexDouble *data); + + /** + * @brief Retrieve a real-valued buffer by @p id. + * @param id Identifier previously used with @ref put_data. + * @param size Expected number of elements; used for validation. + * @param data Destination buffer; must have room for @p size elements. + * @return 0 on success; negative on error; positive if not found. + */ int get_data(int id, int size, double *data); + + /** + * @brief Retrieve a complex-valued buffer by @p id. + * @copydetails get_data(int,int,double*) + */ int get_data(int id, int size, ComplexDouble *data); + + // --- Raw data scoped by node index (spatial addressing) --- + + /** + * @brief Deposit real-valued data associated with a node index. + * @param nIdx Spatial node key. + * @param size Number of elements. + * @param data Buffer pointer (copied by the Bank). + * @return 0 on success, negative on error. + */ int put_data(NodeIndex<3> nIdx, int size, double *data); + + /** + * @brief Deposit complex-valued data for a node index. + * @copydetails put_data(NodeIndex<3>,int,double*) + */ int put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data); + + /** + * @brief Retrieve real-valued data for a node index. + * @param nIdx Node key. + * @param size Expected number of elements. + * @param data Output buffer. + * @return 0 on success; negative on error; positive if not found. + */ int get_data(NodeIndex<3> nIdx, int size, double *data); + + /** + * @brief Retrieve complex-valued data for a node index. + * @copydetails get_data(NodeIndex<3>,int,double*) + */ int get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data); + + // --- Node-scoped data grouped under an object id (e.g., orbital id) --- + + /** + * @brief Deposit real-valued data for a specific node @p nodeid within object @p id. + * @param id Object (e.g., orbital) identifier. + * @param nodeid Node identifier within the object. + * @param size Number of elements. + * @param data Buffer pointer (copied by the Bank). + * @return 0 on success, negative on error. + */ int put_nodedata(int id, int nodeid, int size, double *data); + + /** + * @brief Deposit complex-valued data for a node within object @p id. + * @copydetails put_nodedata(int,int,int,double*) + */ int put_nodedata(int id, int nodeid, int size, ComplexDouble *data); + + /** + * @brief Retrieve real-valued data for (@p id, @p nodeid). + * @param id Object identifier. + * @param nodeid Node identifier. + * @param size Expected element count. + * @param data Output buffer. + * @param idVec (Out) List of object ids actually present in the block, if aggregated. + * @return 0 on success; negative on error; positive if not found. + */ int get_nodedata(int id, int nodeid, int size, double *data, std::vector &idVec); + + /** + * @brief Retrieve complex-valued data for (@p id, @p nodeid). + * @copydetails get_nodedata(int,int,int,double*,std::vector&) + */ int get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector &idVec); + + // --- Block retrieval helpers --- + + /** + * @brief Retrieve a contiguous block of all real node data for @p nodeid across ids. + * @param nodeid Node identifier to gather. + * @param data (Out) Pointer to contiguous storage; copy data before next call. + * @param idVec (Out) List of ids participating in the block. + * @return Number of elements in @p data on success; negative on error. + */ int get_nodeblock(int nodeid, double *data, std::vector &idVec); + + /** + * @brief Retrieve a contiguous block of all complex node data for @p nodeid across ids. + * @copydetails get_nodeblock(int,double*,std::vector&) + */ int get_nodeblock(int nodeid, ComplexDouble *data, std::vector &idVec); + + /** + * @brief Retrieve all real-valued node data for an orbital id into a single contiguous block. + * @param orbid Orbital (object) id. + * @param data (Out) Pointer reference to contiguous storage. + * @param nodeidVec (Out) Node ids represented in the block. + * @param bankstart Starting index/offset within the Bank’s internal storage. + * @return Number of elements in the returned block; negative on error. + * + * @note Copy out the data if it must persist beyond this call or subsequent Bank calls. + */ int get_orbblock(int orbid, double *&data, std::vector &nodeidVec, int bankstart); + + /** + * @brief Retrieve all complex-valued node data for an orbital id into a contiguous block. + * @copydetails get_orbblock(int,double*&,std::vector&,int) + */ int get_orbblock(int orbid, ComplexDouble *&data, std::vector &nodeidVec, int bankstart); }; +/** + * @brief Minimal distributed task queue associated with a Bank account. + * + * The TaskManager assigns task indices in [0, @ref n_tasks). Clients can: + * - Request the next task (`next_task()`), + * - Mark specific items as ready (`put_readytask(i,j)` / `del_readytask(i,j)`), + * - Retrieve ready lists (`get_readytask(i, del)`). + * + * The actual synchronization and distribution are performed by the Bank. + */ class TaskManager { public: + /** + * @brief Construct and initialize a task context with @p ntasks tasks. + * @param ntasks Total number of tasks available (0..ntasks-1). + * @param iclient Client rank that opens the context. + * @param comm Communicator for Bank interaction. + */ TaskManager(int ntasks, int iclient = wrk_rank, MPI_Comm comm = comm_wrk); + + /** @brief Destructor; closes the TaskManager context. */ ~TaskManager(); + + /** + * @brief Obtain the next task index to process. + * @return Task index in [0, @ref n_tasks), or negative if none available. + */ int next_task(); + + /** + * @brief Mark item (@p i, @p j) as ready. + * @param i Primary key (e.g., task group/channel). + * @param j Secondary key (e.g., item id). + */ void put_readytask(int i, int j); + + /** + * @brief Remove ready marker (@p i, @p j). + * @param i Primary key. + * @param j Secondary key. + */ void del_readytask(int i, int j); + + /** + * @brief Retrieve the ready list for key @p i. + * @param i Primary key (queue id). + * @param del If nonzero, consume (erase) the ready list; otherwise keep it. + * @return Vector of secondary keys (j values) that are ready. + */ std::vector get_readytask(int i, int del); + + /** @brief Bank account id associated with this task context. */ int account_id = -1; - int task = 0; // used in serial case only - int n_tasks = 0; // used in serial case only + + /** @name Serial fallbacks + * These are used if the runtime is not using MPI distribution. + * @{ */ + int task = 0; ///< Current task pointer (serial mode only). + int n_tasks = 0; ///< Total tasks (serial mode only). + /** @} */ }; +/** + * @brief Fixed size of control messages exchanged with the Bank. + * + * @details This constant is used by the MPI layer to size control payloads. + */ int const message_size = 7; } // namespace mrcpp diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp index bdec0f954..ec80ce67b 100644 --- a/src/utils/CompFunction.cpp +++ b/src/utils/CompFunction.cpp @@ -1,3 +1,28 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #include "CompFunction.h" #include "Bank.h" #include "Printer.h" @@ -9,17 +34,6 @@ #include "trees/FunctionNode.h" #include -/* Some rules for CompFunction: - * NComp is the number of components. If Ncomp>0, the corresponding trees must exist (can be only empty roots). - * The other trees should be set to nullptr. - * The trees and data can be shared among several CompFunction; this is managed automatically by "std::make_shared" - * Normally the CompFunction must be eiher real or complex (or none if noe is defined anyway). - * Though it is allowed in some cases to have both and the code should preferably allow this. (It is used temporary - * when we need a Complex type, but the trees are real: the tree is then copied as a complex tree in the same CompFunction). - * TreePtr (aka func_ptr) is the part potentially shared with others with "std::make_shared". It contains the pointers to the trees. - * The static data (number of components, real/complex, conjugaison, integers used for spin etc.) are store in func_ptr.data. - */ - namespace mrcpp { template MultiResolutionAnalysis *defaultCompMRA = nullptr; // Global MRA @@ -41,9 +55,6 @@ template CompFunction::CompFunction() { for (int i = 0; i < 4; i++) CompC[i] = nullptr; } -/* - * Empty functions (no components defined) - */ template CompFunction::CompFunction(int n1) { func_ptr = std::make_shared>(false); CompD = func_ptr->real; @@ -59,9 +70,6 @@ template CompFunction::CompFunction(int n1) { func_ptr->data.shared = false; } -/* - * Empty functions (no components defined) - */ template CompFunction::CompFunction(int n1, bool share) { func_ptr = std::make_shared>(share); CompD = func_ptr->real; @@ -77,9 +85,6 @@ template CompFunction::CompFunction(int n1, bool share) { func_ptr->data.shared = share; } -/* - * Empty functions (trees defined but zero) - */ template CompFunction::CompFunction(const CompFunctionData &indata, bool alloc) { func_ptr = std::make_shared>(indata.shared); func_ptr->data = indata; @@ -91,22 +96,12 @@ template CompFunction::CompFunction(const CompFunctionData &indata this->free(); } -/** @brief Copy constructor - * - * Shallow copy: meta data is copied along with the component pointers, - * NO transfer of ownership. - */ template CompFunction::CompFunction(const CompFunction &compfunc) { func_ptr = compfunc.func_ptr; CompD = func_ptr->real; CompC = func_ptr->cplx; } -/** @brief Copy constructor - * - * Shallow copy: meta data is copied along with the component pointers, - * NO transfer of ownership. - */ template CompFunction &CompFunction::operator=(const CompFunction &compfunc) { if (this != &compfunc) { func_ptr = compfunc.func_ptr; @@ -117,13 +112,8 @@ template CompFunction &CompFunction::operator=(const CompFunction< } template -/** @brief Parameter copy - * - * Returns a copy without defined trees. - */ CompFunction CompFunction::paramCopy(bool alloc) const { CompFunction out(func_ptr->data, alloc); - // we do not copy tree sizes: for (int i = 0; i < 4; i++) out.func_ptr->data.Nchunks[i] = 0; return out; } @@ -204,10 +194,6 @@ template double CompFunction::getSquareNorm() const { return norm; } -// Allocate empty trees. The tree must be defined as real or complex already. -// Allocates all ialloc trees, with indices 0,...ialloc-1 -// nalloc is the number of components allocated. ialloc=1 allocates one tree. -// deletes all old trees if found. template void CompFunction::alloc(int nalloc, bool zero) { if (defaultCompMRA == nullptr) MSG_ABORT("Default MRA not yet defined"); if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex"); @@ -227,7 +213,6 @@ template void CompFunction::alloc(int nalloc, bool zero) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); } for (int i = nalloc; i < Ncomp(); i++) { - // delete possible remaining components delete CompD[i]; delete CompC[i]; CompD[i] = nullptr; @@ -235,10 +220,6 @@ template void CompFunction::alloc(int nalloc, bool zero) { } } -// Allocate one empty trees for one specific component. -// The tree must be defined as real or complex already. -// ialloc is index allocated. ialloc=0 allocates the tree with index zero. -// deletes old tree if found. template void CompFunction::alloc_comp(int ialloc) { if (defaultCompMRA == nullptr) MSG_ABORT("Default MRA not yet defined"); if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex"); @@ -271,7 +252,7 @@ template void CompFunction::free() { } template int CompFunction::getSizeNodes() const { - int size_mb = 0; // Memory size in kB + int size_mb = 0; for (int i = 0; i < Ncomp(); i++) { if (isreal() and CompD[i] != nullptr) size_mb += CompD[i]->getSizeNodes(); if (iscomplex() and CompC[i] != nullptr) size_mb += CompC[i]->getSizeNodes(); @@ -288,11 +269,6 @@ template int CompFunction::getNNodes() const { return nNodes; } -/** @brief Soft complex conjugate - * - * Will use complex conjugate in operations (add, multiply etc.) - * Does change the state (conj flag), but does not actively change all coefficients. - */ template void CompFunction::dagger() { func_ptr->data.conj = not(func_ptr->data.conj); for (int i = 0; i < Ncomp(); i++) { @@ -305,7 +281,7 @@ template FunctionTree &CompFunction::real(int i) { if (CompD[i] == nullptr) alloc_comp(i); return *CompD[i]; } -template // NB: should return CompC in the future +template FunctionTree &CompFunction::imag(int i) { MSG_ABORT("Must choose real or complex"); if (!iscomplex()) MSG_ABORT("not complex function"); @@ -322,7 +298,7 @@ template const FunctionTree &CompFunction::real(int i) con if (!isreal()) MSG_ABORT("not real function"); return *CompD[i]; } -template // NB: should use complex or real +template const FunctionTree &CompFunction::imag(int i) const { MSG_ABORT("Must choose real or complex"); if (!iscomplex()) MSG_ABORT("not complex function"); @@ -333,10 +309,8 @@ template const FunctionTree &CompFunction::complex( return *CompC[i]; } -/* for backwards compatibility */ template void CompFunction::setReal(FunctionTree *tree, int i) { func_ptr->isreal = 1; - // if (CompD[i] != nullptr) delete CompD[i]; CompD[i] = tree; if (tree != nullptr) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); @@ -347,7 +321,6 @@ template void CompFunction::setReal(FunctionTree *tree, in template void CompFunction::setCplx(FunctionTree *tree, int i) { func_ptr->iscomplex = 1; - // if (CompC[i] != nullptr) delete CompC[i]; CompC[i] = tree; if (tree != nullptr) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); @@ -356,11 +329,6 @@ template void CompFunction::setCplx(FunctionTree *t } } -/** @brief In place addition. - * - * Output is extended to union grid. - * - */ template void CompFunction::add(ComplexDouble c, CompFunction inp) { if (Ncomp() < inp.Ncomp()) { @@ -397,7 +365,6 @@ template int CompFunction::crop(double prec) { return nChunksremoved; } -/** @brief In place multiply with scalar. Fully in-place.*/ template void CompFunction::rescale(ComplexDouble c) { bool need_to_rescale = not(isShared()) or mpi::share_master(); if (need_to_rescale) { @@ -405,7 +372,7 @@ template void CompFunction::rescale(ComplexDouble c) { if (iscomplex()) { CompC[i]->rescale(c); } else { - if (abs(c.imag()) > MachineZero) { // works only only for NComp==1) + if (abs(c.imag()) > MachineZero) { CompD[i]->CopyTreeToComplex(CompC[i]); delete CompD[i]; CompD[i] = nullptr; @@ -428,10 +395,6 @@ template class CompFunction<1>; template class CompFunction<2>; template class CompFunction<3>; -/** @brief Deep copy that changes type from real to complex - * - * Deep copy: makes an exact copy with type complex from a real input - */ template void CopyToComplex(CompFunction &out, const CompFunction &inp) { out.func_ptr->data = inp.func_ptr->data; out.defcomplex(); @@ -447,11 +410,6 @@ template void CopyToComplex(CompFunction &out, const CompFunction } } - -/** @brief Deep copy - * - * Deep copy: meta data is copied along with the content of each component. - */ template void deep_copy(CompFunction *out, const CompFunction &inp) { out->func_ptr->data = inp.func_ptr->data; out->alloc(inp.Ncomp()); @@ -465,10 +423,6 @@ template void deep_copy(CompFunction *out, const CompFunction &inp } } -/** @brief Deep copy - * - * Deep copy: meta func_ptr->data is copied along with the content of each component. - */ template void deep_copy(CompFunction &out, const CompFunction &inp) { out.func_ptr->data = inp.func_ptr->data; out.alloc(inp.Ncomp()); @@ -482,33 +436,24 @@ template void deep_copy(CompFunction &out, const CompFunction &inp } } -/** @brief out = a*inp_a + b*inp_b - * - * Recast into linear_combination. - * - */ template void add(CompFunction &out, ComplexDouble a, CompFunction inp_a, ComplexDouble b, CompFunction inp_b, double prec, bool conjugate) { std::vector coefs(2); coefs[0] = a; coefs[1] = b; - std::vector> funcs; // NB: not a CompFunctionVector, because not run in parallel! + std::vector> funcs; funcs.push_back(inp_a); funcs.push_back(inp_b); linear_combination(out, coefs, funcs, prec, conjugate); } -/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N - * - * OMP parallel, but not MPI parallel - */ template void linear_combination(CompFunction &out, const std::vector &c, std::vector> &inp, double prec, bool conjugate) { double thrs = MachineZero; bool need_to_add = not(out.isShared()) or mpi::share_master(); bool share = out.isShared(); out.func_ptr->data = inp[0].func_ptr->data; - out.func_ptr->data.shared = share; // we don' inherit the shareness + out.func_ptr->data.shared = share; bool iscomplex = false; for (int i = 0; i < inp.size(); i++) if (inp[i].iscomplex() or c[i].imag() > MachineZero) iscomplex = true; @@ -519,7 +464,7 @@ template void linear_combination(CompFunction &out, const std::vector out.alloc(out.Ncomp()); for (int comp = 0; comp < inp[0].Ncomp(); comp++) { if (not iscomplex) { - FunctionTreeVector fvec; // one component vector + FunctionTreeVector fvec; for (int i = 0; i < inp.size(); i++) { if (std::norm(c[i]) < thrs) continue; if (inp[i].getNNodes() == 0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue; @@ -538,7 +483,7 @@ template void linear_combination(CompFunction &out, const std::vector } } } else { - FunctionTreeVector fvec; // one component vector + FunctionTreeVector fvec; for (int i = 0; i < inp.size(); i++) { if (inp[i].isreal()) { inp[i].CompD[comp]->CopyTreeToComplex(inp[i].CompC[comp]); @@ -568,15 +513,9 @@ template void linear_combination(CompFunction &out, const std::vector } } -/** @brief out = conj(inp) * inp - * - * Note that output is always real - * - */ template void make_density(CompFunction &out, CompFunction inp, double prec) { multiply(prec, out, 1.0, inp, inp, -1, false, false, true); if (out.iscomplex()) { - // copy onto real components for (int i = 0; i < out.Ncomp(); i++) { out.CompD[i] = out.CompC[i]->Real(); delete out.CompC[i]; @@ -586,18 +525,10 @@ template void make_density(CompFunction &out, CompFunction inp, do } } - -/** @brief out = inp_a * inp_b - * - */ template void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate) { multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms, conjugate); } -/** @brief out = inp_a * inp_b - * Takes conjugate of inp_a if conjugate=true - * In case of mixed real/complex inputs, the real functions are converted into complex functions. - */ template void multiply(double prec, CompFunction &out, double coef, CompFunction inp_a, CompFunction inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { if (inp_b.func_ptr->conj) MSG_ABORT("Not implemented"); if (inp_a.func_ptr->conj) conjugate = (not conjugate); @@ -606,29 +537,26 @@ template void multiply(double prec, CompFunction &out, double coef, C if (out.Ncomp() == 0) out_allocated = false; bool share = out.isShared(); out.func_ptr->data = inp_a.func_ptr->data; - out.func_ptr->data.shared = share; // we don't inherit the shareness - out.func_ptr->conj = false; // we don't inherit conjugaison + out.func_ptr->data.shared = share; + out.func_ptr->conj = false; if (inp_a.getNNodes() == 0 or inp_b.getNNodes() == 0) { if (!out_allocated) out.alloc(out.Ncomp()); return; } for (int comp = 0; comp < inp_a.Ncomp(); comp++) { - out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; // we could put this is coef if everything is real? + out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; if (inp_a.isreal() and inp_b.isreal()) { if (need_to_multiply) { if (!out_allocated) out.alloc(out.Ncomp()); if (prec < 0.0) { - // Union grid build_grid(*out.CompD[comp], *inp_a.CompD[comp]); build_grid(*out.CompD[comp], *inp_b.CompD[comp]); mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate); } else { - // Adaptive grid mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate); } } } else { - // if one of the input is real, we simply make a new complex copy of it bool inp_aisReal = inp_a.isreal(); bool inp_bisReal = inp_b.isreal(); if (inp_aisReal) { @@ -644,7 +572,6 @@ template void multiply(double prec, CompFunction &out, double coef, C ComplexDouble coef = 1.0; if (need_to_multiply) { if (prec < 0.0) { - // Union grid out.func_ptr->iscomplex = 1; out.func_ptr->isreal = 0; delete out.CompD[comp]; @@ -653,9 +580,8 @@ template void multiply(double prec, CompFunction &out, double coef, C build_grid(*out.CompC[comp], *inp_a.CompC[comp]); build_grid(*out.CompC[comp], *inp_b.CompC[comp]); mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate); - } else { // note that this assumes Ncomp=1 - // Adaptive grid - if (out.CompD[comp] != nullptr) { // NB: func_ptr has alreadybeen overwritten! + } else { + if (out.CompD[comp] != nullptr) { if (out.CompD[comp]->getNNodes() > 0) { out.CompD[comp]->CopyTreeToComplex(out.CompC[comp]); out.func_ptr->iscomplex = 1; @@ -675,7 +601,6 @@ template void multiply(double prec, CompFunction &out, double coef, C mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate); } } - // restore original tree if (inp_aisReal) { delete inp_a.CompC[comp]; inp_a.CompC[comp] = nullptr; @@ -693,41 +618,28 @@ template void multiply(double prec, CompFunction &out, double coef, C mpi::share_function(out, 0, 9911, mpi::comm_share); } -/** @brief out = inp_a * f - * - * Only one component is multiplied - */ template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented"); if (inp_a.isreal() != 1) MSG_ABORT("Not implemented"); if (conjugate) MSG_ABORT("Not implemented"); - CompFunctionVector CompVec; // Should use vector? + CompFunctionVector CompVec; CompVec.push_back(inp_a); CompFunctionVector CompVecOut; CompVecOut = multiply(CompVec, f, prec, nullptr, nrefine, true); out = CompVecOut[0]; - // multiply(out, *inp_a.CompD[0], f, prec, nrefine, conjugate); } -/** @brief out = inp_a * f - * - * Only one component is multiplied - */ template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { MSG_ABORT("Not implemented"); if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented"); if (inp_a.iscomplex() != 1) MSG_ABORT("Not implemented"); if (conjugate) MSG_ABORT("Not implemented"); - CompFunctionVector CompVec; // Should use vector? + CompFunctionVector CompVec; CompVec.push_back(inp_a); CompFunctionVector CompVecOut; - // CompVecOut = multiply(CompVec, f, prec, nrefine, true); out = CompVecOut[0]; } -/** @brief out = inp_a * f - * - */ template void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { CompFunction func_a; func_a.func_ptr->isreal = 1; @@ -746,12 +658,6 @@ template void multiply(CompFunction &out, FunctionTree = int bra^\dag(r) * ket(r) dr. - * - * Sum of component dots. - * Notice that the ComplexDouble dot(CompFunction bra, CompFunction ket) { if (bra.func_ptr->conj or ket.func_ptr->conj) MSG_ABORT("Not implemented"); ComplexDouble dotprodtot = 0.0; @@ -773,10 +679,6 @@ template ComplexDouble dot(CompFunction bra, CompFunction ket) { return dotprodtot; } -/** @brief Compute = int |bra^\dag(r)| * |ket(r)| dr. - * - * sum of components - */ template double node_norm_dot(CompFunction bra, CompFunction ket) { double dotprodtot = 0.0; for (int comp = 0; comp < bra.Ncomp(); comp++) { @@ -790,7 +692,7 @@ template double node_norm_dot(CompFunction bra, CompFunction ket) } else { dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]); } - dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); // for fully complex values this does not really give the norm + dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); dotprodtot += dotprod; } return dotprodtot; @@ -851,8 +753,6 @@ template void project(CompFunction &out, RepresentableFunction>(N) { for (int i = 0; i < N; i++) (*this)[i].func_ptr->rank = i; @@ -862,21 +762,9 @@ void CompFunctionVector::distribute() { for (int i = 0; i < this->size(); i++) (*this)[i].func_ptr->rank = i; } -/** @brief Make a linear combination of functions - * - * Uses "local" representation: treats one node at a time. - * For each node, all functions are transformed simultaneously - * by a dense matrix multiplication. - * Phi input functions, Psi output functions - * Phi and Psi are complex. - */ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) { - // The principle of this routine is that nodes for all orbitals are rotated one by one using matrix multiplication. - // The routine does avoid when possible to move data, but uses pointers and indices manipulation. - // MPI version does not use OMP yet, Serial version uses OMP - // size of input is N, size of output is M - bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch + bool serial = mpi::wrk_size == 1; int N = Phi.size(); int M = Psi.size(); for (int i = 0; i < M; i++) { @@ -890,18 +778,16 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix"); if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix"); - // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff) FunctionTree<3> refTree(*Phi.vecMRA); mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); std::vector scalefac_ref; - std::vector coeffVec_ref; // not used! - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes + std::vector coeffVec_ref; + std::vector indexVec_ref; + std::vector parindexVec_ref; int max_ix; - // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree); int max_n = indexVec_ref.size(); @@ -915,25 +801,18 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe Psi[i].func_ptr->data.iscomplex = 1; } - // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank + BankAccount nodesPhi; + BankAccount nodesRotated; - BankAccount nodesPhi; // to put the original nodes - BankAccount nodesRotated; // to put the rotated nodes - - // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); // serialIx of the nodes - std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the - // orbital given the node index in the reference tree + std::vector> indexVec(N); + std::map> node2orbVec; + std::vector> orb2node(N); if (serial) { - // make list of all coefficients (coeffVec), and their reference indices (indexVec) - std::vector parindexVec; // serialIx of the parent nodes + std::vector parindexVec; std::vector scalefac; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid Phi[j].complex().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -941,208 +820,167 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe node2orbVec[ix].push_back(j); } } - } else { // MPI case - // send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(Phi, refTree, nodesPhi); - mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. + mpi::barrier(mpi::comm_wrk); } - // 4) rotate all the nodes - IntMatrix split_serial; // in the serial case all split are stored in one array - std::vector> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case - std::vector> ix2coef(M); // to find the index in for example rotCoeffVec[] corresponding to a serialIx - int csize; // size of the current coefficients (different for roots and branches) - std::vector rotatedCoeffVec; // just to ensure that the data from rotatedCoeff is not deleted, since we point to it. - // j indices are for unrotated orbitals, i indices are for rotated orbitals + IntMatrix split_serial; + std::vector> coeffpVec(M); + std::vector> ix2coef(M); + int csize; + std::vector rotatedCoeffVec; if (serial) { - std::map ix2coef_ref; // to find the index n corresponding to a serialIx - split_serial.resize(M, max_n); // not use in the MPI case + std::map ix2coef_ref; + split_serial.resize(M, max_n); for (int n = 0; n < max_n; n++) { - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + int node_ix = indexVec_ref[n]; ix2coef_ref[node_ix] = n; for (int i = 0; i < M; i++) split_serial(i, n) = 1; } - std::vector nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits) - // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok. - // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding - // n is finished. + std::vector nodeReady(max_n, 0); #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { int csize; - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n - std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); + int node_ix = indexVec_ref[n]; + std::vector orbjVec; if (node2orbVec[node_ix].size() <= 0) continue; csize = sizecoeffW; - if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff + if (parindexVec_ref[n] < 0) csize = sizecoeff; - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int shift = sizecoeff - sizecoeffW; if (parindexVec_ref[n] < 0) shift = 0; ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVec[node_ix]) { int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbjVec.push_back(j); } - // 4b) make a list of rotated orbitals needed for this node - // OMP must wait until parent is ready while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) { #pragma omp flush }; std::vector orbiVec; - for (int i = 0; i < M; i++) { // loop over all rotated orbitals - if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets + for (int i = 0; i < M; i++) { + if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; orbiVec.push_back(i); } - // 4c) rotate this node - ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices - for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals + ComplexMatrix Un(orbjVec.size(), orbiVec.size()); + for (int i = 0; i < orbiVec.size(); i++) { for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); } } ComplexMatrix rotatedCoeff(csize, orbiVec.size()); - // HERE IT HAPPENS! - // TODO: conjugaison - rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + rotatedCoeff.noalias() = coeffBlock * Un; - // 4d) store and make rotated node pointers - // for now we allocate in buffer, in future could be directly allocated in the final trees double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; - // make all norms: for (int i = 0; i < orbiVec.size(); i++) { - // check if parent must be split if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) { - // mark this node for this orbital for later split #pragma omp critical { ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size(); - coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers + coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); } - // check norms for split - double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here + double wnorm = 0.0; int kstart = 0; - if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots + if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; for (int k = kstart; k < csize; k++) wnorm += std::real(rotatedCoeff(k, i) * std::conj(rotatedCoeff(k, i))); if (thres < wnorm or prec < 0) split_serial(orbiVec[i], n) = 1; else split_serial(orbiVec[i], n) = 0; } else { - ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used - split_serial(orbiVec[i], n) = 0; // do not split if parent does not need to be split + ix2coef[orbiVec[i]][node_ix] = max_n + 1; + split_serial(orbiVec[i], n) = 0; } } nodeReady[n] = 1; #pragma omp critical - { - // this ensures that rotatedCoeff is not deleted, when getting out of scope - rotatedCoeffVec.push_back(std::move(rotatedCoeff)); - } + { rotatedCoeffVec.push_back(std::move(rotatedCoeff)); } } - } else { // MPI case + } else { - // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits. - std::vector split(M, -1.0); // which orbitals need splitting (at a given node). For now double for compatibilty with bank - std::vector needsplit(M, 1.0); // which orbitals need splitting + std::vector split(M, -1.0); + std::vector needsplit(M, 1.0); BankAccount nodeSplits; - mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. + mpi::barrier(mpi::comm_wrk); ComplexMatrix coeffBlock(sizecoeff, N); - max_ix++; // largest node index + 1. to store rotated orbitals with different id + max_ix++; TaskManager tasks(max_n); for (int nn = 0; nn < max_n; nn++) { int n = tasks.next_task(); if (n < 0) break; double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; - // 4a) make list of orbitals that should split the parent node, i.e. include this node int parentid = parindexVec_ref[n]; if (parentid == -1) { - // root node, split if output needed for (int i = 0; i < M; i++) { split[i] = 1.0; } csize = sizecoeff; } else { - // note that it will wait until data is available nodeSplits.get_data(parentid, M, split.data()); csize = sizecoeffW; } std::vector orbiVec; std::vector orbjVec; - for (int i = 0; i < M; i++) { // loop over rotated orbitals - if (split[i] < 0.0) continue; // parent node has too small wavelets + for (int i = 0; i < M; i++) { + if (split[i] < 0.0) continue; orbiVec.push_back(i); } - // 4b) rotate this node - ComplexMatrix coeffBlock(csize, N); // largest possible used size + ComplexMatrix coeffBlock(csize, N); nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); - // chunk of U, with reorganized indices and separate blocks for real and imag: ComplexMatrix Un(orbjVec.size(), orbiVec.size()); ComplexMatrix rotatedCoeff(csize, orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { // loop over included rotated real and imag part of orbitals - for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts - Un(j, i) = U(orbjVec[j], orbiVec[i]); - } + for (int i = 0; i < orbiVec.size(); i++) { + for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); } } - // HERE IT HAPPENS - // TODO conjugaison - rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + rotatedCoeff.noalias() = coeffBlock * Un; - // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while - // in cache). - for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals - needsplit[orbiVec[i]] = -1.0; // default, do not split - // check if this node/orbital needs further refinement + for (int i = 0; i < orbiVec.size(); i++) { + needsplit[orbiVec[i]] = -1.0; double wnorm = 0.0; - int kwstart = csize - sizecoeffW; // do not include scaling + int kwstart = csize - sizecoeffW; for (int k = kwstart; k < csize; k++) wnorm += std::real(rotatedCoeff.col(i)[k] * std::conj(rotatedCoeff.col(i)[k])); if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0; nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data()); } nodeSplits.put_data(indexVec_ref[n], M, needsplit.data()); } - mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready + mpi::barrier(mpi::comm_wrk); } - // 5) reconstruct trees using rotated nodes. - - // only serial case can use OMP, because MPI cannot be used by threads if (serial) { - // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main - // operation is writing the coefficient into the tree) - #pragma omp parallel for schedule(static) for (int j = 0; j < M; j++) { if (coeffpVec[j].size() == 0) continue; - Psi[j].alloc(1); // All data is stored in coeffpVec[j] + Psi[j].alloc(1); Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec); } - } else { // MPI case + } else { for (int j = 0; j < M; j++) { if (not mpi::my_func(j)) continue; - // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) - std::vector coeffpVec; // - std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx + std::vector coeffpVec; + std::map ix2coef; int ix = 0; - std::vector pointerstodelete; // list of temporary arrays to clean up + std::vector pointerstodelete; for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - ComplexDouble *dataVec; // will be allocated by bank + ComplexDouble *dataVec; nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); // unrotated nodes have been deleted - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once + assert(nodeidVec[n] - max_ix >= 0); + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); ix2coef[nodeidVec[n] - max_ix] = ix++; csize = sizecoeffW; if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff; - coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers + coeffpVec.push_back(&dataVec[shift]); shift += csize; } } @@ -1156,14 +994,6 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe } } -/** @brief Make a linear combination of functions - * - * Uses "local" representation: treats one node at a time. - * For each node, all functions are transformed simultaneously - * by a dense matrix multiplication. - * Phi input functions, Psi output functions - * - */ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) { if (Phi[0].iscomplex()) { @@ -1171,27 +1001,21 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector return; } - // The principle of this routine is that nodes are rotated one by one using matrix multiplication. - // The routine does avoid when possible to move data, but uses pointers and indices manipulation. - // MPI version does not use OMP yet, Serial version uses OMP - // size of input is N, size of output is M int N = Phi.size(); int M = Psi.size(); if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix"); if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix"); - // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff) FunctionTree<3> refTree(*Phi.vecMRA); mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); std::vector scalefac_ref; - std::vector coeffVec_ref; // not used! - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes + std::vector coeffVec_ref; + std::vector indexVec_ref; + std::vector parindexVec_ref; int max_ix; - // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree); int max_n = indexVec_ref.size(); for (int i = 0; i < M; i++) { @@ -1199,27 +1023,20 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector Psi[i].func_ptr->data.iscomplex = 0; } - // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank + bool serial = mpi::wrk_size == 1; + BankAccount nodesPhi; + BankAccount nodesRotated; - bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch - BankAccount nodesPhi; // to put the original nodes - BankAccount nodesRotated; // to put the rotated nodes - - // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); // serialIx of the nodes - std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the - // orbital given the node index in the reference tree + std::vector> indexVec(N); + std::map> node2orbVec; + std::vector> orb2node(N); if (serial) { - // make list of all coefficients (coeffVec), and their reference indices (indexVec) - std::vector parindexVec; // serialIx of the parent nodes + std::vector parindexVec; std::vector scalefac; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -1227,181 +1044,142 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector node2orbVec[ix].push_back(j); } } - } else { // MPI case - // send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(Phi, refTree, nodesPhi); - mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. + mpi::barrier(mpi::comm_wrk); } - // 4) rotate all the nodes - IntMatrix split_serial; // in the serial case all split are stored in one array - std::vector> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case - std::vector> ix2coef(M); // to find the index in for example rotCoeffVec[] corresponding to a serialIx - int csize; // size of the current coefficients (different for roots and branches) - std::vector rotatedCoeffVec; // just to ensure that the data from rotatedCoeff is not deleted, since we point to it. - // j indices are for unrotated orbitals, i indices are for rotated orbitals + IntMatrix split_serial; + std::vector> coeffpVec(M); + std::vector> ix2coef(M); + int csize; + std::vector rotatedCoeffVec; if (serial) { - std::map ix2coef_ref; // to find the index n corresponding to a serialIx - split_serial.resize(M, max_n); // not use in the MPI case + std::map ix2coef_ref; + split_serial.resize(M, max_n); for (int n = 0; n < max_n; n++) { - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + int node_ix = indexVec_ref[n]; ix2coef_ref[node_ix] = n; for (int i = 0; i < M; i++) split_serial(i, n) = 1; } - std::vector nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits) - - // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok. - // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding - // n is finished. + std::vector nodeReady(max_n, 0); #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { int csize; - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n - std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); + int node_ix = indexVec_ref[n]; + std::vector orbjVec; if (node2orbVec[node_ix].size() <= 0) continue; csize = sizecoeffW; - if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff + if (parindexVec_ref[n] < 0) csize = sizecoeff; - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int shift = sizecoeff - sizecoeffW; if (parindexVec_ref[n] < 0) shift = 0; DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVec[node_ix]) { int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbjVec.push_back(j); } - // 4b) make a list of rotated orbitals needed for this node - // OMP must wait until parent is ready while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) { #pragma omp flush }; std::vector orbiVec; - for (int i = 0; i < M; i++) { // loop over all rotated orbitals - if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets + for (int i = 0; i < M; i++) { + if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; orbiVec.push_back(i); } - // 4c) rotate this node - DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices - for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals + DoubleMatrix Un(orbjVec.size(), orbiVec.size()); + for (int i = 0; i < orbiVec.size(); i++) { for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); } } DoubleMatrix rotatedCoeff(csize, orbiVec.size()); - // HERE IT HAPPENS! - rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + rotatedCoeff.noalias() = coeffBlock * Un; - // 4d) store and make rotated node pointers - // for now we allocate in buffer, in future could be directly allocated in the final trees double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; - // make all norms: for (int i = 0; i < orbiVec.size(); i++) { - // check if parent must be split if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) { - // mark this node for this orbital for later split #pragma omp critical { ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size(); - coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers + coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); } - // check norms for split - double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here + double wnorm = 0.0; int kstart = 0; - if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots + if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i); if (thres < wnorm or prec < 0) split_serial(orbiVec[i], n) = 1; else split_serial(orbiVec[i], n) = 0; } else { - ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used - split_serial(orbiVec[i], n) = 0; // do not split if parent does not need to be split + ix2coef[orbiVec[i]][node_ix] = max_n + 1; + split_serial(orbiVec[i], n) = 0; } } nodeReady[n] = 1; #pragma omp critical - { - // this ensures that rotatedCoeff is not deleted, when getting out of scope - rotatedCoeffVec.push_back(std::move(rotatedCoeff)); - } + { rotatedCoeffVec.push_back(std::move(rotatedCoeff)); } } - } else { // MPI case + } else { - // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits. - std::vector split(M, -1.0); // which orbitals need splitting (at a given node). For now double for compatibilty with bank - std::vector needsplit(M, 1.0); // which orbitals need splitting + std::vector split(M, -1.0); + std::vector needsplit(M, 1.0); BankAccount nodeSplits; - mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. + mpi::barrier(mpi::comm_wrk); DoubleMatrix coeffBlock(sizecoeff, N); - max_ix++; // largest node index + 1. to store rotated orbitals with different id + max_ix++; TaskManager tasks(max_n); for (int nn = 0; nn < max_n; nn++) { int n = tasks.next_task(); if (n < 0) break; double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; - // 4a) make list of orbitals that should split the parent node, i.e. include this node int parentid = parindexVec_ref[n]; if (parentid == -1) { - // root node, split if output needed for (int i = 0; i < M; i++) { split[i] = 1.0; } csize = sizecoeff; } else { - // note that it will wait until data is available nodeSplits.get_data(parentid, M, split.data()); csize = sizecoeffW; } std::vector orbiVec; std::vector orbjVec; - for (int i = 0; i < M; i++) { // loop over rotated orbitals - if (split[i] < 0.0) continue; // parent node has too small wavelets + for (int i = 0; i < M; i++) { + if (split[i] < 0.0) continue; orbiVec.push_back(i); } - // 4b) rotate this node - DoubleMatrix coeffBlock(csize, N); // largest possible used size + DoubleMatrix coeffBlock(csize, N); nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); - // chunk of U, with reorganized indices and separate blocks for real and imag: DoubleMatrix Un(orbjVec.size(), orbiVec.size()); DoubleMatrix rotatedCoeff(csize, orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { // loop over included rotated real and imag part of orbitals - for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts - Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); - } + for (int i = 0; i < orbiVec.size(); i++) { + for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); } } - // HERE IT HAPPENS - rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + rotatedCoeff.noalias() = coeffBlock * Un; - // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while - // in cache). - for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals - needsplit[orbiVec[i]] = -1.0; // default, do not split - // check if this node/orbital needs further refinement + for (int i = 0; i < orbiVec.size(); i++) { + needsplit[orbiVec[i]] = -1.0; double wnorm = 0.0; - int kwstart = csize - sizecoeffW; // do not include scaling + int kwstart = csize - sizecoeffW; for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k]; if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0; nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data()); } nodeSplits.put_data(indexVec_ref[n], M, needsplit.data()); } - mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready + mpi::barrier(mpi::comm_wrk); } - // 5) reconstruct trees using rotated nodes. - - // only serial case can use OMP, because MPI cannot be used by threads if (serial) { - // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main - // operation is writing the coefficient into the tree) - #pragma omp parallel for schedule(static) for (int j = 0; j < M; j++) { if (coeffpVec[j].size() == 0) continue; @@ -1410,28 +1188,27 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec); } - } else { // MPI case + } else { for (int j = 0; j < M; j++) { if (not mpi::my_func(j)) continue; - // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) - std::vector coeffpVec; // - std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx + std::vector coeffpVec; + std::map ix2coef; int ix = 0; - std::vector pointerstodelete; // list of temporary arrays to clean up + std::vector pointerstodelete; for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - double *dataVec; // will be allocated by bank + double *dataVec; nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); // unrotated nodes have been deleted - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once + assert(nodeidVec[n] - max_ix >= 0); + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); ix2coef[nodeidVec[n] - max_ix] = ix++; csize = sizecoeffW; if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff; - coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers + coeffpVec.push_back(&dataVec[shift]); shift += csize; } } @@ -1449,9 +1226,6 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec) { return; } -/** @brief Save all nodes in bank; identify them using serialIx from refTree - * shift is a shift applied in the id - */ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) { int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); @@ -1459,40 +1233,34 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount & std::vector coeffVec; std::vector coeffVec_cplx; std::vector scalefac; - std::vector indexVec; // SerialIx of the node in refOrb - std::vector parindexVec; // SerialIx of the parent node + std::vector indexVec; + std::vector parindexVec; int N = Phi.size(); int max_ix; for (int j = 0; j < N; j++) { if (not mpi::my_func(j)) continue; - // make vector with all coef address and their index in the union grid if (Phi[j].isreal()) { Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree); int max_n = indexVec.size(); - // send node coefs from Phi[j] to bank - // except for the root nodes, only wavelets are sent for (int i = 0; i < max_n; i++) { - if (indexVec[i] < 0) continue; // nodes that are not in refOrb + if (indexVec[i] < 0) continue; int csize = sizecoeffW; if (parindexVec[i] < 0) csize = sizecoeff; - if (sizes > 0) { // fixed size + if (sizes > 0) { account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]); } else { account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize])); } } } - // Complex components if (Phi[j].iscomplex()) { Phi[j].complex().makeCoeffVector(coeffVec_cplx, indexVec, parindexVec, scalefac, max_ix, refTree); int max_n = indexVec.size(); - // send node coefs from Phi[j] to bank for (int i = 0; i < max_n; i++) { - if (indexVec[i] < 0) continue; // nodes that are not in refOrb - // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part + if (indexVec[i] < 0) continue; int csize = sizecoeffW; if (parindexVec[i] < 0) csize = sizecoeff; - if (sizes > 0) { // fixed size + if (sizes > 0) { account.put_nodedata(j, indexVec[i], sizes, coeffVec_cplx[i]); } else { account.put_nodedata(j, indexVec[i], csize, &(coeffVec_cplx[i][sizecoeff - csize])); @@ -1502,21 +1270,10 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount & } } -/** @brief Multiply all orbitals with a function - * - * @param Phi: orbitals to multiply - * @param f : function to multiply - * - * Computes the product of each orbital with a function - * in parallel using a local representation. - * Input trees are extended by one scale at most. - */ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) { int N = Phi.size(); const int D = 3; - bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch - // 1a) extend grid where f is large (around nuclei) - // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree? + bool serial = mpi::wrk_size == 1; for (int i = 0; i < N; i++) { if (!mpi::my_func(i)) continue; @@ -1524,12 +1281,9 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f while (Phi[i].isreal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++; if (Phi[i].iscomplex()) MSG_ABORT("Not yet implemented"); irefine = 0; - // while (Phi[i].iscomplex() and irefine < nrefine and refine_grid(Phi[i].complex(), f) > 0) irefine++; } - // 1b) make union tree without coefficients FunctionTree refTree(*Phi.vecMRA); - // refine_grid(refTree, f); //to test mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int kp1 = refTree.getKp1(); @@ -1546,40 +1300,33 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f CompFunctionVector out(N); for (int i = 0; i < N; i++) { out[0] = Phi[i].paramCopy(); } if (not PsihasReIm[0] and not PsihasReIm[1]) { - return out; // do nothing + return out; } std::vector scalefac_ref; - std::vector coeffVec_ref; // not used! - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes - std::vector *> refNodes; // pointers to nodes + std::vector coeffVec_ref; + std::vector indexVec_ref; + std::vector parindexVec_ref; + std::vector *> refNodes; int max_ix; - // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes); int max_n = indexVec_ref.size(); - std::map ix2n; // for a given serialIx, give index in vectors + std::map ix2n; for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn; - // 2a) send own nodes to bank, identifying them through the serialIx of refTree - BankAccount nodesPhi; // to put the original nodes - BankAccount nodesMultiplied; // to put the multiplied nodes + BankAccount nodesPhi; + BankAccount nodesMultiplied; - // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); // serialIx of the nodes - std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the - // orbital given the node index in the reference tree + std::vector> indexVec(N); + std::map> node2orbVec; + std::vector> orb2node(N); if (serial) { - // make list of all coefficients (coeffVec), and their reference indices (indexVec) - std::vector parindexVec; // serialIx of the parent nodes + std::vector parindexVec; std::vector scalefac; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid if (Phi[j].hasReal()) { Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -1589,7 +1336,6 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } if (Phi[j].hasImag()) { Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j + N]) { orb2node[j + N][ix] = orb_node_ix++; @@ -1600,19 +1346,16 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } } else { save_nodes(Phi, refTree, nodesPhi, nCoefs); - mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. + mpi::barrier(mpi::comm_wrk); } - // 2b) save Func in bank and remove its coefficients if (Func != nullptr and !serial) { - // put Func in local representation if not already done if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); } } - // 3) mutiply for each node - std::vector> coeffpVec(N); // to put pointers to the multiplied coefficient for each orbital in serial case - std::vector multipliedCoeffVec; // just to ensure that the data from multipliedCoeff is not deleted, since we point to it. - std::vector> ix2coef(N); // to find the index in for example rotCoeffVec[] corresponding to a serialIx + std::vector> coeffpVec(N); + std::vector multipliedCoeffVec; + std::vector> ix2coef(N); DoubleVector NODEP = DoubleVector::Zero(nCoefs); DoubleVector NODEF = DoubleVector::Zero(nCoefs); @@ -1620,78 +1363,65 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { MWNode node(*(refNodes[n]), false); - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - - // 3a) make values for f at this node - // 3a1) get coordinates of quadrature points for this node - Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs); + int node_ix = indexVec_ref[n]; + Eigen::MatrixXd pts; double fval[nCoefs]; Coord r; double *originalCoef = nullptr; MWNode<3> *Fnode = nullptr; if (Func == nullptr) { - node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). + node.getExpandedChildPts(pts); for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? + for (int d = 0; d < D; d++) r[d] = pts(d, j); fval[j] = f.evalf(r); } } else { Fnode = Func->real().findNode(node.getNodeIndex()); if (Fnode == nullptr) { - node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). + node.getExpandedChildPts(pts); for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? + for (int d = 0; d < D; d++) r[d] = pts(d, j); fval[j] = f.evalf(r); } } else { originalCoef = Fnode->getCoefs(); for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j]; - Fnode->attachCoefs(fval); // note that each thread has its own copy + Fnode->attachCoefs(fval); Fnode->mwTransform(Reconstruction); Fnode->cvTransform(Forward); } } DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size()); int i = 0; - // 3b) fetch all orbitals at this node - std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); - for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node + std::vector orbjVec; + for (int j : node2orbVec[node_ix]) { int orb_node_ix = orb2node[j][node_ix]; orbjVec.push_back(j); for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k]; - // 3c) transform to grid node.attachCoefs(&(multipliedCoeff(0, i))); node.mwTransform(Reconstruction); node.cvTransform(Forward); - // 3d) multiply - for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication? - // 3e) transform back to mw + for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; node.cvTransform(Backward); node.mwTransform(Compression); i++; } if (Func != nullptr and originalCoef != nullptr) { - // restablish original values Fnode->attachCoefs(originalCoef); } - // 3f) save multiplied nodes for (int i = 0; i < orbjVec.size(); i++) { #pragma omp critical { ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size(); - coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers + coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); } } #pragma omp critical - { - // this ensures that multipliedCoeff is not deleted, when getting out of scope - multipliedCoeffVec.push_back(std::move(multipliedCoeff)); - } - node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor + { multipliedCoeffVec.push_back(std::move(multipliedCoeff)); } + node.attachCoefs(nullptr); } } else { - // MPI int count1 = 0; int count2 = 0; TaskManager tasks(max_n); @@ -1699,70 +1429,55 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f int n = tasks.next_task(); if (n < 0) break; MWNode node(*(refNodes[n]), false); - // 3a) make values for f - // 3a1) get coordinates of quadrature points for this node - Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs); - node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). + Eigen::MatrixXd pts; + node.getExpandedChildPts(pts); double fval[nCoefs]; Coord r; MWNode Fnode(*(refNodes[n]), false); if (Func == nullptr) { for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? + for (int d = 0; d < D; d++) r[d] = pts(d, j); fval[j] = f.evalf(r); } } else { int nIdx = Func->real().getIx(node.getNodeIndex()); count1++; if (nIdx < 0) { - // use the function f instead of Func count2++; for (int j = 0; j < nCoefs; j++) { for (int d = 0; d < D; d++) r[d] = pts(d, j); fval[j] = f.evalf(r); } } else { - Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank + Func->real().getNodeCoeff(nIdx, fval); Fnode.attachCoefs(fval); Fnode.mwTransform(Reconstruction); Fnode.cvTransform(Forward); } } - // 3b) fetch all orbitals at this node - DoubleMatrix coeffBlock(nCoefs, N); // largest possible used size + DoubleMatrix coeffBlock(nCoefs, N); std::vector orbjVec; nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size()); - // 3c) transform to grid - for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ? - // TODO: select only nodes that are end nodes? + for (int j = 0; j < orbjVec.size(); j++) { node.attachCoefs(coeffBlock.col(j).data()); node.mwTransform(Reconstruction); node.cvTransform(Forward); - // 3d) multiply double *coefs = node.getCoefs(); for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i]; - // 3e) transform back to mw node.cvTransform(Backward); node.mwTransform(Compression); - // 3f) save multiplied nodes nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs); } - node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor - Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor + node.attachCoefs(nullptr); + Fnode.attachCoefs(nullptr); } - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); } - // 5) reconstruct trees using multiplied nodes. - - // only serial case can use OMP, because MPI cannot be used by threads if (serial) { - // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main - // operation is writing the coefficient into the tree) - #pragma omp parallel for schedule(static) for (int j = 0; j < N; j++) { if (j < N) { @@ -1770,7 +1485,6 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].alloc(1); out[j].real().clear(); out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy"); - // 6) reconstruct trees from end nodes out[j].real().mwTransform(BottomUp); out[j].real().calcSquareNorm(); } @@ -1787,23 +1501,22 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } else { for (int j = 0; j < N; j++) { if (not mpi::my_func(j) and not all) continue; - // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) - std::vector coeffpVec; // - std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx in refTree + std::vector coeffpVec; + std::map ix2coef; int ix = 0; - std::vector pointerstodelete; // list of temporary arrays to clean up + std::vector pointerstodelete; for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - double *dataVec; // will be allocated by bank + double *dataVec; nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); // unmultiplied nodes have been deleted - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once + assert(nodeidVec[n] - max_ix >= 0); + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); ix2coef[nodeidVec[n] - max_ix] = ix++; - coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers + coeffpVec.push_back(&dataVec[shift]); shift += nCoefs; } } @@ -1812,12 +1525,10 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].alloc(1); out[j].real().clear(); out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy"); - // 6) reconstruct trees from end nodes out[j].real().mwTransform(BottomUp); out[j].real().calcSquareNorm(); out[j].real().resetEndNodeTable(); - // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped - if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi + if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); } } else { if (Phi[j].hasImag()) { @@ -1826,7 +1537,6 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy"); out[j].imag().mwTransform(BottomUp); out[j].imag().calcSquareNorm(); - // out[j].imag().crop(prec, 1.0, false); if (nrefine > 0) Phi[j].imag().crop(prec, 1.0, false); } } @@ -1846,7 +1556,6 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) { int N = Bra.size(); ComplexVector result = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { - // The bra is sent to the owner of the ket if (my_func(Bra[i]) != my_func(Ket[i])) { MSG_ABORT("same indices should have same ownership"); } result[i] = dot(Bra[i], Ket[i]); if (not mrcpp::mpi::my_func(i)) Bra[i].free(); @@ -1855,68 +1564,45 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) { return result; } -/** @brief Compute Löwdin orthonormalization matrix - * - * @param Phi: orbitals to orthonomalize - * - * Computes the inverse square root of the orbital overlap matrix S^(-1/2) - */ ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi) { ComplexMatrix S_tilde = calc_overlap_matrix(Phi); ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0); return S_m12; } -/** @brief Orbital transformation out_j = sum_i inp_i*U_ij - * - * NOTE: OrbitalVector is considered a ROW vector, so rotation - * means matrix multiplication from the right - * - * MPI: Rank distribution of output vector is the same as input vector - * - */ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { int N = BraKet.size(); ComplexMatrix S = ComplexMatrix::Zero(N, N); DoubleMatrix Sreal = S.real(); MultiResolutionAnalysis<3> *mra = BraKet.vecMRA; - // 1) make union tree without coefficients - mrcpp::FunctionTree<3> refTree(*mra); + FunctionTree<3> refTree(*mra); mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); - // get a list of all nodes in union grid, as defined by their indices std::vector scalefac; std::vector coeffVec_ref; - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes - int max_ix; // largest index value (not used here) + std::vector indexVec_ref; + std::vector parindexVec_ref; + int max_ix; refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree); int max_n = indexVec_ref.size(); - // only used for serial case: std::vector> coeffVec(N); - std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVec; + std::vector> orb2node(N); - bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + bool serial = mrcpp::mpi::wrk_size == 1; mrcpp::BankAccount nodesBraKet; - // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - // 2) make list of all coefficients, and their reference indices - // for different orbitals, indexVec will give the same index for the same node in space - std::vector parindexVec; // serialIx of the parent nodes - std::vector indexVec; // serialIx of the nodes + std::vector parindexVec; + std::vector indexVec; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid BraKet[j].complex().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2node[j][ix] = orb_node_ix++; @@ -1924,36 +1610,33 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { node2orbVec[ix].push_back(j); } } - } else { // MPI case - // 2) send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(BraKet, refTree, nodesBraKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); } - // 3) make dot product for all the nodes and accumulate into S int ibank = 0; #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread + ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - std::vector orbVec; // identifies which orbitals use this node + int node_ix = indexVec_ref[n]; + std::vector orbVec; if (serial and node2orbVec[node_ix].size() <= 0) continue; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; - // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank if (serial) { - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int shift = sizecoeff - sizecoeffW; if (parindexVec_ref[n] < 0) shift = 0; ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVec[node_ix]) { int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbVec.push_back(j); @@ -1970,7 +1653,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { } } } - } else { // MPI case + } else { ComplexMatrix coeffBlock(csize, N); nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec); @@ -1999,13 +1682,11 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { - if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri + if (i != j) S(j, i) = std::conj(S(i, j)); } } - // Assumes linearity: result is sum of all nodes contributions mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); - // multiply by CompFunction multiplicative factor ComplexVector Fac = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { @@ -2021,7 +1702,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { return S; } ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { - // NB: should be spinseparated at this point! if (BraKet[0].iscomplex()) { return calc_overlap_matrix_cplx(BraKet); } int N = BraKet.size(); @@ -2029,42 +1709,33 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { MultiResolutionAnalysis<3> *mra = BraKet.vecMRA; - // 1) make union tree without coefficients - mrcpp::FunctionTree<3> refTree(*mra); + FunctionTree<3> refTree(*mra); mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); - // get a list of all nodes in union grid, as defined by their indices std::vector scalefac; std::vector coeffVec_ref; - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes - int max_ix; // largest index value (not used here) + std::vector indexVec_ref; + std::vector parindexVec_ref; + int max_ix; refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree); int max_n = indexVec_ref.size(); - // only used for serial case: std::vector> coeffVec(N); - std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVec; + std::vector> orb2node(N); - bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + bool serial = mrcpp::mpi::wrk_size == 1; mrcpp::BankAccount nodesBraKet; - // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - // 2) make list of all coefficients, and their reference indices - // for different orbitals, indexVec will give the same index for the same node in space - std::vector parindexVec; // serialIx of the parent nodes - std::vector indexVec; // serialIx of the nodes + std::vector parindexVec; + std::vector indexVec; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2node[j][ix] = orb_node_ix++; @@ -2072,36 +1743,33 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { node2orbVec[ix].push_back(j); } } - } else { // MPI case - // 2) send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(BraKet, refTree, nodesBraKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); } - // 3) make dot product for all the nodes and accumulate into S int ibank = 0; #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread + ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - std::vector orbVec; // identifies which orbitals use this node + int node_ix = indexVec_ref[n]; + std::vector orbVec; if (serial and node2orbVec[node_ix].size() <= 0) continue; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; - // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank if (serial) { - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int shift = sizecoeff - sizecoeffW; if (parindexVec_ref[n] < 0) shift = 0; DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVec[node_ix]) { int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbVec.push_back(j); @@ -2118,7 +1786,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { } } } - } else { // MPI case + } else { DoubleMatrix coeffBlock(csize, N); nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec); @@ -2147,14 +1815,12 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { - if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri + if (i != j) S(j, i) = std::conj(S(i, j)); } } - // Assumes linearity: result is sum of all nodes contributions mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); - // multiply by CompFunction multiplicative factor ComplexVector Fac = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { if (!mrcpp::mpi::my_func(BraKet[i])) continue; @@ -2168,16 +1834,11 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { return S; } -/** @brief Compute the overlap matrix S_ij = - * - * Will take the conjugate of bra before integrating - */ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket) { - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); bool braisreal = !Bra[0].iscomplex(); bool ketisreal = !Ket[0].iscomplex(); if (braisreal or ketisreal) { - // temporary solution: copy as complex trees if (braisreal) { for (int i = 0; i < Bra.size(); i++) { Bra[i].CompD[0]->CopyTreeToComplex(Bra[i].CompC[0]); @@ -2210,18 +1871,15 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk); - // 1) make union tree without coefficients for Bra (supposed smallest) - mrcpp::FunctionTree<3> refTree(*mra); + FunctionTree<3> refTree(*mra); mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk); - // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero. int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); - // get a list of all nodes in union grid, as defined by their indices std::vector coeffVec_ref; - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes + std::vector indexVec_ref; + std::vector parindexVec_ref; std::vector scalefac; int max_ix; @@ -2229,32 +1887,22 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect int max_n = indexVec_ref.size(); max_ix++; - bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + bool serial = mrcpp::mpi::wrk_size == 1; - // only used for serial case: std::vector> coeffVecBra(N); - std::map> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVecBra; + std::vector> orb2nodeBra(N); std::vector> coeffVecKet(M); - std::map> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVecKet; + std::vector> orb2nodeKet(M); mrcpp::BankAccount nodesBra; mrcpp::BankAccount nodesKet; - // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - // 2) make list of all coefficients, and their reference indices - // for different orbitals, indexVec will give the same index for the same node in space - // TODO? : do not copy coefficients, but use directly the pointers - // could OMP parallelize, but is fast anyway - std::vector parindexVec; // serialIx of the parent nodes - std::vector indexVec; // serialIx of the nodes + std::vector parindexVec; + std::vector indexVec; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid Bra[j].complex().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeBra[j][ix] = orb_node_ix++; @@ -2264,7 +1912,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } for (int j = 0; j < M; j++) { Ket[j].complex().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeKet[j][ix] = orb_node_ix++; @@ -2273,46 +1920,43 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } - } else { // MPI case - // 2) send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(Bra, refTree, nodesBra); save_nodes(Ket, refTree, nodesKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); } - // 3) make dot product for all the nodes and accumulate into S int totsiz = 0; int totget = 0; int mxtotsiz = 0; int ibank = 0; - // the omp crashes sometime for unknown reasons? #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread + ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - std::vector orbVecBra; // identifies which Bra orbitals use this node - std::vector orbVecKet; // identifies which Ket orbitals use this node + std::vector orbVecBra; + std::vector orbVecKet; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; if (serial) { - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int node_ix = indexVec_ref[n]; + int shift = sizecoeff - sizecoeffW; ComplexMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size()); ComplexMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size()); if (parindexVec_ref[n] < 0) shift = 0; - for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVecBra[node_ix]) { int orb_node_ix = orb2nodeBra[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift]; orbVecBra.push_back(j); } - for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVecKet[node_ix]) { int orb_node_ix = orb2nodeKet[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift]; orbVecKet.push_back(j); @@ -2339,12 +1983,12 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } } - } else { // MPI case + } else { ComplexMatrix coeffBlockBra(csize, N); ComplexMatrix coeffBlockKet(csize, M); - nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts - nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts + nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); + nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); totsiz += orbVecBra.size() * orbVecKet.size(); mxtotsiz += N * M; totget += orbVecBra.size() + orbVecKet.size(); @@ -2382,11 +2026,8 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } - // 4) collect results from all MPI. Linearity: result is sum of all node contributions - mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); - // multiply by CompFunction multiplicative factor ComplexVector FacBra = ComplexVector::Zero(N); ComplexVector FacKet = ComplexVector::Zero(M); for (int i = 0; i < N; i++) { @@ -2403,7 +2044,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect for (int j = 0; j < M; j++) { S(i, j) *= std::conj(FacBra[i]) * FacKet[j]; } } - // restore input if (braisreal) { for (int i = 0; i < Bra.size(); i++) { delete Bra[i].CompC[0]; @@ -2423,14 +2063,11 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect return S; } -/** @brief Compute the overlap matrix S_ij = - * - */ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) { if (Bra[0].iscomplex() or Ket[0].iscomplex()) { return calc_overlap_matrix_cplx(Bra, Ket); } - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); MultiResolutionAnalysis<3> *mra = Bra.vecMRA; @@ -2438,18 +2075,15 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K int M = Ket.size(); ComplexMatrix S = ComplexMatrix::Zero(N, M); - // 1) make union tree without coefficients for Bra (supposed smallest) - mrcpp::FunctionTree<3> refTree(*mra); + FunctionTree<3> refTree(*mra); mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk); - // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero. int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); - // get a list of all nodes in union grid, as defined by their indices std::vector coeffVec_ref; - std::vector indexVec_ref; // serialIx of the nodes - std::vector parindexVec_ref; // serialIx of the parent nodes + std::vector indexVec_ref; + std::vector parindexVec_ref; std::vector scalefac; int max_ix; @@ -2457,31 +2091,21 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K int max_n = indexVec_ref.size(); max_ix++; - bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + bool serial = mrcpp::mpi::wrk_size == 1; - // only used for serial case: std::vector> coeffVecBra(N); - std::map> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVecBra; + std::vector> orb2nodeBra(N); std::vector> coeffVecKet(M); - std::map> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node - std::vector> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in - // the orbital given the node index in the reference tree + std::map> node2orbVecKet; + std::vector> orb2nodeKet(M); mrcpp::BankAccount nodesBra; mrcpp::BankAccount nodesKet; - // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - // 2) make list of all coefficients, and their reference indices - // for different orbitals, indexVec will give the same index for the same node in space - // TODO? : do not copy coefficients, but use directly the pointers - // could OMP parallelize, but is fast anyway - std::vector parindexVec; // serialIx of the parent nodes - std::vector indexVec; // serialIx of the nodes + std::vector parindexVec; + std::vector indexVec; for (int j = 0; j < N; j++) { - // make vector with all coef pointers and their indices in the union grid Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeBra[j][ix] = orb_node_ix++; @@ -2491,7 +2115,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } for (int j = 0; j < M; j++) { Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree); - // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeKet[j][ix] = orb_node_ix++; @@ -2500,45 +2123,42 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } - } else { // MPI case - // 2) send own nodes to bank, identifying them through the serialIx of refTree + } else { save_nodes(Bra, refTree, nodesBra); save_nodes(Ket, refTree, nodesKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); } - // 3) make dot product for all the nodes and accumulate into S int totsiz = 0; int totget = 0; int mxtotsiz = 0; int ibank = 0; #pragma omp parallel if (serial) { - DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread - // NB: dynamic does give strange errors? + DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); #pragma omp for schedule(static) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - std::vector orbVecBra; // identifies which Bra orbitals use this node - std::vector orbVecKet; // identifies which Ket orbitals use this node + std::vector orbVecBra; + std::vector orbVecKet; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; if (serial) { - int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree - int shift = sizecoeff - sizecoeffW; // to copy only wavelet part + int node_ix = indexVec_ref[n]; + int shift = sizecoeff - sizecoeffW; DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size()); DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size()); if (parindexVec_ref[n] < 0) shift = 0; - for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVecBra[node_ix]) { int orb_node_ix = orb2nodeBra[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift]; orbVecBra.push_back(j); } - for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node + for (int j : node2orbVecKet[node_ix]) { int orb_node_ix = orb2nodeKet[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift]; orbVecKet.push_back(j); @@ -2557,12 +2177,12 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } } - } else { // MPI case + } else { DoubleMatrix coeffBlockBra(csize, N); DoubleMatrix coeffBlockKet(csize, M); - nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts - nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts + nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); + nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); totsiz += orbVecBra.size() * orbVecKet.size(); mxtotsiz += N * M; totget += orbVecBra.size() + orbVecKet.size(); @@ -2590,11 +2210,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } - // 4) collect results from all MPI. Linearity: result is sum of all node contributions - mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); - // multiply by CompFunction multiplicative factor ComplexVector FacBra = ComplexVector::Zero(N); ComplexVector FacKet = ComplexVector::Zero(M); for (int i = 0; i < N; i++) { @@ -2614,11 +2231,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K return S; } -/** @brief Orthogonalize the functions in Bra against all orbitals in Ket - * - */ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket) { - // TODO: generalize for cases where Ket functions are not orthogonal to each other? ComplexMatrix S = calc_overlap_matrix(Bra, Ket); int N = Bra.size(); int M = Ket.size(); @@ -2638,9 +2251,6 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket } } -/** @brief Orthogonalize the Bra against Ket - * - */ template void orthogonalize(double prec, CompFunction &Bra, CompFunction &Ket) { ComplexDouble overlap = dot(Bra, Ket); double sq_norm = Ket.getSquareNorm(); diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h index 2d2c5732f..562f68ed0 100644 --- a/src/utils/CompFunction.h +++ b/src/utils/CompFunction.h @@ -1,4 +1,53 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #pragma once +/** + * @file + * @brief Composite multicomponent function types (real/complex) on MRCPP multiresolution trees. + * + * This header defines: + * - @ref CompFunctionData : POD metadata describing a multicomponent function. + * - @ref TreePtr : Small owning handle to up to four component trees (real/complex), + * with optional MPI shared-memory backing. + * - @ref CompFunction : A high-level wrapper that owns/addresses component trees, + * provides algebra (add/multiply/dot), projection, scaling, + * norms, and utilities. + * - Helpers for deep copies, linear combinations, products, projections, and orthogonalization. + * - @ref CompFunctionVector : Convenience container for 3D functions with utilities for + * rotations and overlap matrices. + * + * Components are stored as MRCPP @ref FunctionTree "FunctionTree" instances. + * Both real (`double`) and complex (`ComplexDouble`) representations are supported. + * + * Parallel notes: + * - If built with MPI and `is_shared == true`, @ref TreePtr can allocate backing storage + * in an MPI shared-memory window (per @ref mpi::comm_share) to reduce duplication. + * - Distribution utilities (e.g., @ref CompFunctionVector::distribute) use the runtime in + * `mpi_utils.h`. + */ #include "mpi_utils.h" #include "trees/FunctionTreeVector.h" @@ -7,46 +56,83 @@ using namespace Eigen; namespace mrcpp { +/** + * @brief Lightweight, trivially-copiable metadata for a multicomponent function. + * + * This POD accompanies the trees comprising a @ref CompFunction. It holds flags about + * real/complex storage, conjugation, component counts, user-defined labels, and on-disk + * layout hints. Arrays have fixed size (4) to simplify MPI packing and shallow copies. + * + * @tparam D Spatial dimension of the function (1–3 supported by MRCPP). + */ template struct CompFunctionData { - // additional data that describe the overall multicomponent function (defined by user): - // occupancy, quantum number, norm, etc. - int Ncomp{0}; // number of components defined - int rank{-1}; // rank (index) if part of a vector - int conj{0}; // soft conjugate (all components) - int CompFn1{0}; - int CompFn2{0}; - int isreal{0}; // trees are defined for T=double - int iscomplex{0}; // trees are defined for T=DoubleComplex - double CompFd1{0.0}; - double CompFd2{0.0}; - double CompFd3{0.0}; - // additional data that describe each component (defined by user): - // occupancy, quantum number, norm, etc. - // Note: defined with fixed size to ease copying and MPI send - int n1[4]{0, 0, 0, 0}; // 0: neutral. otherwise different values are orthogonal to each other (product = 0) - int n2[4]{0, 0, 0, 0}; - int n3[4]{0, 0, 0, 0}; - int n4[4]{0, 0, 0, 0}; - // multiplicative scalar for the function. So far only actively used to take care of imag factor in momentum operator. + /** @name Global function descriptors (user-defined) */ + ///@{ + int Ncomp{0}; ///< Number of components actually defined/allocated (0–4). + int rank{-1}; ///< Rank (index) inside an external vector or basis set. + int conj{0}; ///< Soft-conjugate flag for algebra (applied to all components). + int CompFn1{0}; ///< Free integer tag (user purpose). + int CompFn2{0}; ///< Free integer tag (user purpose). + int isreal{0}; ///< 1 if component trees are real-valued (`T=double`). + int iscomplex{0}; ///< 1 if component trees are complex-valued (`T=ComplexDouble`). + double CompFd1{0.0};///< Free double tag (user purpose). + double CompFd2{0.0};///< Free double tag (user purpose). + double CompFd3{0.0};///< Free double tag (user purpose). + ///@} + + /** @name Per-component user metadata (fixed-size slots 0..3) */ + ///@{ + int n1[4]{0, 0, 0, 0}; ///< Integer label; unequal labels are treated orthogonal in some workflows. + int n2[4]{0, 0, 0, 0}; ///< Additional integer label (user purpose). + int n3[4]{0, 0, 0, 0}; ///< Additional integer label (user purpose). + int n4[4]{0, 0, 0, 0}; ///< Additional integer label (user purpose). + + /** + * @brief Per-component multiplicative factor. + * + * Often used to carry factors like *i* for momentum-like operators without + * explicitly modifying stored coefficients. + */ ComplexDouble c1[4]{{1.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}}; - double d1[4]{0.0, 0.0, 0.0, 0.0}; - double d2[4]{0.0, 0.0, 0.0, 0.0}; - double d3[4]{0.0, 0.0, 0.0, 0.0}; - // used for storage on disk - int type{0}; - int order{1}; - int scale{0}; - int depth{0}; - int boxes[3] = {0, 0, 0}; - int corner[3] = {0, 0, 0}; - - // used internally - int shared{0}; - int Nchunks[4]{0, 0, 0, 0}; // number of chunks of each component tree + + double d1[4]{0.0, 0.0, 0.0, 0.0}; ///< Free double tag (user purpose) per component. + double d2[4]{0.0, 0.0, 0.0, 0.0}; ///< Free double tag (user purpose) per component. + double d3[4]{0.0, 0.0, 0.0, 0.0}; ///< Free double tag (user purpose) per component. + ///@} + + /** @name On-disk/storage layout hints (optional) */ + ///@{ + int type{0}; ///< Serialization type code. + int order{1}; ///< Polynomial order or filter order hint. + int scale{0}; ///< Root scale / global scale offset. + int depth{0}; ///< Max depth. + int boxes[3] = {0, 0, 0}; ///< Root box tiling (D components used). + int corner[3] = {0, 0, 0}; ///< Root spatial corner (D components used). + ///@} + + /** @name Internal runtime fields */ + ///@{ + int shared{0}; ///< 1 if this function uses shared-memory trees. + int Nchunks[4]{0, 0, 0, 0}; ///< Chunk count for each component (used for MPI shipping). + ///@} }; +/** + * @brief Owning pointer wrapper for up to four component trees (real and/or complex). + * + * Optionally allocates per-communicator shared memory windows when constructed + * with @p share = true and MPI shared memory is available (see @ref mpi::comm_share + * and @ref mpi::shared_memory_size). + * + * @tparam D Spatial dimension (1–3). + */ template class TreePtr final { public: + /** + * @brief Construct an empty handle. + * @param share If true and MPI is enabled, create shared-memory windows + * for real and complex storage sized per @ref mpi::shared_memory_size (MB). + */ explicit TreePtr(bool share) : shared_mem_real(nullptr) , shared_mem_cplx(nullptr) { @@ -62,6 +148,7 @@ template class TreePtr final { } } + /// Destructor: frees shared windows and any allocated trees. ~TreePtr() { if (this->shared_mem_real != nullptr) delete this->shared_mem_real; if (this->shared_mem_cplx != nullptr) delete this->shared_mem_cplx; @@ -72,132 +159,457 @@ template class TreePtr final { this->cplx[i] = nullptr; } } - CompFunctionData data; - int &Ncomp = data.Ncomp; // number of components defined - int &rank = data.rank; // rank (index) if part of a vector - int &conj = data.conj; // soft conjugate - int &isreal = data.isreal; // T=double - int &iscomplex = data.iscomplex; // T=DoubleComplex - int &share = data.shared; - int *Nchunks = data.Nchunks; + /** @name Metadata forwarding (aliases into @ref data) */ + ///@{ + CompFunctionData data; ///< Attached function metadata. + int &Ncomp = data.Ncomp; ///< Number of active components. + int &rank = data.rank; ///< External rank/index tag. + int &conj = data.conj; ///< Soft conjugation flag. + int &isreal = data.isreal; ///< Real storage flag. + int &iscomplex = data.iscomplex; ///< Complex storage flag. + int &share = data.shared; ///< Shared-memory flag. + int *Nchunks = data.Nchunks; ///< Per-component chunk counts. + ///@} + + /** True if shared-memory windows were requested/allocated. */ bool is_shared = false; + friend class CompFunction; protected: - FunctionTree *real[4]; // Real function - FunctionTree *cplx[4]; // Complex function + /** Component trees (owned). Slots 0..3 are valid when @ref Ncomp > slot. */ + FunctionTree *real[4]; ///< Real components. + FunctionTree *cplx[4]; ///< Complex components. + + /** Optional backing shared-memory windows (one per value type). */ SharedMemory *shared_mem_real; SharedMemory *shared_mem_cplx; }; +/** + * @brief High-level multicomponent function wrapper on MRCPP trees. + * + * A @ref CompFunction manages up to four component trees, either real or complex, + * and exposes utilities such as allocation, projection, algebraic operations, + * normalization, conjugation, and data shipping. + * + * The class shares its internal state through a `std::shared_ptr>` + * to enable lightweight copies and move semantics, while retaining clear + * ownership of the underlying trees. + * + * @tparam D Spatial dimension (1–3). + */ template class CompFunction { public: + /** + * @name Construction + * Constructors optionally attach an @ref MultiResolutionAnalysis context, + * choose component count, and enable shared memory. + */ + ///@{ + /** @brief Construct empty function bound to @p mra (no components allocated). */ CompFunction(MultiResolutionAnalysis &mra); + /** @brief Construct unbound/empty function (MRA set later via allocation). */ CompFunction(); + /** @brief Construct with @p n1 components (0..4). */ CompFunction(int n1); + /** + * @brief Construct with @p n1 components and shared-memory preference. + * @param n1 Number of components to allocate (0..4). + * @param share If true, try to use MPI shared memory for tree storage. + */ CompFunction(int n1, bool share); + /** + * @brief Construct from metadata @p indata. + * @param indata Initial metadata (copied). + * @param alloc If true, allocate trees according to @p indata.Ncomp. + */ CompFunction(const CompFunctionData &indata, bool alloc = false); + /** @brief Copy constructor: shares underlying pointer (trees may be deep-copied by helpers). */ CompFunction(const CompFunction &compfunc); + /** @brief Move constructor. */ CompFunction(CompFunction &&compfunc); + /** @brief Copy assignment. */ CompFunction &operator=(const CompFunction &compfunc); + ///@} + + /** Virtual destructor. Trees are owned by the shared @ref TreePtr and freed accordingly. */ virtual ~CompFunction() = default; - FunctionTree **CompD; // = func_ptr->real so that we can use name CompD instead of func_ptr.real - FunctionTree **CompC; // = func_ptr->cplx + /** @name Raw component access (compatibility aliases) */ + ///@{ + /** + * @brief Pointer-to-array of real component trees (alias of internal storage). + * @warning Valid only when @ref isreal() is true. + */ + FunctionTree **CompD; + /** + * @brief Pointer-to-array of complex component trees (alias of internal storage). + * @warning Valid only when @ref iscomplex() is true. + */ + FunctionTree **CompC; + ///@} + /** Optional human-readable name. */ std::string name; - // additional data that describe each component (defined by user): + /** @name Metadata accessors */ + ///@{ + /** @brief Return a copy of the current metadata. */ CompFunctionData data() const { return func_ptr->data; } - int Ncomp() const { return func_ptr->data.Ncomp; } // number of components defined - int rank() const { return func_ptr->data.rank; } // rank (index) if part of a vector - int conj() const { return func_ptr->data.conj; } // soft conjugate - int isreal() const { return func_ptr->data.isreal; } // T=double - int iscomplex() const { return func_ptr->data.iscomplex; } // T=DoubleComplex - void defreal() { func_ptr->data.isreal = 1; } // define as real - void defcomplex() { func_ptr->data.iscomplex = 1; } // define as complex + int Ncomp() const { return func_ptr->data.Ncomp; } ///< Number of components. + int rank() const { return func_ptr->data.rank; } ///< External index/rank. + int conj() const { return func_ptr->data.conj; } ///< Soft conjugation flag. + int isreal() const { return func_ptr->data.isreal; } ///< Real storage flag. + int iscomplex() const { return func_ptr->data.iscomplex; } ///< Complex storage flag. + ///@} + + /** @name Mutators for storage type flags */ + ///@{ + /** @brief Declare that this function stores real-valued components. */ + void defreal() { func_ptr->data.isreal = 1; } + /** @brief Declare that this function stores complex-valued components. */ + void defcomplex() { func_ptr->data.iscomplex = 1; } + ///@} + + /** @return 1 if using shared-memory storage. */ int share() const { return func_ptr->data.shared; } - int *Nchunks() const { return func_ptr->data.Nchunks; } // number of chunks of each component tree + /** @return Per-component chunk counts (used for MPI shipping). */ + int *Nchunks() const { return func_ptr->data.Nchunks; } + + /** + * @brief Copy metadata and optionally allocate components (without copying tree data). + * @param alloc If true, allocate tree containers for the copied component count. + * @return A new @ref CompFunction sharing no nodes/coefficients with the source. + */ CompFunction paramCopy(bool alloc = false) const; + + /** + * @brief Integrate the function over the domain. + * @return Complex integral (real-only functions return real part in `.real()`). + */ ComplexDouble integrate() const; + + /** + * @brief L2 norm of the function. + * @return \f$\|f\|_2\f$ as a double. + */ double norm() const; + + /** + * @brief Square L2 norm of the function. + * @return \f$\|f\|_2^2\f$ as a double. + */ double getSquareNorm() const; + + /** + * @brief Allocate @p nalloc component trees. + * @param nalloc Number of components (0..4). Existing components preserved if possible. + * @param zero If true, initialize coefficients to zero. + */ void alloc(int nalloc = 1, bool zero = true); - void alloc_comp(int i = 0); // allocate one specific component + + /** + * @brief Allocate a single component tree. + * @param i Component index (0..3). + */ + void alloc_comp(int i = 0); + + /** + * @brief Attach an externally created real tree as component @p i. + * @param tree Ownership is transferred to this object. + * @param i Component index (0..3). + */ void setReal(FunctionTree *tree, int i = 0); + + /** + * @brief Attach an externally created complex tree as component @p i. + * @copydetails setReal + */ void setCplx(FunctionTree *tree, int i = 0); + + /** @brief Set/get external rank/index label. */ void setRank(int i) { func_ptr->rank = i; }; const int getRank() const { return func_ptr->rank; }; + + /** + * @brief In-place linear update: @f$f \gets f + c \, g@f$. + * @param c Complex scalar. + * @param inp Addend function (components must be layout-compatible). + */ void add(ComplexDouble c, CompFunction inp); + /** + * @brief Remove coefficients/nodes below precision @p prec. + * @param prec Relative (or absolute) precision threshold. + * @return Number of removed nodes or a non-negative status. + */ int crop(double prec); + + /** + * @brief Multiply the entire function by a complex scalar in-place. + * @param c Complex factor. + */ void rescale(ComplexDouble c); + + /** + * @brief Release all component trees and reset to empty. + * + * Metadata is preserved unless tied to tree content. + */ void free(); + + /** @return Total memory footprint of nodes (bytes or implementation-defined units). */ int getSizeNodes() const; + + /** @return Total number of nodes across all component trees. */ int getNNodes() const; + + /** @brief Flush cached MRA-level data (filters, norms) from component trees. */ void flushMRAData(); + + /** @brief Flush cached function-level data (aux norms, temporaries). */ void flushFuncData(); + + /** @brief Snapshot of the current function metadata (same as @ref data()). */ CompFunctionData getFuncData() const; + + /** @name Component accessors (non-const/const). */ + ///@{ FunctionTree &real(int i = 0); FunctionTree &complex(int i = 0); const FunctionTree &real(int i = 0) const; const FunctionTree &complex(int i = 0) const; + ///@} - // NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class - - void free(int type) { free(); } - bool hasReal() const { return isreal(); } - bool hasImag() const { return iscomplex(); } - bool isShared() const { return share(); } - bool conjugate() const { return conj(); } + /** @name Backwards-compatibility helpers (legacy ComplexFunction interface) */ + ///@{ + void free(int type) { free(); } ///< Ignored @p type; frees all. + bool hasReal() const { return isreal(); } ///< True if real storage is active. + bool hasImag() const { return iscomplex(); } ///< True if complex storage is active. + bool isShared() const { return share(); } ///< True if shared-memory is active. + bool conjugate() const { return conj(); } ///< True if conjugation is requested. + /** @brief Apply Hermitian adjoint (conjugation + operator-specific flips as implemented). */ void dagger(); - FunctionTree &imag(int i = 0); // does not make sense now - const FunctionTree &imag(int i = 0) const; // does not make sense now + /** @brief Imaginary component accessor (legacy; identical to @ref real()). */ + FunctionTree &imag(int i = 0); + /** @brief Const imaginary component accessor (legacy; identical to @ref real()). */ + const FunctionTree &imag(int i = 0) const; + ///@} + + /** @brief Shared state (trees + metadata). */ std::shared_ptr> func_ptr; }; +/** @name Helpers: copying and algebra on @ref CompFunction + * Functions operate componentwise on underlying trees and obey precision controls. + */ +///@{ +/** + * @brief Ensure @p out is complex-valued, copying/embedding a real @p inp if needed. + * @tparam D Dimension. + */ template void CopyToComplex(CompFunction &out, const CompFunction &inp); + +/** @brief Deep-copy @p inp into *@p out (allocate if needed). */ template void deep_copy(CompFunction *out, const CompFunction &inp); +/** @brief Deep-copy @p inp into @p out (allocate if needed). */ template void deep_copy(CompFunction &out, const CompFunction &inp); -template void add(CompFunction &out, ComplexDouble a, CompFunction inp_a, ComplexDouble b, CompFunction inp_b, double prec, bool conjugate = false); -template void linear_combination(CompFunction &out, const std::vector &c, std::vector> &inp, double prec, bool conjugate = false); -template void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); + +/** + * @brief Compute @f$out = a \, inp\_a + b \, inp\_b@f$ with adaptive precision. + * @param prec Target precision controlling refinement/cropping. + * @param conjugate If true, apply soft conjugation to inputs as required. + */ template -void multiply(double prec, CompFunction &out, double coef, CompFunction inp_a, CompFunction inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); +void add(CompFunction &out, ComplexDouble a, CompFunction inp_a, + ComplexDouble b, CompFunction inp_b, double prec, bool conjugate = false); + +/** + * @brief Linear combination of many inputs: @f$out = \sum_k c_k \, inp_k@f$. + * @param c Coefficients (size must match @p inp). + * @param inp Input functions (modified only for temporary workspace). + * @param prec Target precision. + * @param conjugate Whether to conjugate inputs (soft). + */ +template +void linear_combination(CompFunction &out, const std::vector &c, + std::vector> &inp, double prec, bool conjugate = false); + +/** + * @brief Pointwise product: @f$out = inp\_a \cdot inp\_b@f$ with refinement control. + * @param prec Target precision (relative by default). + * @param absPrec If true, treat @p prec as absolute precision. + * @param useMaxNorms If true, use max norms in error control heuristics. + * @param conjugate If true, apply soft conjugation to first factor. + */ +template +void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, + double prec, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); + +/** + * @brief Scaled product with loop control: @f$out = coef \cdot inp\_a \cdot inp\_b@f$. + * @param maxIter Limit iterative refinement steps (-1 for default). + * @copydetails multiply(CompFunction&,CompFunction,CompFunction,double,bool,bool,bool) + */ +template +void multiply(double prec, CompFunction &out, double coef, + CompFunction inp_a, CompFunction inp_b, int maxIter = -1, + bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); + +/** @brief Density from a (possibly complex) function: @f$out = |inp|^2@f$. */ template void make_density(CompFunction &out, CompFunction inp, double prec); -template void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); -template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine = 0, bool conjugate = false); -template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine = 0, bool conjugate = false); -template void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, double prec, int nrefine = 0, bool conjugate = false); -template void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, double prec, int nrefine = 0, bool conjugate = false); + +/** @overload */ +template +void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, + bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); + +/** @brief Multiply by an analytic representable real function @p f. */ +template +void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, + double prec, int nrefine = 0, bool conjugate = false); + +/** @brief Multiply by an analytic representable complex function @p f. */ +template +void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, + double prec, int nrefine = 0, bool conjugate = false); + +/** @brief Multiply a single tree by a representable real function. */ +template +void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, + double prec, int nrefine = 0, bool conjugate = false); + +/** @brief Multiply a single tree by a representable complex function. */ +template +void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, + double prec, int nrefine = 0, bool conjugate = false); + +/** + * @brief Complex inner product @f$\langle bra \,|\, ket \rangle@f$. + * @return Complex inner product consistent with MRCPP normalization. + */ template ComplexDouble dot(CompFunction bra, CompFunction ket); + +/** + * @brief Node-wise norm dot helper (diagnostics / preconditioners). + * @return Real value summarizing node-level contributions. + */ template double node_norm_dot(CompFunction bra, CompFunction ket); +///@} + +/** @name Projection helpers (3D overloads and templated D) + * Project analytic functions onto the multiresolution basis. + */ +///@{ +/** + * @brief Project a real-valued lambda/function @p f onto @p out. + * @param prec Target precision. + */ void project(CompFunction<3> &out, std::function &r)> f, double prec); -void project_real(CompFunction<3> &out, std::function &r)> f, double prec); //overload of project is not always recognized by the compiler +/** @brief Real-valued projection (explicit name to avoid overload ambiguities on some compilers). */ +void project_real(CompFunction<3> &out, std::function &r)> f, double prec); +/** @brief Project a complex-valued lambda/function @p f onto @p out. */ void project(CompFunction<3> &out, std::function &r)> f, double prec); -void project_cplx(CompFunction<3> &out, std::function &r)> f, double prec); //overload of project is not always recognized by the compiler +/** @brief Complex-valued projection (explicit name to avoid overload ambiguities). */ +void project_cplx(CompFunction<3> &out, std::function &r)> f, double prec); + +/** @brief Project a representable real function onto @p out. */ template void project(CompFunction &out, RepresentableFunction &f, double prec); +/** @brief Project a representable complex function onto @p out. */ template void project(CompFunction &out, RepresentableFunction &f, double prec); +///@} + +/** + * @brief Orthogonalize @p Ket against @p Bra to precision @p prec (Gram–Schmidt-like). + * @param prec Target precision controlling projection refinement and cropping. + */ template void orthogonalize(double prec, CompFunction &Bra, CompFunction &Ket); +/** + * @brief Convenience container for 3D composite functions with shared MRA. + * + * Provides utilities for distribution and linear-algebra operations across + * the vector (e.g., rotations and overlap matrices). + */ class CompFunctionVector : public std::vector> { public: + /** @brief Construct a vector with @p N default-initialized functions. */ CompFunctionVector(int N = 0); + + /** @brief Common MRA pointer for all entries (optional but recommended). */ MultiResolutionAnalysis<3> *vecMRA; + + /** + * @brief Distribute internal storage across MPI workers (when enabled). + * + * Typically assigns component ownership/ranks and updates metadata so that + * subsequent parallel operations (add/multiply/dot) can proceed efficiently. + */ void distribute(); }; +/** @name Vector-level linear algebra and IO utilities */ +///@{ +/** + * @brief Apply a unitary (or general) complex rotation @p U in-place: @f$\Phi \gets \Phi U@f$. + * @param prec Optional precision for intermediate truncations (-1 to keep current). + */ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec = -1.0); + +/** + * @brief Apply a rotation @p U producing @p Psi: @f$\Psi \gets \Phi U@f$. + * @param prec Optional precision for intermediate truncations (-1 to keep current). + */ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0); -void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1); -CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, CompFunction<3> *Func = nullptr, int nrefine = 1, bool all = false); + +/** + * @brief Store per-node coefficient blocks of @p Phi into @p account. + * @param refTree Reference tree defining the union grid/blocking. + * @param sizes Optional fixed block size; -1 to auto-size. + */ +void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, + BankAccount &account, int sizes = -1); + +/** + * @brief Multiply a vector of functions by a representable function @p f. + * @param prec Target precision (-1 to inherit default). + * @param Func Optional workspace function (reused). + * @param nrefine Number of refinement passes (>=0). + * @param all If true, apply to all components; else honor component flags. + * @return Result vector (same size as @p Phi). + */ +CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, + double prec = -1.0, CompFunction<3> *Func = nullptr, + int nrefine = 1, bool all = false); + +/** + * @brief Set a library-global default MRA used by convenience constructors. + * @param MRA Non-owning pointer (caller keeps it alive). + */ void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA); + +/** + * @brief Vectorized inner products: returns @f$\langle Bra_i \,|\, Ket_i \rangle@f$ for all i. + */ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket); + +/** @brief Compute the (symmetric) Löwdin overlap matrix @f$S@f$ for @p Phi. */ ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi); + +/** @brief Overlap matrix of a single set against itself. */ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet); + +/** @brief Overlap matrix between two sets @p Bra and @p Ket. */ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket); + +/** + * @brief Pairwise orthogonalization of @p Ket against @p Bra to precision @p prec. + * @param prec Precision target (relative unless the implementation states otherwise). + */ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket); +///@} -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/Plotter.cpp b/src/utils/Plotter.cpp index c29b3ee2e..524bd3631 100644 --- a/src/utils/Plotter.cpp +++ b/src/utils/Plotter.cpp @@ -33,10 +33,6 @@ using namespace Eigen; namespace mrcpp { -/** @returns New Plotter object - * - * @param[in] o: Plot origin, default `(0, 0, ... , 0)` - */ template Plotter::Plotter(const Coord &o) : O(o) { @@ -46,49 +42,20 @@ Plotter::Plotter(const Coord &o) setSuffix(Plotter::Grid, ".grid"); } -/** @brief Set file extension for output file - * - * @param[in] t: Plot type (`Plotter::Line`, `::Surface`, `::Cube`, `::Grid`) - * @param[in] s: Extension string, default `.line`, `.surf`, `.cube`, `.grid` - * - * @details The file name you decide for the output will get a predefined - * suffix that differentiates between different types of plot. - */ template void Plotter::setSuffix(int t, const std::string &s) { this->suffix.insert(std::pair(t, s)); } -/** @brief Set the point of origin for the plot - * - * @param[in] o: Plot origin, default `(0, 0, ... , 0)` - */ template void Plotter::setOrigin(const Coord &o) { this->O = o; } -/** @brief Set boundary vectors A, B and C for the plot - * - * @param[in] a: A vector - * @param[in] b: B vector - * @param[in] c: C vector - */ template void Plotter::setRange(const Coord &a, const Coord &b, const Coord &c) { this->A = a; this->B = b; this->C = c; } -/** @brief Grid plot of a MWTree - * - * @param[in] tree: MWTree to plot - * @param[in] fname: File name for output, without extension - * - * @details Writes a file named fname + file extension (".grid" as default) - * to be read by geomview to visualize the grid (of endNodes) where the - * multiresolution function is defined. In MPI, each process will write a - * separate file, and will print only nodes owned by itself (pluss the - * rootNodes). - */ template void Plotter::gridPlot(const MWTree &tree, const std::string &fname) { println(20, "----------Grid Plot-----------"); std::stringstream file; @@ -99,21 +66,11 @@ template void Plotter::gridPlot(const MWTree &tr printout(20, std::endl); } -/** @brief Parametric plot of a function - * - * @param[in] npts: Number of points along A - * @param[in] func: Function to plot - * @param[in] fname: File name for output, without extension - * - * @details Plots the function func parametrically with npts[0] along the - * vector A starting from the origin O to a file named fname + file extension - * (".line" as default). - */ template void Plotter::linePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "----------Line Plot-----------"); std::stringstream file; file << fname << this->suffix[Plotter::Line]; - if (verifyRange(1)) { // Verifies only A vector + if (verifyRange(1)) { Eigen::MatrixXd coords = calcLineCoordinates(npts[0]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -125,21 +82,11 @@ template void Plotter::linePlot(const std::array void Plotter::surfPlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "--------Surface Plot----------"); std::stringstream file; file << fname << this->suffix[Plotter::Surface]; - if (verifyRange(2)) { // Verifies A and B vectors + if (verifyRange(2)) { Eigen::MatrixXd coords = calcSurfCoordinates(npts[0], npts[1]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -151,22 +98,11 @@ template void Plotter::surfPlot(const std::array void Plotter::cubePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "----------Cube Plot-----------"); std::stringstream file; file << fname << this->suffix[Plotter::Cube]; - if (verifyRange(3)) { // Verifies A, B and C vectors + if (verifyRange(3)) { Eigen::MatrixXd coords = calcCubeCoordinates(npts[0], npts[1], npts[2]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -178,11 +114,6 @@ template void Plotter::cubePlot(const std::array Eigen::MatrixXd Plotter::calcLineCoordinates(int pts_a) const { MatrixXd coords; if (pts_a > 0) { @@ -197,11 +128,6 @@ template Eigen::MatrixXd Plotter::calcLineCoordinates( return coords; } -/** @brief Calculating coordinates to be evaluated - * - * @details Generating a vector of equidistant coordinates that makes up the - * area spanned by vectors A and B in D dimensions, starting from the origin O. - */ template Eigen::MatrixXd Plotter::calcSurfCoordinates(int pts_a, int pts_b) const { if (D < 2) MSG_ERROR("Cannot surfPlot less than 2D"); @@ -225,12 +151,6 @@ template Eigen::MatrixXd Plotter::calcSurfCoordinates( return coords; } -/** @brief Calculating coordinates to be evaluated - * - * @details Generating a vector of equidistant coordinates that makes up the - * volume spanned by vectors A, B and C in D dimensions, starting from - * the origin O. - */ template Eigen::MatrixXd Plotter::calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const { if (D < 3) MSG_ERROR("Cannot cubePlot less than 3D function"); @@ -257,12 +177,6 @@ template Eigen::MatrixXd Plotter::calcCubeCoordinates( return coords; } -/** @brief Evaluating a function in a set of predfined coordinates - * - * @details Given that the set of coordinates ("coords") has been calculated, - * this routine evaluates the function in these points and stores the results - * in the vector "values". - */ template Eigen::Matrix Plotter::evaluateFunction(const RepresentableFunction &func, const Eigen::MatrixXd &coords) const { auto npts = coords.rows(); if (npts == 0) MSG_ERROR("Empty coordinates"); @@ -276,13 +190,6 @@ template Eigen::Matrix Plotter:: return values; } -/** @brief Writing plot data to file - * - * @details This will write the contents of the "coords" matrix along with the - * function values to the file stream fout. File will contain on each line the - * point number (between 0 and nPoints), coordinates 1 through D and the - * function value. - */ template void Plotter::writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix &values) { if (coords.rows() != values.size()) INVALID_ARG_ABORT; std::ofstream &o = *this->fout; @@ -296,25 +203,18 @@ template void Plotter::writeData(const Eigen::MatrixXd } } -// Specialized for D=3 below template void Plotter::writeCube(const std::array &npts, const Eigen::Matrix &values) { NOT_IMPLEMENTED_ABORT } -// Specialized for D=3 below template void Plotter::writeNodeGrid(const MWNode &node, const std::string &color) { NOT_IMPLEMENTED_ABORT } -// Specialized for D=3 below template void Plotter::writeGrid(const MWTree &tree) { NOT_IMPLEMENTED_ABORT } -/** @brief Opening file for output - * - * @details Opens a file output stream fout for file named fname. - */ template void Plotter::openPlot(const std::string &fname) { if (fname.empty()) { if (this->fout == nullptr) { @@ -335,20 +235,11 @@ template void Plotter::openPlot(const std::string &fna } } -/** @brief Closing file - * - * @details Closes the file output stream fout. - */ template void Plotter::closePlot() { if (this->fout != nullptr) this->fout->close(); this->fout = nullptr; } -/** @brief Writing plot data to file - * - * @details This will write a cube file (readable by blob) of the function - * values previously calculated (the "values" vector). - */ template <> void Plotter<3>::writeCube(const std::array &npts, const Eigen::VectorXd &values) { std::ofstream &o = *this->fout; @@ -362,35 +253,30 @@ template <> void Plotter<3>::writeCube(const std::array &npts, const Eig o.setf(std::ios::scientific); o.precision(6); - // Origin o << std::setw(5) << 0; o << std::setw(15) << this->O[0]; o << std::setw(15) << this->O[1]; o << std::setw(15) << this->O[2] << std::endl; - // Vector A o << std::setw(5) << npts[0]; o << std::setw(15) << a[0]; o << std::setw(15) << a[1]; o << std::setw(15) << a[2] << std::endl; - // Vector B o << std::setw(5) << npts[1]; o << std::setw(15) << b[0]; o << std::setw(15) << b[1]; o << std::setw(15) << b[2] << std::endl; - // Vector C o << std::setw(5) << npts[2]; o << std::setw(15) << c[0]; o << std::setw(15) << c[1]; o << std::setw(15) << c[2] << std::endl; - // Function values o.precision(4); for (int n = 0; n < values.size(); n++) { o << std::setw(12) << values[n]; - if (n % 6 == 5) o << std::endl; // Line break after 6 values + if (n % 6 == 5) o << std::endl; } } @@ -419,12 +305,6 @@ template <> void Plotter<3>::writeNodeGrid(const MWNode<3> &node, const std::str << origin[0] << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] + length << color << std::endl; } -/** @brief Writing grid data to file - * - * @details This will write a grid file (readable by geomview) of the grid - * (of endNodes) where the multiresolution function is defined. Currently - * only working in 3D. - */ template <> void Plotter<3>::writeGrid(const MWTree<3> &tree) { std::ostream &o = *this->fout; o << "CQUAD" << std::endl; @@ -441,9 +321,7 @@ template <> void Plotter<3>::writeGrid(const MWTree<3> &tree) { } } -/** @brief Checks the validity of the plotting range */ template bool Plotter::verifyRange(int dim) const { - auto is_len_zero = [](Coord vec) { double vec_sq = 0.0; for (auto d = 0; d < D; d++) vec_sq += vec[d] * vec[d]; @@ -462,7 +340,6 @@ template bool Plotter::verifyRange(int dim) const { return true; } -/** @brief Compute step length to cover vector with `pts` points, including edges */ template Coord Plotter::calcStep(const Coord &vec, int pts) const { Coord step; for (auto d = 0; d < D; d++) step[d] = vec[d] / (pts - 1.0); diff --git a/src/utils/Plotter.h b/src/utils/Plotter.h index 9612dedec..b7cd635b8 100644 --- a/src/utils/Plotter.h +++ b/src/utils/Plotter.h @@ -24,6 +24,19 @@ */ #pragma once +/** + * @file + * @brief Plotting utilities for MRCPP functions and trees. + * + * This header declares a lightweight plotting helper that samples + * multivariate functions (or visualizes trees) on simple, equidistant + * grids derived from user-provided span vectors. It supports 1D (line), + * 2D (surface), and 3D (cube) outputs and can also dump tree grids. + * + * The plotting domain is parameterized by an origin @p O and up to three + * span vectors @p A, @p B, @p C (not required to be orthogonal). For an + * overview of the sampling conventions, see @ref mrcpp::Plotter. + */ #include @@ -36,68 +49,256 @@ namespace mrcpp { -/** @class Plotter +/** + * @class Plotter + * @tparam D Spatial dimension of the *function* being sampled (1–3). + * @tparam T Scalar type of the function values (e.g., double, ComplexDouble). * - * @brief Class for plotting multivariate functions + * @brief Sample multivariate functions on equidistant grids and write results. * - * This class will generate an equidistant grid in one (line), two (surf) - * or three (cube) dimensions, and subsequently evaluate the function on - * this grid. + * ### Domain definition + * The sampling region is specified by: + * - Origin **O** + * - Span vectors **A**, **B**, **C** * - * The grid is generated from the vectors A, B and C, relative to the origin O: - * - a linePlot will plot the line spanned by A, starting from O - * - a surfPlot will plot the area spanned by A and B, starting from O - * - a cubePlot will plot the volume spanned by A, B and C, starting from O + * The actual plot type determines how these are used: + * - **Line plot**: points along **A** starting at **O** + * - **Surface plot**: a 2D lattice in the parallelogram spanned by **A** and **B** + * - **Cube plot**: a 3D lattice in the parallelotope spanned by **A**, **B**, **C** * - * The vectors A, B and C do not necessarily have to be orthogonal. + * None of **A**, **B**, **C** need to be orthogonal. * - * The parameter `D` refers to the dimension of the _function_, not the - * dimension of the plot. + * ### Output + * This class writes simple text files (one value per line or a cube-like block) + * suitable for quick inspection or feeding into downstream visualization tools. + * Grid export for trees writes a mesh for node boxes (D=3). * + * @note The template parameter @p D reflects the *intrinsic* dimensionality of + * the function/tree. A 3D function can still be sampled along a 1D line using + * @ref linePlot by providing only **A** (and leaving **B**, **C** unused). */ - template class Plotter { public: + /** + * @brief Construct a plotter with a given origin. + * @param o Plot origin (defaults to the zero vector). + */ explicit Plotter(const Coord &o = {}); virtual ~Plotter() = default; + /** + * @brief Set the filename suffix for a plot type. + * + * @param t Plot type key (see @ref type). + * @param s Suffix including the dot (e.g., ".line", ".surf"). + * + * @details The suffix is appended to the base filename passed to the + * plotting routines. Defaults are set in the constructor. + */ void setSuffix(int t, const std::string &s); + + /** + * @brief Set the plot origin. + * @param o New origin **O**. + */ void setOrigin(const Coord &o); + + /** + * @brief Define (or update) the plot span vectors. + * + * @param a Vector **A** (required). + * @param b Vector **B** (optional; used for 2D/3D sampling). + * @param c Vector **C** (optional; used for 3D sampling). + * + * @note Vectors are not required to be orthogonal. The number of points + * per span is given at call time for each plot type. + */ void setRange(const Coord &a, const Coord &b = {}, const Coord &c = {}); + /** + * @brief Write a grid visualization of a function tree. + * + * @param tree Multiresolution tree to visualize. + * @param fname Base filename (suffix for @ref Grid is appended). + * + * @details Exports the end-node grid (and roots) of @p tree. The concrete + * output is implementation-dependent; for D=3 it is a geomview-friendly + * mesh (see the .grid writer in the implementation). + * + * @warning Meaningful only when the implementation supports the given @p D. + */ void gridPlot(const MWTree &tree, const std::string &fname); + + /** + * @brief Sample a function along a line @f$ O + s\,A @f$. + * + * @param npts Number of equidistant points along **A**; use @c {N}. + * @param func Function to evaluate. + * @param fname Base filename (suffix for @ref Line is appended). + * + * @details Generates @c npts[0] positions: + * @f$ r_i = O + \frac{i}{N-1} A,\ i=0,\dots,N-1 @f$ + * and writes coordinates and values in text form. + * + * @pre @ref setRange must have set a non-zero **A**; otherwise this call + * will fail validation. + */ void linePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname); + + /** + * @brief Sample a function on a surface spanned by **A**, **B**. + * + * @param npts Number of points along {**A**, **B**}; use @c {Na, Nb}. + * @param func Function to evaluate. + * @param fname Base filename (suffix for @ref Surface is appended). + * + * @details Generates positions + * @f$ r_{ij} = O + \frac{i}{N_a-1}A + \frac{j}{N_b-1}B @f$ and writes + * coordinates and values in text form. + * + * @pre @ref setRange must have set non-zero **A** and **B** when used in 2D/3D. + */ void surfPlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname); + + /** + * @brief Sample a function in a 3D block spanned by **A**, **B**, **C**. + * + * @param npts Number of points along {**A**, **B**, **C**}; use @c {Na, Nb, Nc}. + * @param func Function to evaluate. + * @param fname Base filename (suffix for @ref Cube is appended). + * + * @details Generates positions + * @f$ r_{ijk} = O + \frac{i}{N_a-1}A + \frac{j}{N_b-1}B + \frac{k}{N_c-1}C @f$ + * and writes values in a simple cube-like format suitable for volumetric viewers. + * + * @pre @ref setRange must have set non-zero **A**, **B**, **C** when used in 3D. + */ void cubePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname); - enum type { Line, Surface, Cube, Grid }; + /** + * @brief Plot type selector used for file suffix mapping. + */ + enum type { Line, /**< 1D sampling along **A** */ + Surface, /**< 2D sampling on **A**–**B** lattice */ + Cube, /**< 3D sampling on **A**–**B**–**C** lattice */ + Grid /**< Grid/mesh export for trees */ + }; protected: - Coord O{}; // Plot origin - Coord A{}; // Vector for line plot - Coord B{}; // Vector for surf plot - Coord C{}; // Vector for cube plot - std::ofstream fstrm{}; - std::ofstream *fout{nullptr}; - std::map suffix{}; + /** @name Plot domain and output state */ + ///@{ + Coord O{}; ///< Plot origin. + Coord A{}; ///< Span vector for line plots and first lattice axis. + Coord B{}; ///< Span vector for surface/cube plots (second axis). + Coord C{}; ///< Span vector for cube plots (third axis). + std::ofstream fstrm{}; ///< Owned output stream storage. + std::ofstream *fout{nullptr}; ///< Active output stream (points to @ref fstrm). + std::map suffix{}; ///< Per-type filename suffix map. + ///@} + /** + * @brief Compute step size to place @p pts samples along a span. + * @param vec Span vector (**A**, **B**, or **C**). + * @param pts Number of points along the span (>= 1). + * @return Component-wise step equals @f$ \frac{\text{vec}}{\max(1, pts-1)} @f$. + * + * @note When @p pts == 1 the single sample is placed at the origin offset, + * and the step is unused (implementation guards against division by zero). + */ Coord calcStep(const Coord &vec, int pts) const; + + /** + * @brief Generate equidistant coordinates for a line plot. + * @param pts_a Points along **A**. + * @return Matrix of size (pts_a × D) with row-wise coordinates. + */ Eigen::MatrixXd calcLineCoordinates(int pts_a) const; + + /** + * @brief Generate equidistant coordinates for a surface plot. + * @param pts_a Points along **A**. + * @param pts_b Points along **B**. + * @return Matrix of size ((pts_a*pts_b) × D) with row-wise coordinates. + */ Eigen::MatrixXd calcSurfCoordinates(int pts_a, int pts_b) const; + + /** + * @brief Generate equidistant coordinates for a cube plot. + * @param pts_a Points along **A**. + * @param pts_b Points along **B**. + * @param pts_c Points along **C**. + * @return Matrix of size ((pts_a*pts_b*pts_c) × D) with row-wise coordinates. + */ Eigen::MatrixXd calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const; + /** + * @brief Evaluate a representable function on given coordinates. + * @param func Function to sample. + * @param coords Row-major matrix of coordinates (N × D). + * @return Column vector of values (size N). + * + * @note The implementation may use parallel evaluation (e.g., OpenMP) + * when available at build time. + */ Eigen::Matrix evaluateFunction(const RepresentableFunction &func, const Eigen::MatrixXd &coords) const; + /** + * @brief Write coordinates and values as text rows. + * @param coords Row-major coordinates (N × D). + * @param values Column vector (size N). + * + * @details Each output line contains D coordinates followed by the + * function value. Floating-point formatting is implementation-defined. + */ void writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix &values); + + /** + * @brief Write volumetric (cube) data. + * @param npts Lattice sizes {Na, Nb, Nc}. + * @param values Column vector of length Na*Nb*Nc. + * + * @details Default implementation may be a stub; specialized versions + * (e.g., D=3) provide actual volume exporters (cube/voxel formats). + */ virtual void writeCube(const std::array &npts, const Eigen::Matrix &values); + /** + * @brief Write a grid/mesh representation of a tree. + * @param tree Tree whose node boxes should be exported. + * + * @details Implementation targets D=3 (geomview-like mesh). Other + * dimensionalities may provide no-ops or alternative encodings. + */ void writeGrid(const MWTree &tree); + + /** + * @brief Emit a single node's box edges/faces to the active stream. + * @param node Node to visualize. + * @param color Renderer-dependent color string (implementation-defined). + */ void writeNodeGrid(const MWNode &node, const std::string &color); private: + /** + * @brief Validate that required span vectors are non-zero. + * @param dim Required plot dimensionality (1, 2, or 3). + * @return @c true if all needed spans (**A**, **B**, **C**) are non-zero. + */ bool verifyRange(int dim) const; + + /** + * @brief Open/prepare the output file stream. + * @param fname Base filename plus suffix (if non-empty). + * + * @details If @p fname is empty, reuses the current stream; otherwise + * closes any previous stream and opens the new one. + */ void openPlot(const std::string &fname); + + /** + * @brief Close the output stream if open and reset state. + */ void closePlot(); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/Printer.cpp b/src/utils/Printer.cpp index 24585feb3..c7fd7637e 100644 --- a/src/utils/Printer.cpp +++ b/src/utils/Printer.cpp @@ -45,19 +45,6 @@ int Printer::printRank = 0; int Printer::printSize = 1; std::ostream *Printer::out = &std::cout; -/** @brief Initialize print environment - * - * @param[in] level: Desired print level of output - * @param[in] rank: MPI rank of current process - * @param[in] size: Total number of MPI processes - * @param[in] file: File name for printed output, will get "-{rank}.out" extension - * - * @details Only print statements with lower printlevel than level will be - * displayed. If a file name is given, each process will print to a separate - * file called {file}-{rank}.out. If no file name is given, only processes - * which initialize the printer with rank=0 will print to screen. By default, - * all ranks initialize with rank=0, i.e. all ranks print to screen by default. - */ void Printer::init(int level, int rank, int size, const char *file) { printLevel = level; printRank = rank; @@ -75,19 +62,13 @@ void Printer::init(int level, int rank, int size, const char *file) { } } else { if (printRank > 0) { - setPrintLevel(-1); // Higher ranks be quiet + setPrintLevel(-1); } } setScientific(); } -/** @brief Print information about MRCPP version and build configuration - * - * @param[in] level: Activation level for print statement - * - **/ void print::environment(int level) { - // clang-format off if (level > Printer::getPrintLevel()) return; printout(level, std::endl); @@ -124,28 +105,14 @@ void print::environment(int level) { printout(level, std::endl); print::separator(level, '-', 2); - // clang-format on } -/** @brief Print a full line of a single character - * - * @param[in] level: Activation level for print statement - * @param[in] c: Character to fill the line - * @param[in] newlines: Number of extra newlines - */ void print::separator(int level, const char &c, int newlines) { if (level > Printer::getPrintLevel()) return; printout(level, std::string(Printer::getWidth(), c)); for (int i = 0; i <= newlines; i++) printout(level, std::endl); } -/** @brief Print a text header - * - * @param[in] level: Activation level for print statement - * @param[in] txt: Header text - * @param[in] newlines: Number of extra newlines - * @param[in] c: Character to fill the first line - */ void print::header(int level, const std::string &txt, int newlines, const char &c) { if (level > Printer::getPrintLevel()) return; @@ -156,13 +123,6 @@ void print::header(int level, const std::string &txt, int newlines, const char & print::separator(level, '-', newlines); } -/** @brief Print a footer with elapsed wall time - * - * @param[in] level: Activation level for print statement - * @param[in] t: Timer to be evaluated - * @param[in] newlines: Number of extra newlines - * @param[in] c: Character to fill the last line - */ void print::footer(int level, const Timer &t, int newlines, const char &c) { if (level > Printer::getPrintLevel()) return; @@ -180,14 +140,6 @@ void print::footer(int level, const Timer &t, int newlines, const char &c) { print::separator(level, c, newlines); } -/** @brief Print a scalar value, including unit - * - * @param[in] level: Activation level for print statement - * @param[in] v: Scalar value to print - * @param[in] unit: Unit of scalar - * @param[in] p: Floating point precision - * @param[in] sci: Use scientific notation - */ void print::value(int level, const std::string &txt, double v, const std::string &unit, int p, bool sci) { if (level > Printer::getPrintLevel()) return; @@ -210,14 +162,6 @@ void print::value(int level, const std::string &txt, double v, const std::string println(level, o.str()); } -/** @brief Print tree parameters (nodes, memory) and wall time - * - * @param[in] level: Activation level for print statement - * @param[in] txt: Text string - * @param[in] n: Number of tree nodes - * @param[in] m: Memory usage (kB) - * @param[in] t: Wall time (sec) - */ void print::tree(int level, const std::string &txt, int n, int m, double t) { if (level > Printer::getPrintLevel()) return; @@ -258,13 +202,6 @@ void print::tree(int level, const std::string &txt, int n, int m, double t) { println(level, o.str()); } -/** @brief Print tree parameters (nodes, memory) and wall time - * - * @param[in] level: Activation level for print statement - * @param[in] txt: Text string - * @param[in] tree: Tree to be printed - * @param[in] timer: Timer to be evaluated - */ template void print::tree(int level, const std::string &txt, const MWTree &tree, const Timer &timer) { if (level > Printer::getPrintLevel()) return; @@ -274,12 +211,6 @@ template void print::tree(int level, const std::string &txt, print::tree(level, txt, n, m, t); } -/** @brief Print elapsed time from Timer - * - * @param[in] level: Activation level for print statement - * @param[in] txt: Text string - * @param[in] timer: Timer to be evaluated - */ void print::time(int level, const std::string &txt, const Timer &timer) { if (level > Printer::getPrintLevel()) return; @@ -297,11 +228,6 @@ void print::time(int level, const std::string &txt, const Timer &timer) { println(level, o.str()); } -/** @brief Print the current memory usage of this process, obtained from system - * - * @param[in] level: Activation level for print statement - * @param[in] txt: Text string - */ void print::memory(int level, const std::string &txt) { if (level > Printer::getPrintLevel()) return; @@ -334,4 +260,4 @@ template void print::tree<1>(int level, const std::string &txt, const MWTree<1> template void print::tree<2>(int level, const std::string &txt, const MWTree<2> &tree, const Timer &timer); template void print::tree<3>(int level, const std::string &txt, const MWTree<3> &tree, const Timer &timer); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/Printer.h b/src/utils/Printer.h index c021155e8..32f5f63a1 100644 --- a/src/utils/Printer.h +++ b/src/utils/Printer.h @@ -23,12 +23,30 @@ * */ -/** \file Printer.h - * Collection of assertions and a standard error/warn/info/debug - * message interface. +#pragma once +/** + * @file + * @brief Lightweight, level-based printing, diagnostics, and assertion helpers. + * + * This header provides: + * - A singleton-style @ref mrcpp::Printer that controls global print level, + * numeric formatting (scientific vs. fixed), width and precision, and the + * active output stream (stdout or a per-rank file). + * - A small set of convenience functions in @ref mrcpp::print for consistent, + * nicely formatted environment, headers/footers, timers, memory usage, and + * tree statistics. + * - A family of macros (e.g. @ref println, @ref MSG_ABORT) to emit messages + * with source context and optional termination semantics. + * + * ### Print-level convention + * Every printing API takes an integer *level*. Output is produced iff + * `level <= Printer::getPrintLevel()`. Internal MRCPP prints use \>= 10, + * leaving levels 0–9 available for host/user code control. * + * @warning The facilities herein are process-local; when used in MPI programs, + * each rank will emit messages independently unless explicitly gated + * by rank logic in the caller. */ -#pragma once #include #include @@ -41,33 +59,56 @@ namespace mrcpp { class Timer; template class MWTree; -/** @class Printer +/** + * @class Printer + * @brief Process-local controller for formatted, level-gated output. * - * @brief Convenience class to handle printed output + * @details + * The Printer class is used in a singleton-like fashion via static methods. + * Call init once near program start (optionally per MPI rank) to set: + * - global print level (messages with a higher level are suppressed), + * - rank/size metadata (used to route output), + * - destination stream (stdout or a per-rank file). * - * @details The ``Printer`` singleton class holds the current state of the print - * environment. All ``mrcpp::print`` functions, as well as the ``println`` and - * ``printout`` macros, take an integer print level as first argument. When the - * global ``mrcpp::Printer`` is initialized with a given print level, only print - * statements with a *lower* print level will be displayed. All internal printing - * in MRCPP is at print level 10 or higher, so there is some flexibility left - * (levels 0 through 9) for adjusting the print volume within the host program. + * After initialization, helper functions/macros (see mrcpp::print and the + * macros below) consult the configured level and stream. * + * @par Example + * @code{.cpp} + * // Rank 0 prints to screen, others remain silent: + * Printer::init(5, rank, size); + * println(2, "Hello at level 2"); // shown when level >= 2 + * println(8, "Debug details..."); // suppressed when level < 8 + * @endcode */ - class Printer final { public: + /** + * @brief Initialize printing environment. + * + * @param level Maximum verbosity to emit (inclusive). + * @param rank MPI rank of this process (default 0). + * @param size MPI world size (default 1). + * @param file Optional base filename. If provided and @p size>1, + * output is written to "-.out"; otherwise to + * ".out". If null, output goes to stdout. When + * @p file is null and @p rank>0, printing is disabled + * by setting the print level to -1. + * + * @note Also sets scientific notation as the default numeric format. + */ static void init(int level = 0, int rank = 0, int size = 1, const char *file = nullptr); - /** @brief Use scientific floating point notation, e.g. 1.0e-2 */ + /** @brief Use scientific floating-point notation (e.g., 1.23e-2). */ static void setScientific() { *out << std::scientific; } - /** @brief Use fixed floating point notation, e.g. 0.01 */ + /** @brief Use fixed floating-point notation (e.g., 0.0123). */ static void setFixed() { *out << std::fixed; } - /** @brief Set new line width for printed output - * @param[in] i: New width (number of characters) - * @returns Old width (number of characters) + /** + * @brief Set line width for formatted helpers. + * @param i New width in characters. + * @return Previous width. */ static int setWidth(int i) { int oldWidth = printWidth; @@ -75,9 +116,10 @@ class Printer final { return oldWidth; } - /** @brief Set new precision for floating point output - * @param[in] i: New precision (digits after comma) - * @returns Old precision (digits after comma) + /** + * @brief Set precision for floating-point output. + * @param i Digits after the decimal point. + * @return Previous precision. */ static int setPrecision(int i) { int oldPrec = printPrec; @@ -86,9 +128,10 @@ class Printer final { return oldPrec; } - /** @brief Set new print level - * @param[in] i: New print level - * @returns Old print level + /** + * @brief Set the global print level threshold. + * @param i New level; only messages with level <= @p i are printed. + * @return Previous print level. */ static int setPrintLevel(int i) { int oldLevel = printLevel; @@ -96,116 +139,251 @@ class Printer final { return oldLevel; } - /** @returns Current line width (number of characters) */ + /** @return Current line width (characters). */ static int getWidth() { return printWidth; } - /** @returns Current precision for floating point output (digits after comma) */ + /** @return Current floating-point precision (digits after decimal). */ static int getPrecision() { return printPrec; } - /** @returns Current print level */ + /** @return Current global print level threshold. */ static int getPrintLevel() { return printLevel; } + /** + * @brief Active output stream (stdout or a file). + * @warning Pointer is owned externally; @ref init manages it appropriately. + */ static std::ostream *out; private: - static int printWidth; - static int printLevel; - static int printPrec; - static int printRank; - static int printSize; + static int printWidth; ///< Line width used by @ref mrcpp::print helpers. + static int printLevel; ///< Global verbosity threshold. + static int printPrec; ///< Floating-point precision (digits). + static int printRank; ///< MPI rank (for routing decisions). + static int printSize; ///< MPI world size. - Printer() = delete; // No instances of this class + Printer() = delete; ///< Non-instantiable utility. + /// @brief Redirect all output to @p o (used internally by @ref init). static void setOutputStream(std::ostream &o) { out = &o; } }; +/** + * @namespace mrcpp::print + * @brief Nicely formatted, level-aware printing helpers. + * + * These helpers produce standardized, aligned, and labeled output for common + * diagnostics: environment summaries, section headers/footers, timers, memory + * usage, and tree statistics. Each function is level-gated via + * @ref Printer::getPrintLevel(). + */ namespace print { + +/** + * @brief Print MRCPP and build environment information. + * @param level Activation level threshold. + * + * @details Includes library version, Git metadata, linear algebra backend, + * and parallelization mode (MPI/OpenMP). + */ void environment(int level); + +/** + * @brief Print a full separator line composed of @p c characters. + * @param level Activation level. + * @param c Filler character (e.g., '-', '='). + * @param newlines Number of extra trailing blank lines (default 0). + */ void separator(int level, const char &c, int newlines = 0); + +/** + * @brief Print a centered header with a framed title. + * @param level Activation level. + * @param txt Header text. + * @param newlines Extra trailing blank lines (default 0). + * @param c Filler character for the frame (default '='). + */ void header(int level, const std::string &txt, int newlines = 0, const char &c = '='); + +/** + * @brief Print a footer containing elapsed wall time and a closing frame. + * @param level Activation level. + * @param timer Timer whose @ref Timer::elapsed is shown. + * @param newlines Extra trailing blank lines (default 0). + * @param c Filler character for the closing line (default '='). + */ void footer(int level, const Timer &timer, int newlines = 0, const char &c = '='); + +/** + * @brief Print current process memory usage. + * @param level Activation level. + * @param txt Label to show before the value (aligned). + */ void memory(int level, const std::string &txt); + +/** + * @brief Print a labeled scalar with unit in aligned columns. + * @param level Activation level. + * @param txt Label. + * @param v Value. + * @param unit Unit string (optional). + * @param p Precision; if negative uses @ref Printer::getPrecision. + * @param sci Scientific formatting when true, fixed when false. + */ void value(int level, const std::string &txt, double v, const std::string &unit = "", int p = -1, bool sci = true); + +/** + * @brief Print an elapsed time value from a @ref Timer. + * @param level Activation level. + * @param txt Label. + * @param timer Timer whose @ref Timer::elapsed is shown. + */ void time(int level, const std::string &txt, const Timer &timer); + +/** + * @brief Print tree statistics (nodes, memory, wall time). + * @param level Activation level. + * @param txt Label/section title. + * @param n Number of nodes. + * @param m Memory usage in kB. + * @param t Elapsed wall time in seconds. + */ void tree(int level, const std::string &txt, int n, int m, double t); -template void tree(int level, const std::string &txt, const MWTree &tree, const Timer &timer); + +/** + * @brief Print tree statistics extracted from an @ref MWTree and a @ref Timer. + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param level Activation level. + * @param txt Label/section title. + * @param tree Tree whose node/memory info is reported. + * @param timer Timer whose elapsed time is reported. + */ +template +void tree(int level, const std::string &txt, const MWTree &tree, const Timer &timer); + } // namespace print -// clang-format off +// ============================================================================ +// Macros : level-aware printing and diagnostics +// ============================================================================ -/** @brief Print text at the given print level, with newline */ -#define println(level, STR) \ +/** + * @def println(level, STR) + * @brief Print text followed by newline if @p level is enabled. + */ +#define println(level, STR) \ { if (level <= mrcpp::Printer::getPrintLevel()) *mrcpp::Printer::out << STR << std::endl; } -/** @brief Print text at the given print level, without newline */ -#define printout(level, STR) \ +/** + * @def printout(level, STR) + * @brief Print text without newline if @p level is enabled. + */ +#define printout(level, STR) \ { if (level <= mrcpp::Printer::getPrintLevel()) *mrcpp::Printer::out << STR; } -/** @brief Print info message */ -#define MSG_INFO(STR) \ - { \ - *mrcpp::Printer::out << "Info: " << __FILE__ << ": " << __func__ << "(), line " << __LINE__ << ": " << STR << std::endl; \ +/** + * @def MSG_INFO(STR) + * @brief Emit an informational message with source location. + */ +#define MSG_INFO(STR) \ + { \ + *mrcpp::Printer::out << "Info: " << __FILE__ << ": " << __func__ \ + << "(), line " << __LINE__ << ": " << STR << std::endl; \ } -/** @brief Print warning message */ -#define MSG_WARN(STR) \ - { \ - *mrcpp::Printer::out << "Warning: " << __func__ << "(), line " << __LINE__ << ": " << STR << std::endl; \ +/** + * @def MSG_WARN(STR) + * @brief Emit a warning message with source location. + */ +#define MSG_WARN(STR) \ + { \ + *mrcpp::Printer::out << "Warning: " << __func__ << "(), line " << __LINE__ \ + << ": " << STR << std::endl; \ } -/** @brief Print error message, no abort*/ -#define MSG_ERROR(STR) \ - { \ - *mrcpp::Printer::out << "Error: " << __func__ << "(), line " << __LINE__ << ": " << STR << std::endl; \ +/** + * @def MSG_ERROR(STR) + * @brief Emit a non-fatal error message with source location. + */ +#define MSG_ERROR(STR) \ + { \ + *mrcpp::Printer::out << "Error: " << __func__ << "(), line " << __LINE__ \ + << ": " << STR << std::endl; \ } -/** @brief Print error message and abort */ -#define MSG_ABORT(STR) \ - { \ - *mrcpp::Printer::out << "Error: " << __FILE__ << ": " << __func__ << "(), line " << __LINE__ << ": " << STR << std::endl; \ - abort(); \ +/** + * @def MSG_ABORT(STR) + * @brief Emit an error message with source location and abort the process. + */ +#define MSG_ABORT(STR) \ + { \ + *mrcpp::Printer::out << "Error: " << __FILE__ << ": " << __func__ \ + << "(), line " << __LINE__ << ": " << STR << std::endl; \ + abort(); \ } -/** @brief You have passed an invalid argument to a function */ -#define INVALID_ARG_ABORT \ - { \ - *mrcpp::Printer::out << "Error, invalid argument passed: " << __func__ << "(), line " << __LINE__ << std::endl; \ - abort(); \ +/** + * @def INVALID_ARG_ABORT + * @brief Abort with a standardized message for invalid arguments. + */ +#define INVALID_ARG_ABORT \ + { \ + *mrcpp::Printer::out << "Error, invalid argument passed: " << __func__ \ + << "(), line " << __LINE__ << std::endl; \ + abort(); \ } -/** @brief You have reached a point in the code that is not yet implemented */ -#define NOT_IMPLEMENTED_ABORT \ - { \ - *mrcpp::Printer::out << "Error: Not implemented, " << __FILE__ ", " << __func__ << "(), line " << __LINE__ << std::endl; \ - abort(); \ +/** + * @def NOT_IMPLEMENTED_ABORT + * @brief Abort with a standardized message for unimplemented code paths. + */ +#define NOT_IMPLEMENTED_ABORT \ + { \ + *mrcpp::Printer::out << "Error: Not implemented, " << __FILE__ << ", " << __func__ \ + << "(), line " << __LINE__ << std::endl; \ + abort(); \ } -/** @brief You have reached a point that should not be reached, bug or inconsistency */ -#define NOT_REACHED_ABORT \ - { \ - *mrcpp::Printer::out << "Error, should not be reached: " << __func__ << "(), line " << __LINE__ << std::endl; \ - abort(); \ +/** + * @def NOT_REACHED_ABORT + * @brief Abort for code paths that should be logically unreachable. + */ +#define NOT_REACHED_ABORT \ + { \ + *mrcpp::Printer::out << "Error, should not be reached: " << __func__ \ + << "(), line " << __LINE__ << std::endl; \ + abort(); \ } -/** @brief You have reached an experimental part of the code, results cannot be trusted */ -#define NEEDS_TESTING \ - { \ - static bool __once = true; \ - if (__once) { \ - __once = false; \ - *mrcpp::Printer::out << "NEEDS TESTING: " << __FILE__ << ", " << __func__ << "(), line " << __LINE__ << std::endl; \ - } \ +/** + * @def NEEDS_TESTING + * @brief Emit a one-time notice that a code path is experimental. + * + * Prints exactly once per process at the first hit, then stays quiet. + */ +#define NEEDS_TESTING \ + { \ + static bool __once = true; \ + if (__once) { \ + __once = false; \ + *mrcpp::Printer::out << "NEEDS TESTING: " << __FILE__ << ", " << __func__ \ + << "(), line " << __LINE__ << std::endl; \ + } \ } -/** @brief You have hit a known bug that is yet to be fixed, results cannot be trusted */ -#define NEEDS_FIX(STR) \ - { \ - static bool __once = true; \ - if (__once) { \ - __once = false; \ - *mrcpp::Printer::out << "NEEDS FIX: " << __FILE__ << ", " << __func__ << "(), line " << __LINE__ << ": " << STR << std::endl; \ \ - } \ +/** + * @def NEEDS_FIX(STR) + * @brief Emit a one-time notice that a known issue affects this code path. + * @param STR Short description of the known issue. + */ +#define NEEDS_FIX(STR) \ + { \ + static bool __once = true; \ + if (__once) { \ + __once = false; \ + *mrcpp::Printer::out << "NEEDS FIX: " << __FILE__ << ", " << __func__ \ + << "(), line " << __LINE__ << ": " << STR << std::endl; \ + } \ } -// clang-format on -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/Timer.cpp b/src/utils/Timer.cpp index 67bd0c713..54fd802ac 100644 --- a/src/utils/Timer.cpp +++ b/src/utils/Timer.cpp @@ -28,24 +28,15 @@ namespace mrcpp { -/** @returns New Timer object - * @param[in] start_timer: option to start timer immediately - */ Timer::Timer(bool start_timer) { if (start_timer) start(); } -/** @returns Copy of Timer object, including its current state - * @param[in] timer: Object to copy - */ Timer::Timer(const Timer &timer) : running(timer.running) , time_used(timer.time_used) , clock_start(timer.clock_start) {} -/** @returns Copy of Timer object, including its current state - * @param[in] timer: Object to copy - */ Timer &Timer::operator=(const Timer &timer) { if (this != &timer) { this->running = timer.running; @@ -55,28 +46,24 @@ Timer &Timer::operator=(const Timer &timer) { return *this; } -/** @brief Start timer from zero */ void Timer::start() { this->clock_start = now(); this->time_used = 0.0; this->running = true; } -/** @brief Resume timer from previous time */ void Timer::resume() { if (this->running) MSG_WARN("Timer already running"); this->clock_start = now(); this->running = true; } -/** @brief Stop timer */ void Timer::stop() { if (not this->running) MSG_WARN("Timer not running"); this->time_used += diffTime(now(), this->clock_start); this->running = false; } -/** @returns Current elapsed time, in seconds */ double Timer::elapsed() const { return (this->running) ? diffTime(now(), this->clock_start) : this->time_used; } diff --git a/src/utils/Timer.h b/src/utils/Timer.h index 4c02201a9..091812eca 100644 --- a/src/utils/Timer.h +++ b/src/utils/Timer.h @@ -29,33 +29,151 @@ namespace mrcpp { +/** + * @file + * @brief Wall-clock timing utilities. + */ + +/** + * @typedef timeT + * @brief Timestamp type used by Timer. + * + * @details + * Alias for `std::chrono::time_point`. + * The actual clock may map to a platform-specific high-resolution source. + * It typically offers sub-microsecond resolution but is not guaranteed to be + * steady on all standard libraries (i.e., it may jump if the underlying clock + * is adjusted). The @ref Timer class measures *wall* time, not CPU time. + */ using timeT = std::chrono::time_point; -/** @class Timer +/** + * @class Timer + * @brief Lightweight wall-time stopwatch with start/resume/stop semantics. + * + * @details + * `Timer` accumulates elapsed *wall* time across one or more running intervals. + * A newly constructed timer can optionally start immediately. + * + * ### State machine + * - **Stopped** (default when constructed with `start_timer == false`): + * - `elapsed()` returns the accumulated time. + * - `resume()` starts a new interval without clearing accumulated time. + * - `start()` clears accumulated time and starts fresh from zero. + * - **Running** (default when constructed with `start_timer == true`): + * - `elapsed()` returns the live time since the most recent start/resume, + * ignoring previously accumulated time until `stop()` is called. + * - `stop()` ends the current interval and adds it to the accumulation. + * + * ### Characteristics + * - Measures wall time (affected by system sleep/suspend). + * - Very low overhead; suitable for inner-loop timing in most cases. + * - Not thread-safe: do not share a single instance across threads without + * external synchronization. + * + * ### Example + * @code{.cpp} + * mrcpp::Timer t; // starts immediately by default + * // ... code section A ... + * t.stop(); + * double a = t.elapsed(); // seconds for section A * - * @brief Records wall time between the execution of two lines of source code + * t.resume(); + * // ... code section B ... + * t.stop(); + * double total = t.elapsed(); // seconds for A + B * + * t.start(); // reset and start from zero + * // ... code section C ... + * double live = t.elapsed(); // live time while running + * @endcode */ - class Timer final { public: + /** + * @brief Construct a timer. + * @param start_timer If true, the timer is started immediately with + * accumulated time cleared to zero. + * + * @note Default is `true` for convenience; pass `false` to construct + * a stopped timer and control the first start explicitly. + */ Timer(bool start_timer = true); + + /** + * @brief Copy constructor. + * @details Copies the running state, accumulated time, and the last start + * timestamp. If the source is running, the copy will also be + * running and will measure from the same start instant. + */ Timer(const Timer &timer); + + /** + * @brief Copy assignment. + * @details Assigns running state, accumulated time, and start timestamp. + * Self-assignment is a no-op. + * @return Reference to `*this`. + */ Timer &operator=(const Timer &timer); + /** + * @brief Start from zero. + * @details Resets the accumulated time to 0 and begins a new running + * interval starting "now". Use this to time a fresh region. + */ void start(); + + /** + * @brief Resume without clearing accumulated time. + * @details If the timer is stopped, begins a new running interval starting + * "now". If already running, the call has no effect besides + * potentially issuing a diagnostic in the implementation. + */ void resume(); + + /** + * @brief Stop and accumulate. + * @details Ends the current running interval and adds its duration to the + * accumulated time. If already stopped, the call has no effect + * besides potentially issuing a diagnostic in the implementation. + */ void stop(); + /** + * @brief Get elapsed time in seconds. + * @details + * - If the timer is **running**, returns the time since the most recent + * `start()` or `resume()` (not including previously accumulated time). + * - If the timer is **stopped**, returns the total accumulated time across + * all completed intervals since the last `start()`. + * + * @return Elapsed wall time in seconds as a `double`. + */ double elapsed() const; private: + /// @brief True if the timer is currently running. bool running{false}; + + /// @brief Accumulated time in seconds across completed intervals. double time_used{0.0}; + + /// @brief Timestamp when the current interval was started/resumed. timeT clock_start; + /** + * @brief Current timestamp helper. + * @return `high_resolution_clock::now()`. + */ timeT now() const; + + /** + * @brief Difference between two timestamps. + * @param t2 Later timestamp. + * @param t1 Earlier timestamp. + * @return `(t2 - t1)` expressed in seconds as a `double`. + */ double diffTime(timeT t2, timeT t1) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/details.cpp b/src/utils/details.cpp index b5bed3998..3801fb91a 100644 --- a/src/utils/details.cpp +++ b/src/utils/details.cpp @@ -37,16 +37,15 @@ namespace mrcpp { namespace details { + bool directory_exists(std::string path) { struct stat info; - int statRC = stat(path.c_str(), &info); if (statRC != 0) { - if (errno == ENOENT) { return 0; } // something along the path does not exist - if (errno == ENOTDIR) { return 0; } // something in path prefix is not a dir + if (errno == ENOENT) { return 0; } + if (errno == ENOTDIR) { return 0; } MSG_ABORT("Non-existent directory " + path); } - return (info.st_mode & S_IFDIR) ? true : false; } @@ -60,7 +59,6 @@ std::string find_filters() { break; } } - if (filters.empty()) { MSG_ABORT("Could not find a folder containing filters!"); } else { @@ -68,12 +66,10 @@ std::string find_filters() { } } -// helper function: parse a string and returns the nth integer number int get_val(char *line, int n) { char *p = line; int len = 0; for (int i = 0; i < n - 1; i++) { - // jump over n-1 first numbers while (*p < '0' || *p > '9') p++; while (*p >= '0' && *p <= '9') p++; } @@ -88,27 +84,26 @@ int get_val(char *line, int n) { return atoi(p); } -/** @brief returns the current memory usage of this process, in kB */ int get_memory_usage() { int mem_val = -1; FILE *file = fopen("/proc/self/statm", "r"); if (file != nullptr) { char line[80]; while (fgets(line, 80, file) != nullptr) { - mem_val = 4 * get_val(line, 6); // sixth number is data+stack in pages (4kB) + mem_val = 4 * get_val(line, 6); } fclose(file); } return mem_val; } -/** @brief checks if all elements of an array of doubles are equal */ -template bool are_all_equal(const std::array &exponent) { +template +bool are_all_equal(const std::array &exponent) { return std::all_of(exponent.begin(), exponent.end(), [ex = std::begin(exponent)](double i) { return i == *ex; }); } -/** @brief converts c_type arrays to std::arrays */ -template std::array convert_to_std_array(T *arr) { +template +std::array convert_to_std_array(T *arr) { auto ret_arr = std::array{}; for (auto d = 0; d < D; d++) { ret_arr[d] = arr[d]; } return ret_arr; @@ -125,5 +120,6 @@ template std::array convert_to_std_array(double *arr); template std::array convert_to_std_array(int *arr); template std::array convert_to_std_array(int *arr); template std::array convert_to_std_array(int *arr); -} // namespace details -} // namespace mrcpp + +} +} \ No newline at end of file diff --git a/src/utils/details.h b/src/utils/details.h index c88c16196..cb6e0b600 100644 --- a/src/utils/details.h +++ b/src/utils/details.h @@ -24,6 +24,18 @@ */ #pragma once +/** + * @file + * @brief Small cross-cutting utilities and helpers for MRCPP internals. + * + * This header declares: + * - Filesystem and environment helpers (e.g., locating MW filter folders). + * - Lightweight process information (Linux memory usage). + * - Tiny array algorithms (equality/any checks, C-array → std::array conversion). + * - Generic collection pretty-printer and a `std::array` stream operator. + * + * Most functions live in the `mrcpp::details` namespace to signal internal use. + */ #include #include @@ -31,16 +43,89 @@ #include namespace mrcpp { +/** + * @namespace mrcpp::details + * @brief Internal utilities; APIs may change without notice. + */ namespace details { + +/** + * @brief Check whether a path refers to an existing directory. + * @param path Path to check. + * @return `true` if the path exists and is a directory, otherwise `false`. + * @note Implementation typically uses `stat(2)` and is therefore + * POSIX-oriented. + */ bool directory_exists(std::string path); + +/** + * @brief Locate the directory containing multiresolution filter files. + * + * The search strategy prefers an explicit environment override and then + * compiled-in locations. + * + * @return Absolute/relative path to a directory with filter files. + * @throws (implementation-defined) if no suitable directory is found. + * + * @details + * The implementation checks (in order): + * 1. Environment variable `MWFILTERS_DIR`, if set and points to a directory. + * 2. Compiled-in source/install search paths (e.g., `mwfilters_source_dir()`, + * `mwfilters_install_dir()`). + */ std::string find_filters(); + +/** + * @brief Return the current process memory usage in kilobytes. + * @return Resident (or data+stack) usage in kB, or a negative value on error. + * @note Implemented via `/proc/self/statm`; available on Linux only. + */ int get_memory_usage(); -template bool are_all_equal(const std::array &exponent); -template bool are_any(const std::array &col, const T eq) { + +/** + * @brief Check if all elements of a fixed-size array of doubles are equal. + * @tparam D Array length. + * @param exponent Input array. + * @return `true` if all elements compare equal to the first element; otherwise `false`. + * @warning Equality is tested with `==` (no tolerance). + */ +template +bool are_all_equal(const std::array &exponent); + +/** + * @brief Test whether any element of an array equals a given value. + * @tparam T Element type (must be equality comparable). + * @tparam D Array length (compile-time). + * @param col Array to scan. + * @param eq Value to compare against. + * @return `true` if at least one element satisfies `element == eq`. + * @complexity O(D). + */ +template +bool are_any(const std::array &col, const T eq) { return std::any_of(col.cbegin(), col.cend(), [eq](const T &el) { return el == eq; }); }; -template std::array convert_to_std_array(T *arr); -template auto stream_collection(const T &coll) -> std::string { + +/** + * @brief Convert a C-style pointer to a fixed-size `std::array`. + * @tparam T Element type. + * @tparam D Number of elements to copy. + * @param arr Pointer to at least `D` contiguous elements of type `T`. + * @return `std::array` with a shallow copy of the `D` elements. + * @warning Caller is responsible for ensuring `arr` has at least `D` valid elements. + */ +template +std::array convert_to_std_array(T *arr); + +/** + * @brief Render a collection to a compact bracketed string. + * @tparam T A range/collection with range-for iteration and streamable elements. + * @param coll Collection to print. + * @return String like `"[e0, e1, ...]"`. + * @note This utility underpins the `operator<<` overload for `std::array`. + */ +template +auto stream_collection(const T &coll) -> std::string { std::ostringstream os; bool first = true; os << "["; @@ -52,9 +137,21 @@ template auto stream_collection(const T &coll) -> std::string { os << "]"; return os.str(); } + } // namespace details -template auto operator<<(std::ostream &os, const std::array &coll) -> std::ostream & { +/** + * @brief Stream insertion for `std::array`, producing a compact bracketed list. + * @tparam T Element type (must be stream-insertable). + * @tparam D Array length. + * @param os Output stream. + * @param coll Array to print. + * @return Reference to @p os. + * @sa mrcpp::details::stream_collection + */ +template +auto operator<<(std::ostream &os, const std::array &coll) -> std::ostream & { return (os << details::stream_collection(coll)); } -} // namespace mrcpp + +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp index 69a13f300..1fdb55f54 100644 --- a/src/utils/math_utils.cpp +++ b/src/utils/math_utils.cpp @@ -41,7 +41,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief Calculate \f$ m^e\f$ for integers (for convenience, not speed!) */ int math_utils::ipow(int m, int e) { if (e < 0) MSG_ABORT("Exponent cannot be negative: " << e) int result = 1; @@ -49,35 +48,18 @@ int math_utils::ipow(int m, int e) { return result; } -/** @brief Compute the norm of a matrix given as a vector - * - * The norm of the matrix is computed by iterating the following operation: - * \f$ x_n = M^t \cdot M \cdot x_{n-1} \f$ - * - * The norm of the matrix is obtained as: - * \f$ ||M|| \lim_{n \rightarrow \infty} ||x_n||/||x_{n-1}||\f$ - */ double math_utils::matrix_norm_2(const MatrixXd &M) { return M.lpNorm<2>(); } -/** Compute the norm of a matrix given as a vector. - * - * The norm of the matrix is obtained by taking the column with the - * largest norm. - */ double math_utils::matrix_norm_1(const MatrixXd &M) { return M.colwise().lpNorm<1>().maxCoeff(); } -/** Compute the infinity norm of a matrix given as a vector. - * The norm of the matrix is obtained by taking the row with the largest norm. - */ double math_utils::matrix_norm_inf(const MatrixXd &M) { return M.rowwise().lpNorm<1>().maxCoeff(); } -/** Compute the binomial coefficient n!/((n-j)! j!) */ double math_utils::binomial_coeff(int n, int j) { double binomial_n_j = 1.0; if (n < 0 || j < 0 || j > n) { @@ -95,11 +77,10 @@ double math_utils::binomial_coeff(int n, int j) { VectorXd math_utils::get_binomial_coefs(unsigned int order) { VectorXd coefs = VectorXd::Ones(order + 1); - for (int k = 0; k <= order; k++) { coefs[k] = math_utils::binomial_coeff(order, k); } + for (int k = 0; k <= (int)order; k++) { coefs[k] = math_utils::binomial_coeff(order, k); } return coefs; } -/** Compute k! = GAMMA(k+1) for integer argument k */ double math_utils::factorial(int n) { int k = 1; double fac_n = 1.0; @@ -117,7 +98,6 @@ double math_utils::factorial(int n) { return fac_n; } -/** Compute the tensor product of two matrices */ MatrixXd math_utils::tensor_product(const MatrixXd &A, const MatrixXd &B) { int Ar = A.rows(); int Ac = A.cols(); @@ -130,7 +110,6 @@ MatrixXd math_utils::tensor_product(const MatrixXd &A, const MatrixXd &B) { return tprod; } -/** Compute the tensor product of a matrix and a vector */ MatrixXd math_utils::tensor_product(const MatrixXd &A, const VectorXd &B) { int Ar = A.rows(); int Ac = A.cols(); @@ -140,7 +119,6 @@ MatrixXd math_utils::tensor_product(const MatrixXd &A, const VectorXd &B) { return tprod; } -/** Compute the tensor product of a matrix and a vector */ MatrixXd math_utils::tensor_product(const VectorXd &A, const MatrixXd &B) { int Ar = A.rows(); int Br = B.rows(); @@ -150,7 +128,6 @@ MatrixXd math_utils::tensor_product(const VectorXd &A, const MatrixXd &B) { return tprod; } -/** Compute the tensor product of a column vector and a row vector */ MatrixXd math_utils::tensor_product(const VectorXd &A, const VectorXd &B) { int Ar = A.rows(); int Br = B.rows(); @@ -159,20 +136,18 @@ MatrixXd math_utils::tensor_product(const VectorXd &A, const VectorXd &B) { return tprod; } -/** Compute the tensor product of a vector and itself */ void math_utils::tensor_self_product(const VectorXd &A, VectorXd &tprod) { int Ar = A.rows(); for (int i = 0; i < Ar; i++) { tprod.segment(i * Ar, Ar) = A(i) * A; } } -/** Compute the tensor product of a vector and itself */ void math_utils::tensor_self_product(const VectorXd &A, MatrixXd &tprod) { int Ar = A.rows(); for (int i = 0; i < Ar; i++) { tprod.block(i, 0, 1, Ar) = A(i) * A; } } -/** Matrix multiplication of the filter with the input coefficients */ -template void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) { +template +void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) { if constexpr (std::is_same::value) { Map f(in, kp1, kp1_dm1); Map g(out, kp1_dm1, kp1); @@ -193,11 +168,6 @@ template void math_utils::apply_filter(T *out, T *in, const MatrixX NOT_IMPLEMENTED_ABORT; } -/** Make a nD-representation from 1D-representations of separable functions. - * - * This method uses the "output" vector as initial input, in order to - * avoid the use of temporaries. - */ void math_utils::tensor_expand_coefs(int dim, int dir, int kp1, int kp1_d, const MatrixXd &primitive, VectorXd &expanded) { if (dir < dim - 1) { int idx = math_utils::ipow(kp1, dir + 1); @@ -234,30 +204,13 @@ void math_utils::tensor_expand_coords_3D(int kp1, const MatrixXd &primitive, Mat } } -/** @brief Compute the eigenvalues and eigenvectors of a Hermitian matrix - * - * @param A: matrix to diagonalize (not modified) - * @param b: vector to store eigenvalues - * - * Returns the matrix of eigenvectors and stores the eigenvalues in the input vector. - */ ComplexMatrix math_utils::diagonalize_hermitian_matrix(const ComplexMatrix &A, DoubleVector &diag) { Eigen::SelfAdjointEigenSolver es(A.cols()); es.compute(A); - diag = es.eigenvalues(); // real - return es.eigenvectors(); // complex + diag = es.eigenvalues(); + return es.eigenvectors(); } -/** @brief Compute the power of a Hermitian matrix - * - * @param A: matrix - * @param b: exponent - * - * The matrix is first diagonalized, then the diagonal elements are raised - * to the given power, and the diagonalization is reversed. Sanity check for - * eigenvalues close to zero, necessary for negative exponents in combination - * with slightly negative eigenvalues. - */ ComplexMatrix math_utils::hermitian_matrix_pow(const ComplexMatrix &A, double b) { DoubleVector diag; ComplexMatrix U = diagonalize_hermitian_matrix(A, diag); @@ -273,15 +226,6 @@ ComplexMatrix math_utils::hermitian_matrix_pow(const ComplexMatrix &A, double b) return U * B * U.adjoint(); } -/** @brief Compute the eigenvalues and eigenvectors of a Hermitian matrix block - * - * @param A: matrix to diagonalize (updated in place) - * @param U: matrix of eigenvectors - * @param nstart: upper left corner of block - * @param nsize: size of block - * - * Assumes that the given block is a proper Hermitian sub matrix. - */ void math_utils::diagonalize_block(ComplexMatrix &A, ComplexMatrix &U, int nstart, int nsize) { Eigen::SelfAdjointEigenSolver es(nsize); es.compute(A.block(nstart, nstart, nsize, nsize)); @@ -291,15 +235,15 @@ void math_utils::diagonalize_block(ComplexMatrix &A, ComplexMatrix &U, int nstar A.block(nstart, nstart, nsize, nsize) = ei_val.asDiagonal(); } -/** Calculate the distance between two points in n-dimensions */ -template double math_utils::calc_distance(const Coord &a, const Coord &b) { +template +double math_utils::calc_distance(const Coord &a, const Coord &b) { double r = 0.0; for (int i = 0; i < D; i++) { r += std::pow(a[i] - b[i], 2.0); } return std::sqrt(r); } -/** Calculate the cartesian_product A x B */ -template std::vector> math_utils::cartesian_product(std::vector A, std::vector B) { +template +std::vector> math_utils::cartesian_product(std::vector A, std::vector B) { std::vector> output; for (auto &a : A) { for (auto &b : B) output.push_back(std::vector{a, b}); @@ -307,8 +251,8 @@ template std::vector> math_utils::cartesian_product(std return output; } -/** Calculate the cartesian product between a matrix l_A and the vector B */ -template std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B) { +template +std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B) { std::vector> output; for (auto A : l_A) { for (auto &b : B) { @@ -320,9 +264,8 @@ template std::vector> math_utils::cartesian_product(std return output; } -/** Calculate the cartesian product between A vector and itself with A repeater, - ie. reapeat 4 is equal to the cartesian product A x A x A x A */ -template std::vector> math_utils::cartesian_product(std::vector A, int dim) { +template +std::vector> math_utils::cartesian_product(std::vector A, int dim) { std::vector> output; if (dim < 0) MSG_ABORT("Dimension has to be 1 or greater") if (dim == 1) { @@ -349,4 +292,4 @@ template std::vector> math_utils::cartesian_product(std::vec template std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B); template std::vector> math_utils::cartesian_product(std::vector A, int dim); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/math_utils.h b/src/utils/math_utils.h index 3eacfa10b..38cbf7203 100644 --- a/src/utils/math_utils.h +++ b/src/utils/math_utils.h @@ -29,59 +29,263 @@ */ #pragma once +/** + * @file + * @brief Linear algebra and small numerical helpers built on top of Eigen. + * + * This header exposes a compact set of utilities frequently used across MRCPP: + * - Common Eigen-based type aliases (`Vector`, `Matrix`, complex scalars). + * - Small combinatorial helpers (factorial, binomial, integer powers). + * - Tensor products and self–outer-products. + * - Matrix norms (1, 2, and ∞). + * - Multi-index/tensor expansion helpers for separable bases. + * - Hermitian eigendecompositions and block diagonalization. + * - Cartesian products of small sets/vectors. + * - Euclidean distance for MRCPP coordinates. + */ #include #include #include "MRCPP/mrcpp_declarations.h" -using IntVector = Eigen::VectorXi; -using DoubleVector = Eigen::VectorXd; -using ComplexVector = Eigen::VectorXcd; +/** @name Eigen type aliases + * @brief Short, explicit aliases for Eigen vectors/matrices used in MRCPP. + * @{ */ +using IntVector = Eigen::VectorXi; ///< Column vector of integers. +using DoubleVector = Eigen::VectorXd; ///< Column vector of doubles. +using ComplexVector= Eigen::VectorXcd; ///< Column vector of complex doubles. -using IntMatrix = Eigen::MatrixXi; -using DoubleMatrix = Eigen::MatrixXd; -using ComplexMatrix = Eigen::MatrixXcd; +using IntMatrix = Eigen::MatrixXi; ///< Integer matrix. +using DoubleMatrix = Eigen::MatrixXd; ///< Double-precision matrix. +using ComplexMatrix= Eigen::MatrixXcd; ///< Complex double-precision matrix. -using ComplexDouble = std::complex; +using ComplexDouble= std::complex; ///< Convenience alias for complex. +/** @} */ namespace mrcpp { +/** + * @namespace mrcpp::math_utils + * @brief Numerical utilities layered on Eigen; header-only declarations. + */ namespace math_utils { +/** + * @brief Binomial coefficient \f$\binom{n}{j}\f$. + * @param n Non-negative integer. + * @param j Non-negative integer, \f$0 \le j \le n\f$. + * @return \f$\frac{n!}{(n-j)!\,j!}\f$ as a double. + * @note For out-of-domain inputs, the implementation may log an error. + */ double binomial_coeff(int n, int j); + +/** + * @brief Pascal row of binomial coefficients. + * @param order Row index \f$n\f$. + * @return Vector \f$[\binom{n}{0}, \ldots, \binom{n}{n}]\f$. + */ Eigen::VectorXd get_binomial_coefs(unsigned int order); +/** + * @brief Factorial for non-negative integers. + * @param n \f$n \ge 0\f$. + * @return \f$n!\f$ as a double. + */ double factorial(int n); + +/** + * @brief Integer power \f$m^e\f$ for \f$e \ge 0\f$ (loop-based; exact for small ranges). + * @param m Base (integer). + * @param e Exponent (integer, \f$e \ge 0\f$). + * @return \f$m^e\f$ as an int. + */ int ipow(int m, int e); +/** @name Tensor/Kronecker products + * @brief Kronecker products and outer products for vectors/matrices. + * @{ + */ + +/** + * @brief Kronecker product \f$A \otimes B\f$. + */ Eigen::MatrixXd tensor_product(const Eigen::MatrixXd &A, const Eigen::MatrixXd &B); + +/** + * @brief Kronecker product of a matrix and a column vector (treated as \f$B\f$). + */ Eigen::MatrixXd tensor_product(const Eigen::MatrixXd &A, const Eigen::VectorXd &B); + +/** + * @brief Kronecker product of a column vector and a matrix (treated as \f$B\f$). + */ Eigen::MatrixXd tensor_product(const Eigen::VectorXd &A, const Eigen::MatrixXd &B); + +/** + * @brief Outer product \f$A B^\top\f$ of two column vectors. + */ Eigen::MatrixXd tensor_product(const Eigen::VectorXd &A, const Eigen::VectorXd &B); +/** + * @brief Self outer-product \f$A \otimes A\f$ into a flat vector. + * @param A Input column vector. + * @param B Output vector (size must be \f$\mathrm{size}(A)^2\f$). + */ void tensor_self_product(const Eigen::VectorXd &A, Eigen::VectorXd &B); + +/** + * @brief Self outer-product \f$A A^\top\f$ into a matrix. + * @param A Input column vector. + * @param B Output matrix (square, same dimension as \f$A\f$). + */ void tensor_self_product(const Eigen::VectorXd &A, Eigen::MatrixXd &B); +/** @} */ +/** @name Matrix norms + * @brief Induced matrix norms consistent with Eigen semantics. + * @{ + */ +/** + * @brief Infinity norm \f$\|M\|_\infty\f$ (max row 1-norm). + */ double matrix_norm_inf(const Eigen::MatrixXd &M); + +/** + * @brief 1-norm \f$\|M\|_1\f$ (max column 1-norm). + */ double matrix_norm_1(const Eigen::MatrixXd &M); + +/** + * @brief Spectral norm \f$\|M\|_2\f$ (largest singular value). + */ double matrix_norm_2(const Eigen::MatrixXd &M); +/** @} */ -template void apply_filter(T *out, T *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac); +/** + * @brief Apply a linear filter to a coefficient block (templated on scalar type). + * + * Conceptually computes and accumulates a matrix product of the form + * \f$G \leftarrow G + F^\top \cdot \mathrm{filter}\f$, where \f$F\f$ and \f$G\f$ + * are views over `in` and `out` with shapes derived from \p kp1 and \p kp1_dm1. + * + * @tparam T Scalar type (`double` or `ComplexDouble`). + * @param[out] out Output buffer (accumulation destination). + * @param[in] in Input buffer (interpreted as a matrix view). + * @param[in] filter Dense filter matrix to apply. + * @param[in] kp1 Leading polynomial order + 1 (per MR basis). + * @param[in] kp1_dm1 \f$\text{kp1}^{D-1}\f$ helper (stride in the mapped view). + * @param[in] fac If zero, overwrite the destination; otherwise accumulate. + * @warning Buffers must contain at least the required number of elements for the + * implicit matrix views. + */ +template +void apply_filter(T *out, T *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac); +/** + * @brief Expand separable 1D coefficient blocks into an \f$ \text{dim}\f$-D tensor layout. + * + * Recursively multiplies along dimensions using the columns of \p primitive. + * + * @param dim Spatial dimensionality (1–3). + * @param dir Current recursion direction (starting at 0). + * @param kp1 Polynomial order + 1. + * @param kp1_d \f$\text{kp1}^d\f$ where \f$d = \text{dim}\f$ (total coefficients). + * @param primitive Matrix with primitive 1D basis values per dimension. + * @param[in,out] expanded Buffer holding intermediate input and final expanded output. + */ void tensor_expand_coefs(int dim, int dir, int kp1, int kp1_d, const Eigen::MatrixXd &primitive, Eigen::VectorXd &expanded); +/** + * @brief Generate 2D sampling coordinates on a tensor grid spanned by primitive 1D points. + * @param kp1 Points per axis. + * @param primitive Matrix whose rows are the per-axis primitive coordinates. + * @param[out] expanded Output matrix of size \f$(\text{kp1}^2) \times 2\f$. + */ void tensor_expand_coords_2D(int kp1, const Eigen::MatrixXd &primitive, Eigen::MatrixXd &expanded); + +/** + * @brief Generate 3D sampling coordinates on a tensor grid spanned by primitive 1D points. + * @param kp1 Points per axis. + * @param primitive Matrix whose rows are the per-axis primitive coordinates. + * @param[out] expanded Output matrix of size \f$(\text{kp1}^3) \times 3\f$. + */ void tensor_expand_coords_3D(int kp1, const Eigen::MatrixXd &primitive, Eigen::MatrixXd &expanded); +/** + * @brief Hermitian matrix power \f$A^b\f$ via eigendecomposition. + * @param A Hermitian (self-adjoint) complex matrix. + * @param b Real exponent. + * @return \f$U\,\mathrm{diag}(\lambda_i^b)\,U^\dagger\f$ where \f$A = U\,\mathrm{diag}(\lambda_i)\,U^\dagger\f$. + * @note Eigenvalues with magnitude near zero are guarded to avoid blow-ups for negative \p b. + */ ComplexMatrix hermitian_matrix_pow(const ComplexMatrix &A, double b); + +/** + * @brief Diagonalize a Hermitian matrix. + * @param A Input Hermitian matrix (not modified). + * @param[out] diag Real vector of eigenvalues (ascending). + * @return Matrix of eigenvectors as columns (unitary). + */ ComplexMatrix diagonalize_hermitian_matrix(const ComplexMatrix &A, DoubleVector &diag); + +/** + * @brief In-place diagonalization of a Hermitian sub-block. + * + * Replaces the \f$n_\text{size}\times n_\text{size}\f$ block of @p M at + * \f$(n_\text{start}, n_\text{start})\f$ with its eigenvalues on the diagonal + * and writes the corresponding eigenvectors into the same block of @p U. + * + * @param[in,out] M Matrix containing the Hermitian sub-block to diagonalize. + * @param[out] U Matrix receiving the block eigenvectors. + * @param nstart Upper-left index of the block. + * @param nsize Size of the (square) block. + */ void diagonalize_block(ComplexMatrix &M, ComplexMatrix &U, int nstart, int nsize); -template std::vector> cartesian_product(std::vector A, std::vector B); -template std::vector> cartesian_product(std::vector> l_A, std::vector B); -template std::vector> cartesian_product(std::vector a, int dim); +/** @name Cartesian products + * @brief Simple, small-container cartesian products (for enumeration tasks). + * @{ + */ +/** + * @brief Cartesian product \f$A \times B\f$. + * @tparam T Element type. + * @param A First list. + * @param B Second list. + * @return Vector of pairs `[a, b]`. + */ +template +std::vector> cartesian_product(std::vector A, std::vector B); + +/** + * @brief Cartesian product \f{(l\_A) \times B\f}, where each element of @p l_A is itself a tuple. + * @tparam T Element type. + * @param l_A List of partial tuples. + * @param B Second list. + * @return Concatenated tuples. + */ +template +std::vector> cartesian_product(std::vector> l_A, std::vector B); + +/** + * @brief Repeated cartesian power \f$A^{\times \text{dim}}\f$. + * @tparam T Element type. + * @param a Base list. + * @param dim Number of repeats (\f$\ge 1\f$). + * @return All length-\p dim tuples with elements from \p a. + */ +template +std::vector> cartesian_product(std::vector a, int dim); +/** @} */ -template double calc_distance(const Coord &a, const Coord &b); +/** + * @brief Euclidean distance between two D-dimensional coordinates. + * @tparam D Dimension (compile-time). + * @param a First point. + * @param b Second point. + * @return \f$\sqrt{\sum_{i=1}^D (a_i-b_i)^2}\f$. + */ +template +double calc_distance(const Coord &a, const Coord &b); } // namespace math_utils -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/mpi_utils.cpp b/src/utils/mpi_utils.cpp index 77526375b..d6d58a165 100644 --- a/src/utils/mpi_utils.cpp +++ b/src/utils/mpi_utils.cpp @@ -31,11 +31,6 @@ namespace mrcpp { -/** @brief SharedMemory constructor - * - * @param[in] comm: Communicator sharing resources - * @param[in] sh_size: Memory size, in MB - */ template SharedMemory::SharedMemory(mrcpp::mpi_comm comm, int sh_size) : sh_start_ptr(nullptr) @@ -76,19 +71,6 @@ template SharedMemory::~SharedMemory() { #endif } -/** @brief Send FunctionTree to a given MPI rank using blocking communication - * - * @param[in] tree: FunctionTree to send - * @param[in] dst: MPI rank to send to - * @param[in] tag: unique identifier - * @param[in] comm: Communicator that defines ranks - * @param[in] nChunks: Number of memory chunks to send - * - * @details The number of memory chunks must be known before we can send the - * tree. This can be specified in the last argument if known a priori, in order - * to speed up communication, otherwise it will be communicated in a separate - * step before the main communication. - */ template void send_tree(FunctionTree &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) { #ifdef MRCPP_HAS_MPI auto &allocator = tree.getNodeAllocator(); @@ -108,19 +90,6 @@ template void send_tree(FunctionTree &tree, int dst, i #endif } -/** @brief Receive FunctionTree from a given MPI rank using blocking communication - * - * @param[in] tree: FunctionTree to write into - * @param[in] src: MPI rank to receive from - * @param[in] tag: unique identifier - * @param[in] comm: Communicator that defines ranks - * @param[in] nChunks: Number of memory chunks to receive - * - * @details The number of memory chunks must be known before we can receive the - * tree. This can be specified in the last argument if known a priori, in order - * to speed up communication, otherwise it will be communicated in a separate - * step before the main communication. - */ template void recv_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -146,16 +115,6 @@ template void recv_tree(FunctionTree &tree, int src, i #endif } -/** @brief Share a FunctionTree among MPI processes that share the same physical memory - * - * @param[in] tree: FunctionTree to write into - * @param[in] src: MPI rank that last updated the function - * @param[in] tag: unique identifier - * @param[in] comm: Communicator that defines ranks - * - * @details This function should be called every time a shared function is - * updated, in order to update the local memory of each MPI process. - */ template void share_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm) { #ifdef MRCPP_HAS_MPI Timer t1; diff --git a/src/utils/mpi_utils.h b/src/utils/mpi_utils.h index 062d1affa..46c6b42ac 100644 --- a/src/utils/mpi_utils.h +++ b/src/utils/mpi_utils.h @@ -24,43 +24,106 @@ */ #pragma once +/** + * @file + * @brief MPI-facing declarations and a lightweight shared-memory helper for MRCPP. + * + * This header provides: + * - Portable aliases for MPI types (working in non-MPI builds as no-ops). + * - Public globals describing the current MPI topology/roles used by MRCPP. + * - A templated @ref mrcpp::SharedMemory class to allocate a shared-memory + * window among ranks that share a physical node (MPI-3 RMA). + * - Prototypes for shipping trees between ranks: @ref send_tree, @ref recv_tree, @ref share_tree. + * + * @note All MPI symbols are guarded by `MRCPP_HAS_MPI`. In non-MPI builds, dummy + * typedefs are supplied so that client code can still compile. + */ #ifdef MRCPP_HAS_MPI -#include + #include #else -using MPI_Comm = int; -using MPI_Win = int; -using MPI_Request = int; + /// Fallback alias so non-MPI builds can compile client code. + using MPI_Comm = int; + /// Fallback alias so non-MPI builds can compile client code. + using MPI_Win = int; + /// Fallback alias so non-MPI builds can compile client code. + using MPI_Request = int; #endif namespace mrcpp { -using mpi_comm = MPI_Comm; -using mpi_win = MPI_Win; + +/// Alias for MPI communicator (portable across MPI/non-MPI builds). +using mpi_comm = MPI_Comm; +/// Alias for MPI window used by RMA (portable across MPI/non-MPI builds). +using mpi_win = MPI_Win; +/// Alias for MPI request handle (portable across MPI/non-MPI builds). using mpi_request = MPI_Request; + +/** + * @namespace mrcpp::mpi + * @brief Runtime MPI topology, role flags, and communicators used internally by MRCPP. + * + * These externs are set during MRCPP's MPI initialization (see implementation) + * and describe how the current process participates in computation and data + * distribution. They are intentionally kept as simple PODs for easy broadcasting + * and logging. + */ namespace mpi { + +/// If true, the code may choose numerically exact variants of some algorithms. extern bool numerically_exact; -extern int shared_memory_size; +/// Requested per-node shared-memory window size (in MB) for shared allocations. +extern int shared_memory_size; +/// Rank of this process in `MPI_COMM_WORLD`. extern int world_rank; +/// Size of `MPI_COMM_WORLD`. extern int world_size; + +/// Rank within the MRCPP "worker" communicator. extern int wrk_rank; +/// Size of the MRCPP "worker" communicator. extern int wrk_size; + +/// Rank within the node-local shared-memory communicator. extern int share_rank; +/// Size of the node-local shared-memory communicator. extern int share_size; + +/// Rank inside the group communicator that clusters ranks by shared-memory groups. extern int sh_group_rank; + +/// True iff this rank belongs to the bank (data service) group. extern int is_bank; +/// True iff this rank is a worker (i.e., bank client). extern int is_bankclient; + +/// Number of ranks dedicated to the bank (data service). extern int bank_size; +/// Desired number of bank ranks per node (if configured). extern int bank_per_node; + +/// User/auto-configured OpenMP thread count hint for workers. extern int omp_threads; +/// If non-zero, honor the environment's OMP thread count for sizing decisions. extern int use_omp_num_threads; + +/// Total number of bank ranks (including any special managers). extern int tot_bank_size; + +/// Upper bound for usable MPI tags (implementation specific). extern int max_tag; + +/// World-rank of the special task-manager bank (if any). extern int task_bank; +/// Communicator for workers (orbital/function computations). extern MPI_Comm comm_wrk; +/// Communicator that groups ranks which share physical memory on the same node. extern MPI_Comm comm_share; +/// Communicator that orders ranks within a shared-memory group. extern MPI_Comm comm_sh_group; +/// Communicator that includes all bank ranks (and possibly clients for RPC). extern MPI_Comm comm_bank; } // namespace mpi @@ -68,34 +131,126 @@ extern MPI_Comm comm_bank; namespace mrcpp { -/** @class SharedMemory +/** + * @class SharedMemory + * @brief Thin RAII wrapper around an MPI-3 shared-memory window (per node). + * + * A `SharedMemory` instance allocates a node-local window using + * `MPI_Win_allocate_shared` (only when compiled with MPI). The window can be used + * to place data structures (e.g., coefficient chunks of a @ref FunctionTree) + * accessible by all ranks on the same physical node without explicit messaging. * - * @brief Shared memory block within a compute node + * @tparam T Element type of the memory window. * - * @details This class defines a shared memory window in a shared MPI - * communicator. In order to allocate a FunctionTree in shared memory, - * simply pass a SharedMemory object to the FunctionTree constructor. + * @par Usage + * - Construct on one or more ranks of `mpi::comm_share` to allocate a window. + * - Use `sh_start_ptr`/`sh_end_ptr`/`sh_max_ptr` to manage a simple bump allocator. + * - Call @ref clear to reset the bump pointer without freeing the window. + * + * @note In non-MPI builds, this class becomes a trivial holder and does not + * allocate any real shared memory. */ -template class SharedMemory { +template +class SharedMemory { public: + /** + * @brief Create (or attach to) a node-local shared-memory window. + * @param comm Node-local communicator (typically @ref mpi::comm_share). + * @param sh_size Window size in megabytes (MB). Only rank 0 in @p comm + * dictates the size; other ranks attach to it. + * + * @details + * When `MRCPP_HAS_MPI` is enabled, this constructor calls: + * - `MPI_Win_allocate_shared` on @p comm with the requested size on rank 0 + * (and size 0 on others), + * - `MPI_Win_shared_query` so that every rank obtains a base pointer + * into the same window, + * - Initializes `sh_start_ptr`, `sh_end_ptr`, and `sh_max_ptr`. + */ SharedMemory(mrcpp::mpi_comm comm, int sh_size); + + /// Deleted copy constructor to avoid double-free of the MPI window. SharedMemory(const SharedMemory &mem) = delete; + /// Deleted copy assignment. SharedMemory &operator=(const SharedMemory &mem) = delete; + + /** + * @brief Destroy the shared window and release resources. + * Calls `MPI_Win_free` when built with MPI. + */ ~SharedMemory(); - void clear(); // show shared memory as entirely available + /** + * @brief Reset the bump pointer so the whole window appears free. + * Does not deallocate or shrink the MPI window. + */ + void clear(); - T *sh_start_ptr; // start of shared block - T *sh_end_ptr; // end of used part - T *sh_max_ptr; // end of shared block - mrcpp::mpi_win sh_win; // MPI window object - int rank; // rank among shared group + /// Pointer to the beginning of the shared window. + T *sh_start_ptr{nullptr}; + /// Pointer to one past the last used element (bump pointer). + T *sh_end_ptr{nullptr}; + /// Pointer to one past the last available element (capacity end). + T *sh_max_ptr{nullptr}; + /// Underlying MPI window handle. + mrcpp::mpi_win sh_win{}; + /// Rank of this process within the shared-memory communicator. + int rank{0}; }; template class FunctionTree; -template void send_tree(FunctionTree &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true); -template void recv_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true); -template void share_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm); +/** + * @brief Send a @ref FunctionTree to another rank (blocking). + * + * Transfers node/structure (and optionally coefficient) chunks to @p dst + * using point-to-point MPI. If @p nChunks is negative, a small header with the + * number of chunks is sent first. + * + * @tparam D Dimensionality of the function. + * @tparam T Scalar type (`double` or @ref ComplexDouble). + * @param tree FunctionTree to send. + * @param dst Destination rank (in @p comm). + * @param tag Base MPI tag (chunk indices are offset from this). + * @param comm Communicator over which to send. + * @param nChunks Number of chunks to send; if `<0`, the count is sent first. + * @param coeff If true, also send coefficient chunks; otherwise only structure. + */ +template +void send_tree(FunctionTree &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true); + +/** + * @brief Receive a @ref FunctionTree from another rank (blocking). + * + * Reconstructs tree structure (and optionally coefficients) by receiving the + * same chunk layout produced by @ref send_tree. + * + * @tparam D Dimensionality of the function. + * @tparam T Scalar type (`double` or @ref ComplexDouble). + * @param tree Destination FunctionTree (reinitialized internally). + * @param src Source rank (in @p comm). + * @param tag Base MPI tag (must match sender). + * @param comm Communicator over which to receive. + * @param nChunks Number of chunks to receive; if `<0`, read the header first. + * @param coeff If true, receive coefficient chunks; otherwise only structure. + */ +template +void recv_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true); + +/** + * @brief Share a @ref FunctionTree to all ranks in a node-local communicator. + * + * Used to mirror the latest version of a shared function across ranks that + * participate in a shared-memory group, without reconstructing the tree from scratch. + * + * @tparam D Dimensionality of the function. + * @tparam T Scalar type (`double` or @ref ComplexDouble). + * @param tree FunctionTree to disseminate. + * @param src Rank that owns the up-to-date copy (in @p comm). + * @param tag Base tag used to coordinate transfers. + * @param comm Communicator comprising the sharing ranks (e.g., @ref mpi::comm_share). + */ +template +void share_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm); } // namespace mrcpp diff --git a/src/utils/omp_utils.cpp b/src/utils/omp_utils.cpp index 67ccd3069..63c88aaaa 100644 --- a/src/utils/omp_utils.cpp +++ b/src/utils/omp_utils.cpp @@ -27,7 +27,6 @@ #include namespace mrcpp { -// By default we get OMP_NUM_THREADS int max_threads = mrcpp_get_max_threads(); void set_max_threads(int threads) { diff --git a/src/utils/omp_utils.h b/src/utils/omp_utils.h index 083387c27..9fc87a8af 100644 --- a/src/utils/omp_utils.h +++ b/src/utils/omp_utils.h @@ -24,31 +24,101 @@ */ #pragma once +/** + * @file + * @brief OpenMP utilities and portability shims for MRCPP. + * + * This header centralizes MRCPP's interaction with OpenMP so client code can + * compile and run both with and without OpenMP support. It provides: + * - A consistent way to query the number of threads/rank of a thread. + * - Lightweight lock helpers that compile away in non-OpenMP builds. + * - A global cap on MRCPP-managed threads to avoid oversubscription with Eigen. + * + * @note Eigen is explicitly forced to single-threaded mode via + * `EIGEN_DONT_PARALLELIZE` to prevent nested parallelism when MRCPP + * runs OpenMP regions. + */ +/// Disable Eigen's internal multi-threading to avoid oversubscription. #define EIGEN_DONT_PARALLELIZE #ifdef MRCPP_HAS_OMP -#include -#define mrcpp_get_max_threads() omp_get_max_threads() -#define mrcpp_get_num_threads() mrcpp::max_threads -#define mrcpp_get_thread_num() omp_get_thread_num() -#define MRCPP_INIT_OMP_LOCK() omp_init_lock(&this->omp_lock) -#define MRCPP_DESTROY_OMP_LOCK() omp_destroy_lock(&this->omp_lock) -#define MRCPP_SET_OMP_LOCK() omp_set_lock(&this->omp_lock) -#define MRCPP_UNSET_OMP_LOCK() omp_unset_lock(&this->omp_lock) -#define MRCPP_TEST_OMP_LOCK() omp_test_lock(&this->omp_lock) + #include + + /** + * @name Thread query helpers (OpenMP build) + * @{ + */ + + /// Maximum number of threads OpenMP may use for parallel regions. + #define mrcpp_get_max_threads() omp_get_max_threads() + + /** + * Number of threads MRCPP intends to use in parallel regions. + * + * @details This is capped by user/runtime policy via @ref mrcpp::set_max_threads + * and may be lower than @c omp_get_max_threads() to respect node-level limits. + */ + #define mrcpp_get_num_threads() mrcpp::max_threads + + /// Zero-based thread id within a running OpenMP parallel region. + #define mrcpp_get_thread_num() omp_get_thread_num() + /** @} */ + + /** + * @name Lightweight lock helpers (OpenMP build) + * @brief Macros that operate on a member `omp_lock_t omp_lock;`. + * @{ + */ + #define MRCPP_INIT_OMP_LOCK() omp_init_lock(&this->omp_lock) + #define MRCPP_DESTROY_OMP_LOCK() omp_destroy_lock(&this->omp_lock) + #define MRCPP_SET_OMP_LOCK() omp_set_lock(&this->omp_lock) + #define MRCPP_UNSET_OMP_LOCK() omp_unset_lock(&this->omp_lock) + #define MRCPP_TEST_OMP_LOCK() omp_test_lock(&this->omp_lock) + /** @} */ + #else -#define mrcpp_get_max_threads() 1 -#define mrcpp_get_num_threads() 1 -#define mrcpp_get_thread_num() 0 -#define MRCPP_INIT_OMP_LOCK() -#define MRCPP_DESTROY_OMP_LOCK() -#define MRCPP_SET_OMP_LOCK() -#define MRCPP_UNSET_OMP_LOCK() -#define MRCPP_TEST_OMP_LOCK() + /** + * @name Thread/query helpers (non-OpenMP build) + * @brief Serial fallbacks so code compiles and runs without OpenMP. + * @{ + */ + #define mrcpp_get_max_threads() 1 ///< Always 1 in serial builds. + #define mrcpp_get_num_threads() 1 ///< Always 1 in serial builds. + #define mrcpp_get_thread_num() 0 ///< Single thread has id 0. + /** @} */ + + /** + * @name Lock helpers (non-OpenMP build) + * @brief No-ops in serial builds. + * @{ + */ + #define MRCPP_INIT_OMP_LOCK() + #define MRCPP_DESTROY_OMP_LOCK() + #define MRCPP_SET_OMP_LOCK() + #define MRCPP_UNSET_OMP_LOCK() + #define MRCPP_TEST_OMP_LOCK() + /** @} */ #endif namespace mrcpp { + +/** + * @brief Upper bound on threads MRCPP will request for OpenMP regions. + * + * @details This value is used by @c mrcpp_get_num_threads() and allows MRCPP + * to honor node-level thread budgeting (e.g., when co-scheduled with MPI or + * other threaded libraries). In non-OpenMP builds this remains 1. + */ extern int max_threads; + +/** + * @brief Set the global thread cap used by MRCPP parallel regions. + * @param threads Desired number of threads (clamped to at least 1). + * + * @note This does not change system-wide OpenMP settings; it only influences + * MRCPP's internal use (e.g., via @c mrcpp_get_num_threads()). + */ void set_max_threads(int threads); -} // namespace mrcpp + +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp index 510e34a1e..b47ea1b4a 100644 --- a/src/utils/parallel.cpp +++ b/src/utils/parallel.cpp @@ -1,3 +1,28 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #include #include #include @@ -27,9 +52,7 @@ using namespace std; namespace mrcpp { namespace omp { - int n_threads = mrcpp_get_max_threads(); - } // namespace omp using namespace Eigen; @@ -41,7 +64,6 @@ namespace mpi { bool numerically_exact = false; int shared_memory_size = 1000; -// these parameters set by initialize() int world_size = 1; int world_rank = 0; int wrk_size = 1; @@ -52,24 +74,24 @@ int sh_group_rank = 0; int is_bank = 0; int is_centralbank = 0; int is_bankclient = 1; -int is_bankmaster = 0; // only one bankmaster is_bankmaster +int is_bankmaster = 0; int bank_size = 0; int bank_per_node = 0; -int omp_threads = -1; // can be set to force number of threads -int use_omp_num_threads = -1; // can be set to use number of threads from env -int tot_bank_size = 0; // size of bank, including the task manager -int max_tag = 0; // max value allowed by MPI +int omp_threads = -1; +int use_omp_num_threads = -1; +int tot_bank_size = 0; +int max_tag = 0; vector bankmaster; -int task_bank = -1; // world rank of the task manager +int task_bank = -1; MPI_Comm comm_wrk; MPI_Comm comm_share; MPI_Comm comm_sh_group; MPI_Comm comm_bank; -int id_shift; // to ensure that nodes, orbitals and functions do not collide +int id_shift; -extern int metadata_block[3]; // can add more metadata in future +extern int metadata_block[3]; extern int const size_metadata = 3; void initialize() { @@ -81,22 +103,15 @@ void initialize() { MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - // divide the world into groups - // each group has its own group communicator definition - - // count the number of process per node MPI_Comm node_comm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm); int node_rank, node_size; MPI_Comm_rank(node_comm, &node_rank); MPI_Comm_size(node_comm, &node_size); - // define independent group of MPI processes, that are not part of comm_wrk - // for now the new group does not include comm_share - comm_bank = MPI_COMM_WORLD; // clients and master - MPI_Comm comm_remainder; // clients only + comm_bank = MPI_COMM_WORLD; + MPI_Comm comm_remainder; - // set bank_size automatically if not defined by user if (world_size < 2) { bank_size = 0; } else if (bank_size < 0) { @@ -113,15 +128,13 @@ void initialize() { bankmaster.resize(bank_size); for (int i = 0; i < bank_size; i++) { - bankmaster[i] = world_size - i - 1; // rank of the bankmasters + bankmaster[i] = world_size - i - 1; } if (world_rank < world_size - bank_size) { - // everything which is left is_bank = 0; is_centralbank = 0; is_bankclient = 1; } else { - // special group of centralbankmasters is_bank = 1; is_centralbank = 1; is_bankclient = 0; @@ -129,53 +142,41 @@ void initialize() { } MPI_Comm_split(MPI_COMM_WORLD, is_bankclient, world_rank, &comm_remainder); - // split world into groups that can share memory MPI_Comm_split_type(comm_remainder, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share); MPI_Comm_rank(comm_share, &share_rank); MPI_Comm_size(comm_share, &share_size); - // define a rank of the group MPI_Comm_split(comm_remainder, share_rank, world_rank, &comm_sh_group); - // mpiShRank is color (same color->in same group) - // MPI_worldrank is key (orders rank within the groups) - // we define a new orbital rank, so that the orbitals within - // a shared memory group, have consecutive ranks MPI_Comm_rank(comm_sh_group, &sh_group_rank); wrk_rank = share_rank + sh_group_rank * world_size; MPI_Comm_split(comm_remainder, 0, wrk_rank, &comm_wrk); - // 0 is color (same color->in same group) - // mpiOrbRank is key (orders rank in the group) MPI_Comm_rank(comm_wrk, &wrk_rank); MPI_Comm_size(comm_wrk, &wrk_size); - // if bank_size is large enough, we reserve one as "task manager" tot_bank_size = bank_size; if (bank_size <= 2 and bank_size > 0) { - // use the first bank as task manager task_bank = bankmaster[0]; } else if (bank_size > 1) { - // reserve one bank for task management only bank_size--; - task_bank = bankmaster[bank_size]; // the last rank is reserved as task manager + task_bank = bankmaster[bank_size]; } - // determine the maximum value alowed for mpi tags void *val; int flag; - MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &val, &flag); // max value allowed by MPI for tags + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &val, &flag); max_tag = *(int *)val / 2; - id_shift = max_tag / 2; // half is reserved for non orbital. + id_shift = max_tag / 2; - MPI_Comm comm_share_world; // all that share the memory + MPI_Comm comm_share_world; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share_world); - int n_bank_thisnode; // number of banks on this node + int n_bank_thisnode; MPI_Allreduce(&is_bank, &n_bank_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world); - int n_wrk_thisnode; // number of workers on this node + int n_wrk_thisnode; MPI_Allreduce(&is_bankclient, &n_wrk_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world); int omp_threads_available = thread::hardware_concurrency(); @@ -183,42 +184,13 @@ void initialize() { int nthreads = 1; int my_OMP_NUM_THREADS = mrcpp_get_max_threads(); MPI_Bcast(&my_OMP_NUM_THREADS, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (use_omp_num_threads) { // we assume that the user has set the environment variable - // OMP_NUM_THREADS, such that the total number of threads that can be used on each node is - // OMP_NUM_THREADS * (number of MPI processes per node) - // NB: OMP_NUM_THREADS is the number of threads for all MPI processes on one node. - // The bank need only one thread, and can give "their" remaining share to workers. + if (use_omp_num_threads) { int total_omp_threads_per_node = my_OMP_NUM_THREADS * (n_bank_thisnode + n_wrk_thisnode); nthreads = (total_omp_threads_per_node - n_bank_thisnode) / n_wrk_thisnode; } else { - // we determine the number of threads by detecting what is available - // determine the number of threads we can assign to each mpi worker. - // mrcpp_get_num_procs is total number of hardware logical threads accessible by this mpi - // NB: We assume that half of them are physical cores (not easily detectable). - // mrcpp_get_max_threads is OMP_NUM_THREADS (environment variable) but is NOT USED. - // omp_threads_available is the total number of logical threads available on this compute-node - // We assume that half of them are physical cores. - // - // five conditions should be satisfied: - // 1) the total number of threads used on the compute-node must not exceed thread::hardware_concurrency()/2 - // 2) no one use more than omp_get_num_procs()/2 - // 3) Bank needs only one thread - // 4) workers need as many threads as possible (but all workers use same number of threads) - // 5) at least one thread - if (is_bankclient) nthreads = (omp_threads_available / 2 - n_bank_thisnode) / n_wrk_thisnode; // 1) and 4) - // cout< 0) { if (omp_threads != nthreads and world_rank == 0) { cout << "Warning: recommended number of threads is " << nthreads << endl; @@ -227,7 +199,7 @@ void initialize() { nthreads = omp_threads; } } - nthreads = max(1, nthreads); // 5) + nthreads = max(1, nthreads); if (nthreads * n_wrk_thisnode + n_bank_thisnode < omp_threads_available / 3 and world_rank == 0) { std::cout << "WARNING: only " << nthreads * n_wrk_thisnode + n_bank_thisnode << " threads used per node while " << omp_threads_available << " logical cpus are accessible " << std::endl; @@ -239,7 +211,6 @@ void initialize() { mrcpp::set_max_threads(nthreads); if (is_bank) { - // bank is open until end of program if (is_centralbank) { dataBank.open(); } finalize(); exit(EXIT_SUCCESS); @@ -256,7 +227,7 @@ void finalize() { println(4, " max data in bank " << dataBank.get_maxtotalsize() << " MB "); dataBank.close(); } - MPI_Barrier(MPI_COMM_WORLD); // to ensure everybody got here + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif } @@ -267,10 +238,6 @@ void barrier(MPI_Comm comm) { #endif } -/********************************* - * Orbital related MPI functions * - *********************************/ - bool grand_master() { return (world_rank == 0 and is_bankclient) ? true : false; } @@ -279,29 +246,24 @@ bool share_master() { return (share_rank == 0) ? true : false; } -/** @brief Test if function belongs to this MPI rank */ bool my_func(int j) { return ((j) % wrk_size == wrk_rank) ? true : false; } -/** @brief Test if function belongs to this MPI rank */ bool my_func(const CompFunction<3> &func) { return my_func(func.rank()); } -/** @brief Test if function belongs to this MPI rank */ bool my_func(CompFunction<3> *func) { return my_func(func->rank()); } -/** @brief Free all function pointers not belonging to this MPI rank */ void free_foreign(CompFunctionVector &Phi) { for (CompFunction<3> &i : Phi) { if (not my_func(i)) i.free(); } } -/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(IntVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -309,7 +271,6 @@ void allreduce_vector(IntVector &vec, MPI_Comm comm) { #endif } -/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(DoubleVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -317,7 +278,6 @@ void allreduce_vector(DoubleVector &vec, MPI_Comm comm) { #endif } -/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(ComplexVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -325,7 +285,6 @@ void allreduce_vector(ComplexVector &vec, MPI_Comm comm) { #endif } -/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(IntMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -333,7 +292,6 @@ void allreduce_matrix(IntMatrix &mat, MPI_Comm comm) { #endif } -/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -341,7 +299,6 @@ void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) { #endif } -/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -349,11 +306,9 @@ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) { #endif } -// send a component function with MPI void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI for (int i = 0; i < func.Ncomp(); i++) { - // make sure that Nchunks is up to date if (func.isreal()) func.Nchunks()[i] = func.CompD[i]->getNChunks(); else @@ -369,7 +324,6 @@ void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) #endif } -// receive a component function with MPI void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -385,7 +339,6 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { #endif } -/** Update a shared function after it has been changed by one of the MPI ranks. */ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { if (func.isShared()) { #ifdef MRCPP_HAS_MPI @@ -399,40 +352,31 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { } } -/** @brief Add all mpi function into rank zero */ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) { -/* 1) Each odd rank send to the left rank - 2) All odd ranks are "deleted" (can exit routine) - 3) new "effective" ranks are defined within the non-deleted ranks - effective rank = rank/fac , where fac are powers of 2 - 4) repeat - */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; // powers of 2 + int fac = 1; while (fac < comm_size) { if ((comm_rank / fac) % 2 == 0) { - // receive int src = comm_rank + fac; if (src < comm_size) { CompFunction<3> func_i; int tag = 3333 + src; recv_function(func_i, src, tag, comm); - func.add(1.0, func_i); // add in place using union grid + func.add(1.0, func_i); func.crop(prec); } } if ((comm_rank / fac) % 2 == 1) { - // send int dest = comm_rank - fac; if (dest >= 0) { int tag = 3333 + comm_rank; send_function(func, dest, tag, comm); - break; // once data is sent we are done + break; } } fac *= 2; @@ -441,39 +385,30 @@ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) { #endif } -/** @brief make union tree and send into rank zero */ template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) { -/* 1) Each odd rank send to the left rank - 2) All odd ranks are "deleted" (can exit routine) - 3) new "effective" ranks are defined within the non-deleted ranks - effective rank = rank/fac , where fac are powers of 2 - 4) repeat - */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; // powers of 2 + int fac = 1; while (fac < comm_size) { if ((comm_rank / fac) % 2 == 0) { - // receive int src = comm_rank + fac; if (src < comm_size) { int tag = 3333 + src; mrcpp::FunctionTree<3, T> tree_i(tree.getMRA()); mrcpp::recv_tree(tree_i, src, tag, comm, -1, false); - tree.appendTreeNoCoeff(tree_i); // make union grid + tree.appendTreeNoCoeff(tree_i); } } if ((comm_rank / fac) % 2 == 1) { - // send int dest = comm_rank - fac; if (dest >= 0) { int tag = 3333 + comm_rank; mrcpp::send_tree(tree, dest, tag, comm, -1, false); - break; // once data is sent we are done + break; } } fac *= 2; @@ -482,14 +417,7 @@ template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, #endif } -/** @brief make union tree without coeff and send to all - */ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector> &Phi, MPI_Comm comm) { - /* 1) make union grid of own orbitals - 2) make union grid with others orbitals (sent to rank zero) - 3) rank zero broadcast func to everybody - */ - int N = Phi.size(); for (int j = 0; j < N; j++) { if (not my_func(j)) continue; @@ -501,14 +429,7 @@ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tre #endif } -/** @brief make union tree without coeff and send to all - */ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector> &Phi, MPI_Comm comm) { - /* 1) make union grid of own orbitals - 2) make union grid with others orbitals (sent to rank zero) - 3) rank zero broadcast func to everybody - */ - int N = Phi.size(); for (int j = 0; j < N; j++) { if (not my_func(j)) continue; @@ -521,28 +442,24 @@ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tre #endif } -/** @brief Distribute rank zero function to all ranks */ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) { -/* use same strategy as a reduce, but in reverse order */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; // powers of 2 + int fac = 1; while (fac < comm_size) fac *= 2; fac /= 2; while (fac > 0) { if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) { - // receive int src = comm_rank - fac; int tag = 4334 + comm_rank; recv_function(func, src, tag, comm); } if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) { - // send int dst = comm_rank + fac; int tag = 4334 + dst; if (dst < comm_size) send_function(func, dst, tag, comm); @@ -553,28 +470,24 @@ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) { #endif } -/** @brief Distribute rank zero function to all ranks */ template void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) { -/* use same strategy as a reduce, but in reverse order */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; // powers of 2 + int fac = 1; while (fac < comm_size) fac *= 2; fac /= 2; while (fac > 0) { if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) { - // receive int src = comm_rank - fac; int tag = 4334 + comm_rank; mrcpp::recv_tree(tree, src, tag, comm, -1, false); } if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) { - // send int dst = comm_rank + fac; int tag = 4334 + dst; if (dst < comm_size) mrcpp::send_tree(tree, dst, tag, comm, -1, false); diff --git a/src/utils/parallel.h b/src/utils/parallel.h index 395cc1174..417b1d852 100644 --- a/src/utils/parallel.h +++ b/src/utils/parallel.h @@ -1,4 +1,46 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #pragma once +/** + * @file + * @brief MPI/OpenMP orchestration and collectives for MRCPP. + * + * This header declares the process/thread orchestration utilities and the + * common collective/point-to-point helpers used by MRCPP to distribute + * multiresolution data structures across MPI ranks (and optionally coordinate + * with OpenMP threads). It provides: + * + * - Initialization/finalization of the MRCPP MPI environment. + * - Rank/topology helpers (e.g., “grand master”, ownership checks). + * - Typed send/recv/broadcast for @ref mrcpp::CompFunction and trees. + * - Element-wise allreduce helpers for Eigen vectors/matrices. + * + * All MPI symbols are no-ops in non-MPI builds (compiled without + * `MRCPP_HAS_MPI`), allowing the same interface to work in serial. + */ #include @@ -12,61 +54,241 @@ using namespace Eigen; -using IntVector = Eigen::VectorXi; +using IntVector = Eigen::VectorXi; using DoubleVector = Eigen::VectorXd; -using ComplexVector = Eigen::VectorXcd; +using ComplexVector= Eigen::VectorXcd; -using IntMatrix = Eigen::MatrixXi; +using IntMatrix = Eigen::MatrixXi; using DoubleMatrix = Eigen::MatrixXd; -using ComplexMatrix = Eigen::MatrixXcd; +using ComplexMatrix= Eigen::MatrixXcd; namespace mrcpp { +/** + * @namespace mrcpp::omp + * @brief OpenMP runtime hints used by the parallel layer. + */ namespace omp { -extern int n_threads; +extern int n_threads; ///< Number of OpenMP threads MRCPP intends to use. } // namespace omp -class Bank; -extern Bank dataBank; +class Bank; ///< Forward declaration of the in-memory data bank. +extern Bank dataBank; ///< Global bank instance used by bank ranks. +/** + * @namespace mrcpp::mpi + * @brief MPI utilities, communicators, and collectives. + * + * Functions in this namespace act as thin wrappers around MPI and encode + * MRCPP’s distribution policy for component functions and trees. + */ namespace mpi { +/** @brief World ranks assigned to bank masters (control/data services). */ extern std::vector bankmaster; +/** + * @brief Initialize MRCPP’s MPI environment and process topology. + * + * Sets up communicators (workers, shared-memory groups, bank group), + * partitions ranks into worker/bank roles, and configures OpenMP thread + * counts per rank. Safe to call exactly once at program start. + */ void initialize(); + +/** + * @brief Finalize MRCPP’s MPI environment. + * + * Performs a global barrier, closes the global data bank (if present), and + * calls `MPI_Finalize()` in MPI builds. Safe to call once at program exit. + */ void finalize(); + +/** + * @brief Rank barrier on a given communicator. + * @param comm MPI communicator to synchronize. + * + * In non-MPI builds this is a no-op. + */ void barrier(MPI_Comm comm); +/** + * @brief Whether this rank is the global worker “grand master”. + * @return @c true iff world rank is 0 and the rank is a worker (not a bank). + */ bool grand_master(); + +/** + * @brief Whether this rank is the master of its shared-memory group. + * @return @c true iff rank is 0 within @ref mpi::comm_share. + */ bool share_master(); +/** + * @name Ownership helpers + * @brief Determine whether an object/function is owned by this rank. + * @{ + */ + +/** + * @brief Ownership test for an index. + * @param j Global function index. + * @return @c true if @c j maps to this rank under MRCPP’s block-cyclic policy. + */ bool my_func(int j); + +/** + * @brief Ownership test for a component function (const ref). + * @param func Component function to test. + * @return @c true if @p func belongs to this rank (by @c func.rank()). + */ bool my_func(const CompFunction<3> &func); + +/** + * @brief Ownership test for a component function (pointer). + * @param func Pointer to component function. + * @return @c true if @p func belongs to this rank (by @c func->rank()). + */ bool my_func(CompFunction<3> *func); +/** @} */ -// bool my_unique_orb(const Orbital &orb); +/** + * @brief Free memory held by functions not owned by this rank. + * @param Phi Vector of component functions; foreign entries are freed in place. + */ void free_foreign(CompFunctionVector &Phi); +/** + * @name Point-to-point transfers for component functions + * @brief Send/receive/share a @ref mrcpp::CompFunction across ranks. + * @{ + */ + +/** + * @brief Send a component function to a destination rank. + * @param func Function to send. + * @param dst Destination world rank. + * @param tag Message tag base (submessages will offset from this). + * @param comm Communicator (default: worker communicator). + * + * Sends the function header followed by its component trees. Assumes the + * receiver uses @ref recv_function with the same @p tag and @p comm. + */ void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk); + +/** + * @brief Receive a component function from a source rank. + * @param func Function to receive into (resized as needed). + * @param src Source world rank. + * @param tag Message tag base (must match sender). + * @param comm Communicator (default: worker communicator). + */ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk); + +/** + * @brief Update shared-memory replicas of a function after modification. + * @param func Function to share (must be marked shared). + * @param src Rank that produced the update. + * @param tag Base tag for the transfer. + * @param comm Communicator that defines the sharing group. + * + * Only has effect if the function was allocated in shared memory. + */ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm); +/** @} */ +/** + * @brief Reduce (sum/accumulate) a function onto rank 0 of @p comm. + * @param prec Cropping precision applied after each accumulation. + * @param func Function buffer holding the local contribution; on rank 0 it + * becomes the global sum; on other ranks it may be left unchanged. + * @param comm Communicator over which to reduce. + * + * Uses a binary-tree pattern to send odd ranks to preceding even ranks; the + * receiver adds and crops to control growth. + */ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm); + +/** + * @brief Broadcast a function from rank 0 to all ranks in @p comm. + * @param func Buffer to receive (or hold, on root) the broadcasted function. + * @param comm Communicator to broadcast over. + * + * Implements a reverse of the binary-tree pattern used by + * @ref reduce_function. + */ void broadcast_function(CompFunction<3> &func, MPI_Comm comm); -template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm); -template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, std::vector> &Phi, MPI_Comm comm); -template void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm); +/** + * @name Tree collectives (no coefficient payload) + * @brief Perform collectives on @ref mrcpp::FunctionTree without coefficients. + * @{ + */ + +/** + * @brief Reduce (union) grids from all ranks to rank 0, excluding coeffs. + * @tparam T Coefficient scalar type of the tree. + * @param tree Output/input tree on each rank; on rank 0 it becomes the union. + * @param comm Communicator. + */ +template +void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm); -template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, std::vector> &Phi, MPI_Comm comm); +/** + * @brief Build local union grid, reduce to rank 0, then broadcast to all. + * @tparam T Coefficient scalar type. + * @param tree Target tree to hold the global union grid (no coeffs). + * @param Phi Vector of trees whose grids contribute to the union. + * @param comm Communicator. + */ +template +void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, + std::vector> &Phi, + MPI_Comm comm); +/** + * @brief Broadcast a no-coeff tree from rank 0 to all ranks. + * @tparam T Coefficient scalar type. + * @param tree Tree to broadcast/receive. + * @param comm Communicator. + */ +template +void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm); + +/** + * @brief Build union grid from owned components in @p Phi, allreduce to all. + * @tparam T Coefficient scalar type. + * @param tree Output tree receiving the global union grid. + * @param Phi Vector of component functions contributing their grids. + * @param comm Communicator. + */ +template +void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, + std::vector> &Phi, + MPI_Comm comm); +/** @} */ + +/** + * @name Element-wise allreduce (sum) helpers + * @brief Sum across ranks into every rank for Eigen containers. + * @{ + */ + +/** @brief In-place element-wise sum allreduce for integer vectors. */ void allreduce_vector(IntVector &vec, MPI_Comm comm); +/** @brief In-place element-wise sum allreduce for double vectors. */ void allreduce_vector(DoubleVector &vec, MPI_Comm comm); +/** @brief In-place element-wise sum allreduce for complex vectors. */ void allreduce_vector(ComplexVector &vec, MPI_Comm comm); + +/** @brief In-place element-wise sum allreduce for integer matrices. */ void allreduce_matrix(IntMatrix &vec, MPI_Comm comm); +/** @brief In-place element-wise sum allreduce for double matrices. */ void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm); +/** @brief In-place element-wise sum allreduce for complex matrices. */ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm); +/** @} */ } // namespace mpi -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/periodic_utils.cpp b/src/utils/periodic_utils.cpp index af8f753e2..afc23d80d 100644 --- a/src/utils/periodic_utils.cpp +++ b/src/utils/periodic_utils.cpp @@ -76,8 +76,6 @@ template void index_manipulation(NodeIndex &idx, const std::array void coord_manipulation(Coord &r, const std::array &periodic) { for (auto i = 0; i < D; i++) { r[i] *= 0.5; diff --git a/src/utils/periodic_utils.h b/src/utils/periodic_utils.h index f79a893d3..1ea95aaf2 100644 --- a/src/utils/periodic_utils.h +++ b/src/utils/periodic_utils.h @@ -24,12 +24,98 @@ */ #pragma once +/** + * @file + * @brief Periodic boundary utilities for node indices and real-space coordinates. + * + * This header declares helpers for enforcing periodic boundary conditions (PBC) + * on both discrete tree indices (@ref mrcpp::NodeIndex) and continuous + * coordinates (@ref mrcpp::Coord). The utilities are templated on dimension + * @p D and support selectively periodic directions via a boolean mask. + * + * Typical use cases: + * - Normalizing a node index to the canonical unit cell before lookup. + * - Wrapping real-space coordinates into the primary cell when sampling or + * exporting data. + * - Applying periodicity per axis (e.g., 2D slab periodic in x/y but not z). + */ #include "MRCPP/mrcpp_declarations.h" + namespace mrcpp { +/** + * @namespace mrcpp::periodic + * @brief Helpers for periodic index/coordinate manipulation. + * + * The functions here assume MRCPP’s convention where the canonical cell is the + * unit hypercube, and indices/coordinates are normalized accordingly: + * - Discrete indices: @ref NodeIndex logically cover tiles of the unit cell + * at a given resolution (scale). These helpers re-map out-of-range indices + * back into the unit cell modulo the periodic axes. + * - Continuous coordinates: @ref Coord (double-valued) are wrapped by + * subtracting/adding integer lattice vectors along periodic axes so that + * the result lies in the half-open interval [0, 1) per periodic dimension. + */ namespace periodic { -template bool in_unit_cell(NodeIndex idx); -template void index_manipulation(NodeIndex &idx, const std::array &periodic); -template void coord_manipulation(Coord &r, const std::array &periodic); + +/** + * @brief Check whether a node index lies inside the unit cell. + * + * @tparam D Spatial dimension. + * @param idx Node index to test (scale and per-dimension integer indices). + * @return @c true if @p idx is within the canonical unit cell bounds in all + * dimensions; @c false if any component is outside. + * + * @details “Inside” means the discrete index components fall in the valid range + * for the node’s scale with no modular wrap required. This does not modify + * @p idx and performs a pure check. Use @ref index_manipulation to fold an + * index back into the unit cell when periodicity is intended. + */ +template +bool in_unit_cell(NodeIndex idx); + +/** + * @brief Fold a node index into the unit cell under per-axis periodicity. + * + * @tparam D Spatial dimension. + * @param[in,out] idx Node index to normalize; on return, the per-axis integer + * index components are mapped into the unit-cell range for + * the node’s scale when the corresponding axis is periodic. + * @param periodic Boolean mask of length @p D; @c true marks an axis as + * periodic, @c false leaves that axis unchanged (no wrapping). + * + * @details + * For each periodic axis, the index component is reduced modulo the extent at + * the node’s scale so that the resulting index is in-range. For non-periodic + * axes, the index is left as-is (and may remain out-of-bounds if provided so). + * + * @note This function is idempotent for already in-range indices on periodic + * axes and is a no-op for non-periodic axes. + */ +template +void index_manipulation(NodeIndex &idx, const std::array &periodic); + +/** + * @brief Wrap a coordinate into the unit cell under per-axis periodicity. + * + * @tparam D Spatial dimension. + * @param[in,out] r Coordinate to normalize; each periodic component is wrapped + * into the half-open interval [0, 1). + * @param periodic Boolean mask of length @p D; @c true marks an axis as + * periodic, @c false leaves that axis unchanged (no wrapping). + * + * @details + * For each periodic axis, the component @f$r_d@f$ is replaced by + * @f$r_d - \lfloor r_d \rfloor@f$, producing a value in [0, 1). Non-periodic + * axes are not modified. This is equivalent to applying @c std::floor-based + * fractional reduction to each periodic component. + * + * @warning If your simulation cell is scaled or shifted relative to the unit + * cube, convert to reduced coordinates before calling this function, or adjust + * the values accordingly (e.g., divide by box length) and convert back. + */ +template +void coord_manipulation(Coord &r, const std::array &periodic); + } // namespace periodic -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/utils/tree_utils.cpp b/src/utils/tree_utils.cpp index 333544f6e..8cc22d564 100644 --- a/src/utils/tree_utils.cpp +++ b/src/utils/tree_utils.cpp @@ -39,11 +39,6 @@ namespace mrcpp { -/** Calculate the threshold for the wavelet norm. - * - * Calculates the threshold that has to be met in the wavelet norm in order to - * guarantee the precision in the function representation. Depends on the - * square norm of the function and the requested relative accuracy. */ template bool tree_utils::split_check(const MWNode &node, double prec, double split_fac, bool abs_prec) { bool split = false; if (prec > 0.0) { @@ -64,8 +59,6 @@ template bool tree_utils::split_check(const MWNode &no return split; } -/** Traverse tree along the Hilbert path and find nodes of any rankId. - * Returns one nodeVector for the whole tree. GenNodes disregarded. */ template void tree_utils::make_node_table(MWTree &tree, MWNodeVector &table) { TreeIterator it(tree, TopDown, Hilbert); it.setReturnGenNodes(false); @@ -81,8 +74,6 @@ template void tree_utils::make_node_table(MWTree &tree } } -/** Traverse tree along the Hilbert path and find nodes of any rankId. - * Returns one nodeVector per scale. GenNodes disregarded. */ template void tree_utils::make_node_table(MWTree &tree, std::vector> &table) { TreeIterator it(tree, TopDown, Hilbert); it.setReturnGenNodes(false); @@ -90,7 +81,6 @@ template void tree_utils::make_node_table(MWTree &tree MWNode &node = it.getNode(); if (node.getDepth() == 0) continue; int depth = node.getDepth() + tree.getNNegScales(); - // Add one more element if (depth + 1 > table.size()) table.push_back(MWNodeVector()); table[depth].push_back(&node); } @@ -98,18 +88,11 @@ template void tree_utils::make_node_table(MWTree &tree while (it.next()) { MWNode &node = it.getNode(); int depth = node.getDepth() + tree.getNNegScales(); - // Add one more element if (depth + 1 > table.size()) table.push_back(MWNodeVector()); table[depth].push_back(&node); } } -/** Make children scaling coefficients from parent - * Other node info are not used/set - * coeff_in are not modified. - * The output is written directly into the 8 children scaling coefficients. - * NB: ASSUMES that the children coefficients are separated by Children_Stride! - */ template void tree_utils::mw_transform(const MWTree &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite) { int operation = Reconstruction; int kp1 = tree.getKp1(); @@ -127,8 +110,6 @@ template void tree_utils::mw_transform(const MWTree &t ftlim = 1; ftlim2 = 2; ftlim3 = 4; - // NB: Careful: tmpcoeff tmpcoeff2 are not initialized to zero - // must not read these unitialized values! } overwrite = 0.0; @@ -137,9 +118,6 @@ template void tree_utils::mw_transform(const MWTree &t for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff + gt * kp1_d; for (int ft = 0; ft < ftlim; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_in + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -153,13 +131,10 @@ template void tree_utils::mw_transform(const MWTree &t } if (D > 1) { i++; - mask = 2; // 1 << i; + mask = 2; for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff2 + gt * kp1_d; for (int ft = 0; ft < ftlim2; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -176,13 +151,10 @@ template void tree_utils::mw_transform(const MWTree &t overwrite = 1.0; if (b_overwrite) overwrite = 0.0; i++; - mask = 4; // 1 << i; + mask = 4; for (int gt = 0; gt < tDim; gt++) { - T *out = coeff_out + gt * stride; // write right into children + T *out = coeff_out + gt * stride; for (int ft = 0; ft < ftlim3; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff2 + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -220,12 +192,6 @@ template void tree_utils::mw_transform(const MWTree &t // NOT_IMPLEMENTED_ABORT; //} -/** Make parent from children scaling coefficients - * Other node info are not used/set - * coeff_in are not modified. - * The output is read directly from the 8 children scaling coefficients. - * NB: ASSUMES that the children coefficients are separated by Children_Stride! - */ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride) { int operation = Compression; int kp1 = tree.getKp1(); @@ -245,9 +211,6 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * for (int gt = 0; gt < tDim; gt++) { T *out = coeff_out + gt * kp1_d; for (int ft = 0; ft < ftlim; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_in + ft * stride; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -260,13 +223,10 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * overwrite = 0.0; } i++; - mask = 2; // 1 << i; + mask = 2; for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff + gt * kp1_d; for (int ft = 0; ft < ftlim2; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_out + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -279,14 +239,10 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * overwrite = 0.0; } i++; - mask = 4; // 1 << i; + mask = 4; for (int gt = 0; gt < tDim; gt++) { T *out = coeff_out + gt * kp1_d; - // T *out = coeff_out + gt * N_coeff; for (int ft = 0; ft < ftlim3; ft++) { - // Operate in direction i only if the bits along other - // directions are identical. The bit of the direction we - // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); diff --git a/src/utils/tree_utils.h b/src/utils/tree_utils.h index 56c8c7d79..52d7a564b 100644 --- a/src/utils/tree_utils.h +++ b/src/utils/tree_utils.h @@ -29,16 +29,149 @@ #include "utils/math_utils.h" namespace mrcpp { +/** + * @file + * @brief Utilities for inspecting and transforming Multiwavelet (MW) trees. + * + * @details + * This header declares helper routines that operate on MRCPP tree structures: + * - adaptive refinement decisions based on wavelet norms, + * - creation of per-scale or flat node tables (Hilbert-ordered), + * - forward and backward multiwavelet transforms between parent/children + * scaling coefficients. + * + * Unless otherwise stated, functions are **not** thread-safe; synchronize at + * a higher level if multiple threads may act on the same tree or buffers. + */ namespace tree_utils { -template bool split_check(const MWNode &node, double prec, double split_fac, bool abs_prec); +/** + * @brief Decide whether a node should be split (refined) based on its wavelet norm. + * + * @tparam D Spatial dimension of the MW tree. + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param node Node to be tested. + * @param prec Target accuracy (relative by default). Non-positive disables splitting. + * @param split_fac Scale-dependent factor. If `> MachineZero`, the threshold is + * scaled by \f$2^{-0.5 \cdot \text{split\_fac} \cdot (s+1)}\f$ + * where `s` is the node scale; this makes refinement stricter + * at finer scales. + * @param abs_prec When `true`, interpret `prec` as an **absolute** tolerance. + * When `false`, use a **relative** tolerance multiplied by + * \f$\|f\|\f$ (square-norm taken from the owning tree). + * + * @return `true` if the node’s wavelet norm exceeds the computed threshold and + * the node should be refined; `false` otherwise. + * + * @details + * The decision compares \f$\|\mathbf{w}\|\f$ (node wavelet norm) to a threshold: + * \f[ + * \tau = \max(2\,\text{MachinePrec},\; + * \text{prec} \times (\text{abs\_prec} ? 1 : \|f\|) \times \text{scale\_fac}) + * \f] + * where \f$\text{scale\_fac}\f$ is determined by `split_fac` as described above. + * If the owning tree’s square norm is zero and `abs_prec == false`, a fallback + * of \f$\|f\|=1\f$ is used. + */ +template +bool split_check(const MWNode &node, double prec, double split_fac, bool abs_prec); + +/** + * @brief Build a flat, Hilbert-ordered table of all non-root nodes in a tree. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param tree Input MW tree. + * @param table Output vector receiving pointers to all internal and leaf nodes + * (root depth 0 is skipped). Nodes are traversed in a Hilbert + * space-filling curve order; generator nodes are excluded. + * + * @details + * Useful for linear passes (e.g., I/O, diagnostics, custom sweeps) where a + * contiguous list of nodes is required. + */ +template +void make_node_table(MWTree &tree, MWNodeVector &table); + +/** + * @brief Build per-scale Hilbert-ordered node tables. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param tree Input MW tree. + * @param table Output vector of vectors. Index `d` stores node pointers whose + * depth corresponds to `d - tree.getNNegScales()`. Each inner + * vector is Hilbert-ordered; generator nodes are excluded. + * + * @details + * This form is convenient for level-wise processing such as multigrid cycles, + * visualization, or per-scale statistics. + */ +template +void make_node_table(MWTree &tree, std::vector> &table); -template void make_node_table(MWTree &tree, MWNodeVector &table); -template void make_node_table(MWTree &tree, std::vector> &table); +/** + * @brief Forward MW transform: build children scaling coefficients from a parent block. + * + * @tparam D Spatial dimension (implemented for 1, 2, 3). + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param tree Tree providing filter and arity/meta information. + * @param coeff_in Pointer to the parent block (size = `kp1^D` entries), + * laid out in standard MRCPP order. + * @param coeff_out Pointer to the destination buffer for **children** blocks. + * This routine writes (or accumulates) into `2^D` child + * blocks separated by `stride` elements each. + * @param readOnlyScaling If `true`, operate as if only scaling components are + * present (skips mixing with wavelets internally). + * @param stride Stride, in elements, between consecutive child blocks + * inside `coeff_out`. Must be at least `kp1^D`. + * @param overwrite When `true` (default), assign into `coeff_out`. + * When `false`, accumulate (add) into existing values. + * + * @pre + * - `coeff_out` points to sufficient writable storage: + * at least `2^D * stride` elements of type `T`. + * - `coeff_in` points to at least `kp1^D` elements. + * + * @post + * - The `2^D` children scaling blocks are produced in-place in `coeff_out`. + * + * @note + * Complexity is \f$O(2^D \cdot k^{D+1})\f$ for polynomial order `k` (where `kp1 = k+1`). + * For `D > 3` the routine is not implemented. + */ +template +void mw_transform(const MWTree &tree, + T *coeff_in, + T *coeff_out, + bool readOnlyScaling, + int stride, + bool overwrite = true); -template void mw_transform(const MWTree &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool overwrite = true); // template void mw_transform_back(MWTree &tree, T *coeff_in, T *coeff_out, int stride); -template void mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride); + +/** + * @brief Backward MW transform (3D specialization): build the parent block from children. + * + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param tree Tree providing filter and arity/meta information. + * @param coeff_in Pointer to the concatenated **children** blocks (8 blocks in 3D), + * each of size `kp1^3`, separated by `stride` elements. + * @param coeff_out Pointer to the **parent** block storage (size `kp1^3`). + * @param stride Stride, in elements, between consecutive children blocks. + * + * @pre + * - `coeff_in` provides at least `8 * stride` elements. + * - `coeff_out` provides at least `kp1^3` writable elements. + * + * @post + * - The parent scaling block is reconstructed into `coeff_out`. + * + * @note + * Only the \f$D=3\f$ variant is provided. Use @ref mw_transform for the forward direction. + */ +template +void mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride); } // namespace tree_utils } // namespace mrcpp From 218178f2de7aaa0fc99758d4e2f156034b4306d7 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 08:17:35 +0300 Subject: [PATCH 11/51] Update Bank.cpp --- src/utils/Bank.cpp | 276 +++++++++++++++++---------------------------- 1 file changed, 104 insertions(+), 172 deletions(-) diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp index f8c111a53..b924b233d 100644 --- a/src/utils/Bank.cpp +++ b/src/utils/Bank.cpp @@ -1,3 +1,28 @@ +/* + * MRCPP, a numerical library based on multiresolution analysis and + * the multiwavelet basis which provide low-scaling algorithms as well as + * rigorous error control in numerical computations. + * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. + * + * This file is part of MRCPP. + * + * MRCPP is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * MRCPP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with MRCPP. If not, see . + * + * For information on the complete list of contributors to MRCPP, see: + * + */ + #include "Printer.h" #include "Timer.h" @@ -8,35 +33,31 @@ namespace mrcpp { using namespace Eigen; using namespace std; -int metadata_block[3]; // can add more metadata in future +int metadata_block[3]; int const size_metadata = 3; Bank::~Bank() { - // delete all data and accounts } struct Blockdata_struct { - std::vector data; // to store the incoming data. One column for each orbital on the same node. - int N_rows = 0; // the number of coefficients in one column of the block. - std::map id2data; // internal index of the data in the block - std::vector id; // the id of each column. Either nodeid, or orbid + std::vector data; + int N_rows = 0; + std::map id2data; + std::vector id; }; struct OrbBlock_struct { - std::vector data; // pointer to the data - std::map id2data; // internal index of the data in the block - std::vector id; // the nodeid of the data - // note that N_rows can be different inside the same orbblock: root node have scaling and wavelets, other nodes have only wavelets + std::vector data; + std::map id2data; + std::vector id; }; struct mem_struct { - std::vector chunk_p; // vector with allocated chunks - int p = -1; // position of next available memory (not allocated if < 0) - // on Betzy 1024*1024*4 ok, 1024*1024*2 NOT ok: leads to memory fragmentation (on "Betzy" 2023) - int chunk_size = 1024 * 1024 * 4; // chunksize (in number of doubles). data_p[i]+chunk_size is end of chunk i + std::vector chunk_p; + int p = -1; + int chunk_size = 1024 * 1024 * 4; int account = -1; double *get_mem(int size) { - if (p < 0 or size > chunk_size or p + size > chunk_size) { // allocate new chunk of memory + if (p < 0 or size > chunk_size or p + size > chunk_size) { if (size > 1024 * 1024) { - // make a special chunk just for this double *m_p = new double[size]; chunk_p.push_back(m_p); p = -1; @@ -52,12 +73,12 @@ struct mem_struct { return m_p; } }; -std::map *> get_nodeid2block; // to get block from its nodeid (all coeff for one node) -std::map *> get_orbid2block; // to get block from its orbid +std::map *> get_nodeid2block; +std::map *> get_orbid2block; std::map mem; -int const MIN_SCALE = -999; // Smaller than smallest scale +int const MIN_SCALE = -999; int naccounts = 0; void Bank::open() { @@ -75,7 +96,6 @@ void Bank::open() { int next_task = 0; int tot_ntasks = 0; std::map> readytasks; - // The bank never goes out of this loop until it receives a close message! while (true) { MPI_Recv(messages, message_size, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, comm_bank, &status); if (printinfo) @@ -83,26 +103,23 @@ void Bank::open() { << messages[2] << std::endl; int message = messages[0]; - // can be called directly: if (message == CLOSE_BANK) { if (is_bank and printinfo) std::cout << "Bank is closing" << std::endl; this->clear_bank(); - break; // close bank, i.e stop listening for incoming messages + break; } else if (message == GET_MAXTOTDATA) { - int maxsize_int = maxsize / 1024; // convert into MB + int maxsize_int = maxsize / 1024; MPI_Send(&maxsize_int, 1, MPI_INT, status.MPI_SOURCE, 1171, comm_bank); continue; } else if (message == GET_TOTDATA) { - int maxsize_int = totcurrentsize / 1024; // convert into MB + int maxsize_int = totcurrentsize / 1024; MPI_Send(&maxsize_int, 1, MPI_INT, status.MPI_SOURCE, 1172, comm_bank); continue; } else if (message == NEW_ACCOUNT) { - // we just have to pick out a number that is not already assigned int account = (max_account_id + 1) % 1000000000; - while (get_deposits.count(account)) account = (account + 1) % 1000000000; // improbable this is used + while (get_deposits.count(account)) account = (account + 1) % 1000000000; max_account_id = account; naccounts++; - // create default content get_deposits[account] = new std::vector; get_deposits[account]->resize(1); get_id2ix[account] = new std::map; @@ -120,8 +137,6 @@ void Bank::open() { continue; } - // the following is only accessible through an account - int account = messages[1]; auto it_dep = get_deposits.find(account); if (it_dep == get_deposits.end() || it_dep->second == nullptr) { @@ -129,7 +144,7 @@ void Bank::open() { MSG_ABORT("Account error"); } std::vector &deposits = *get_deposits[account]; - std::map &id2ix = *get_id2ix[account]; // gives zero if id is not defined + std::map &id2ix = *get_id2ix[account]; std::map &id2qu = *get_id2qu[account]; std::vector &queue = *get_queue[account]; std::map &orbid2block = *get_orbid2block[account]; @@ -144,7 +159,6 @@ void Bank::open() { if (message == CLOSE_ACCOUNT) { get_numberofclients[account]--; if (get_numberofclients[account] == 0) { - // all clients have closed the account. We remove the account. remove_account(account); } } @@ -153,33 +167,29 @@ void Bank::open() { this->clear_bank(); for (auto const &block : nodeid2block) { if (block.second.data.size() > 0) { - currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB - totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; // converted into kB + currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; + totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; } } nodeid2block.clear(); orbid2block.clear(); - // send message that it is ready (value of message is not used) MPI_Ssend(&message, 1, MPI_INT, status.MPI_SOURCE, 77, comm_bank); } else if (message == GET_NODEDATA or message == GET_NODEBLOCK) { - // NB: has no queue system yet - int nodeid = messages[2]; // which block to fetch from + int nodeid = messages[2]; if (nodeid2block.count(nodeid)) { Blockdata_struct &block = nodeid2block[nodeid]; - int dataindex = 0; // internal index of the data in the block + int dataindex = 0; int size = 0; if (message == GET_NODEDATA) { - int orbid = messages[3]; // which part of the block to fetch - dataindex = block.id2data[orbid]; // column of the data in the block - size = block.N_rows; // number of doubles to fetch + int orbid = messages[3]; + dataindex = block.id2data[orbid]; + size = block.N_rows; if (size != messages[4]) std::cout << "ERROR nodedata has wrong size" << std::endl; double *data_p = block.data[dataindex]; if (size > 0) MPI_Send(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } else { - // send entire block. First make one contiguous superblock - // Prepare the data as one contiguous block if (block.data.size() == 0) std::cout << "Zero size blockdata! " << nodeid << " " << block.N_rows << std::endl; MatrixXd DataBlock(block.N_rows, block.data.size()); size = block.N_rows * block.data.size(); @@ -187,49 +197,43 @@ void Bank::open() { for (int j = 0; j < block.data.size(); j++) { for (int i = 0; i < block.N_rows; i++) { DataBlock(i, j) = block.data[j][i]; } } - dataindex = 0; // start from first column - // send info about the size of the superblock - metadata_block[0] = nodeid; // nodeid - metadata_block[1] = block.data.size(); // number of columns - metadata_block[2] = size; // total size = rows*columns + dataindex = 0; + metadata_block[0] = nodeid; + metadata_block[1] = block.data.size(); + metadata_block[2] = size; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); - // send info about the id of each column MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank); if (size > 0) MPI_Send(DataBlock.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } } else { if (printinfo) std::cout << " block " << nodeid << " does not exist " << std::endl; - // Block with this id does not exist. if (message == GET_NODEDATA) { - int size = messages[4]; // number of doubles to send + int size = messages[4]; if (size == 0) { std::cout << "WARNING: GET_NODEDATA asks for zero size data" << std::endl; metadata_block[2] = size; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 3, comm_bank); } else { - std::vector zero(size, 0.0); // send zeroes + std::vector zero(size, 0.0); MPI_Ssend(zero.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } } else { metadata_block[0] = nodeid; - metadata_block[1] = 0; // number of columns - metadata_block[2] = 0; // total size = rows*columns + metadata_block[1] = 0; + metadata_block[2] = 0; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); } } } else if (message == GET_ORBBLOCK) { - // NB: BLOCKDATA has no queue system yet - int orbid = messages[2]; // which block to fetch from + int orbid = messages[2]; if (orbid2block.count(orbid)) { OrbBlock_struct &block = orbid2block[orbid]; if (block.data.size() == 0) std::cout << "Zero size blockdata! C " << orbid << " " << std::endl; - // send entire block. First make one contiguous superblock - // Prepare the data as one contiguous block int size = 0; for (int j = 0; j < block.data.size(); j++) { int nodeid = block.id[j]; - int Nrows = nodeid2block[nodeid].N_rows; // note that root nodes have scaling and wavelets, while other nodes have only wavelets -> N_rows is not a constant. + int Nrows = nodeid2block[nodeid].N_rows; size += Nrows; } std::vector coeff(size); @@ -239,33 +243,28 @@ void Bank::open() { int Nrows = nodeid2block[nodeid].N_rows; for (int i = 0; i < Nrows; i++) { coeff[ij++] = block.data[j][i]; } } - // send info about the size of the superblock metadata_block[0] = orbid; - metadata_block[1] = block.data.size(); // number of columns - metadata_block[2] = size; // total size = rows*columns + metadata_block[1] = block.data.size(); + metadata_block[2] = size; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank); MPI_Send(coeff.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } else { - // it is possible and allowed that the block has not been written if (printinfo) std::cout << " block does not exist " << orbid << " " << orbid2block.count(orbid) << std::endl; - // Block with this id does not exist. metadata_block[0] = orbid; - metadata_block[1] = 0; // number of columns - metadata_block[2] = 0; // total size = rows*columns + metadata_block[1] = 0; + metadata_block[2] = 0; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); } } else if (message == GET_FUNCTION or message == GET_FUNCTION_AND_WAIT or message == GET_FUNCTION_AND_DELETE or message == GET_FUNCTION or message == GET_DATA) { - // withdrawal int id = messages[2]; if (message == GET_DATA and messages[3] > MIN_SCALE) { NodeIndex<3> nIdx; nIdx.setScale(messages[4]); nIdx.setTranslation({messages[2], messages[5], messages[6]}); if (nIdx2id.count(nIdx) == 0) { - // data is not yet saved, but one can hope it will be created at some stage id = nIdx2id.size(); nIdx2id[nIdx] = id; } else { @@ -276,18 +275,15 @@ void Bank::open() { if (id2ix.count(id) == 0 or ix == 0) { if (printinfo) std::cout << world_rank << " not found " << id << " " << message << std::endl; if (message == GET_FUNCTION or message == GET_FUNCTION_AND_DELETE) { - // do not wait for the orbital to arrive int found = 0; if (printinfo) std::cout << world_rank << " sending found 0 to " << status.MPI_SOURCE << std::endl; MPI_Send(&found, 1, MPI_INT, status.MPI_SOURCE, 117, comm_bank); } else { - // the id does not exist. Put in queue and Wait until it is defined if (printinfo) std::cout << world_rank << " queuing " << id << " " << id2ix.count(id) << ix << std::endl; if (id2qu[id] == 0) { queue.push_back({id, {status.MPI_SOURCE}}); id2qu[id] = queue.size() - 1; } else { - // somebody is already waiting for this id. queue in queue queue[id2qu[id]].clients.push_back(status.MPI_SOURCE); } } @@ -310,18 +306,16 @@ void Bank::open() { if (message == GET_DATA) { MPI_Send(deposits[ix].data, deposits[ix].datasize, MPI_DOUBLE, status.MPI_SOURCE, 1, comm_bank); } } } else if (message == SAVE_NODEDATA) { - int nodeid = messages[2]; // which block to write - int orbid = messages[3]; // which part of the block - int size = messages[4]; // number of doubles + int nodeid = messages[2]; + int orbid = messages[3]; + int size = messages[4]; - // test if the block exists already if (printinfo) std::cout << world_rank << " save data nodeid " << nodeid << " size " << size << std::endl; - // append the incoming data Blockdata_struct &block = nodeid2block[nodeid]; - block.id2data[orbid] = nodeid2block[nodeid].data.size(); // internal index of the data in the block - double *data_p = mem[account]->get_mem(size); // new double[size]; - currentsize[account] += size / 128; // converted into kB - totcurrentsize += size / 128; // converted into kB + block.id2data[orbid] = nodeid2block[nodeid].data.size(); + double *data_p = mem[account]->get_mem(size); + currentsize[account] += size / 128; + totcurrentsize += size / 128; this->maxsize = std::max(totcurrentsize, this->maxsize); block.data.push_back(data_p); block.id.push_back(orbid); @@ -329,18 +323,15 @@ void Bank::open() { block.N_rows = size; OrbBlock_struct &orbblock = orbid2block[orbid]; - orbblock.id2data[nodeid] = orbblock.data.size(); // internal index of the data in the block + orbblock.id2data[nodeid] = orbblock.data.size(); orbblock.data.push_back(data_p); orbblock.id.push_back(nodeid); - // orbblock.N_rows.push_back(size); MPI_Recv(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 1, comm_bank, &status); if (printinfo) std::cout << " written block " << nodeid << " id " << orbid << " subblocks " << nodeid2block[nodeid].data.size() << std::endl; } else if (message == SAVE_FUNCTION or message == SAVE_DATA) { - // make a new deposit int id = messages[2]; if (message == SAVE_DATA and messages[4] > MIN_SCALE) { - // has to find or create unique id from NodeIndex. Use the same internal mapping for all trees NodeIndex<3> nIdx; nIdx.setScale(messages[4]); nIdx.setTranslation({messages[2], messages[5], messages[6]}); @@ -355,27 +346,26 @@ void Bank::open() { if (id2ix[id]) { std::cout << "WARNING: id " << id << " exists already" << " " << status.MPI_SOURCE << " " << message << " " << messages[1] << std::endl; - ix = id2ix[id]; // the deposit exist from before. Will be overwritten + ix = id2ix[id]; exist_flag = 1; if (message == SAVE_DATA and !deposits[ix].hasdata) { datasize = messages[3]; exist_flag = 0; - // deposits[ix].data = new double[datasize]; deposits[ix].data = mem[account]->get_mem(datasize); - currentsize[account] += datasize / 128; // converted into kB - totcurrentsize += datasize / 128; // converted into kB + currentsize[account] += datasize / 128; + totcurrentsize += datasize / 128; this->maxsize = std::max(totcurrentsize, this->maxsize); deposits[ix].hasdata = true; } } else { - ix = deposits.size(); // NB: ix is now index of last element + 1 + ix = deposits.size(); deposits.resize(ix + 1); if (message == SAVE_FUNCTION) deposits[ix].orb = new CompFunction<3>(0); if (message == SAVE_DATA) { datasize = messages[3]; - deposits[ix].data = mem[account]->get_mem(datasize); // new double[datasize]; - currentsize[account] += datasize / 128; // converted into kB - totcurrentsize += datasize / 128; // converted into kB + deposits[ix].data = mem[account]->get_mem(datasize); + currentsize[account] += datasize / 128; + totcurrentsize += datasize / 128; this->maxsize = std::max(totcurrentsize, this->maxsize); deposits[ix].hasdata = true; } @@ -397,33 +387,30 @@ void Bank::open() { MPI_Recv(deposits[ix].data, datasize, MPI_DOUBLE, deposits[ix].source, 1, comm_bank, &status); } if (id2qu[deposits[ix].id] != 0) { - // someone is waiting for those data. Send to them int iq = id2qu[deposits[ix].id]; if (deposits[ix].id != queue[iq].id) std::cout << ix << " Bank queue accounting error " << std::endl; for (int iqq : queue[iq].clients) { if (message == SAVE_FUNCTION) { send_function(*deposits[ix].orb, iqq, 1, comm_bank); } if (message == SAVE_DATA) { MPI_Send(deposits[ix].data, messages[3], MPI_DOUBLE, iqq, 1, comm_bank); } } - queue[iq].clients.clear(); // cannot erase entire queue[iq], because that would require to shift all the - // id2qu value larger than iq + queue[iq].clients.clear(); queue[iq].id = -1; id2qu.erase(deposits[ix].id); } - // Task manager members: } else if (message == INIT_TASKS) { tot_ntasks = messages[2]; next_task = 0; } else if (message == GET_NEXTTASK) { int task = next_task; - if (next_task >= tot_ntasks) task = -1; // flag to show all tasks are assigned + if (next_task >= tot_ntasks) task = -1; MPI_Send(&task, 1, MPI_INT, status.MPI_SOURCE, 1, comm_bank); next_task++; } else if (message == PUT_READYTASK) { readytasks[messages[2]].push_back(messages[3]); } if (message == DEL_READYTASK) { - for (int i = 0; i < readytasks[messages[2]].size(); i++) { // we expect small sizes + for (int i = 0; i < readytasks[messages[2]].size(); i++) { if (readytasks[messages[2]][i] == messages[3]) { readytasks[messages[2]].erase(readytasks[messages[2]].begin() + i); break; @@ -445,7 +432,6 @@ void Bank::open() { #endif } -// Ask to close the Bank void Bank::close() { #ifdef MRCPP_HAS_MPI int messages[message_size]; @@ -484,7 +470,7 @@ void Bank::remove_account(int account) { currentsize[account] -= deposits[ix].datasize / 128; totcurrentsize -= deposits[ix].datasize / 128; } - if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; // indicate that it does not exist + if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; deposits[ix].hasdata = false; } deposits.clear(); @@ -502,8 +488,8 @@ void Bank::remove_account(int account) { std::map &orbid2block = *get_orbid2block[account]; for (auto const &block : nodeid2block) { - currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB - totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; // converted into kB + currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; + totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; } nodeid2block.clear(); orbid2block.clear(); @@ -518,7 +504,6 @@ void Bank::remove_account(int account) { } int Bank::openAccount(int iclient, MPI_Comm comm) { - // NB: this is a collective call, since we need all the accounts to be synchronized int account_id[1] = {-1}; #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -545,7 +530,6 @@ int Bank::openAccount(int iclient, MPI_Comm comm) { } int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { - // NB: this is a collective call, since we need all the accounts to be synchronized int account_id = -1; #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -558,7 +542,6 @@ int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { MPI_Send(messages, 2, MPI_INT, task_bank, 0, comm_bank); MPI_Recv(&account_id, 1, MPI_INT, task_bank, 1, comm_bank, &status); if (tot_bank_size == bank_size) { - // make a dummy account so that all account_id are synchronized int account_id_i; for (int i = 0; i < bank_size; i++) { if (bankmaster[i] != task_bank) { @@ -581,7 +564,6 @@ int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { } void Bank::closeAccount(int account_id) { -// The account will in reality not be removed before everybody has sent a close message #ifdef MRCPP_HAS_MPI MPI_Status status; int messages[message_size]; @@ -592,7 +574,6 @@ void Bank::closeAccount(int account_id) { } void Bank::closeTaskManager(int account_id) { -// The account will in reality not be removed before everybody has sent a close message #ifdef MRCPP_HAS_MPI MPI_Status status; int messages[message_size]; @@ -634,13 +615,6 @@ std::vector Bank::get_totalsize() { return tot; } -// Accounts: (clients) - -// save orbital in Bank with identity id - -// get orbital with identity id. -// If wait=0, return immediately with value zero if not available (default) -// else, wait until available int BankAccount::get_func(int id, CompFunction<3> &func, int wait) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -667,8 +641,6 @@ int BankAccount::get_func(int id, CompFunction<3> &func, int wait) { return 1; } -// get orbital with identity id, and delete from bank. -// return immediately with value zero if not available int BankAccount::get_func_del(int id, CompFunction<3> &orb) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -689,10 +661,8 @@ int BankAccount::get_func_del(int id, CompFunction<3> &orb) { return 1; } -// save function in Bank with identity id int BankAccount::put_func(int id, CompFunction<3> &func) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_FUNCTION; messages[1] = account_id; @@ -703,44 +673,38 @@ int BankAccount::put_func(int id, CompFunction<3> &func) { return 1; } -// save data in Bank with identity id . datasize MUST have been set already. NB:not tested int BankAccount::put_data(int id, int size, double *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = id; messages[3] = size; - messages[4] = MIN_SCALE; // to indicate that it is defined by id + messages[4] = MIN_SCALE; MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank); #endif return 1; } -// save data in Bank with identity id . datasize MUST have been set already. NB:not tested int BankAccount::put_data(int id, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = id; - messages[3] = size * 2; // save as twice as many doubles - messages[4] = MIN_SCALE; // to indicate that it is defined by id + messages[3] = size * 2; + messages[4] = MIN_SCALE; MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank); #endif return 1; } -// save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; @@ -756,15 +720,13 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) { return 1; } -// save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = nIdx.getTranslation(0); - messages[3] = size * 2; // save as twice as many doubles + messages[3] = size * 2; messages[4] = nIdx.getScale(); messages[5] = nIdx.getTranslation(1); messages[6] = nIdx.getTranslation(2); @@ -775,7 +737,6 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { return 1; } -// get data with identity id int BankAccount::get_data(int id, int size, double *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -790,7 +751,6 @@ int BankAccount::get_data(int id, int size, double *data) { return 1; } -// get data with identity id int BankAccount::get_data(int id, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -800,13 +760,11 @@ int BankAccount::get_data(int id, int size, ComplexDouble *data) { messages[2] = id; messages[3] = MIN_SCALE; MPI_Send(messages, 4, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); - // fetch as twice as many doubles MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status); #endif return 1; } -// get data with identity id int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -825,7 +783,6 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) { return 1; } -// get data with identity id int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -839,84 +796,72 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { messages[5] = nIdx.getTranslation(1); messages[6] = nIdx.getTranslation(2); MPI_Send(messages, 7, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); - // fetch as twice as many doubles MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status); #endif return 1; } -// save data in Bank with identity id as part of block with identity nodeid. int BankAccount::put_nodedata(int id, int nodeid, int size, double *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to nodeid int messages[message_size]; messages[0] = SAVE_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; // which block - messages[3] = id; // id within block - messages[4] = size; // size of this data + messages[2] = nodeid; + messages[3] = id; + messages[4] = size; MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank); #endif return 1; } -// save data in Bank with identity id as part of block with identity nodeid. -// NB: Complex is stored as two doubles int BankAccount::put_nodedata(int id, int nodeid, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI - // for now we distribute according to nodeid int messages[message_size]; messages[0] = SAVE_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; // which block - messages[3] = id; // id within block - messages[4] = 2 * size; // size of this data + messages[2] = nodeid; + messages[3] = id; + messages[4] = 2 * size; MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Send(data, 2 * size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank); #endif return 1; } -// get data with identity id int BankAccount::get_nodedata(int id, int nodeid, int size, double *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; - // get the column with identity id int messages[message_size]; messages[0] = GET_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; // which block - messages[3] = id; // id within block. - messages[4] = size; // expected size of data + messages[2] = nodeid; + messages[3] = id; + messages[4] = size; MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status); #endif return 1; } -// get data with identity id int BankAccount::get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; - // get the column with identity id int messages[message_size]; messages[0] = GET_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; // which block - messages[3] = id; // id within block. - messages[4] = size; // expected size of data + messages[2] = nodeid; + messages[3] = id; + messages[4] = size; MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status); #endif return 1; } -// get all data for nodeid (same nodeid, different orbitals) int BankAccount::get_nodeblock(int nodeid, double *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; - // get the entire superblock and also the id of each column int messages[message_size]; messages[0] = GET_NODEBLOCK; messages[1] = account_id; @@ -932,11 +877,9 @@ int BankAccount::get_nodeblock(int nodeid, double *data, std::vector &idVec return 1; } -// get all data for nodeid (same nodeid, different orbitals) int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; - // get the entire superblock and also the id of each column int messages[message_size]; messages[0] = GET_NODEBLOCK; messages[1] = account_id; @@ -952,12 +895,10 @@ int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector return 1; } -// get all data with identity orbid (same orbital, different nodes) int BankAccount::get_orbblock(int orbid, double *&data, std::vector &nodeidVec, int bankstart) { #ifdef MRCPP_HAS_MPI MPI_Status status; int nodeid = wrk_rank + bankstart; - // get the entire superblock and also the nodeid of each column int messages[message_size]; messages[0] = GET_ORBBLOCK; messages[1] = account_id; @@ -973,12 +914,10 @@ int BankAccount::get_orbblock(int orbid, double *&data, std::vector &nodeid return 1; } -// get all data with identity orbid (same orbital, different nodes) int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector &nodeidVec, int bankstart) { #ifdef MRCPP_HAS_MPI MPI_Status status; int nodeid = wrk_rank + bankstart; - // get the entire superblock and also the nodeid of each column int messages[message_size]; messages[0] = GET_ORBBLOCK; messages[1] = account_id; @@ -994,7 +933,6 @@ int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector return 1; } -// creator. NB: collective BankAccount::BankAccount(int iclient, MPI_Comm comm) { this->account_id = dataBank.openAccount(iclient, comm); #ifdef MRCPP_HAS_MPI @@ -1002,18 +940,14 @@ BankAccount::BankAccount(int iclient, MPI_Comm comm) { #endif } -// destructor BankAccount::~BankAccount() { - // The account will in reality not be removed before everybody has sent a delete message dataBank.closeAccount(this->account_id); } -// closes account and reopen a new empty account. NB: account_id will change void BankAccount::clear(int iclient, MPI_Comm comm) { this->account_id = dataBank.clearAccount(this->account_id, iclient, comm); } -// creator. NB: collective TaskManager::TaskManager(int ntasks, int iclient, MPI_Comm comm) { this->n_tasks = ntasks; if (bank_size == 0) return; @@ -1023,9 +957,7 @@ TaskManager::TaskManager(int ntasks, int iclient, MPI_Comm comm) { #endif } -// destructor TaskManager::~TaskManager() { - // The account will in reality not be removed before everybody has sent a delete message if (this->account_id < 0) return; dataBank.closeTaskManager(this->account_id); } @@ -1095,4 +1027,4 @@ std::vector TaskManager::get_readytask(int i, int del) { return readytasks; } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From aee5b7841f33276159f6b9835309cb5007137485 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 09:44:08 +0300 Subject: [PATCH 12/51] Done doxygen in .h and removed by .cpp within treebuilders folder --- src/treebuilders/ConvolutionCalculator.cpp | 203 +------------- src/treebuilders/ConvolutionCalculator.h | 250 +++++++++++++++++- src/treebuilders/CopyAdaptor.cpp | 118 +-------- src/treebuilders/CopyAdaptor.h | 95 ++++++- .../CrossCorrelationCalculator.cpp | 98 +------ src/treebuilders/CrossCorrelationCalculator.h | 91 ++++++- src/treebuilders/DefaultCalculator.h | 60 ++++- src/treebuilders/DerivativeCalculator.cpp | 173 +----------- src/treebuilders/DerivativeCalculator.h | 160 ++++++++++- src/treebuilders/MapCalculator.h | 112 +++++++- src/treebuilders/MultiplicationAdaptor.h | 106 +++++++- src/treebuilders/MultiplicationCalculator.h | 119 ++++++++- src/treebuilders/OperatorAdaptor.h | 64 ++++- src/treebuilders/PHCalculator.cpp | 63 +---- src/treebuilders/PHCalculator.h | 110 +++++++- src/treebuilders/PowerCalculator.h | 106 +++++++- src/treebuilders/ProjectionCalculator.cpp | 126 +-------- src/treebuilders/ProjectionCalculator.h | 89 ++++++- src/treebuilders/SplitAdaptor.h | 56 +++- src/treebuilders/SquareCalculator.h | 116 +++++++- ...meEvolution_CrossCorrelationCalculator.cpp | 102 +------ ...TimeEvolution_CrossCorrelationCalculator.h | 134 ++++++++-- src/treebuilders/TreeAdaptor.h | 111 +++++++- src/treebuilders/TreeBuilder.cpp | 106 +------- src/treebuilders/TreeBuilder.h | 113 +++++++- src/treebuilders/TreeCalculator.h | 81 +++++- src/treebuilders/WaveletAdaptor.h | 95 ++++++- src/treebuilders/complex_apply.cpp | 79 +----- src/treebuilders/complex_apply.h | 96 ++++++- src/treebuilders/grid.cpp | 239 +---------------- src/treebuilders/grid.h | 236 +++++++++++++++-- src/treebuilders/map.cpp | 79 ------ src/treebuilders/map.h | 104 +++++++- src/treebuilders/multiply.cpp | 149 ----------- src/treebuilders/multiply.h | 181 ++++++++++++- src/treebuilders/project.cpp | 100 +------ src/treebuilders/project.h | 115 +++++++- 37 files changed, 2697 insertions(+), 1738 deletions(-) diff --git a/src/treebuilders/ConvolutionCalculator.cpp b/src/treebuilders/ConvolutionCalculator.cpp index 24e9be125..ae1d4012f 100644 --- a/src/treebuilders/ConvolutionCalculator.cpp +++ b/src/treebuilders/ConvolutionCalculator.cpp @@ -23,54 +23,6 @@ * */ -/** - * @file ConvolutionCalculator.cpp - * @brief Adaptive node-wise application kernel for separable convolution operators. - * - * @details - * This file implements the templated class - * mrcpp::ConvolutionCalculator, which is the **workhorse** used by the - * adaptive `TreeBuilder` when applying a separable convolution operator - * (#mrcpp::ConvolutionOperator) to a multiresolution function tree - * (#mrcpp::FunctionTree). - * - * At a high level, for each **target** node \f$ g \f$ (in the output tree) - * the calculator: - * - determines the **band** of **source** nodes \f$ f \f$ that can - * contribute via the operator's bandwidth model, - * - estimates cheap **screening bounds** using precomputed operator norms, - * the local source/target norms, and a precision policy, - * - for surviving pairs \f$ (g,f) \f$, performs a sequence of small - * **tensor contractions** (one per Cartesian direction) to apply the - * separable operator component(s) and accumulates the result into \f$ g \f$. - * - * The class also: - * - precomputes **band-size factors** per depth and component-combination to - * drive thresholding, - * - supports **periodic worlds** and optional **unit-cell manipulation** - * (near-field vs. far-field selection), - * - collects **per-thread timings** and **operator-usage statistics**. - * - * ### Screening model (outline) - * Let \f$ \mathcal{O} = \sum_i \bigotimes_{d=1}^D O_i^{(d)} \f$ be the - * separable expansion (terms indexed by \f$ i \f$). For a source node - * \f$ f \f$ and target node \f$ g \f$, the calculator estimates - * \f[ - * \| \mathcal{O}_i f \| \;\lesssim\; - * \Big(\prod_{d=1}^D \|O_i^{(d)}\|\Big)\; \|f\|\; s(i, \Delta \ell) - * \f] - * where \f$ s(\cdot) \f$ is a band-size factor depending on depth and the - * component combination, and compares the bound to a target threshold - * \f$ \tau(g) \sim \texttt{prec} \cdot \sqrt{\|g\|^2 / N_\text{terms}} \f$. - * Only terms that can exceed \f$ \tau(g) \f$ are explicitly applied. - * - * ### BLAS vs. Eigen - * If BLAS is available, the directional contractions can be carried out via - * GEMM. Otherwise, an Eigen-based path is used. Both routes compute - * \f$ G \leftarrow F^\top O \f$ in each direction and **accumulate** on the - * last direction to the target buffer. - */ - #include "ConvolutionCalculator.h" #include "operators/ConvolutionOperator.h" #include "operators/OperatorState.h" @@ -94,21 +46,6 @@ using Eigen::MatrixXi; namespace mrcpp { -/** - * @brief Construct a calculator for applying a convolution operator. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (`double` or `ComplexDouble`). - * @param p Target precision used for screening and adaptivity. - * @param o Separable convolution operator to apply. - * @param f Source function tree (input). - * @param depth Maximum operator depth considered for band-size tables. - * - * @details - * Initializes per-term **band-size tables** (used in screening) and - * allocates per-thread timers. The `depth` argument is upper-bounded by - * `MaxDepth`. - */ template ConvolutionCalculator::ConvolutionCalculator(double p, ConvolutionOperator &o, FunctionTree &f, int depth) : maxDepth(depth) @@ -120,9 +57,6 @@ ConvolutionCalculator::ConvolutionCalculator(double p, ConvolutionOperator initTimers(); } -/** - * @brief Destructor: clear timers and print aggregated operator statistics. - */ template ConvolutionCalculator::~ConvolutionCalculator() { clearTimers(); this->operStat.flushNodeCounters(); @@ -130,9 +64,6 @@ template ConvolutionCalculator::~ConvolutionCalculator for (int i = 0; i < this->bandSizes.size(); i++) { delete this->bandSizes[i]; } } -/** - * @brief Allocate per-thread timers for band construction, calculation, and norm updates. - */ template void ConvolutionCalculator::initTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -142,9 +73,6 @@ template void ConvolutionCalculator::initTimers() { } } -/** - * @brief Release per-thread timers. - */ template void ConvolutionCalculator::clearTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -157,9 +85,6 @@ template void ConvolutionCalculator::clearTimers() { this->norm_t.clear(); } -/** - * @brief Print a compact report of thread-wise timings. - */ template void ConvolutionCalculator::printTimers() const { int oldprec = Printer::setPrecision(1); int nThreads = mrcpp_get_max_threads(); @@ -175,15 +100,6 @@ template void ConvolutionCalculator::printTimers() con Printer::setPrecision(oldprec); } -/** - * @brief Precompute per-depth band-size factors for all operator terms. - * - * @details - * For each raw operator term and each depth, builds a table of the number of - * source nodes formally falling within the **Cartesian bandwidth box** for - * every component-combination (gt,ft). These factors are later used to scale - * screening thresholds. - */ template void ConvolutionCalculator::initBandSizes() { for (int i = 0; i < this->oper->size(); i++) { // IMPORTANT: only 0-th dimension! @@ -196,19 +112,6 @@ template void ConvolutionCalculator::initBandSizes() { } } -/** - * @brief Compute band-size factor for a given depth from a bandwidth model. - * - * @param[out] bs Table to be filled (rows: depth, cols: component-pairs plus a max column). - * @param[in] depth Operator depth relative to root. - * @param[in] bw Bandwidth model (per-depth widths per component index). - * - * @details - * For each component pair \f$(g_t,f_t)\f$, the routine forms the Cartesian - * product of directional half-widths to estimate the number of contributing - * source nodes and stores it in \p bs. The last column stores the row-wise - * maximum for quick access. - */ template void ConvolutionCalculator::calcBandSizeFactor(MatrixXi &bs, int depth, const BandWidth &bw) { for (int gt = 0; gt < this->nComp; gt++) { for (int ft = 0; ft < this->nComp; ft++) { @@ -229,18 +132,6 @@ template void ConvolutionCalculator::calcBandSizeFacto bs(depth, this->nComp2) = bs.row(depth).maxCoeff(); } -/** - * @brief Build the band of source nodes affected by the operator for a given target node. - * - * @param[in] gNode Target node (in the output tree). - * @param[out] idx_band Matching indices of the source nodes added to the band. - * @returns A vector of pointers to the source nodes \f$ f \f$. - * - * @details - * The band is the intersection between the operator's bandwidth box centered - * at \p gNode and the function-tree world box, respecting periodicity and - * (optionally) unit-cell filtering when `manipulateOperator` is enabled. - */ template MWNodeVector *ConvolutionCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { auto *band = new MWNodeVector; @@ -281,21 +172,8 @@ template MWNodeVector *ConvolutionCalculator::ma return band; } -/** - * @brief Recursive helper to enumerate all source indices inside the bandwidth box. - * - * @param[out] band Vector of pointers to source nodes added along the recursion. - * @param[out] idx_band Parallel vector of node indices corresponding to \p band. - * @param[in] idx Current multi-index (mutated along recursion). - * @param[in] nbox Side lengths of the bandwidth box. - * @param[in] dim Current dimension to recurse on. - * - * @details - * If **unit-cell manipulation** is enabled, nodes are included/excluded based - * on their membership in the first unit cell (for periodic worlds) and the - * `onUnitcell` flag. - */ -template void ConvolutionCalculator::fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim) { +template +void ConvolutionCalculator::fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim) { int l_start = idx[dim]; for (int j = 0; j < nbox[dim]; j++) { // Recurse until dim == 0 @@ -337,18 +215,6 @@ template void ConvolutionCalculator::fillOperBand(MWNo idx[dim] = l_start; } -/** - * @brief Compute contributions to a single **target** node by scanning its band. - * - * @param[in,out] node Target node (coefficients are accumulated here). - * - * @details - * - Builds the source band for the target node. - * - Computes a **local target threshold** from the node's tree norm and `prec`. - * - Loops over band nodes and component combinations, performing **screening**. - * - For surviving pairs, applies all operator terms via `applyOperComp`. - * - Updates node norms at the end. - */ template void ConvolutionCalculator::calcNode(MWNode &node) { auto &gNode = static_cast &>(node); gNode.zeroCoefs(); @@ -402,20 +268,6 @@ template void ConvolutionCalculator::calcNode(MWNode void ConvolutionCalculator::applyOperComp(OperatorState &os) { double fNorm = os.fNode->getComponentNorm(os.ft); int o_depth = os.fNode->getScale() - this->oper->getOperatorRoot(); @@ -429,24 +281,6 @@ template void ConvolutionCalculator::applyOperComp(Ope } } -/** - * @brief Apply a single operator term to a single source node (low-level path). - * - * @param i Index of the operator term in the separable expansion. - * @param os Operator state (nodes, buffers, norms, component indices). - * - * @details - * For each direction: - * - Fetch the operator-block at the required translation (\f$ \Delta \ell \f$) - * and depth \f$ o\_depth \f$; multiply the running contraction with its norm - * and keep a raw pointer to its coefficient block. - * - If the translation is outside bandwidth, return early. - * After the per-direction setup: - * - Form an **upper bound** as product of directional norms times the - * source-threshold and compare to the target-threshold. - * - If active, dispatch to `tensorApplyOperComp` to carry out the contraction - * and accumulate into the target node buffer. - */ template void ConvolutionCalculator::applyOperator(int i, OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -480,19 +314,6 @@ template void ConvolutionCalculator::applyOperator(int } } -/** - * @brief Perform the directional tensor contractions for one operator term. - * - * @param os Operator state (holds mapped buffers for in-place contractions). - * - * @details - * The contraction sequence computes, for each direction \f$ d \f$, - * \f$ G \leftarrow F^\top O^{(d)} \f$, with **accumulation** on the last - * direction. If a directional block is `nullptr`, an identity map is used - * (i.e., pure transposition). - * - * Both a BLAS path (disabled here) and an Eigen path are implemented. - */ template void ConvolutionCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -544,16 +365,6 @@ template void ConvolutionCalculator::tensorApplyOperCo //#endif } -/** - * @brief Ensure parent nodes exist up to the operator root (periodic worlds). - * - * @param tree Target/output tree. - * - * @details - * When operating in periodic settings, parent nodes above the root scale - * may be required for coarse contributions; this helper guarantees their - * presence prior to work scheduling. - */ template void ConvolutionCalculator::touchParentNodes(MWTree &tree) const { if (not manipulateOperator) { const auto oper_scale = this->oper->getOperatorRoot(); @@ -570,16 +381,6 @@ template void ConvolutionCalculator::touchParentNodes( } } -/** - * @brief Create the initial list of target nodes to process. - * - * @param tree Target/output tree. - * @returns A vector of pointers to existing nodes to be processed. - * - * @details - * For periodic trees, parent nodes above the root are first touched to ensure - * consistency; then a flat node table is produced via `tree_utils::make_node_table`. - */ template MWNodeVector *ConvolutionCalculator::getInitialWorkVector(MWTree &tree) const { auto *nodeVec = new MWNodeVector; if (tree.isPeriodic()) touchParentNodes(tree); diff --git a/src/treebuilders/ConvolutionCalculator.h b/src/treebuilders/ConvolutionCalculator.h index 8ac4b5d34..8f671e084 100644 --- a/src/treebuilders/ConvolutionCalculator.h +++ b/src/treebuilders/ConvolutionCalculator.h @@ -25,6 +25,18 @@ #pragma once +/** + * @file + * @brief Adaptive multiwavelet convolution driver. + * + * @details + * Declares @ref mrcpp::ConvolutionCalculator, a tree-walking calculator that + * applies a (possibly non-local) @ref ConvolutionOperator to an input + * @ref FunctionTree with adaptive precision control. The calculator + * orchestrates band construction, per-node operator application, and optional + * operator manipulation (e.g., unit-cell projections for periodic problems). + */ + #include "TreeCalculator.h" #include "operators/OperatorStatistics.h" #include "trees/FunctionTreeVector.h" @@ -33,64 +45,286 @@ namespace mrcpp { -template class ConvolutionCalculator final : public TreeCalculator { +/** + * @class ConvolutionCalculator + * @brief Performs adaptive convolution of a function tree with a convolution operator. + * + * @tparam D Spatial dimensionality (1–3). + * @tparam T Coefficient scalar type (`double` or `ComplexDouble`). + * + * @details + * The calculator traverses the output tree (owned by the base + * @ref TreeCalculator) and, for each node, applies the convolution operator to + * the relevant neighborhood (an operator *band*). Band sizes are derived from + * the operator bandwidth and the current tree depth, and can be further tuned + * by a user-supplied per-node precision function @ref setPrecFunction. + * + * The implementation records timing and operator statistics per band/component + * to aid profiling, and can optionally manipulate the operator prior to + * application (see @ref startManipulateOperator). + * + * ### Lifetime / ownership + * - @ref ConvolutionCalculator does **not** own the operator nor the input + * function tree; it stores non-owning pointers. + * - Timers and internal matrices are allocated and cleared by + * @ref initTimers / @ref clearTimers . + */ +template +class ConvolutionCalculator final : public TreeCalculator { public: + /** + * @brief Construct a calculator for \f$ g = \mathcal{O}\{f\} \f$. + * + * @param p Target accuracy (relative or absolute depending on usage). + * @param o Convolution operator to apply. + * @param f Input function tree \f$ f \f$. + * @param depth Maximum traversal depth for the output tree + * (defaults to @c MaxDepth for the MRA). + * + * @pre @p o and @p f must remain valid for the lifetime of the calculator. + */ ConvolutionCalculator(double p, ConvolutionOperator &o, FunctionTree &f, int depth = MaxDepth); + + /// @brief Destructor. Releases timers and internal band-size tables. ~ConvolutionCalculator() override; - MWNodeVector *getInitialWorkVector(MWTree &tree) const override; + /** + * @brief Produce the initial work vector of nodes for the output tree. + * + * @param tree Output tree that will receive the convolution result. + * @return Pointer to a heap-allocated vector of nodes to start from. + * + * @details + * The initial set typically includes end nodes (or generator nodes in + * banded neighborhoods) where the operator action is non-zero. + * The caller (base class) assumes ownership of the returned vector. + */ + MWNodeVector* getInitialWorkVector(MWTree &tree) const override; + /** + * @brief Set a per-node precision function. + * + * @param prec_func A functor returning the local tolerance for a node index. + * + * @details + * When provided, the calculator uses @p prec_func(idx) to refine the target + * precision locally (e.g., tighter near singularities), typically in + * conjunction with the global precision passed to the constructor. + */ void setPrecFunction(const std::function &idx)> &prec_func) { this->precFunc = prec_func; } + + /** + * @brief Enable operator manipulation prior to application. + * + * @param excUnit If `true`, manipulate on the unit cell (periodic contexts). + * + * @details + * When enabled the operator may be preconditioned, symmetrized, or mapped + * to a fundamental domain before application. Exact behavior depends on + * the associated @ref ConvolutionOperator. + */ void startManipulateOperator(bool excUnit) { this->manipulateOperator = true; this->onUnitcell = excUnit; } private: + // ---- Configuration / inputs ------------------------------------------------ + + /// @brief Maximum output depth to visit. int maxDepth; + + /// @brief Global target precision (interpreted by implementation). double prec; + + /// @brief Toggle for pre-application manipulation of the operator. bool manipulateOperator{false}; + + /// @brief Toggle for unit-cell manipulation in periodic problems. bool onUnitcell{false}; + + /// @brief Non-owning pointer to the convolution operator. ConvolutionOperator *oper; + + /// @brief Non-owning pointer to the input function tree f(r). FunctionTree *fTree; - std::vector band_t; - std::vector calc_t; - std::vector norm_t; + // ---- Instrumentation ------------------------------------------------------- + + /// @brief Per-band timers for operator band building. + std::vector band_t; + + /// @brief Per-band timers for the main convolution kernels. + std::vector calc_t; + + /// @brief Per-band timers for norm/threshold checks. + std::vector norm_t; + + /// @brief Aggregate operator statistics (bandwidths, touches, flops estimates). OperatorStatistics operStat; - std::vector bandSizes; + + // ---- Band-size modeling ---------------------------------------------------- + + /** + * @brief Precomputed band-size factors per depth/component. + * + * @details + * Each matrix has shape `(maxDepth+1) × nComp2`, where `nComp = 2^D` + * and `nComp2 = nComp * nComp`. Linearized index + * `k = gt * nComp + ft` maps from generator (`gt`) and father (`ft`) + * component pairs to a band-size factor at a given depth. + */ + std::vector bandSizes; + + /** + * @brief Optional local precision override. + * + * @details + * Defaults to a neutral functor returning 1.0. When set by + * @ref setPrecFunction, it scales or replaces the global precision on a + * per-node basis. + */ std::function &idx)> precFunc = [](const NodeIndex &idx) { return 1.0; }; + /// @brief Number of component blocks (2^D) in a multiwavelet tensor. static const int nComp = (1 << D); + + /// @brief Number of component-pair interactions ( (2^D) × (2^D) ). static const int nComp2 = (1 << D) * (1 << D); - MWNodeVector *makeOperBand(const MWNode &gNode, std::vector> &idx_band); + // ---- Band construction helpers -------------------------------------------- + + /** + * @brief Build an operator band (list of neighbor nodes) around @p gNode. + * + * @param gNode Generator node (output-space anchor). + * @param idx_band Output: collected node indices forming the band. + * @return Heap-allocated node vector corresponding to @p idx_band. + * + * @details + * The band is determined by the operator bandwidth at the scale of + * @p gNode and the precomputed band-size factors. The returned vector + * contains concrete node handles in traversal order. + */ + MWNodeVector* makeOperBand(const MWNode &gNode, std::vector> &idx_band); + + /** + * @brief Recursive fill of an operator band. + * + * @param band Destination node vector to append to. + * @param idx_band Indices to materialize. + * @param idx Current index under construction. + * @param nbox Periodic-box replication vector per dimension. + * @param dim Current dimension (0..D-1) being expanded. + */ void fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim); + // ---- Timing / statistics lifecycle ----------------------------------------- + + /// @brief Allocate and start per-band timers. void initTimers(); + + /// @brief Stop and free per-band timers. void clearTimers(); + + /// @brief Print a compact timing breakdown per component/band. void printTimers() const; + // ---- Band-size factors ----------------------------------------------------- + + /// @brief Allocate @ref bandSizes tables. void initBandSizes(); + + /** + * @brief Lookup the band-size factor for a component pair at a depth. + * + * @param i Which table (implementation-defined band decomposition). + * @param depth Tree depth. + * @param os Current operator-state (provides `gt` and `ft`). + * @return Precomputed size factor. + */ int getBandSizeFactor(int i, int depth, const OperatorState &os) const { int k = os.gt * this->nComp + os.ft; return (*this->bandSizes[i])(depth, k); } + /** + * @brief Compute the band-size factors for all component pairs at a depth. + * + * @param bs Destination matrix (size `(maxDepth+1) × nComp2`). + * @param depth Target depth to (re)compute. + * @param bw Operator bandwidth descriptor. + */ void calcBandSizeFactor(Eigen::MatrixXi &bs, int depth, const BandWidth &bw); + // ---- Core calculation hooks (TreeCalculator overrides) --------------------- + + /** + * @brief Compute the output contribution for a single node. + * + * @param node Output node to update. + * + * @details + * Builds the relevant operator band around @p node, applies the operator to + * the input tree restricted to that band, and accumulates the result into + * @p node's coefficients. Precision is controlled by @ref prec and + * @ref precFunc. + */ void calcNode(MWNode &node) override; + + /** + * @brief Post-processing after a full tree sweep. + * + * @details + * Prints per-band timing information, clears timers, and re-initializes + * the timing infrastructure for possible subsequent sweeps. + */ void postProcess() override { printTimers(); clearTimers(); initTimers(); } + // ---- Operator application kernels ------------------------------------------ + + /** + * @brief Apply a single operator component to the current band. + * + * @param os Operator state (component indices, buffers, thresholds, etc.). + */ void applyOperComp(OperatorState &os); + + /** + * @brief Apply the full operator (all components) for a given band index. + * + * @param i Band table index / decomposition slot. + * @param os Operator state for the current output node. + */ void applyOperator(int i, OperatorState &os); + + /** + * @brief Tensor-kernel variant of @ref applyOperComp (blocked/tensor form). + * + * @param os Operator state for the current output node. + * + * @details + * May use batched/tensorized multiply-adds for better cache locality when + * the component layout allows it. + */ void tensorApplyOperComp(OperatorState &os); + // ---- Tree maintenance ------------------------------------------------------- + + /** + * @brief Ensure parent nodes are materialized/touched before children writes. + * + * @param tree Output tree to touch/wake parents in. + * + * @details + * Some backends require parent nodes to exist to safely commit child + * contributions (e.g., for allocation, normalization, or boundary handling). + */ void touchParentNodes(MWTree &tree) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/CopyAdaptor.cpp b/src/treebuilders/CopyAdaptor.cpp index 9acefcbea..cc4a3a308 100644 --- a/src/treebuilders/CopyAdaptor.cpp +++ b/src/treebuilders/CopyAdaptor.cpp @@ -23,75 +23,12 @@ * */ -/** - * @file CopyAdaptor.cpp - * @brief Tree adaptor that **copies** (follows) an existing grid structure, - * optionally widened by a user-specified bandwidth. - * - * @details - * `mrcpp::CopyAdaptor` is a `TreeAdaptor` used with `TreeBuilder` to produce an - * output function tree whose refinement pattern mirrors one or more **reference - * trees**. It decides whether a node should be split solely by inspecting the - * presence of the corresponding **children** (and their integer-neighbor shifts) - * in the reference trees. - * - * This adaptor is typically used to: - * - replicate an input grid for **fixed-grid operations** (e.g., local - * derivative applies where no adaptivity is desired), and - * - **widen** the grid along selected directions to accommodate operators - * whose stencils reach into neighboring nodes (e.g., first/second derivative - * stencils). The widening is controlled by a per-dimension integer - * bandwidth \f$ \text{bandWidth}[d] \ge 0 \f$. - * - * ### Split criterion - * For a candidate node `node` and each of its children `c` (in tensor-product - * sense), the adaptor checks, for each dimension \f$d\in\{0,\dots,D-1\}\f$, - * every integer shift \f$ \delta \in [-\text{bandWidth}[d],\text{bandWidth}[d]] \f$: - * - * 1. Form the child index `bwIdx = node.child(c)` and add the shift on the - * current dimension: `bwIdx[d] += δ`. - * 2. If **any** reference `FunctionTree` contains that child index, the adaptor - * returns **true** (requesting the split). - * - * If no such child is found in any reference, the adaptor returns **false**. - * - * ### Notes - * - This adaptor is **purely topological**; it does not inspect coefficients. - * - If `bw == nullptr`, all bandwidths default to `0` (exact copy of the - * reference grid). - * - The reference set can be a single tree or a vector of trees; the union of - * their reachable children (with bandwidth widening) drives the output grid. - * - * ### Example - * @code - * int bw[3] = {1, 0, 0}; // widen one node on each side in x - * CopyAdaptor<3,double> pre(out_inp, maxScale, bw); - * TreeBuilder<3,double> builder; - * DefaultCalculator<3,double> calc; // no-op; we only want to build the grid - * builder.build(out, calc, pre, -1); // fixed grid construction - * @endcode - */ - #include "CopyAdaptor.h" #include namespace mrcpp { -/** - * @brief Construct a copy adaptor that follows a single reference tree. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type (`double` or `ComplexDouble`). - * @param t Reference function tree to follow. - * @param ms Maximum scale allowed for splitting (forwarded to `TreeAdaptor`). - * @param bw Optional pointer to an array of length `D` with per-dimension - * integer bandwidths. If `nullptr`, all bandwidths are set to `0`. - * - * @details - * The adaptor will request a split whenever a corresponding child (possibly - * shifted by up to `bw[d]` in each dimension) exists in the reference tree. - */ template CopyAdaptor::CopyAdaptor(FunctionTree &t, int ms, int *bw) : TreeAdaptor(ms) { @@ -99,20 +36,6 @@ CopyAdaptor::CopyAdaptor(FunctionTree &t, int ms, int *bw) tree_vec.push_back(std::make_tuple(1.0, &t)); } -/** - * @brief Construct a copy adaptor that follows the **union** of several trees. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * @param t Vector of `(coef, tree*)` pairs; only the tree pointers matter for - * the splitting logic, the coefficients are ignored. - * @param ms Maximum scale allowed for splitting. - * @param bw Optional per-dimension bandwidth array. If `nullptr`, zeros. - * - * @details - * A split is requested if **any** tree in `t` contains the candidate child - * (within the bandwidth neighborhood). - */ template CopyAdaptor::CopyAdaptor(FunctionTreeVector &t, int ms, int *bw) : TreeAdaptor(ms) @@ -120,18 +43,6 @@ CopyAdaptor::CopyAdaptor(FunctionTreeVector &t, int ms, int *bw) setBandWidth(bw); } -/** - * @brief Set the per-dimension bandwidths used to widen the copied grid. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * @param bw Pointer to an integer array of length `D`. If `nullptr`, all - * bandwidths are set to `0`. - * - * @note Negative entries are treated as `0` by the caller contract; this - * function simply copies the values. The split loop ranges over - * `[-bandWidth[d], +bandWidth[d]]`. - */ template void CopyAdaptor::setBandWidth(int *bw) { for (int d = 0; d < D; d++) { if (bw != nullptr) { @@ -142,33 +53,6 @@ template void CopyAdaptor::setBandWidth(int *bw) { } } -/** - * @brief Decide whether a node should be split to mirror (and widen) a reference grid. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * @param node Candidate node in the output tree. - * @return `true` if any reference tree contains a corresponding child - * (considering bandwidth shifts), `false` otherwise. - * - * @details - * For each tensor child `c` of `node` and each dimension `d`, the method scans - * integer offsets `bw ∈ [-bandWidth[d], +bandWidth[d]]`. The candidate child - * index is formed as: - * - * @code - * NodeIndex bwIdx = idx.child(c); - * bwIdx[d] += bw; - * @endcode - * - * If any reference tree contains `bwIdx`, a split is requested immediately. - * The search stops on the first positive hit. - * - * @complexity - * \f$ \mathcal{O}\big(T \cdot C \cdot \prod_{d=0}^{D-1} (2\,\text{bandWidth}[d]+1)\big) \f$, - * where `T` is the number of reference trees and `C` is the number of tensor - * children per node. - */ template bool CopyAdaptor::splitNode(const MWNode &node) const { const NodeIndex &idx = node.getNodeIndex(); for (int c = 0; c < node.getTDim(); c++) { @@ -196,4 +80,4 @@ template class CopyAdaptor<1, ComplexDouble>; template class CopyAdaptor<2, ComplexDouble>; template class CopyAdaptor<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/CopyAdaptor.h b/src/treebuilders/CopyAdaptor.h index a7825cca0..f26fbf354 100644 --- a/src/treebuilders/CopyAdaptor.h +++ b/src/treebuilders/CopyAdaptor.h @@ -25,22 +25,113 @@ #pragma once +/** + * @file + * @brief Adaptor that copies data from one or more source trees into a target tree. + * + * @details + * Declares @ref mrcpp::CopyAdaptor, a lightweight @ref TreeAdaptor that + * drives adaptive traversal/refinement for copy operations. The adaptor + * decides whether to split/visit nodes based on a per-dimension band–width + * window around the regions populated in the source tree(s) and on a + * max-scale constraint provided at construction. + */ + #include "TreeAdaptor.h" #include "trees/FunctionTreeVector.h" namespace mrcpp { -template class CopyAdaptor final : public TreeAdaptor { +/** + * @class CopyAdaptor + * @brief Adaptor for reproducing (copying) function-tree structure/coefficients. + * + * @tparam D Spatial dimensionality (1–3). + * @tparam T Coefficient scalar type. + * + * @details + * A @ref TreeAdaptor used by generic tree algorithms to: + * - restrict refinement to a maximum scale (`ms`), and + * - gate splitting to nodes that fall within a per-dimension *band width* + * neighborhood around the non-empty region of one or more **source trees**. + * + * This enables efficient copying/subsetting of a function tree (or a + * @ref FunctionTreeVector) into a new tree while avoiding unnecessary + * refinement outside the area of interest. + */ +template +class CopyAdaptor final : public TreeAdaptor { public: + /** + * @brief Construct an adaptor using a single source tree. + * + * @param t Source tree to mirror/copy from. + * @param ms Maximum scale (depth) allowed for refinement in the target. + * @param bw Pointer to an array of length @c D with per-dimension band + * half-widths (in node/grid units). Values control how far + * from the source support we keep refining; non-positive + * entries are treated as zero. + * + * @note The adaptor stores an internal vector view that contains @p t. + */ CopyAdaptor(FunctionTree &t, int ms, int *bw); + + /** + * @brief Construct an adaptor using multiple source trees. + * + * @param t Collection of source trees whose union of supports guides + * refinement/visitation. + * @param ms Maximum scale (depth) allowed for refinement in the target. + * @param bw Pointer to an array of length @c D with per-dimension band + * half-widths (in node/grid units). See the single-tree + * constructor for interpretation. + */ CopyAdaptor(FunctionTreeVector &t, int ms, int *bw); private: + /** + * @brief Per-dimension refinement band half-widths. + * + * @details + * For dimension @c d, only nodes whose index lies within + * @c bandWidth[d] boxes of the populated region of the source will be + * considered for splitting. A value of zero limits refinement strictly to + * the currently populated footprint. + */ int bandWidth[D]; + + /** + * @brief Source tree collection used to drive the copy operation. + * + * @details + * When constructed from a single tree, this vector contains exactly one + * entry referencing that tree; otherwise it aliases the user-provided + * vector. No ownership transfer takes place. + */ FunctionTreeVector tree_vec; + /** + * @brief Initialize the @ref bandWidth array from a user buffer. + * + * @param bw Pointer to an array of length @c D; negative values are clamped + * to zero. + */ void setBandWidth(int *bw); + + /** + * @brief Decide whether a node should be split during traversal. + * + * @param node Node under consideration in the *target* tree. + * @return `true` if the node lies within the refinement window and the + * max-scale policy permits further subdivision; `false` otherwise. + * + * @details + * The decision combines: + * - the maximum allowed scale passed at construction, and + * - the per-dimension band width around the union support of the source + * tree(s) stored in @ref tree_vec. + */ bool splitNode(const MWNode &node) const override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/CrossCorrelationCalculator.cpp b/src/treebuilders/CrossCorrelationCalculator.cpp index b68e71945..cb81b7de2 100644 --- a/src/treebuilders/CrossCorrelationCalculator.cpp +++ b/src/treebuilders/CrossCorrelationCalculator.cpp @@ -23,53 +23,6 @@ * */ -/** - * @file CrossCorrelationCalculator.cpp - * @brief Assembly of 2D cross–correlation operator blocks from a 1D kernel, - * for Legendre and Interpolating scaling bases. - * - * @details - * This module implements the node-wise assembly of a separable 2D operator that - * represents the *cross–correlation* between adjacent 1D kernel segments. - * Given a 1D kernel stored as an `MWTree<1>` (accessed via - * `CrossCorrelationCalculator::kernel`), the calculator: - * - * - selects the appropriate **precomputed** cross–correlation matrices - * \f$L\f$ and \f$R\f$ from a `CrossCorrelationCache` depending on the - * scaling basis (Legendre or Interpolating) and the local polynomial order, - * - extracts the relevant 1D kernel coefficient blocks at indices shifted by - * the child offset of the current 2D node, - * - forms the 2D block by the linear combination - * \f[ - * \mathbf{v}_o^{(i)} \;=\; L \,\mathbf{v}_a \;+\; R \,\mathbf{v}_b, - * \f] - * where \f$\mathbf{v}_a\f$ and \f$\mathbf{v}_b\f$ are the 1D kernel - * coefficient segments corresponding to the left/right neighboring child - * positions induced by the current 2D node child \f$i\f$, - * - applies a scale factor \f$ 2^{-\,(\text{scale}+1)/2} \f$ and a global - * normalization factor derived from the world-box scaling to obtain the - * final coefficient block for the 2D operator node. - * - * The assembled coefficients are then compressed (wavelet transform in - * `Compression` mode), marked as present, and their norms are computed for - * downstream thresholding and application. - * - * ### Indexing convention - * For a node with index \f$\ell = (\ell_0,\ell_1)\f$ and a specific tensor - * child \f$i\f$, the child index `l = idx.child(i)` induces two 1D offsets - * \f[ - * \ell_a = \ell_1 - \ell_0 - 1, - * \qquad - * \ell_b = \ell_1 - \ell_0, - * \f] - * which select adjacent 1D kernel nodes at the next finer scale. These are - * mapped to 1D node indices \f$(\text{scale}+1,\ell_a)\f$ and - * \f$(\text{scale}+1,\ell_b)\f$. - * - * @note At the moment, only **uniform scaling factors** are supported; the code - * reads the scaling factor for dimension 0 and assumes it is uniform. - */ - #include "CrossCorrelationCalculator.h" #include "trees/FunctionTree.h" #include "trees/MWNode.h" @@ -80,22 +33,6 @@ using Eigen::VectorXd; namespace mrcpp { -/** - * @brief Build the cross–correlation block for a 2D operator node. - * - * @param node Output 2D operator node to be filled (overwrites coefficients). - * - * @details - * - Zeros existing coefficients. - * - Detects the scaling basis of the underlying MRA (`Interpol` or `Legendre`). - * - Retrieves the corresponding `CrossCorrelationCache` and dispatches to - * `applyCcc()` which performs the actual linear algebra using cached - * matrices \f$L,R\f$ and the 1D kernel tree referenced by this calculator. - * - Applies compression (`mwTransform(Compression)`), marks coefficients as - * present, and computes norms (`calcNorms()`). - * - * @throws Emits an error if the scaling type is unsupported. - */ void CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); int type = node.getMWTree().getMRA().getScalingBasis().getScalingType(); @@ -119,37 +56,6 @@ void CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } -/** - * @brief Assemble one 2D node from cached cross–correlation matrices and a 1D kernel. - * - * @tparam T Tag of the scaling basis cache (`Interpol` or `Legendre`). - * @param node Output 2D operator node to be filled. - * @param ccc Cross–correlation cache for the selected scaling basis. - * - * @details - * Let \f$k\f$ denote the 1D kernel function tree pointed to by - * `this->kernel`. For each tensor child \f$i\f$ of the current 2D node: - * - compute the child index \f$l = \text{idx.child}(i)\f$ where - * \f$\text{idx}\f$ is the node index, - * - form the adjacent 1D indices \f$\ell_a = l_1-l_0-1\f$ and - * \f$\ell_b = l_1-l_0\f$ at scale \f$s = \text{node.getScale()}+1\f$, - * - fetch the 1D coefficient vectors \f$\mathbf{v}_a,\mathbf{v}_b\f$ from - * the kernel tree at \f$(s,\ell_a)\f$ and \f$(s,\ell_b)\f$, - * - compute the 2D segment - * \f[ - * \mathbf{v}_o^{(i)} \;=\; L \,\mathbf{v}_a \;+\; R \,\mathbf{v}_b, - * \f] - * where \f$L,R\f$ are read from the cache for the node order - * (`ccc.getLMatrix(node.getOrder())`, `ccc.getRMatrix(node.getOrder())`), - * - store \f$\mathbf{v}_o^{(i)}\f$ in the appropriate slot of the 2D node - * coefficient buffer after applying the normalization - * \f$ \sqrt{\text{scaling\_factor}}\, 2^{-s/2} \f$. - * - * The method writes directly into `node.getCoefs()` and does not allocate - * intermediate node structures beyond temporary vectors. - * - * @note Only uniform world-box scaling factors are supported at present. - */ template void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, CrossCorrelationCache &ccc) { const MatrixXd &lMat = ccc.getLMatrix(node.getOrder()); const MatrixXd &rMat = ccc.getRMatrix(node.getOrder()); @@ -183,10 +89,8 @@ template void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros double two_n = std::pow(2.0, -scale / 2.0); for (int i = 0; i < t_dim * kp1_d; i++) { auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0); - // Implemented for uniform scaling factors (dimension 0). For non-uniform - // scaling a per-dimension normalization would be required. coefs[i] = std::sqrt(scaling_factor) * two_n * vec_o(i); } } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/CrossCorrelationCalculator.h b/src/treebuilders/CrossCorrelationCalculator.h index b97e5e6da..af80cdaa7 100644 --- a/src/treebuilders/CrossCorrelationCalculator.h +++ b/src/treebuilders/CrossCorrelationCalculator.h @@ -25,22 +25,109 @@ #pragma once +/** + * @file + * @brief Cross-correlation tree calculator for 2D functions with a 1D kernel. + * + * @details + * Declares @ref mrcpp::CrossCorrelationCalculator, a concrete + * @ref TreeCalculator that evaluates a (discrete) cross–correlation between a + * two–dimensional multiresolution function and a one–dimensional kernel, + * typically along one axis of each 2D node. The implementation leverages a + * @ref CrossCorrelationCache to reuse banded operator data and reduce + * per–node setup costs across the traversal. + */ + #include "TreeCalculator.h" #include "core/CrossCorrelationCache.h" namespace mrcpp { +/** + * @class CrossCorrelationCalculator + * @brief Applies a cached cross–correlation with a 1D kernel to a 2D tree. + * + * @details + * This calculator specializes @ref TreeCalculator for 2D nodes + * (`TreeCalculator<2>`). During traversal, @ref calcNode pulls the relevant + * coefficient band(s) from the current node, applies a cross–correlation with + * the supplied 1D @ref kernel, and writes the result back to the destination + * tree/state managed by the base calculator. + * + * ### Design notes + * - The kernel is provided as a `FunctionTree<1>` and is **not owned** by the + * calculator (the caller must guarantee its lifetime). + * - Internally, @ref applyCcc parametrizes on the scalar coefficient type + * (`double`, `std::complex`, …) via the template parameter `T` of + * @ref CrossCorrelationCache, enabling reuse for both real and complex trees. + * - A @ref CrossCorrelationCache is used to memoize structure- and + * bandwidth-dependent intermediates (e.g., band shapes, transforms) so that + * repeated applications across many nodes are efficient. + */ class CrossCorrelationCalculator final : public TreeCalculator<2> { public: + /** + * @brief Construct a calculator using a given 1D kernel. + * + * @param k Reference to a 1D function tree representing the correlation + * kernel. The pointer is stored; the object must outlive the + * calculator. + * + * @note No ownership is transferred; `k` must remain valid for the entire + * calculation. + */ CrossCorrelationCalculator(FunctionTree<1> &k) : kernel(&k) {} private: + /** + * @brief Non-owning pointer to the 1D kernel used in the cross–correlation. + */ FunctionTree<1> *kernel; + /** + * @brief Compute the cross–correlated output for a single 2D node. + * + * @param node The node to process within the current output tree. The node's + * scale/index determine the coefficient bands to read/write. + * + * @details + * This override fetches the node's input coefficients (from the source tree + * configured in the base @ref TreeCalculator), prepares/cache-reuses the + * operator band via a @ref CrossCorrelationCache, and applies the + * correlation along the appropriate axis. The resulting coefficients are + * accumulated into the node's output buffer. + * + * @warning The method assumes that the base calculator has already + * orchestrated any required refinement and that input/output tree + * storage is valid for @p node. + */ void calcNode(MWNode<2> &node) override; - template void applyCcc(MWNode<2> &node, CrossCorrelationCache &ccc); + /** + * @brief Apply the cached cross–correlation to a node with a concrete scalar type. + * + * @tparam T Coefficient scalar type of the node (e.g., `double`, + * `std::complex`). + * @param node The target node to which the operator is applied. + * @param ccc A cross–correlation cache specialized for @p T that provides + * band sizes, temporary buffers, and any preassembled operator + * pieces required for efficient application. + * + * @details + * Performs the type-specific core computation: + * - obtains the relevant coefficient band(s) from @p node, + * - uses @p ccc to assemble or retrieve the required operator slice + * derived from @ref kernel, + * - applies the correlation (with the proper band width and alignment), + * - writes/accumulates the result into the node's output coefficients. + * + * The separation into this templated helper allows the public + * @ref calcNode to dispatch based on the underlying node coefficient type + * without duplicating logic. + */ + template + void applyCcc(MWNode<2> &node, CrossCorrelationCache &ccc); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/DefaultCalculator.h b/src/treebuilders/DefaultCalculator.h index 4a1a4ce54..90074ccdf 100644 --- a/src/treebuilders/DefaultCalculator.h +++ b/src/treebuilders/DefaultCalculator.h @@ -25,23 +25,77 @@ #pragma once +/** + * @file + * @brief Trivial calculator that clears coefficients and norms on each node. + * + * @details + * Declares @ref mrcpp::DefaultCalculator, a minimal + * @ref TreeCalculator implementation that: + * - iterates over a vector of nodes **sequentially** (no OpenMP), + * - for each node, clears coefficient/flag state via + * @ref MWNode::clearHasCoefs and resets stored norms via + * @ref MWNode::clearNorms. + * + * This is useful as a baseline or as a final cleanup pass when no numerical + * operator needs to be applied but tree state must be normalized. + */ + #include "TreeCalculator.h" namespace mrcpp { -template class DefaultCalculator final : public TreeCalculator { +/** + * @class DefaultCalculator + * @brief Minimal calculator that performs per-node cleanup. + * + * @tparam D Spatial dimension of the tree. + * @tparam T Scalar coefficient type (`double`, `std::complex`, …). + * + * @details + * The calculator eschews OpenMP parallelism for its node-vector traversal + * because the work per node is trivial and sequential iteration is typically + * faster (lower overhead). If parallel traversal is desired, use or derive + * from an alternative calculator that enables OpenMP in + * `calcNodeVector`. + */ +template +class DefaultCalculator final : public TreeCalculator { public: - // Reimplementation without OpenMP, the default is faster this way + /** + * @brief Process a vector of nodes sequentially. + * + * @param nodeVec Container of node pointers to process. + * + * @details + * Calls @ref calcNode on each entry in order. The method deliberately + * avoids OpenMP to minimize overhead for very small, constant-time work. + * + * @complexity Linear in `nodeVec.size()`. + */ void calcNodeVector(MWNodeVector &nodeVec) override { int nNodes = nodeVec.size(); for (int n = 0; n < nNodes; n++) { calcNode(*nodeVec[n]); } } private: + /** + * @brief Clear coefficient presence flags and stored norms for a node. + * + * @param node The node whose local state will be reset. + * + * @details + * - @ref MWNode::clearHasCoefs marks that the node no longer has valid + * coefficients. + * - @ref MWNode::clearNorms removes any cached norm values. + * + * This does **not** modify topology (no splitting/merging) and does not + * change coefficient arrays beyond clearing the "has coefs" state. + */ void calcNode(MWNode &node) override { node.clearHasCoefs(); node.clearNorms(); } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp index db4a398b7..e07ca1a1c 100644 --- a/src/treebuilders/DerivativeCalculator.cpp +++ b/src/treebuilders/DerivativeCalculator.cpp @@ -23,43 +23,6 @@ * */ -/** - * @file DerivativeCalculator.cpp - * @brief Node-wise application of multiresolution **derivative operators**. - * - * @details - * This module implements the computational kernels used to apply a - * #mrcpp::DerivativeOperator to multiresolution coefficient trees. - * The calculator works node-by-node and supports both: - * - * - **Zero-bandwidth (local) operators** — e.g. ABGV-00 type operators that - * act diagonally (per cell) in non-applied directions; handled by - * DerivativeCalculator::applyOperator_bw0(). - * - **Finite-bandwidth operators** — e.g. ABGV-55/PH/BS operators that couple - * nearest neighbors along the application direction; handled by - * DerivativeCalculator::applyOperator(). - * - * The apply pipeline for each output (g) node: - * 1. Build the **operator band** of input (f) nodes affected by the operator - * at the current depth (makeOperBand()). - * 2. For each combination of tensor components \f$(f_t,g_t)\f$, gather the - * 1D operator blocks from the pre-built #mrcpp::OperatorTree components. - * 3. Perform the separated **tensor contraction** - * (tensorApplyOperComp()) across dimensions, using identity where the - * operator is not applied. - * 4. Apply a **scaling normalization** based on the world-box scaling factor - * and the derivative order. - * 5. Compute node norms for downstream thresholding and diagnostics. - * - * The class collects **per-thread timing** and **operator-usage statistics** - * to aid profiling and load balancing. - * - * @note Scaling normalization: - * The derivative w.r.t. direction `applyDir` is normalized by the - * world-box scaling factor of that direction raised to the operator - * order. See the notes near the end of calcNode() overloads. - */ - #include "DerivativeCalculator.h" #include "operators/DerivativeOperator.h" #include "operators/OperatorState.h" @@ -79,21 +42,6 @@ using Eigen::MatrixXd; namespace mrcpp { -/** - * @brief Construct a derivative calculator. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient scalar type (e.g., double or ComplexDouble). - * @param dir Direction along which the derivative is applied (0-based, < D). - * @param o Derivative operator to apply. - * @param f Input function tree (source of coefficients). - * - * @throws Aborts if `dir` is outside \f$[0,D)\f$. - * - * @details - * The constructor stores references to the operator and the input tree, - * validates the application direction, and initializes per-thread timers. - */ template DerivativeCalculator::DerivativeCalculator(int dir, DerivativeOperator &o, FunctionTree &f) : applyDir(dir) @@ -103,17 +51,11 @@ DerivativeCalculator::DerivativeCalculator(int dir, DerivativeOperator initTimers(); } -/** - * @brief Flush usage counters and print aggregate statistics on destruction. - */ template DerivativeCalculator::~DerivativeCalculator() { this->operStat.flushNodeCounters(); println(10, this->operStat); } -/** - * @brief Initialize per-thread timers (band construction / calc / norms). - */ template void DerivativeCalculator::initTimers() { int nThreads = mrcpp_get_max_threads(); for (int i = 0; i < nThreads; i++) { @@ -123,18 +65,12 @@ template void DerivativeCalculator::initTimers() { } } -/** - * @brief Clear local timer storage. - */ template void DerivativeCalculator::clearTimers() { this->band_t.clear(); this->calc_t.clear(); this->norm_t.clear(); } -/** - * @brief Print per-thread timing statistics gathered during application. - */ template void DerivativeCalculator::printTimers() const { int oldprec = Printer::setPrecision(1); int nThreads = mrcpp_get_max_threads(); @@ -150,23 +86,7 @@ template void DerivativeCalculator::printTimers() cons Printer::setPrecision(oldprec); } -/** - * @brief Apply a **local (zero-bandwidth)** derivative operator to a single node. - * - * @param[in] inpNode Source node (input function). - * @param[out] outNode Destination node (output after derivative). - * - * @details - * Uses applyOperator_bw0() which assumes the operator couples only the same - * spatial cell in non-applied directions. Identity is implicitly used in - * directions other than `applyDir`. - * - * After the tensor contraction, multiplies by \f$(\text{scale-factor})^{-p}\f$ - * where `p = oper->getOrder()` to account for physical coordinate scaling, - * then updates norms. - */ template void DerivativeCalculator::calcNode(MWNode &inpNode, MWNode &outNode) { - // if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw"); outNode.zeroCoefs(); int nComp = (1 << D); T tmpCoefs[outNode.getNCoefs()]; @@ -176,35 +96,20 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); if (abs(scaling_factor - 1.0) > MachineZero) { for (int i = 0; i < outNode.getNCoefs(); i++) outNode.getCoefs()[i] *= scaling_factor; } - outNode.calcNorms(); // norms are used by downstream screening + outNode.calcNorms(); } -/** - * @brief Apply a **finite-bandwidth** derivative operator to a single node. - * - * @param gNode Destination/output node (will be overwritten). - * - * @details - * 1. Build the operator band (list of input nodes influencing `gNode`) along - * the apply direction (makeOperBand()). - * 2. For each band node and tensor-component pair, gather operator slices - * from the #mrcpp::OperatorTree and perform the tensor product. - * 3. Apply coordinate scaling normalization by dividing by - * \f$(\text{scaleFactor})^{\text{order}}\f$ in the applied direction. - * 4. Update norms and timing statistics. - */ template void DerivativeCalculator::calcNode(MWNode &gNode) { gNode.zeroCoefs(); @@ -213,7 +118,6 @@ template void DerivativeCalculator::calcNode(MWNode os(gNode, tmpCoefs); this->operStat.incrementGNodeCounters(gNode); - // Build band of input nodes that affect gNode this->band_t[mrcpp_get_thread_num()].resume(); std::vector> idx_band; MWNodeVector fBand = makeOperBand(gNode, idx_band); @@ -235,7 +139,6 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); for (int i = 0; i < gNode.getNCoefs(); i++) gNode.getCoefs()[i] /= scaling_factor; this->calc_t[mrcpp_get_thread_num()].stop(); @@ -245,19 +148,6 @@ template void DerivativeCalculator::calcNode(MWNodenorm_t[mrcpp_get_thread_num()].stop(); } -/** - * @brief Build the **operator band** of input nodes that influence a given output node. - * - * @param gNode Output node for which to gather contributing input nodes. - * @param idx_band Output list of input node indices (aligned with returned vector). - * @return Vector of pointers to input nodes in @p fTree that lie within the - * operator bandwidth along `applyDir`. - * - * @details - * The band extends `width = oper->getMaxBandWidth()` cells to the left/right - * of the output index along the applied direction. Out-of-bounds indices are - * skipped; periodicity is handled by FunctionTree::getRootIndex(). - */ template MWNodeVector DerivativeCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { assert(this->applyDir >= 0); assert(this->applyDir < D); @@ -265,13 +155,11 @@ template MWNodeVector DerivativeCalculator::make MWNodeVector band; const NodeIndex &idx_0 = gNode.getNodeIndex(); - // Assumes given width only in applyDir, otherwise width = 0 int width = this->oper->getMaxBandWidth(); for (int w = -width; w <= width; w++) { NodeIndex idx_w(idx_0); idx_w[this->applyDir] += w; - // returns -1 if out of bounds and 0 for periodic int rIdx_w = this->fTree->getRootIndex(idx_w); if (rIdx_w >= 0) { idx_band.push_back(idx_w); @@ -281,17 +169,6 @@ template MWNodeVector DerivativeCalculator::make return band; } -/** - * @brief Apply a single **zero-bandwidth** operator component to one input node. - * - * @param os Operator state (holds pointers/scratch and component indices). - * - * @details - * Fetches the operator block at the current depth with translation 0 in all - * directions. In non-applied directions, activates identity by passing - * `nullptr` in the operator pointers, which signals tensorApplyOperComp() to - * copy/accumulate. - */ template void DerivativeCalculator::applyOperator_bw0(OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -310,10 +187,8 @@ template void DerivativeCalculator::applyOperator_bw0( oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oIdx == 0 or oIdx == 3) { - // Identity in direction d oData[d] = nullptr; } else { - // Outside identity block: contributes zero return; } } @@ -322,19 +197,6 @@ template void DerivativeCalculator::applyOperator_bw0( tensorApplyOperComp(os); } -/** - * @brief Apply a single **finite-bandwidth** operator component to one input node. - * - * @param os Operator state (holds pointers/scratch and component indices). - * - * @details - * For each dimension: - * - Determine the relative translation from input to output node. - * - Check that translation lies within the operator bandwidth. - * - Fetch the corresponding #mrcpp::OperatorNode data. - * - In non-applied directions, only the central identity block (translation 0, - * component 0 or 3) contributes; otherwise the term is skipped. - */ template void DerivativeCalculator::applyOperator(OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -350,7 +212,6 @@ template void DerivativeCalculator::applyOperator(Oper int oTransl = fIdx[d] - gIdx[d]; - // Bandwidth check in each direction int a = (os.gt & (1 << d)) >> d; int b = (os.ft & (1 << d)) >> d; int idx = (a << 1) + b; @@ -365,10 +226,8 @@ template void DerivativeCalculator::applyOperator(Oper oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oTransl == 0 and (oIdx == 0 or oIdx == 3)) { - // Identity in direction d oData[d] = nullptr; } else { - // Zero contribution return; } } @@ -377,19 +236,6 @@ template void DerivativeCalculator::applyOperator(Oper tensorApplyOperComp(os); } -/** - * @brief Perform the separated **tensor contraction** for one operator term. - * - * @param os Operator state (provides temporary buffers and operator slices). - * - * @details - * For each dimension i: - * - Map the \f$k\times k^{D-1}\f$ slice of the input into `f`, - * - Multiply by the \f$k\times k\f$ operator block if present - * (otherwise use identity), - * - Transpose-accumulate into the next staging buffer `g`. - * On the last dimension, accumulate into the output buffer. - */ template void DerivativeCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -398,13 +244,12 @@ template void DerivativeCalculator::tensorApplyOperCom Eigen::Map> g(aux[i + 1], os.kp1_dm1, os.kp1); if (oData[i] != nullptr) { Eigen::Map op(oData[i], os.kp1, os.kp1); - if (i == D - 1) { // last dim: accumulate + if (i == D - 1) { g.noalias() += f.transpose() * op; } else { g.noalias() = f.transpose() * op; } } else { - // Identity in dimension i if (i == D - 1) { g.noalias() += f.transpose(); } else { @@ -414,22 +259,10 @@ template void DerivativeCalculator::tensorApplyOperCom } } -/** - * @brief Provide the initial work vector for a tree traversal. - * - * @param tree Output tree where results will be stored. - * @return A vector of pointers to the leaf/end nodes of @p tree. - * - * @details - * The derivative application uses a fixed grid determined by the operator. - * This helper asks the tree to provide a snapshot of its end-node table to - * seed the traversal. - */ template MWNodeVector *DerivativeCalculator::getInitialWorkVector(MWTree &tree) const { return tree.copyEndNodeTable(); } -// Explicit instantiations template class DerivativeCalculator<1, double>; template class DerivativeCalculator<2, double>; template class DerivativeCalculator<3, double>; diff --git a/src/treebuilders/DerivativeCalculator.h b/src/treebuilders/DerivativeCalculator.h index 347554a46..b89d97b4d 100644 --- a/src/treebuilders/DerivativeCalculator.h +++ b/src/treebuilders/DerivativeCalculator.h @@ -25,45 +25,187 @@ #pragma once +/** + * @file + * @brief Derivative calculator on multiresolution function trees. + * + * @details + * Declares @ref mrcpp::DerivativeCalculator, a @ref TreeCalculator that applies a + * directional differential operator to an input @ref FunctionTree and writes the + * result into the calculator's target tree. The implementation constructs a + * scale-aware operator “band” around each output node and evaluates the + * derivative using tensorized component applications while optionally collecting + * timing and bandwidth statistics. + */ + #include "TreeCalculator.h" #include "operators/OperatorStatistics.h" namespace mrcpp { -template class DerivativeCalculator final : public TreeCalculator { +/** + * @class DerivativeCalculator + * @brief Applies a directional derivative operator to a function tree. + * + * @tparam D Spatial dimension of the tree (1–3 typical). + * @tparam T Scalar coefficient type (e.g., `double`, `std::complex`). + * + * @details + * The calculator computes \f$ g = \partial_{x_{dir}}(f) \f$ where: + * - `dir` selects the Cartesian direction \f$0 \le dir < D\f$, + * - `oper` encapsulates the discretized derivative stencils/filters, + * - `fTree` is the source tree and the calculator’s target is the destination. + * + * The traversal is driven by the base @ref TreeCalculator; for each output + * node, a localized operator band is constructed (see @ref makeOperBand) and + * the operator is applied in a tensorized fashion (see + * @ref tensorApplyOperComp). Optional timing is gathered per phase (band + * building, application, norm updates) and summarized on completion. + */ +template +class DerivativeCalculator final : public TreeCalculator { public: + /** + * @brief Construct a derivative calculator. + * + * @param dir Direction index of the derivative (0-based, `< D`). + * @param o Reference to the derivative operator to apply. + * @param f Reference to the **input/source** function tree. + * + * @note The destination/output tree is owned by the base + * @ref TreeCalculator (`this->outTree()`). + */ DerivativeCalculator(int dir, DerivativeOperator &o, FunctionTree &f); + + /// @brief Virtual destructor; prints and clears timers in @ref postProcess. ~DerivativeCalculator() override; + /** + * @brief Provide the initial work vector for traversal. + * + * @param tree Output tree on which work will be scheduled. + * @return Pointer to a newly created vector of nodes to process first. + * + * @details + * The default strategy is to populate the initial vector with those nodes + * of @p tree that require operator application (implementation-dependent). + */ MWNodeVector *getInitialWorkVector(MWTree &tree) const override; + + /** + * @brief Compute the derivative for a node pair. + * + * @param fNode Source node from @ref fTree (input). + * @param gNode Destination node on the output tree (result of \f$\partial_{dir} f\f$). + * + * @details + * Builds a local band around @p gNode, gathers contributions from @p fNode + * within the operator bandwidth, then accumulates into @p gNode. + */ void calcNode(MWNode &fNode, MWNode &gNode); private: - int applyDir; - FunctionTree *fTree; - DerivativeOperator *oper; + // --- Configuration and inputs ------------------------------------------------- + + int applyDir; ///< Direction index along which to differentiate. + FunctionTree *fTree{nullptr};///< Source function tree (input). + DerivativeOperator *oper{nullptr}; ///< Differential operator to apply. - std::vector band_t; - std::vector calc_t; - std::vector norm_t; - OperatorStatistics operStat; + // --- Timing/statistics -------------------------------------------------------- + std::vector band_t; ///< Timers for band construction per depth or phase. + std::vector calc_t; ///< Timers for operator application per depth or phase. + std::vector norm_t; ///< Timers for norm/cleanup updates per depth or phase. + OperatorStatistics operStat;///< Aggregate operator and bandwidth statistics. + + // --- Work preparation --------------------------------------------------------- + + /** + * @brief Build the operator "band" (neighborhood) for an output node. + * + * @param gNode Output (destination) node. + * @param idx_band Output: list of source node indices involved by bandwidth. + * @return A vector of node pointers representing the band to be processed. + * + * @details + * The band captures the set of input nodes that may contribute to @p gNode + * under the derivative operator’s bandwidth model across scales and + * spatial adjacency. + */ MWNodeVector makeOperBand(const MWNode &gNode, std::vector> &idx_band); + /// @brief Initialize per-phase timers based on tree depth/layout. void initTimers(); + + /// @brief Stop/clear all timers and release related resources. void clearTimers(); + + /// @brief Print a concise timing summary and collected operator statistics. void printTimers() const; + // --- TreeCalculator interface ------------------------------------------------- + + /** + * @brief Per-node callback from the traversal engine. + * + * @param node Output node to compute; pulls required input contributions. + * + * @details + * For each output @p node, constructs the operator band (via + * @ref makeOperBand), then delegates the actual stencil application to + * @ref applyOperator / @ref tensorApplyOperComp. Norm and flag updates are + * performed as needed. + */ void calcNode(MWNode &node) override; + + /** + * @brief Hook invoked after a traversal pass. + * + * @details Prints timing statistics, clears timers, and re-initializes + * them to be ready for subsequent passes. + */ void postProcess() override { printTimers(); clearTimers(); initTimers(); } + // --- Operator application ----------------------------------------------------- + + /** + * @brief Apply the derivative operator to the current band/state. + * + * @param os Operator state for the current output node and component pair. + * + * @details + * Chooses an application path depending on operator bandwidth and node + * configuration, then accumulates results into the output node. + */ void applyOperator(OperatorState &os); + + /** + * @brief Specialized path for zero-bandwidth (local) derivative application. + * + * @param os Operator state for the current output node and component pair. + * + * @details + * When the derivative is strictly local in the discretization (bandwidth 0), + * this fast path avoids neighborhood assembly and directly applies the + * local stencil. + */ void applyOperator_bw0(OperatorState &os); + + /** + * @brief Tensorized component application of the operator. + * + * @param os Operator state (contains gt/ft component ids, indices, buffers). + * + * @details + * Performs dimension-wise application of the derivative operator using + * separable tensor components, respecting the grid/scale layout in + * @ref OperatorState. + */ void tensorApplyOperComp(OperatorState &os); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/MapCalculator.h b/src/treebuilders/MapCalculator.h index 33f799ee9..8af401713 100644 --- a/src/treebuilders/MapCalculator.h +++ b/src/treebuilders/MapCalculator.h @@ -25,29 +25,129 @@ #pragma once +/** + * @file + * @brief Node-wise nonlinear mapping calculator for multiresolution trees. + * + * @details + * This header defines @ref mrcpp::MapCalculator, a concrete + * @ref mrcpp::TreeCalculator that applies a *pointwise* value mapping + * (nonlinear allowed) to the coefficients of @ref mrcpp::FunctionTree nodes. + * + * The calculator: + * 1. Locates the corresponding input node (creating a *temporary copy* when + * needed) via `FunctionTree::getNode(idx)`. + * 2. Brings that input node to **value space**: inverse multi-wavelet (MW) + * reconstruction followed by a forward coefficient transform (CV) to obtain + * per-point values. + * 3. Applies the user-supplied functor `fmap : T → T` elementwise. + * 4. Transforms the output node back to the compressed representation + * (CV backward, MW compression), sets flags, and updates norms. + * + * The calculator operates node-by-node and is typically orchestrated by the + * surrounding @ref TreeCalculator driver (which handles traversal, work queues, + * and adaptive refinement). + */ + #include "TreeCalculator.h" namespace mrcpp { -template class MapCalculator final : public TreeCalculator { +/** + * @class MapCalculator + * @brief Node-local nonlinear mapping (value transform) on a function tree. + * + * @tparam D Spatial dimension (e.g., 1, 2, 3). + * @tparam T Coefficient/value scalar type (e.g., `double`, `ComplexDouble`). + * + * @details + * `MapCalculator` evaluates a user-provided mapping functor `fmap` on the node + * samples corresponding to the input tree @p inp and writes the transformed + * values into the output tree managed by the base @ref TreeCalculator. + * + * ### Transform pipeline per node + * For a given output node `node_o` at index `idx`: + * - Acquire an **input** node copy: `MWNode node_i = func->getNode(idx)`. + * - Convert coefficients to point samples: + * - `node_i.mwTransform(Reconstruction);` + * - `node_i.cvTransform(Forward);` + * - Apply `fmap` to each sample: `coefs_o[j] = fmap(coefs_i[j]);` + * - Convert back to the compressed representation: + * - `node_o.cvTransform(Backward);` + * - `node_o.mwTransform(Compression);` + * - Finalize bookkeeping: `setHasCoefs()` and `calcNorms()`. + * + * @note + * The calculator assumes the **input** and **output** trees are compatible + * (same MRA order/domain and node layouts as scheduled by the driver). Any + * missing input nodes are generated on-the-fly by `getNode(idx)` as a *copy*. + * + * @warning + * The mapping functor @p fm **must** be thread-safe and side-effect free, + * as nodes can be processed in parallel by the base calculator. + */ +template +class MapCalculator final : public TreeCalculator { public: + /** + * @brief Construct a node-mapping calculator. + * + * @param fm Elementwise mapping functor `T → T` (copied/moved in). + * @param inp Input function tree providing source coefficients. + * + * @pre @p inp is initialized on a valid MRA compatible with the target + * output tree managed by the driver. + */ MapCalculator(FMap fm, FunctionTree &inp) : func(&inp) , fmap(std::move(fm)) {} private: + /// Pointer to the input function tree (non-owning). FunctionTree *func; + + /// Elementwise mapping functor applied to node samples. FMap fmap; + + /** + * @brief Compute mapped coefficients for one output node. + * + * @param node_o Output node to fill (topology assumed prepared by driver). + * + * @details + * - Fetch input node at the same index (creates a temporary node copy). + * - Perform MW reconstruction and CV forward transforms on the input copy. + * - Apply @ref fmap to each sample and write into @p node_o. + * - Restore compressed representation of @p node_o (CV backward, MW compress). + * - Mark coefficients present and update node norms. + * + * @complexity + * \f$O(n_\text{coef})\f$ per node (excluding transform costs), where + * \f$n_\text{coef}\f$ is the number of local coefficients. + * + * @thread_safety + * Independent across nodes when the driver runs in parallel. The functor + * @ref fmap must be reentrant. + */ void calcNode(MWNode &node_o) override { const NodeIndex &idx = node_o.getNodeIndex(); - int n_coefs = node_o.getNCoefs(); + const int n_coefs = node_o.getNCoefs(); T *coefs_o = node_o.getCoefs(); - // This generates missing nodes - MWNode node_i = func->getNode(idx); // Copy node + + // Obtain a temporary input node copy at the same index. + MWNode node_i = func->getNode(idx); + + // Bring input node to value space (reconstruction → forward CV). node_i.mwTransform(Reconstruction); node_i.cvTransform(Forward); + + // Apply the non-linear map pointwise. const T *coefs_i = node_i.getCoefs(); - for (int j = 0; j < n_coefs; j++) { coefs_o[j] = fmap(coefs_i[j]); } + for (int j = 0; j < n_coefs; ++j) { + coefs_o[j] = fmap(coefs_i[j]); + } + + // Return to compressed representation and finalize bookkeeping. node_o.cvTransform(Backward); node_o.mwTransform(Compression); node_o.setHasCoefs(); @@ -55,4 +155,4 @@ template class MapCalculator final : public TreeCalculator class MultiplicationAdaptor : public TreeAdaptor { +/** + * @class MultiplicationAdaptor + * @brief Refinement rule for the product of two function trees. + * + * @tparam D Spatial dimension (e.g., 1, 2, 3). + * @tparam T Coefficient/value scalar type (e.g., `double`, `ComplexDouble`). + * + * @details + * The adaptor is typically used to drive construction of an output grid for + * \f$f_0 \cdot f_1\f$. At each node index it: + * 1. Retrieves the corresponding nodes from both input trees. + * 2. Computes the square-rooted maximum scaling and wavelet norms + * \f$(S_i, W_i)\f$. + * 3. Forms the estimate + * \f$\text{multNorm} = W_0 S_1 + W_1 S_0 + W_0 W_1\f$. + * 4. Requests a split if `multNorm > prec` and at least one input node is not + * a leaf. This effectively avoids refining deeper than either input grid, + * because when both inputs have zero wavelet contribution at a node, + * `multNorm == 0` and no further refinement is triggered. + * + * @note The input vector @ref trees must contain **exactly two** trees; a + * runtime error is emitted otherwise. + * + * @warning The adaptor reads norms from the input trees during `splitNode`. + * The member @ref trees is `mutable` to allow this from a `const` context. + */ +template +class MultiplicationAdaptor : public TreeAdaptor { public: + /** + * @brief Construct the multiplication refinement rule. + * + * @param pr Refinement threshold \f$\text{prec}\f$ for the multNorm estimate. + * @param ms Maximum scale/depth hint passed to @ref TreeAdaptor base. + * @param t The pair of input trees as a @ref FunctionTreeVector. + * + * @pre `t.size() == 2` + */ MultiplicationAdaptor(double pr, int ms, FunctionTreeVector &t) : TreeAdaptor(ms) , prec(pr) , trees(t) {} + ~MultiplicationAdaptor() override = default; protected: + /// Refinement threshold used against `multNorm`. double prec; + + /** + * @brief Input trees used to estimate the product's local complexity. + * + * @details + * Marked `mutable` so that `splitNode` can retrieve node views from a + * `const` context without implying logical modification. + */ mutable FunctionTreeVector trees; + /** + * @brief Decide whether an output node should be split. + * + * @param node The (prospective) output node whose index determines + * which input nodes are inspected. + * @return `true` if `multNorm > prec` **and** at least one of the input + * nodes is not a leaf; otherwise `false`. + * + * @details + * - Retrieves the corresponding nodes from both input trees at the same + * @ref NodeIndex. + * - Computes + * \f$S_i=\sqrt{\text{max scaling square norm}},\; + * W_i=\sqrt{\text{max wavelet square norm}}\f$. + * - Forms \f$\text{multNorm}=W_0 S_1 + W_1 S_0 + W_0 W_1\f$. + * - Triggers refinement when the estimate exceeds @ref prec, except when + * both inputs are already leaf nodes at this index. + * + * @throws Emits `MSG_ERROR` if `trees.size() != 2`. + */ bool splitNode(const MWNode &node) const override { if (this->trees.size() != 2) MSG_ERROR("Invalid tree vec size: " << this->trees.size()); + auto &pNode0 = get_func(trees, 0).getNode(node.getNodeIndex()); auto &pNode1 = get_func(trees, 1).getNode(node.getNodeIndex()); + + // Square roots convert stored square norms to norms. double maxW0 = std::sqrt(pNode0.getMaxWSquareNorm()); double maxW1 = std::sqrt(pNode1.getMaxWSquareNorm()); double maxS0 = std::sqrt(pNode0.getMaxSquareNorm()); double maxS1 = std::sqrt(pNode1.getMaxSquareNorm()); - // The wavelet contribution (in the product of node0 and node1) can be approximated as + // Estimated wavelet contribution in the product node. double multNorm = maxW0 * maxS1 + maxW1 * maxS0 + maxW0 * maxW1; - // Note: this never refine deeper than one scale more than input tree grids, because when wavelets are zero - // for both input trees, multPrec=0 In addition, we force not to refine deeper than input tree grids - if (multNorm > this->prec and not(pNode0.isLeafNode() and pNode1.isLeafNode())) { + // Never refine beyond both input grids' leaf level. + if (multNorm > this->prec && !(pNode0.isLeafNode() && pNode1.isLeafNode())) { return true; } else { return false; @@ -65,4 +159,4 @@ template class MultiplicationAdaptor : public TreeAdaptor class MultiplicationCalculator final : public TreeCalculator { +/** + * @class MultiplicationCalculator + * @brief Computes the pointwise product of several input trees into an output tree. + * + * @tparam D Spatial dimension (e.g., 1, 2, 3). + * @tparam T Coefficient scalar type (`double` or `ComplexDouble`). + * + * @details + * Let \f$\{f_i\}\f$ be the input trees (with optional scalar prefactors + * provided externally via `get_coef(prod_vec, i)`) and let + * \f$g = \prod_i f_i\f$ denote the pointwise product. This calculator fills + * the coefficients of the output node corresponding to a given + * @ref NodeIndex by: + * + * \f[ + * \mathbf{c}^{(g)} \leftarrow + * \prod_i \left( c_i \; \mathbf{c}^{(f_i)} \right), + * \f] + * + * where \f$c_i\f$ is the scalar multiplier returned by `get_coef` and + * \f$\mathbf{c}^{(f_i)}\f$ are the (reconstructed, forward-transformed) + * coefficients of the input node. When `T` is complex, \f$\mathbf{c}^{(f_i)}\f$ + * may be conjugated as described below. + * + * ### Conjugation rules (complex case) + * - If `func_i.conjugate()` is true, that input’s coefficients are conjugated. + * - Additionally, if the calculator is constructed with `conjugate=true`, + * the **first** input (index 0) is conjugated. The two conditions are XOR’d + * (`xor`), so a per-tree conjugation flag can cancel the global one. + * + * @note Missing input nodes are generated on demand by `FunctionTree::getNode`. + */ +template +class MultiplicationCalculator final : public TreeCalculator { public: + /** + * @brief Construct a product calculator. + * + * @param inp Vector of input trees to be multiplied. + * @param conjugate If `true`, apply complex conjugation to the **first** + * input factor (useful for ⟨bra|ket⟩-like operations). + * Ignored for real types. + */ MultiplicationCalculator(const FunctionTreeVector &inp, bool conjugate = false) : prod_vec(inp) , conj(conjugate) {} private: + /// Collection of input trees and (optionally) their scalar prefactors. FunctionTreeVector prod_vec; + + /// Global conjugation switch for the first factor (complex types only). bool conj; + /** + * @brief Compute coefficients for one output node by multiplying inputs. + * + * @param node_o Output node to be filled/updated. + * + * @details + * Steps performed: + * 1. Initialize output coefficients to unity. + * 2. For each input tree `i`: + * - Fetch scalar factor `c_i = get_coef(prod_vec, i)`. + * - Materialize copy of matching input node (`getNode(idx)`), + * reconstruct (`mwTransform(Reconstruction)`), + * and forward transform to coefficient space (`cvTransform(Forward)`). + * - Multiply output coefficients element-wise by + * `c_i * coefs_i[j]` (or `c_i * conj(coefs_i[j])` per rules above). + * 3. Transform output node back (`cvTransform(Backward)`), + * compress (`mwTransform(Compression)`), mark as having coefficients, + * and update norms. + * + * @note Uses helper functions `get_func(prod_vec, i)` and + * `get_coef(prod_vec, i)` provided by the @ref FunctionTreeVector API. + */ void calcNode(MWNode &node_o) { const NodeIndex &idx = node_o.getNodeIndex(); T *coefs_o = node_o.getCoefs(); - for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; } + + // 1) Initialize output coefficients to multiplicative identity. + for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = static_cast(1.0); } + + // 2) Multiply contributions from each input factor. for (int i = 0; i < this->prod_vec.size(); i++) { T c_i = get_coef(this->prod_vec, i); FunctionTree &func_i = get_func(this->prod_vec, i); - // This generates missing nodes - MWNode node_i = func_i.getNode(idx); // Copy node + + // Materialize and prepare input node coefficients. + MWNode node_i = func_i.getNode(idx); // copy/materialize node_i.mwTransform(Reconstruction); node_i.cvTransform(Forward); + const T *coefs_i = node_i.getCoefs(); int n_coefs = node_i.getNCoefs(); + if constexpr (std::is_same::value) { - if (func_i.conjugate() xor (conj and i == 0)) { // NB: take complex conjugate of "bra" + // Conjugate rule: per-tree flag XOR global-first-factor flag. + bool do_conj = func_i.conjugate() xor (conj && i == 0); + if (do_conj) { for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * std::conj(coefs_i[j]); } } else { for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; } @@ -63,6 +168,8 @@ template class MultiplicationCalculator final : public TreeC for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; } } } + + // 3) Finalize output node state. node_o.cvTransform(Backward); node_o.mwTransform(Compression); node_o.setHasCoefs(); @@ -70,4 +177,4 @@ template class MultiplicationCalculator final : public TreeC } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/OperatorAdaptor.h b/src/treebuilders/OperatorAdaptor.h index d4bd0d1d8..524fcf7a3 100644 --- a/src/treebuilders/OperatorAdaptor.h +++ b/src/treebuilders/OperatorAdaptor.h @@ -25,27 +25,85 @@ #pragma once +/** + * @file + * @brief Adaptor that targets operator-sensitive regions for refinement. + * + * @details + * This adaptor specializes @c WaveletAdaptor in 2D to refine only those nodes + * that are (i) aligned with a coordinate axis (translation index 0 along @f$x@f$ + * or @f$y@f$) and (ii) exhibit non-zero **wavelet** content. This pattern is + * useful when applying kernels/operators whose singular support or strongest + * variation is concentrated along axes (e.g., banded/operator stencils), so + * we avoid refining benign regions. + * + * The refinement trigger is implemented in @ref OperatorAdaptor::splitNode. + */ + #include "WaveletAdaptor.h" namespace mrcpp { +/** + * @class OperatorAdaptor + * @brief Wavelet-driven 2D refinement biased to axis-aligned nodes. + * + * @details + * A node is marked for splitting iff: + * - **Axis proximity:** its translation index satisfies @c idx[0]==0 or + * @c idx[1]==0 (i.e., the node touches the @f$x@f- or @f$y@f-axis at its scale). + * - **Wavelet energy present:** at least one non-scaling component has a + * positive norm. In 2D, component indices are conventionally: + * - 0: scaling (S), + * - 1..3: wavelet components (W). + * + * Combining these filters refines only the regions that are both “near” the + * axes and relevant to operator action (non-zero wavelet content), keeping the + * mesh compact elsewhere. + * + * @see WaveletAdaptor + */ class OperatorAdaptor final : public WaveletAdaptor<2> { public: + /** + * @brief Construct an adaptor with precision and depth controls. + * + * @param pr Target precision/tolerance forwarded to the base adaptor. + * @param ms Maximum scale (upper bound on refinement depth) forwarded to the base adaptor. + * @param ap If @c true, enable the base adaptor's optional parent-aware + * behavior (propagation specifics depend on @ref WaveletAdaptor). + */ OperatorAdaptor(double pr, int ms, bool ap = false) : WaveletAdaptor<2>(pr, ms, ap) {} protected: + /** + * @brief Decide whether a node should be split. + * + * @param node The candidate node. + * @return @c true if the node lies on an axis (either translation index + * is zero) **and** has non-zero wavelet component norm; otherwise + * @c false. + * + * @details + * - **Axis check:** @c idx[0]==0 || idx[1]==0. + * - **Wavelet check:** any component @c i in {1,2,3} has + * @c node.getComponentNorm(i) > 0.0. + * + * Component 0 (scaling) is intentionally ignored in the wavelet check to + * avoid refining nodes that carry only low-frequency/scaling content. + */ bool splitNode(const MWNode<2> &node) const override { const auto &idx = node.getNodeIndex(); - bool chkTransl = (idx[0] == 0 or idx[1] == 0); + bool chkTransl = (idx[0] == 0 || idx[1] == 0); bool chkCompNorm = false; for (int i = 1; i < 4; i++) { if (node.getComponentNorm(i) > 0.0) chkCompNorm = true; } - return chkTransl and chkCompNorm; + return chkTransl && chkCompNorm; } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/PHCalculator.cpp b/src/treebuilders/PHCalculator.cpp index 2b54ef730..ea3bf5ced 100644 --- a/src/treebuilders/PHCalculator.cpp +++ b/src/treebuilders/PHCalculator.cpp @@ -56,20 +56,6 @@ using Eigen::MatrixXd; namespace mrcpp { -/** - * @brief Construct a PHCalculator and load its stencil blocks. - * - * @param[in] basis Scaling basis (type and order). - * @param[in] n Derivative order (1 or 2 supported). - * - * @throws NOT_IMPLEMENTED_ABORT if n <= 0 or n >= 3. - * - * @details - * Based on \p n the constructor selects the corresponding set of PH derivative - * blocks and loads them from disk. Supported files are: - * - Legendre: L_ph_deriv_1.txt, L_ph_deriv_2.txt - * - Interpol: I_ph_deriv_1.txt, I_ph_deriv_2.txt - */ PHCalculator::PHCalculator(const ScalingBasis &basis, int n) : diff_order(n) { if (this->diff_order <= 0) NOT_IMPLEMENTED_ABORT; @@ -78,29 +64,6 @@ PHCalculator::PHCalculator(const ScalingBasis &basis, int n) if (this->diff_order >= 3) NOT_IMPLEMENTED_ABORT; } -/** - * @brief Read PH derivative blocks from text files for the given basis. - * - * @param[in] basis Scaling basis (provides type and order). - * @param[in] n Character '1' or '2' selecting derivative order. - * - * @details - * The file format is: - * - First line per order k+1 (k+1 = 2..29): an integer "order" sentinel. - * - Followed by a 3*(k+1) by (k+1) table (row-major in the file) containing - * the vertically stacked blocks: - * [ S_{+1} ; S_{0} ; S_{-1} ] - * - * Only the block triple corresponding to the active basis order (kp1 = k+1) - * is kept: - * - S_p1 = rows [0*kp1 .. 1*kp1-1] - * - S_0 = rows [1*kp1 .. 2*kp1-1] - * - S_m1 = rows [2*kp1 .. 3*kp1-1] - * - * @note - * - Supported scaling orders: 0..28 for Interpol/Legendre (kp1 in 2..29). - * - Files are discovered via details::find_filters(). - */ void PHCalculator::readSMatrix(const ScalingBasis &basis, char n) { std::string file; std::string path = details::find_filters(); @@ -136,30 +99,6 @@ void PHCalculator::readSMatrix(const ScalingBasis &basis, char n) { } } -/** - * @brief Fill 2D node coefficients by applying the PH derivative stencil. - * - * @param[in,out] node 2D MW node to populate (coefficients in scaling basis). - * - * @details - * Let idx = (i0, i1) be the 2D node index and l = i1 - i0. Depending on l, - * the appropriate neighbour coupling block is selected: - * - l = +1 : right neighbour uses S_p1 - * - l = 0 : interior uses S_0 (diagonal) with off-diagonals S_{-1}, S_{+1} - * - l = -1 : left neighbour uses S_m1 - * - * The coefficient tensor is laid out as 4 contiguous tiles for the four - * tensor children, each of size kp1_d = (k+1)^2. For each tile we accumulate - * the matrix-product contribution and rescale by - * two_np1 = 2^{diff_order * (scale+1)}. - * - * Finally, coefficients are transformed to MW (Compression), marked present, - * and node norms are updated. - * - * @note - * - For periodic trees, indices outside the world box are ignored (no write). - * - The switch default does nothing by design (periodic handling is upstream). - */ void PHCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); @@ -208,4 +147,4 @@ void PHCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); // update node/component norms } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/PHCalculator.h b/src/treebuilders/PHCalculator.h index 2c3b18510..affb1848f 100644 --- a/src/treebuilders/PHCalculator.h +++ b/src/treebuilders/PHCalculator.h @@ -25,24 +25,132 @@ #pragma once +/** + * @file + * @brief 2D calculator that applies a three-point stencil in a scaling basis. + * + * @details + * This component specializes the generic @ref TreeCalculator for 2D trees to + * perform operations that can be expressed through **shifted overlap/stencil + * matrices** of a scaling basis. The operator is represented by three + * precomputed banded blocks + * @f$S_{-1}, S_{0}, S_{+1}@f$, which correspond to interactions with the + * left, center, and right neighbor positions along a given axis in the + * multiresolution grid. These blocks are assembled from a provided + * @ref ScalingBasis and then applied node-wise in @ref calcNode. + * + * Typical use-cases include discrete differential operators (e.g., first/second + * derivatives) or narrow-band filters that can be written as a three-point + * stencil in the scaling-function coefficient space. The effective stencil + * "width" (derivative order) is indicated by @ref diff_order. + */ + #include #include "TreeCalculator.h" namespace mrcpp { +/** + * @class PHCalculator + * @brief Applies a scaling-basis three-point stencil on a 2D multiresolution tree. + * + * @details + * The calculator preloads three overlap/stencil matrices derived from a + * @ref ScalingBasis: + * - @ref S_m1 : shifted block for offset @f$-1@f$, + * - @ref S_0 : central block for offset @f$0@f$, + * - @ref S_p1 : shifted block for offset @f$+1@f$. + * + * During @ref calcNode, these blocks are combined to transform the node's + * coefficients according to the chosen stencil (e.g., a centered finite + * difference of order @ref diff_order). The exact algebra depends on the + * basis; see the implementation of @ref readSMatrix. + * + * The class is marked @c final because it provides a complete node-level + * implementation tailored to a 3-band stencil and is not intended for further + * subclassing. + */ class PHCalculator final : public TreeCalculator<2> { public: + /** + * @brief Construct the calculator and preload stencil blocks. + * + * @param basis Scaling basis from which the banded overlap/stencil + * matrices are derived. The basis determines support, + * moments, and thus the actual entries of the @f$S@f$ + * blocks. + * @param n Nominal stencil/derivative order (e.g., 1 for first + * derivative, 2 for second). The value is stored in + * @ref diff_order and may influence how the three + * blocks are combined inside @ref calcNode. + * + * @post + * - @ref diff_order is set from @p n. + * - @ref S_m1, @ref S_0, @ref S_p1 are populated via @ref readSMatrix(). + */ PHCalculator(const ScalingBasis &basis, int n); private: + /** + * @brief Logical order of the stencil/differential operator to apply. + * + * @details + * This does not change the *size* of the precomputed blocks, but can alter + * how @ref S_m1, @ref S_0, and @ref S_p1 are linearly combined inside + * @ref calcNode (e.g., centered first vs. second derivative weights). + */ const int diff_order; + + /// @name Precomputed banded overlap/stencil blocks + /// @{ + /// Block corresponding to a shift of @f$-1@f$ grid unit(s). Eigen::MatrixXd S_m1; + /// Central (unshifted) block. Eigen::MatrixXd S_0; + /// Block corresponding to a shift of @f$+1@f$ grid unit(s). Eigen::MatrixXd S_p1; + /// @} + /** + * @brief Node-level application of the three-point stencil. + * + * @param node Target 2D node whose coefficient vector is transformed + * in-place according to the assembled operator. The method + * is invoked by the traversal implemented in the base + * @ref TreeCalculator. + * + * @details + * Conceptually, this computes (in scaling space) + * @f[ + * \mathbf{c}_{\text{out}} \;\leftarrow\; + * w_{-1}\,S_{-1}\,\mathbf{c}_{-1} \;+\; + * w_{0}\, S_{0}\, \mathbf{c}_{0} \;+\; + * w_{+1}\,S_{+1}\,\mathbf{c}_{+1}, + * @f] + * where weights @f$w_{\cdot}@f$ depend on @ref diff_order and the chosen + * discrete scheme, and @f$\mathbf{c}_{k}@f$ denotes the coefficient vector + * at relative offset @f$k \in \{-1,0,+1\}@f$. The exact assembly is + * implementation-specific and consistent with the supplied @ref ScalingBasis. + */ void calcNode(MWNode<2> &node); + + /** + * @brief Populate one of the stencil matrices from the scaling basis. + * + * @param basis Scaling basis providing overlap and shift relations. + * @param n Selector for the matrix to load/build: + * chooses among @f$S_{-1}@f$, @f$S_{0}@f$, or @f$S_{+1}@f$. + * (The accepted values and encoding are implementation-defined, + * but conceptually map to offsets -1, 0, and +1.) + * + * @details + * Extracts or assembles the band-limited matrix corresponding to a given + * neighbor offset with respect to the scaling-function grid induced by + * @p basis. The resulting block is stored into one of @ref S_m1, @ref S_0, + * or @ref S_p1 depending on @p n. + */ void readSMatrix(const ScalingBasis &basis, char n); }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/PowerCalculator.h b/src/treebuilders/PowerCalculator.h index 79147fc4b..e35b1d4a7 100644 --- a/src/treebuilders/PowerCalculator.h +++ b/src/treebuilders/PowerCalculator.h @@ -25,30 +25,128 @@ #pragma once +/** + * @file + * @brief Node-wise power transform calculator for multiresolution trees. + * + * @details + * This header defines @ref mrcpp::PowerCalculator, a concrete + * @ref TreeCalculator that raises the coefficients of an input function tree + * to a scalar power, writing results into an output tree during the standard + * calculator traversal. + * + * The transform is applied **locally per node** in scaling space: + * 1. Fetch/copy the corresponding input node (creating it if missing). + * 2. Apply multiresolution reconstruction (wavelet → scaling) and then a + * forward coefficient transform to obtain coefficient values suitable + * for pointwise operations. + * 3. Compute @c coefs_out[j] = pow(coefs_in[j], power) for each coefficient. + * 4. Apply the inverse coefficient transform and multiresolution compression. + * 5. Mark coefficients present and update node norms. + * + * The scalar @ref power is a real number. For complex-valued trees, + * @c std::pow(std::complex<>, double) is used. + */ + #include "TreeCalculator.h" namespace mrcpp { -template class PowerCalculator final : public TreeCalculator { +/** + * @class PowerCalculator + * @brief Raises node coefficients of an input tree to a fixed power. + * + * @tparam D Spatial dimension of the tree (e.g., 1, 2, or 3). + * @tparam T Coefficient scalar type (e.g., @c double or @c ComplexDouble). + * + * @details + * This calculator implements a **pointwise power** operation in coefficient + * space. For each visited node in the output tree, it pulls the corresponding + * node from the input tree (creating it if necessary), reconstructs to scaling + * space, and applies: + * @f[ + * \forall j:\quad c^{\text{out}}_j \leftarrow \big(c^{\text{in}}_j\big)^{\,p} + * @f] + * where @f$p=@ref power@f$. + * + * ### Precision & grid handling + * The class delegates traversal, splitting, and precision handling to the + * base @ref TreeCalculator. It performs only the node-local algebra and the + * required forward/backward transforms. + * + * ### Complex inputs + * For complex-valued @p T, the standard overload @c std::pow(T,double) is used. + * Note that if @p T is real and coefficients are negative while @ref power is + * non-integer, the result may be @c NaN; this calculator does not alter that + * behavior. + */ +template +class PowerCalculator final : public TreeCalculator { public: + /** + * @brief Construct a power calculator for a given input tree. + * + * @param inp Reference to the input function tree whose node coefficients + * are the base values in the power operation. + * @param pow Exponent @f$p@f$ to apply pointwise to all node coefficients. + * + * @note The calculator does not own @p inp; the caller must ensure that + * the referenced tree remains valid for the calculator's lifetime. + */ PowerCalculator(FunctionTree &inp, double pow) : power(pow) , func(&inp) {} private: + /** + * @brief Scalar exponent used in @c std::pow for all coefficients. + */ double power; + + /** + * @brief Non-owning pointer to the input function tree. + */ FunctionTree *func; + /** + * @brief Node-level power application. + * + * @param node_o Output node whose coefficients are overwritten with + * @f$\big(c^{\text{in}}\big)^{\,p}@f$ at the corresponding + * location in the input tree. + * + * @details + * Steps performed: + * - Retrieve the corresponding input node @c node_i from @ref func + * using the same @ref NodeIndex (this may create a missing node). + * - @c node_i.mwTransform(Reconstruction) to convert wavelet → scaling. + * - @c node_i.cvTransform(Forward) to access coefficient array in the + * appropriate local basis. + * - For each coefficient index @c j, compute: + * @code + * coefs_o[j] = std::pow(coefs_i[j], power); + * @endcode + * - Apply @c node_o.cvTransform(Backward) and + * @c node_o.mwTransform(Compression) to restore representation. + * - Set the "has coefficients" flag and refresh node norms. + * + * @warning If @p T is a real type and @ref power is non-integer, negative + * input coefficients can lead to @c NaN. This is the standard + * behavior of @c std::pow and is not intercepted here. + */ void calcNode(MWNode &node_o) override { const NodeIndex &idx = node_o.getNodeIndex(); int n_coefs = node_o.getNCoefs(); T *coefs_o = node_o.getCoefs(); - // This generates missing nodes - MWNode node_i = func->getNode(idx); // Copy node + + // Generate/copy input node at the same index. + MWNode node_i = func->getNode(idx); node_i.mwTransform(Reconstruction); node_i.cvTransform(Forward); + const T *coefs_i = node_i.getCoefs(); for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::pow(coefs_i[j], this->power); } + node_o.cvTransform(Backward); node_o.mwTransform(Compression); node_o.setHasCoefs(); @@ -56,4 +154,4 @@ template class PowerCalculator final : public TreeCalculator } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/ProjectionCalculator.cpp b/src/treebuilders/ProjectionCalculator.cpp index f72fde8cd..d6ca9fc77 100644 --- a/src/treebuilders/ProjectionCalculator.cpp +++ b/src/treebuilders/ProjectionCalculator.cpp @@ -23,42 +23,6 @@ * */ -/** - * @file ProjectionCalculator.cpp - * @brief Compute scaling/wavelet coefficients by projecting an analytic (or - * otherwise representable) function onto the MW basis on a given node. - * - * @details - * The projection proceeds by evaluating the input function at a set of - * expanded child quadrature/collocation points associated with the node, - * then transforming these samples into scaling coefficients and finally - * compressing into the wavelet representation: - * - * 1. `node.getExpandedChildPts(exp_pts)` returns a D×N matrix of evaluation - * points in *local* node coordinates, where N equals `node.getNCoefs()`. - * 2. Each point is rescaled by the per-dimension world-box scaling factors - * (`scaling_factor[d]`) so that the user function is evaluated in - * physical coordinates. - * 3. The raw samples are written into the node coefficient buffer and - * converted to scaling coefficients via `cvTransform(Backward)`. - * 4. `mwTransform(Compression)` moves the representation to compressed MW - * form (wavelets across scales, scaling on roots). - * 5. Bookkeeping: mark coefficients present and update (square-)norms. - * - * The calculator is stateless across nodes; it assumes that the caller - * (TreeBuilder) handles traversal and refinement decisions (via an adaptor). - * - * @note - * - The assertion `exp_pts.cols() == node.getNCoefs()` guards consistency - * between the quadrature layout and the node’s coefficient count. - * - `scaling_factor` is typically extracted from the world box and allows - * non-unit, per-axis domain scaling. - * - This implementation works for both real and complex coefficient types. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (`double` or `ComplexDouble`). - */ - #include "ProjectionCalculator.h" #include "trees/MWNode.h" #include @@ -67,24 +31,6 @@ using Eigen::MatrixXd; namespace mrcpp { -/** - * @brief Project a single node by sampling the input function on the node's - * expanded child grid and transforming samples into MW coefficients. - * - * @param[in,out] node The MW node whose coefficients are to be computed. - * - * @pre `node.getExpandedChildPts(exp_pts)` provides exactly `getNCoefs()` - * columns (asserted). - * @post - * - Node coefficients represent the function in compressed MW form. - * - `node.setHasCoefs()` is set and node norms are updated. - * - * @implementation - * - Samples are taken at expanded child points, rescaled by - * `scaling_factor[d]`, and passed to `func->evalf`. - * - `cvTransform(Backward)` maps collocation values → scaling coefficients. - * - `mwTransform(Compression)` converts to MW compressed representation. - */ template void ProjectionCalculator::calcNode(MWNode &node) { MatrixXd exp_pts; @@ -99,82 +45,12 @@ void ProjectionCalculator::calcNode(MWNode &node) { coefs[i] = this->func->evalf(r); } - node.cvTransform(Backward); // collocation values -> scaling coefficients - node.mwTransform(Compression); // scaling/wavelet compression on the node - node.setHasCoefs(); // mark that the node now owns valid coefs - node.calcNorms(); // update norms for refinement/threshholding -} - -/* -------------------------------------------------------------------------- - * Legacy (interpolating) variant - * - * The block below shows an older, somewhat faster interpolating approach - * that assumes an interpolating scaling basis. It is kept as reference; - * it performs quadrature using cached roots/weights and writes block - * coefficients directly before compressing. Enable with care as it - * assumes specific basis properties (Interpol). - * - * template - * void ProjectionCalculator::calcNode(MWNode &node) { ... } - * -------------------------------------------------------------------------- */ - -/* Old interpolating version, somewhat faster -template -void ProjectionCalculator::calcNode(MWNode &node) { - const ScalingBasis &sf = node.getMWTree().getMRA().getScalingBasis(); - if (sf.getScalingType() != Interpol) { - NOT_IMPLEMENTED_ABORT; - } - int quadratureOrder = sf.getQuadratureOrder(); - getQuadratureCache(qc); - const VectorXd &pts = qc.getRoots(quadratureOrder); - const VectorXd &wgts = qc.getWeights(quadratureOrder); - - double tmp_coefs[node.getNCoefs()]; - - int scale = node.getScale(); - int kp1_d = node.getKp1_d(); - - double scaleFactor = 1.0 / std::pow(2.0, scale + 1.0); - double sqrtScaleFactor = std::sqrt(scaleFactor); - double point[D]; - - static int tDim = 1 << D; - for (int cIdx = 0; cIdx < tDim; cIdx++) { - NodeIndex nIdx(node.getNodeIndex(), cIdx); - const int *l = nIdx.getTranslation(); - - int indexCounter[D]; - for (int i = 0; i < D; i++) { - indexCounter[i] = 0; - } - - for (int i = 0; i < kp1_d; i++) { - double coef = 1.0; - for (int j = 0; j < D; j++) { - point[j] = scaleFactor * (pts(indexCounter[j]) + l[j]); - coef *= std::sqrt(wgts(indexCounter[j])) * sqrtScaleFactor; - } - - tmp_coefs[i] = coef * this->func->evalf(point); - - indexCounter[0]++; - for (int j = 0; j < D - 1; j++) { - if (indexCounter[j] == quadratureOrder) { - indexCounter[j] = 0; - indexCounter[j + 1]++; - } - } - } - node.setCoefBlock(cIdx, kp1_d, tmp_coefs); - } + node.cvTransform(Backward); node.mwTransform(Compression); node.setHasCoefs(); node.calcNorms(); } -*/ -/// Explicit template instantiations template class ProjectionCalculator<1, double>; template class ProjectionCalculator<2, double>; template class ProjectionCalculator<3, double>; diff --git a/src/treebuilders/ProjectionCalculator.h b/src/treebuilders/ProjectionCalculator.h index 067c41422..a5d5708b1 100644 --- a/src/treebuilders/ProjectionCalculator.h +++ b/src/treebuilders/ProjectionCalculator.h @@ -25,20 +25,103 @@ #pragma once +/** + * @file + * @brief Node-wise projector from an analytic/representable function to a + * multiresolution function tree. + * + * @details + * This calculator implements the core **projection kernel** that takes a + * user-provided @ref RepresentableFunction and, node by node, produces + * multiresolution coefficients for an output tree managed by the surrounding + * @ref TreeCalculator pipeline. + * + * The calculator is agnostic to the scheduling/refinement policy: it only + * defines how a *single* node is computed (see @ref calcNode). The driving + * logic (initial work list, adaptors, termination) is handled by + * @ref TreeCalculator and its collaborators. + * + * ### Coordinate scaling + * A per-dimension @p scaling_factor is supplied at construction. It is applied + * consistently during node evaluation to support anisotropic grid scalings, + * unit conversions, or jacobian-like preconditioning. Use a vector of ones to + * disable scaling. + */ + #include "TreeCalculator.h" namespace mrcpp { -template class ProjectionCalculator final : public TreeCalculator { +// Forward declaration; the concrete definition is provided by MRCPP headers. +template class RepresentableFunction; + +/** + * @class ProjectionCalculator + * @brief Projects a @ref RepresentableFunction onto the active output tree. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type (e.g., `double`, `ComplexDouble`). + * + * @details + * For each node requested by the @ref TreeCalculator scheduler, this class: + * 1. Builds or fetches the corresponding input stencil/quadrature on the + * node’s support. + * 2. Evaluates the supplied @ref RepresentableFunction at those points, + * applying the provided per-axis @ref scaling_factor. + * 3. Computes the node’s scaling/wavelet coefficients, writes them to the + * output tree, and updates norms/metadata. + * + * The calculator itself does **not** decide where to refine; use an adaptor + * (e.g., wavelet- or operator-based) with @ref TreeCalculator to drive + * adaptivity from residuals or norm estimates. + */ +template +class ProjectionCalculator final : public TreeCalculator { public: - ProjectionCalculator(const RepresentableFunction &inp_func, const std::array &sf) + /** + * @brief Construct a projector from an analytic/representable function. + * + * @param[in] inp_func Function to be projected. The pointer is + * stored and must remain valid for the lifetime + * of the calculator. + * @param[in] sf Per-dimension scaling factors applied to local + * coordinates before evaluating @p inp_func. + * Set to `{1, …, 1}` for no scaling. + * + * @note The output target (tree) and traversal policy are provided by the + * owning @ref TreeCalculator context; this constructor only binds the + * callable and the evaluation scaling. + */ + ProjectionCalculator(const RepresentableFunction &inp_func, + const std::array &sf) : func(&inp_func) , scaling_factor(sf) {} private: + /// Source function to be sampled on each node’s stencil. const RepresentableFunction *func; + + /// Per-axis multiplicative coordinate scaling used during evaluation. const std::array scaling_factor; + + /** + * @brief Compute a single node of the output tree. + * + * @param[in,out] node Target node whose coefficients and norms are produced. + * + * @details + * The typical implementation flow is: + * - derive the node’s physical coordinates from its @ref NodeIndex and + * apply @ref scaling_factor, + * - evaluate @ref func at the required sample points, + * - assemble scaling/wavelet coefficients and write them into @p node, + * - set “has coefficients” flags and update per-component norms. + * + * Thread-safety: the method only mutates @p node and uses read-only access + * to @ref func and @ref scaling_factor, so it is safe under the usual + * per-node parallel scheduling employed by @ref TreeCalculator. + */ void calcNode(MWNode &node) override; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/SplitAdaptor.h b/src/treebuilders/SplitAdaptor.h index 7e81bbe8b..d1c73e679 100644 --- a/src/treebuilders/SplitAdaptor.h +++ b/src/treebuilders/SplitAdaptor.h @@ -25,19 +25,73 @@ #pragma once +/** + * @file + * @brief Minimal adaptor that unconditionally (or never) splits nodes. + * + * @details + * This adaptor implements the @ref TreeAdaptor interface with a constant + * split policy: depending on a boolean flag passed at construction, + * every node presented by the traversal will either: + * - be **always split** (when `split == true`), or + * - be **never split** (when `split == false`). + * + * This is useful for: + * - unit tests and benchmarks (forcing a fixed refinement pattern), + * - creating a uniform grid up to a maximum scale, + * - disabling refinement while still running a calculator over an existing grid. + */ + #include "TreeAdaptor.h" namespace mrcpp { -template class SplitAdaptor final : public TreeAdaptor { +/** + * @class SplitAdaptor + * @brief Constant split/no-split adaptor for tree refinement. + * + * @tparam D Spatial dimension of the tree. + * @tparam T Scalar coefficient type (defaults to `double`). + * + * @details + * The adaptor inherits the depth/scale controls from @ref TreeAdaptor (e.g., + * the *maximum scale* passed to the base constructor). The split decision + * itself is independent of node content and simply mirrors the `split` flag. + * + * ### Refinement semantics + * - If `split == true`, any node that the base class allows to be refined + * (e.g., below the maximum scale) will be marked for splitting. + * - If `split == false`, no node will be split by this adaptor (even if below + * the maximum scale). + */ +template +class SplitAdaptor final : public TreeAdaptor { public: + /** + * @brief Construct a constant split adaptor. + * + * @param[in] ms Maximum scale (or equivalent depth limit) forwarded to + * @ref TreeAdaptor. Nodes at or beyond this scale will not be + * refined by the base logic regardless of @p sp. + * @param[in] sp Split policy: `true` to always split (subject to base + * constraints), `false` to never split. + */ SplitAdaptor(int ms, bool sp) : TreeAdaptor(ms) , split(sp) {} private: + /// Constant split policy applied to every visited node. bool split; + /** + * @brief Decide whether to split a node. + * + * @param[in] node Node under consideration (unused). + * @return `true` if this adaptor is configured to split; otherwise `false`. + * + * @note The base class may still veto refinement (e.g., beyond max scale). + */ bool splitNode(const MWNode &node) const override { return this->split; } }; diff --git a/src/treebuilders/SquareCalculator.h b/src/treebuilders/SquareCalculator.h index 015b90f82..63c33f94d 100644 --- a/src/treebuilders/SquareCalculator.h +++ b/src/treebuilders/SquareCalculator.h @@ -25,46 +25,154 @@ #pragma once +/** + * @file + * @brief Element-wise squaring of function-tree coefficients (with optional complex conjugation). + * + * @details + * This calculator evaluates one of the following pointwise operations on an input function + * represented by a multiresolution @ref FunctionTree: + * + * - **Algebraic square**: \f$ g(\mathbf r) = f(\mathbf r)^2 \f$ + * - **Squared magnitude** (Hermitian square): \f$ g(\mathbf r) = f(\mathbf r)\,f(\mathbf r)^* = |f(\mathbf r)|^2 \f$ + * + * The choice is controlled by the constructor's `conjugate` flag (see below). For real + * coefficient types the two definitions coincide. + * + * Implementation sketch per node: + * 1. Pull (or generate) the input node at the same index as the output node. + * 2. Convert to scaling coefficients (multiwavelet reconstruction), then to coefficient + * vector space. + * 3. Apply the element-wise operation (square or squared magnitude). + * 4. Transform coefficients back, compress, mark as having coefficients, and refresh norms. + */ + #include "TreeCalculator.h" namespace mrcpp { -template class SquareCalculator final : public TreeCalculator { +/** + * @class SquareCalculator + * @brief Per-node square / squared-magnitude operator for function trees. + * + * @tparam D Spatial dimension of the tree. + * @tparam T Scalar coefficient type (e.g., `double` or `ComplexDouble`). + * + * @details + * Let \f$f\f$ be the input function represented by `func`. This calculator writes + * an output tree \f$g\f$ such that, for each node and each basis coefficient: + * + * - If `conjugate == false`: + * - Real `T`: \f$ g = f^2 \f$ + * - Complex `T`: \f$ g = f^2 \f$ + * - If `conjugate == true`: + * - Real `T`: \f$ g = f^2 \f$ (same as above) + * - Complex `T`: \f$ g = f\,\overline{f} = |f|^2 \f$ + * + * Additionally, if the input tree is marked internally as "conjugated" (via its + * soft-conjugation flag), the implementation respects that view such that + * `conjugate == true` still produces \f$|f|^2\f$ and `conjugate == false` produces + * \f$(\overline{f})^2\f$ for complex `T`. See the truth table in @ref calcNode. + * + * ### Transform pipeline + * Each output node is computed by: + * - reconstructing the corresponding input node to scaling space + * (`mwTransform(Reconstruction)`), + * - converting to coefficient space (`cvTransform(Forward)`), + * - applying the element-wise operation on the coefficient array, + * - mapping back (`cvTransform(Backward)`, `mwTransform(Compression)`), + * - finalizing (`setHasCoefs()`, `calcNorms()`). + * + * @note The calculator is stateless across nodes and can be scheduled in parallel by + * the tree execution engine as long as nodes are independent. + */ +template +class SquareCalculator final : public TreeCalculator { public: + /** + * @brief Construct a square (or squared-magnitude) calculator. + * + * @param[in] inp Input function tree \f$f\f$. + * @param[in] conjugate If `true` and `T` is complex, compute the squared magnitude + * \f$|f|^2\f$ (i.e., multiply by the complex conjugate). If + * `false`, compute the algebraic square \f$f^2\f$. + * + * @note For real `T`, `conjugate` has no effect; \f$|f|^2 = f^2\f$. + */ SquareCalculator(FunctionTree &inp, bool conjugate = false) : func(&inp) , conj(conjugate) {} private: + /// Pointer to the input function tree \f$f\f$. FunctionTree *func; + /// Operation switch: `false` ⇒ \f$f^2\f$; `true` ⇒ (for complex) \f$|f|^2\f$. bool conj; + /** + * @brief Compute one output node by squaring the corresponding input node. + * + * @param[in,out] node_o Output node to be written at the current index. + * + * @details + * Steps: + * 1. Acquire a copy of the input node at the same index: `node_i = func->getNode(idx)`. + * 2. Transform `node_i` to coefficient space (`mwTransform(Reconstruction)`, + * then `cvTransform(Forward)`). + * 3. For each coefficient \f$c_j\f$: + * - If `T` is real: \f$c_j \leftarrow c_j^2\f$. + * - If `T` is complex: + * - Respect the input tree's soft conjugation flag (`func->conjugate()`). + * - Apply the following table to compute `coefs_o[j]`: + * + * | `func->conjugate()` | `conj` | result | + * |:--------------------:|:------:|:---------------------------------------| + * | false | false | \f$c_j \cdot c_j = c_j^2\f$ | + * | false | true | \f$c_j \cdot \overline{c_j} = |c_j|^2\f$ | + * | true | false | \f$\overline{c_j}\cdot \overline{c_j} = (\overline{c_j})^2\f$ | + * | true | true | \f$\overline{c_j}\cdot c_j = |c_j|^2\f$ | + * + * 4. Map the result back (`cvTransform(Backward)`, `mwTransform(Compression)`), + * then finalize flags and norms. + * + * @complexity Linear in the number of coefficients of the node: \f$O(n_{\text{coefs}})\f$. + */ void calcNode(MWNode &node_o) { const NodeIndex &idx = node_o.getNodeIndex(); int n_coefs = node_o.getNCoefs(); T *coefs_o = node_o.getCoefs(); - // This generates missing nodes - MWNode node_i = func->getNode(idx); // Copy node + + // Acquire / materialize the input node at the same index + MWNode node_i = func->getNode(idx); // Copy node (may generate missing nodes) node_i.mwTransform(Reconstruction); node_i.cvTransform(Forward); + const T *coefs_i = node_i.getCoefs(); + if constexpr (std::is_same::value) { if (func->conjugate()) { if (conj) { + // |f|^2: conj(c) * c for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * coefs_i[j]; } } else { + // (conj f)^2: conj(c) * conj(c) for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * std::conj(coefs_i[j]); } } } else { if (conj) { + // |f|^2: c * conj(c) for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * std::conj(coefs_i[j]); } } else { + // f^2: c * c for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; } } } } else { + // Real case: f^2 for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; } } + + // Map back and finalize node_o.cvTransform(Backward); node_o.mwTransform(Compression); node_o.setHasCoefs(); @@ -72,4 +180,4 @@ template class SquareCalculator final : public TreeCalculato } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp index f855155b7..962963d6f 100644 --- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp +++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp @@ -23,57 +23,6 @@ * */ -/** - * @file TimeEvolution_CrossCorrelationCalculator.cpp - * @brief Compute node-local coefficients for the time-evolution cross–correlation - * contribution on a 2D multiwavelet node. - * - * @details - * This calculator assembles, for each 2D node, the coefficients produced by a - * cross–correlation kernel combined with a set of precomputed power integrals - * \( J_m \). Conceptually, for each child-translation \( i \in \{0,\dots,t\_dim-1\} \) - * and for each local polynomial pair \( (p,j) \) (with \(0\le p,j \le k\)), - * we accumulate - * - * \f[ - * \mathrm{vec\_o}_{i,(p,j)} - * \;+=\; - * \sum_{k=0}^{K(i,p,j)} - * J_{2k+p+j}^{(l_b)} - * \; \cdot \; - * \mathrm{CC}[k](p,j) , - * \f] - * - * where: - * - \(k\) is the polynomial order (node order), - * - \(l_b\) is the child offset along the 1D index difference (second minus first), - * - \(\mathrm{CC}[k](p,j)\) are entries of the cross–correlation matrices - * (one per \(k\)), and - * - \(J_{m}^{(l_b)}\) are power integrals looked up from - * `J_power_inetgarls[scale+1][l_b][m]` (note: member name “inetgarls” is kept as-is). - * - * The result vector `vec_o` of length `t_dim * kp1_d` (with `t_dim = 4` in 2D and - * `kp1_d = (k+1)^2`) is written into the node coefficient buffer. The node is then - * compressed (`mwTransform(Compression)`), marked as having coefficients, and its - * norms are updated. - * - * @note - * - Only the Legendre scaling basis is currently supported here; Interpol is rejected. - * - The member flag `imaginary` selects whether the imaginary or real parts of the - * \(J\)-integrals are used. - * - The code assumes the cross–correlation matrices have been pre-populated in - * `cross_correlation->Matrix[k]`, consistent with the node order. - * - No world-box rescaling is applied in this routine (values are directly assigned). - * - * @warning - * - The routine relies on external consistency: - * * `J_power_inetgarls[scale+1]` must exist for the node’s scale. - * * `J_power_inetgarls[...][l_b]` must cover all accessed indices `2*k+p+j`. - * * `cross_correlation->Matrix[k]` must be dimension-compatible with `(p,j)`. - * - If these invariants are violated, out-of-bounds access may occur upstream; - * the caller is responsible for preparing inputs correctly. - */ - #include "TimeEvolution_CrossCorrelationCalculator.h" #include "trees/FunctionTree.h" #include "trees/MWNode.h" @@ -84,20 +33,6 @@ using Eigen::VectorXd; namespace mrcpp { -/** - * @brief Assemble time-evolution cross–correlation coefficients on a 2D node, - * then compress to MW form. - * - * @param[in,out] node The target multiwavelet node (D=2). - * - * @details - * 1. Zero current coefficients. - * 2. Dispatch based on scaling basis type: - * - **Legendre**: compute through #applyCcc. - * - **Interpol**: rejected (not implemented for this calculator). - * 3. Compress (`mwTransform(Compression)`), mark coefficients present, and - * update node norms. - */ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); int type = node.getMWTree().getMRA().getScalingBasis().getScalingType(); @@ -119,44 +54,15 @@ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } -/** - * @brief Core assembly routine for Legendre scaling basis. - * - * @param[in,out] node The target 2D node. - * - * @details - * Let `t_dim = node.getTDim()` (in 2D this is 4) and `kp1_d = (k+1)^2` with - * `k = node.getOrder()`. For each child index `i` we compute its child index - * difference `l_b = l[1] - l[0]` and accumulate - * - * \f[ - * \mathrm{vec\_o}[i,(p,j)] - * \;+=\; - * \sum_{k=0}^{K} - * J_{2k+p+j}^{(l_b)} \cdot \mathrm{CC}[k](p,j), - * \f] - * - * writing the final `vec_o` into the node coefficient buffer without further - * rescaling in this routine. If `imaginary == true`, the imaginary parts of - * the \(J\)-integrals are used; otherwise the real parts are used. - * - * @pre - * - `this->J_power_inetgarls[node.getScale() + 1]` is allocated and populated. - * - `cross_correlation->Matrix[k]` exists for all accessed `k` and is - * indexable at `(p,j)`, with `0 <= p,j <= node.getOrder()`. - */ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { - // Node configuration - int t_dim = node.getTDim(); // e.g. 4 in 2D - int kp1_d = node.getKp1_d(); // (k + 1)^2 + int t_dim = node.getTDim(); + int kp1_d = node.getKp1_d(); VectorXd vec_o = VectorXd::Zero(t_dim * kp1_d); const NodeIndex<2> &idx = node.getNodeIndex(); - // Access precomputed J-power integrals for the node scale (+1 by convention). auto &J_power_inetgarls = *this->J_power_inetgarls[node.getScale() + 1]; - // Loop over children and local basis pairs (p, j) for (int i = 0; i < t_dim; i++) { NodeIndex<2> l = idx.child(i); int l_b = l[1] - l[0]; @@ -164,7 +70,6 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { int vec_o_segment_index = 0; for (int p = 0; p <= node.getOrder(); p++) for (int j = 0; j <= node.getOrder(); j++) { - // Accumulate up to the largest admissible 2k+p+j supported by J_power_inetgarls[l_b] for (int k = 0; 2 * k + p + j < J_power_inetgarls[l_b].size(); k++) { double J; if (this->imaginary) @@ -172,8 +77,6 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { else J = J_power_inetgarls[l_b][2 * k + p + j].real(); - // Note: Eigen reads matrices row-major from file by default in this setup; - // hence the comment about transposition in the original code. vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += J * cross_correlation->Matrix[k](p, j); } @@ -181,7 +84,6 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { } } - // Write assembled values into the node coefficient buffer (no additional scaling here). double *coefs = node.getCoefs(); for (int i = 0; i < t_dim * kp1_d; i++) { coefs[i] = vec_o(i); diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h index f2a68295f..917f00fed 100644 --- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h +++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h @@ -25,6 +25,33 @@ #pragma once +/** + * @file + * @brief Time-evolution calculator based on cross-correlation kernels. + * + * @details + * This header declares a node-local calculator that evaluates contributions + * required for (imaginary or real time) Schrödinger evolution using + * precomputed *J-power* integrals and a cross-correlation driver. + * + * Conceptually, for a two-dimensional function tree \f$f(\mathbf r)\f$ the + * calculator applies (per node) a correlation-type update of the form + * \f[ + * g(\mathbf r) \;=\; \big(K * f\big)(\mathbf r) + * \;=\; \int_{\mathbb R^2} K(\mathbf r - \mathbf r')\, f(\mathbf r')\, d\mathbf r' , + * \f] + * where the kernel \f$K\f$ and its (power) moments are provided through + * the cross-correlation infrastructure and the \c JpowerIntegrals table. + * + * The boolean switch #imaginary selects which component of the complex-valued + * kernel (or of the assembled integral) is used: + * - `imaginary == false` → **real** part; + * - `imaginary == true` → **imaginary** part. + * + * The class is invoked by the tree execution engine (see @ref TreeCalculator) + * and operates independently on each @ref MWNode. + */ + #include "TreeCalculator.h" #include "core/CrossCorrelationCache.h" #include "core/SchrodingerEvolution_CrossCorrelation.h" @@ -32,34 +59,111 @@ namespace mrcpp { -/** @class TimeEvolution_CrossCorrelationCalculator - * - * @brief An efficient way to calculate ... (work in progress) +/** + * @class TimeEvolution_CrossCorrelationCalculator + * @brief Node calculator for Schrödinger time evolution via cross-correlation. * - * @details An efficient way to calculate ... having the form - * \f$ \ldots = \ldots \f$ + * @details + * This calculator evaluates nodewise contributions needed for real- or + * imaginary-time propagation in 2D using a cross-correlation representation + * of the evolution operator. Precomputed integrals of the form + * \f$ J_m = \int x^m\,K(x)\,dx \f$ (and higher-dimensional analogs) are + * supplied through a map of @ref JpowerIntegrals instances indexed by + * the power/order. * + * ### Responsibilities + * - Pull required cross-correlation data from a + * @ref SchrodingerEvolution_CrossCorrelation instance. + * - Select **real** or **imaginary** contribution according to #imaginary. + * - Assemble the per-node update and write the result to the output node. * + * ### Threading / Parallelism + * The class itself holds only non-owning pointers and simple references + * to shared, read-only tables. It is thus re-entrant across nodes. + * Synchronization and scheduling are handled at the @ref TreeCalculator layer. * + * @note All pointer members are **non-owning**; the caller must ensure they + * remain valid for the lifetime of the calculator. */ class TimeEvolution_CrossCorrelationCalculator final : public TreeCalculator<2> { public: - TimeEvolution_CrossCorrelationCalculator(std::map &J, SchrodingerEvolution_CrossCorrelation *cross_correlation, bool imaginary) + /** + * @brief Construct the calculator with auxiliary integral tables and a driver. + * + * @param[in] J + * Map from power/order (e.g., \f$m\f$) to the corresponding + * @ref JpowerIntegrals table. The calculator does **not** take ownership. + * @param[in] cross_correlation + * Pointer to a @ref SchrodingerEvolution_CrossCorrelation driver that + * exposes kernel accessors / caches needed to assemble the correlation + * at node level. Non-owning. + * @param[in] imaginary + * If `true`, use the **imaginary part** of the accumulated contribution; + * otherwise use the **real part**. + * + * @warning The map and the driver pointer must outlive this calculator. + */ + TimeEvolution_CrossCorrelationCalculator(std::map &J, + SchrodingerEvolution_CrossCorrelation *cross_correlation, + bool imaginary) : J_power_inetgarls(J) , cross_correlation(cross_correlation) , imaginary(imaginary) {} - // private: - std::map J_power_inetgarls; - SchrodingerEvolution_CrossCorrelation *cross_correlation; - - /// @brief If False then the calculator is using th real part of integrals, otherwise - the imaginary part. - bool imaginary; + /** + * @brief Compute the contribution for one output node. + * + * @param[in,out] node + * The node to be written. The implementation typically: + * 1) gathers the necessary kernel moments / cache entries, + * 2) accumulates the cross-correlation at the node resolution, + * 3) commits coefficients and refreshes norms/flags. + * + * @note The exact algebra (e.g., reconstruction/compression steps) is + * implemented in the corresponding source file. + */ void calcNode(MWNode<2> &node) override; - // template + /** + * @brief Apply the cross-correlation operator at the granularity of a single node. + * + * @param[in,out] node The node to which the operator is applied. + * + * @details + * This helper encapsulates the node-local application of the correlation + * kernel using the caches provided by #cross_correlation and the moment + * tables from #J_power_inetgarls. The #imaginary flag governs whether + * the real or imaginary component of the final integral is extracted. + * + * @see calcNode + */ void applyCcc(MWNode<2> &node); - // template void applyCcc(MWNode<2> &node, CrossCorrelationCache &ccc); + + // --------------------------------------------------------------------- + // Public state (non-owning) — kept public to match existing interfaces. + // --------------------------------------------------------------------- + + /** + * @brief Precomputed kernel moment/integral tables, indexed by power. + * + * @note Non-owning pointers; the map must remain valid externally. + */ + std::map J_power_inetgarls; + + /** + * @brief Cross-correlation driver (non-owning). + * + * Provides access to kernel caches and auxiliary data needed to assemble + * the correlation at a given node. + */ + SchrodingerEvolution_CrossCorrelation *cross_correlation; + + /** + * @brief Component selector for complex contributions. + * + * If `false`, the **real** part is used; if `true`, the **imaginary** part. + */ + bool imaginary; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/TreeAdaptor.h b/src/treebuilders/TreeAdaptor.h index 80cecb09e..8698034cf 100644 --- a/src/treebuilders/TreeAdaptor.h +++ b/src/treebuilders/TreeAdaptor.h @@ -25,36 +25,137 @@ #pragma once +/** + * @file + * @brief Generic adapter that decides whether tree nodes should be refined (split). + * + * @details + * `TreeAdaptor` provides a lightweight, policy-style interface used by the tree + * execution engine to determine which nodes of a @ref MWTree should be split + * (refined). Concrete adaptors implement the decision rule in the protected + * pure-virtual @ref splitNode method. + * + * Typical workflow: + * 1. Construct a concrete adaptor (e.g., one that inspects norms, wavelet + * content, error estimates, etc.). + * 2. Optionally set a maximum refinement scale with @ref setMaxScale. + * 3. Call @ref splitNodeVector to examine an input list of nodes and append any + * newly-created children to an output list for further processing. + * + * The adaptor enforces two built-in guards in @ref splitNodeVector: + * - **Branch nodes** (internal/structural nodes without coefficients) are + * skipped. + * - Nodes deeper than the allowed scale threshold are skipped: + * `node.getScale() + 2 > maxScale`. + * + * @note The `+2` slack prevents overshooting the configured refinement ceiling + * when subsequent passes may still need headroom (implementation detail). + */ + #include "MRCPP/mrcpp_declarations.h" #include "trees/MWNode.h" namespace mrcpp { -template class TreeAdaptor { +/** + * @class TreeAdaptor + * @brief Abstract base class for node-refinement policies. + * + * @tparam D Spatial dimension of the tree. + * @tparam T Coefficient value type (e.g., `double`, `ComplexDouble`). + * + * @details + * Concrete adaptors derive from this class and implement @ref splitNode to + * express the criterion that decides whether a given leaf node should be + * refined. The base class owns the *maximum scale* guard and the helper that + * performs splitting and collects children. + */ +template +class TreeAdaptor { public: - TreeAdaptor(int ms) + /** + * @brief Construct with an initial maximum refinement scale. + * @param ms Maximum scale (depth) allowed for refinement. + * + * Nodes at scales for which `node.getScale() + 2 > ms` will **not** be + * split by @ref splitNodeVector, regardless of the policy decision. + */ + explicit TreeAdaptor(int ms) : maxScale(ms) {} + + /// Virtual destructor (polymorphic base). virtual ~TreeAdaptor() = default; + /** + * @brief Change the maximum refinement scale. + * @param ms New ceiling for refinement depth. + * + * @see maxScale + */ void setMaxScale(int ms) { this->maxScale = ms; } + /** + * @brief Apply the refinement policy to a batch of nodes and collect children. + * + * @param[out] out + * Vector that will receive pointers to the **newly created children** + * (across all nodes that are decided to be split). + * @param[in] inp + * Vector of candidate nodes to be tested for splitting. + * + * @details + * For each node in @p inp the routine: + * - skips the node if it is a **branch node** (see `MWNode::isBranchNode`); + * - enforces the scale guard `node.getScale() + 2 > maxScale`; + * - calls the policy @ref splitNode; if `true`, it creates the children + * (`node.createChildren(true)`) and appends them to @p out. + * + * @note Ownership of nodes remains with the tree; this function only pushes + * pointers to existing/newly created nodes into @p out. + */ void splitNodeVector(MWNodeVector &out, MWNodeVector &inp) const { for (int n = 0; n < inp.size(); n++) { MWNode &node = *inp[n]; - // Can be BranchNode in operator application + + // Skip structural nodes (no coefficients) if (node.isBranchNode()) continue; + + // Enforce maximum scale guard with a +2 safety margin if (node.getScale() + 2 > this->maxScale) continue; + + // Delegate the decision to the concrete adaptor if (splitNode(node)) { node.createChildren(true); - for (int i = 0; i < node.getNChildren(); i++) out.push_back(&node.getMWChild(i)); + for (int i = 0; i < node.getNChildren(); i++) { + out.push_back(&node.getMWChild(i)); + } } } } protected: + /** + * @brief Maximum allowed refinement scale (depth) for newly created nodes. + * + * Nodes for which `node.getScale() + 2 > maxScale` are not considered for + * splitting within @ref splitNodeVector. + */ int maxScale; + /** + * @brief Decide whether a given leaf node should be refined. + * + * @param node Candidate node (guaranteed non-branch and within scale guard). + * @return `true` if the node must be split, `false` otherwise. + * + * @details + * Derived classes implement this method to express an application-specific + * refinement criterion (e.g., wavelet-norm threshold, operator bandwidth, + * error estimator, etc.). This method must be **pure** (no side-effects) + * with respect to the tree topology; @ref splitNodeVector performs the + * actual splitting. + */ virtual bool splitNode(const MWNode &node) const = 0; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/TreeBuilder.cpp b/src/treebuilders/TreeBuilder.cpp index 91d25e47d..cb64ad044 100644 --- a/src/treebuilders/TreeBuilder.cpp +++ b/src/treebuilders/TreeBuilder.cpp @@ -23,23 +23,6 @@ * */ -/** - * @file TreeBuilder.cpp - * @brief Generic driver that orchestrates adaptive construction, refinement, - * and coefficient (re)calculation of multiwavelet trees. - * - * @details - * A TreeBuilder manages the high-level loop: - * 1) pick a work set of nodes (via a TreeCalculator-provided policy), - * 2) compute coefficients on those nodes (calculator), - * 3) estimate norms to drive thresholding, - * 4) ask the TreeAdaptor where to split next, - * 5) iterate until the work set is empty or a maximum iteration is reached. - * - * The builder never changes numerical kernels; it delegates all math to - * a TreeCalculator and all grid-refinement policy to a TreeAdaptor. - */ - #include "TreeBuilder.h" #include "TreeAdaptor.h" #include "TreeCalculator.h" @@ -52,28 +35,6 @@ namespace mrcpp { -/** - * @brief Adaptive build of a tree using a calculator/adaptor pair. - * - * @param[in,out] tree Target tree to be populated/refined. - * @param[in,out] calculator Computes node coefficients & provides initial work set. - * @param[in,out] adaptor Decides which nodes to split next (refinement policy). - * @param[in] maxIter Maximum refinement iterations; negative => unbounded. - * - * @details - * Loop invariant: - * - `workVec` holds the nodes to be (re)computed at the current iteration. - * - After computing, the builder updates an approximate squared norm - * (scaling + wavelet) to drive relative thresholding elsewhere. - * - The adaptor produces the next `workVec` by splitting according to - * its policy. If `maxIter >= 0` and `iter >= maxIter`, splitting is - * disabled and the loop terminates after coefficients are computed. - * - * @note - * The approximate norm written into `tree.squareNorm` is for thresholding and - * progress reporting only. A precise norm is expected to be recomputed later - * (e.g., after a bottom-up transform). - */ template void TreeBuilder::build(MWTree &tree, TreeCalculator &calculator, @@ -85,39 +46,33 @@ void TreeBuilder::build(MWTree &tree, MWNodeVector *newVec = nullptr; MWNodeVector *workVec = calculator.getInitialWorkVector(tree); - double sNorm = 0.0; // accumulated scaling contribution (approx.) - double wNorm = 0.0; // accumulated wavelet contribution (approx.) + double sNorm = 0.0; + double wNorm = 0.0; int iter = 0; while (workVec->size() > 0) { printout(10, " -- #" << std::setw(3) << iter << ": Calculated "); printout(10, std::setw(6) << workVec->size() << " nodes "); - // 1) Compute coefficients on current work set calc_t.resume(); calculator.calcNodeVector(*workVec); calc_t.stop(); - // 2) Update approximate norms used for thresholding/progress only norm_t.resume(); if (iter == 0) sNorm = calcScalingNorm(*workVec); wNorm += calcWaveletNorm(*workVec); if (sNorm < 0.0 || wNorm < 0.0) { - // Propagate "unknown" / invalid norm tree.squareNorm = -1.0; } else { - // Approximate norm (exact one will be recomputed later) tree.squareNorm = sNorm + wNorm; } println(10, std::setw(24) << tree.squareNorm); norm_t.stop(); - // 3) Decide and perform refinement for the next iteration split_t.resume(); newVec = new MWNodeVector; if (iter >= maxIter && maxIter >= 0) { - // Respect iteration cap: stop splitting workVec->clear(); } adaptor.splitNodeVector(*newVec, *workVec); @@ -128,7 +83,6 @@ void TreeBuilder::build(MWTree &tree, iter++; } - // Invalidate cached end-node table because the grid changed tree.resetEndNodeTable(); delete workVec; @@ -138,17 +92,6 @@ void TreeBuilder::build(MWTree &tree, print::time(10, "Time split", split_t); } -/** - * @brief Remove all coefficients from the tree (fixed grid), using the calculator - * to "clear" node data. - * - * @param[in,out] tree Target MW tree. - * @param[in,out] calculator Calculator invoked to clear coefficients for nodes. - * - * @details - * - The grid topology is preserved. - * - `tree.squareNorm` is reset. - */ template void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Clearing tree"); @@ -156,7 +99,7 @@ void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculat Timer clean_t; MWNodeVector nodeVec; tree_utils::make_node_table(tree, nodeVec); - calculator.calcNodeVector(nodeVec); // calculator is responsible for zeroing/clearing + calculator.calcNodeVector(nodeVec); clean_t.stop(); tree.clearSquareNorm(); @@ -167,28 +110,13 @@ void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculat print::separator(10, ' '); } -/** - * @brief Split (refine) the current leaf nodes according to an adaptor policy. - * - * @param[in,out] tree Target tree to refine. - * @param[in,out] adaptor Adaptor that decides which nodes to split. - * @param[in] passCoefs If true, transfer parent coefficients to children - * (preserving function representation). - * - * @return Number of newly created child nodes (i.e., number of splits * children). - * - * @details - * - The end-node table is reset after refinement. - * - If `passCoefs == true` and a refined node remains a branch node, the parent - * distributes its coefficients to the children (e.g., via projection / exact transfer). - */ template int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const { println(10, " == Refining tree"); Timer split_t; - MWNodeVector newVec; // newly created nodes (unused beyond counting) - MWNodeVector *workVec = tree.copyEndNodeTable(); // current leaves + MWNodeVector newVec; + MWNodeVector *workVec = tree.copyEndNodeTable(); adaptor.splitNodeVector(newVec, *workVec); @@ -196,7 +124,6 @@ int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, boo for (int i = 0; i < workVec->size(); i++) { MWNode &node = *(*workVec)[i]; if (node.isBranchNode()) { - // Transfer coefficients from parent to children node.giveChildrenCoefs(true); } } @@ -216,17 +143,6 @@ int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, boo return newVec.size(); } -/** - * @brief Recalculate coefficients on the calculator-provided work set - * without refinement. - * - * @param[in,out] tree Target tree. - * @param[in,out] calculator Calculator used to compute node coefficients. - * - * @details - * Computes on the initial work vector (as defined by the calculator) and then - * recomputes the exact squared norm of the tree. - */ template void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Calculating tree"); @@ -245,12 +161,6 @@ void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculato print::time(10, "Time calc", calc_t); } -/** - * @brief Sum of scaling contributions (approximate) across a vector of nodes. - * - * @param[in] vec Node vector from the current iteration. - * @return Approximate sum of scaling norms for nodes with depth >= 0. - */ template double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { double sNorm = 0.0; @@ -261,12 +171,6 @@ double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { return sNorm; } -/** - * @brief Sum of wavelet contributions (approximate) across a vector of nodes. - * - * @param[in] vec Node vector from the current iteration. - * @return Approximate sum of wavelet norms for nodes with depth >= 0. - */ template double TreeBuilder::calcWaveletNorm(const MWNodeVector &vec) const { double wNorm = 0.0; diff --git a/src/treebuilders/TreeBuilder.h b/src/treebuilders/TreeBuilder.h index 81c32afe6..8f45cf0e9 100644 --- a/src/treebuilders/TreeBuilder.h +++ b/src/treebuilders/TreeBuilder.h @@ -29,16 +29,123 @@ namespace mrcpp { -template class TreeBuilder final { +/** + * @class TreeBuilder + * @brief Orchestrates adaptive construction and refinement of @ref MWTree objects. + * + * @tparam D Spatial dimension of the tree. + * @tparam T Coefficient value type (e.g., `double`, `ComplexDouble`). + * + * @details + * `TreeBuilder` coordinates three roles during adaptive computations: + * - a **calculator** (@ref TreeCalculator) that evaluates node data + * (coefficients, norms, metadata) on the current grid, + * - an **adaptor** (@ref TreeAdaptor) that decides which nodes should + * be refined (split), + * - the **tree** (@ref MWTree) that stores topology and coefficients. + * + * A typical adaptive build loop is: + * 1. @ref calc to populate coefficients/norms on the current grid, + * 2. @ref split to refine nodes selected by the adaptor, + * 3. repeat (1–2) until no more splits occur or `maxIter` is reached. + * + * Some calculators maintain internal statistics/timers and may need a final + * post-processing step; @ref build calls into the calculator appropriately. + */ +template +class TreeBuilder final { public: - void build(MWTree &tree, TreeCalculator &calculator, TreeAdaptor &adaptor, int maxIter) const; + /** + * @brief Adaptive build: iterate (calc → split) up to @p maxIter times. + * + * @param[in,out] tree Target tree to (re)build/refine. + * @param[in,out] calculator Calculator used to fill coefficients/norms on the current grid. + * @param[in,out] adaptor Refinement policy deciding which nodes to split. + * @param[in] maxIter Upper bound on calc/split passes (use a small integer; non-positive means 0 passes). + * + * @details + * The method performs: + * - an initial @ref calc pass, + * - up to `maxIter` refinement passes, each performing: + * - `split(tree, adaptor, /*passCoefs=*/true)` + * - `calc(tree, calculator)` + * - any calculator post-processing hooks. + * + * Implementations typically stop early when `split` returns 0 (no new nodes). + */ + void build(MWTree &tree, + TreeCalculator &calculator, + TreeAdaptor &adaptor, + int maxIter) const; + + /** + * @brief Clear node data in @p tree using the provided @p calculator policy. + * + * @param[in,out] tree Tree whose nodes should be cleared. + * @param[in,out] calculator Calculator that defines how to reset per-node state. + * + * @details + * Resets coefficient flags and cached norms to a consistent "empty" state. + * This is useful before reusing a tree structure for another computation. + */ void clear(MWTree &tree, TreeCalculator &calculator) const; + + /** + * @brief Compute/refresh coefficients and norms on the current grid. + * + * @param[in,out] tree Tree to evaluate. + * @param[in,out] calculator Calculator that implements per-node computation. + * + * @details + * Traverses the active nodes (calculator-dependent strategy) and ensures + * each leaf has consistent coefficients (scaling/wavelet) and derived norms. + */ void calc(MWTree &tree, TreeCalculator &calculator) const; + + /** + * @brief Refine the tree topology according to @p adaptor policy. + * + * @param[in,out] tree Tree subject to refinement. + * @param[in,out] adaptor Adaptor deciding which nodes to split. + * @param[in] passCoefs + * If `true`, propagate or initialize child coefficients immediately + * (calculator-dependent behavior); if `false`, only topology changes + * are performed and coefficients are left for a subsequent @ref calc. + * + * @return Number of **new nodes** created (sum of all children inserted). + * + * @details + * The method collects candidate leaves, applies the adaptor’s + * `splitNodeVector`, and updates the tree topology. Implementations may + * perform light-weight coefficient seeding when `passCoefs==true` to + * improve the next calculation pass. + */ int split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const; private: + /** + * @brief Aggregate the total scaling-norm over a set of nodes. + * + * @param vec Vector of node pointers to be reduced. + * @return Sum (or calculator-defined aggregation) of scaling coefficients' norm. + * + * @details + * Utility used by build loops for convergence checks and diagnostics. + * The precise definition of “scaling norm” follows the node’s basis. + */ double calcScalingNorm(const MWNodeVector &vec) const; + + /** + * @brief Aggregate the total wavelet-norm over a set of nodes. + * + * @param vec Vector of node pointers to be reduced. + * @return Sum (or calculator-defined aggregation) of wavelet coefficients' norm. + * + * @details + * Utility used by build loops for refinement heuristics and stopping criteria. + * The precise definition of “wavelet norm” follows the node’s basis. + */ double calcWaveletNorm(const MWNodeVector &vec) const; }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/TreeCalculator.h b/src/treebuilders/TreeCalculator.h index 1bf41f407..5d10e3c10 100644 --- a/src/treebuilders/TreeCalculator.h +++ b/src/treebuilders/TreeCalculator.h @@ -29,13 +29,69 @@ namespace mrcpp { -template class TreeCalculator { +/** + * @class TreeCalculator + * @brief Abstract base for per-node computations on @ref MWTree. + * + * @tparam D Spatial dimension of the multiwavelet tree. + * @tparam T Coefficient value type (e.g. `double`, `ComplexDouble`). + * + * @details + * A `TreeCalculator` defines how to **evaluate/update a single node** + * (via the pure-virtual @ref calcNode) and provides utilities to apply + * that logic over a set of nodes, possibly in parallel. + * + * Typical usage (in conjunction with @ref TreeBuilder): + * - derive a calculator and implement @ref calcNode, + * - obtain a worklist with @ref getInitialWorkVector, + * - call @ref calcNodeVector to process all nodes, + * - optionally override @ref postProcess for statistics/timers. + * + * ### Parallelism + * @ref calcNodeVector uses OpenMP (if enabled) with `schedule(guided)` + * and a thread count provided by the `mrcpp_get_num_threads()` macro. + * Implementations of @ref calcNode must be **thread-safe** w.r.t. other + * nodes in the worklist. Avoid shared mutable state unless properly + * synchronized. + */ +template +class TreeCalculator { public: + /// @brief Default constructor. TreeCalculator() = default; + + /// @brief Virtual destructor. virtual ~TreeCalculator() = default; - virtual MWNodeVector *getInitialWorkVector(MWTree &tree) const { return tree.copyEndNodeTable(); } + /** + * @brief Build the initial list of nodes to process. + * + * @param[in,out] tree The tree whose nodes should be evaluated. + * @return Heap-allocated vector of node pointers representing the initial + * work set (typically the **current leaf nodes**). + * + * @details + * The default implementation returns a copy of the tree's end-node table + * (`tree.copyEndNodeTable()`). Callers are responsible for deleting the + * returned container when done. + */ + virtual MWNodeVector* getInitialWorkVector(MWTree &tree) const { + return tree.copyEndNodeTable(); + } + /** + * @brief Evaluate all nodes in @p nodeVec (parallelized when available). + * + * @param[in,out] nodeVec Container of node pointers to be processed. + * + * @details + * Invokes @ref calcNode for each entry. Uses OpenMP with guided scheduling + * and `mrcpp_get_num_threads()` to determine the thread count. + * After processing all nodes, calls @ref postProcess once. + * + * @note The container is treated as read-only regarding its topology; + * implementations of @ref calcNode should not insert/remove nodes. + */ virtual void calcNodeVector(MWNodeVector &nodeVec) { #pragma omp parallel shared(nodeVec) num_threads(mrcpp_get_num_threads()) { @@ -50,8 +106,27 @@ template class TreeCalculator { } protected: + /** + * @brief Perform the calculator's core work on a single node. + * + * @param[in,out] node Target node. Implementations typically: + * - ensure transforms are in the correct space (MW/CV) as needed, + * - compute/update coefficients and derived norms/flags, + * - leave the node in a consistent state for subsequent passes. + * + * @warning This method is called concurrently on different nodes. + * Do not mutate shared global state without synchronization. + */ virtual void calcNode(MWNode &node) = 0; + + /** + * @brief Optional hook executed once after @ref calcNodeVector finishes. + * + * @details + * Override to flush accumulators, update statistics, print timers, etc. + * Default implementation is a no-op. + */ virtual void postProcess() {} }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/WaveletAdaptor.h b/src/treebuilders/WaveletAdaptor.h index 829039bf4..231c16f7d 100644 --- a/src/treebuilders/WaveletAdaptor.h +++ b/src/treebuilders/WaveletAdaptor.h @@ -31,27 +31,114 @@ namespace mrcpp { -template class WaveletAdaptor : public TreeAdaptor { +/** + * @class WaveletAdaptor + * @brief Refinement policy based on wavelet-norm error indicators. + * + * @tparam D Spatial dimension of the multiwavelet tree. + * @tparam T Coefficient value type (e.g., double, ComplexDouble). + * + * @details + * This adaptor decides whether a node should be *split* (refined) by + * comparing its wavelet contribution against a precision target. + * Internally it relies on @ref mrcpp::tree_utils::split_check, which + * examines a node's (accumulated) wavelet norm relative to: + * + * - a global precision @ref prec (optionally absolute via @ref absPrec), + * - a user-provided, index-dependent scaling @ref precFunc (defaults to 1), + * - an extra scale-dependent attenuation factor @ref splitFac + * (used to bias refinement with depth). + * + * If the threshold is exceeded, @ref splitNode requests refinement. + * + * @code{.cpp} + * // Typical usage: + * WaveletAdaptor<3, double> adapt(1e-6, 20); // prec, maxScale + * adapt.setPrecFunction([](const NodeIndex<3>&){ return 2.0; }); // tighten locally + * TreeBuilder<3, double> builder; + * builder.split(tree, adapt, false); // passCoefs = false + * @endcode + */ +template +class WaveletAdaptor : public TreeAdaptor { public: + /** + * @brief Construct a wavelet-based adaptor. + * + * @param pr Global target precision (relative unless @p ap is true). + * @param ms Maximum refinement scale (forwarded to @ref TreeAdaptor). + * @param ap If true, interpret @p pr as an **absolute** tolerance; + * otherwise use a **relative** tolerance w.r.t. function norm. + * @param sf Split-factor controlling depth bias (≥ 0). When > 0, + * the threshold is scaled by \f$2^{-0.5\,sf\,(s+1)}\f$ at scale s, + * encouraging deeper refinement only when warranted. + */ WaveletAdaptor(double pr, int ms, bool ap = false, double sf = 1.0) : TreeAdaptor(ms) , absPrec(ap) , prec(pr) , splitFac(sf) {} + + /// @brief Virtual destructor. ~WaveletAdaptor() override = default; - void setPrecFunction(const std::function &idx)> &prec_func) { this->precFunc = prec_func; } + /** + * @brief Provide a spatially varying precision multiplier. + * + * @param prec_func Function returning a factor (default 1.0) for + * a given node index. The effective threshold becomes + * `prec * prec_func(idx)` (plus depth scaling via @ref splitFac). + * + * @note Use this to tighten or relax refinement in specific regions, + * e.g. around features of interest. + */ + void setPrecFunction(const std::function &idx)> &prec_func) { + this->precFunc = prec_func; + } protected: + /// @brief If true, treat @ref prec as an absolute tolerance; otherwise relative. bool absPrec; + + /// @brief Base precision target used by the wavelet thresholding rule. double prec; + + /** + * @brief Scale-dependent attenuation of the threshold. + * + * @details A positive value reduces the threshold with depth, making + * refinement stricter at finer scales. Set to 0.0 to disable. + */ double splitFac; - std::function &idx)> precFunc = [](const NodeIndex &idx) { return 1.0; }; + /** + * @brief Per-node precision multiplier (defaults to identity). + * + * @details The effective threshold is `prec * precFunc(idx)` before + * applying the depth-dependent @ref splitFac scaling. + */ + std::function &idx)> precFunc = + [](const NodeIndex & /*idx*/) { return 1.0; }; + + /** + * @brief Decide whether a node should be split. + * + * @param node The candidate node. + * @return `true` if the node's wavelet norm exceeds the computed threshold. + * + * @details + * Computes a local tolerance as: + * \f[ + * \tau = \text{prec} \times \text{precFunc}(\text{idx}) + * \f] + * (relative to the function norm unless @ref absPrec is set), + * then applies an additional depth-dependent factor governed by + * @ref splitFac, and finally compares against the node's wavelet norm. + */ bool splitNode(const MWNode &node) const override { auto precFac = this->precFunc(node.getNodeIndex()); // returns 1.0 by default return tree_utils::split_check(node, this->prec * precFac, this->splitFac, this->absPrec); } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/complex_apply.cpp b/src/treebuilders/complex_apply.cpp index 026152347..406d8baf0 100644 --- a/src/treebuilders/complex_apply.cpp +++ b/src/treebuilders/complex_apply.cpp @@ -23,52 +23,6 @@ * */ -/** - * @file complex_apply.cpp - * @brief Complex-valued application of multiresolution convolution operators. - * - * @details - * This module provides a **complex** front-end to the real-valued adaptive - * application pipeline used throughout MRCPP. A complex operator - * \f$ \mathcal{O} = \mathcal{O}_\mathrm{R} + i\,\mathcal{O}_\mathrm{I} \f$ - * acting on a complex function - * \f$ f = f_\mathrm{R} + i\,f_\mathrm{I} \f$ - * is evaluated via the standard decomposition: - * \f[ - * \mathcal{O} f - * = (\mathcal{O}_\mathrm{R} f_\mathrm{R} - \mathcal{O}_\mathrm{I} f_\mathrm{I}) - * \;+\; - * i\,(\mathcal{O}_\mathrm{I} f_\mathrm{R} + \mathcal{O}_\mathrm{R} f_\mathrm{I}). - * \f] - * - * Internally, the routine delegates every real application - * \f$ \mathcal{O}_\bullet f_\bullet \f$ to the standard adaptive `apply` for - * real data structures (see `apply.h`), and then combines the four real - * results to produce the complex output. - * - * ### Precision model and adaptivity - * The same adaptive refinement loop is honored as in the real case: - * - **Relative precision** (default): refine where local wavelet details exceed - * a fraction of the local norm. - * - **Absolute precision** (`absPrec = true`): refine until local details fall - * below a fixed absolute threshold. - * - * The `prec` parameter and `maxIter` semantics are identical to the real-valued - * `apply`: - * - `prec < 0` or `maxIter = 0` disables refinement, - * - `maxIter < 0` removes the iteration bound. - * - * ### Preconditions - * - All real and imaginary parts (operator and function) must share the same - * `MultiResolutionAnalysis`. - * - The output complex object should reference **empty** (uninitialized) trees - * at entry; the routine will construct their contents. - * - * @note This is a thin complex wrapper; all heavy lifting (bandwidth computation, - * adaptive splitting, transformations, norm updates) happens in the - * underlying real `apply`. - */ - #include "complex_apply.h" #include "ConvolutionCalculator.h" #include "CopyAdaptor.h" @@ -88,37 +42,6 @@ namespace mrcpp { -/** - * @brief Apply a complex convolution operator to a complex function (adaptive). - * - * @tparam D Spatial dimension (1, 2, or 3). - * - * @param[in] prec Target build precision for the adaptive application. - * @param[out] out Complex output function tree (real and imaginary parts filled). - * @param[in] oper Complex convolution operator (real and imaginary parts provided). - * @param[in] inp Complex input function tree (real and imaginary parts provided). - * @param[in] maxIter Maximum refinement iterations; `-1` means unbounded. - * @param[in] absPrec Use absolute (`true`) versus relative (`false`, default) precision. - * - * @details - * The routine evaluates - * \f[ - * \Re(\mathcal{O}f) = \mathcal{O}_\mathrm{R} f_\mathrm{R} - \mathcal{O}_\mathrm{I} f_\mathrm{I},\quad - * \Im(\mathcal{O}f) = \mathcal{O}_\mathrm{I} f_\mathrm{R} + \mathcal{O}_\mathrm{R} f_\mathrm{I} - * \f] - * by two real `apply` calls per part, followed by linear combinations via `add`. - * Temporary real trees are allocated on the same MRA as the input. - * - * ### Implementation notes - * - The real building blocks `apply(prec, ...)` are identical to the scalar path - * and include: bandwidth precomputation, adaptive refinement, top-down coarse - * contributions, bottom-up transforms, and norm updates. - * - Output parts are formed with `add(prec, ...)` to maintain consistent grid - * and transformation state. - * - * @warning The MRA of `inp.real`, `inp.imaginary`, `oper.real`, and - * `oper.imaginary` must match. No cross-MRA application is supported. - */ template void apply(double prec, ComplexObject> &out, @@ -147,4 +70,4 @@ template void apply<1>(double prec, int maxIter, bool absPrec); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/complex_apply.h b/src/treebuilders/complex_apply.h index 8ed9a0f17..3d634e5a9 100644 --- a/src/treebuilders/complex_apply.h +++ b/src/treebuilders/complex_apply.h @@ -29,28 +29,100 @@ namespace mrcpp { -/// @brief Stores pointers to real and imaginary parts of tree objects. -/// @tparam MWClass -template struct ComplexObject { - MWClass *real; - MWClass *imaginary; - - ComplexObject(MWClass &realPart, MWClass &imaginaryPart) - : real(&realPart) - , imaginary(&imaginaryPart) {} +/** + * @file + * @brief Complex wrapper utilities for multiwavelet trees and operators. + * + * @details + * This header declares a lightweight wrapper, @ref ComplexObject, that groups + * pointers to the real and imaginary parts of an object (e.g., a function + * tree or a convolution operator). It also declares an `apply` routine that + * applies a (possibly complex) convolution operator to a (possibly complex) + * function, writing the result to a (possibly complex) output. + * + * The pattern keeps real and imaginary parts as separate objects for memory + * locality and to reuse existing real-valued kernels, while allowing users to + * orchestrate complex arithmetic at a higher level. + */ + +/** + * @brief Aggregates pointers to the real and imaginary parts of an object. + * + * @tparam MWClass Underlying class type of the wrapped objects + * (e.g., `FunctionTree` or `ConvolutionOperator`). + * + * @details + * The struct is a non-owning pair of pointers. It does **not** manage + * lifetime—callers must ensure both referenced objects outlive the wrapper. + * + * @note + * The members are intentionally public for ergonomic access in kernels. + */ +template +struct ComplexObject { + /** @brief Pointer to the real component (non-owning). */ + MWClass* real; + /** @brief Pointer to the imaginary component (non-owning). */ + MWClass* imaginary; + + /** + * @brief Construct from lvalue references to the real and imaginary parts. + * @param realPart Reference to the real component. + * @param imaginaryPart Reference to the imaginary component. + */ + ComplexObject(MWClass& realPart, MWClass& imaginaryPart) + : real(&realPart) + , imaginary(&imaginaryPart) {} }; // clang-format off //template class FunctionTree; //template class ConvolutionOperator; +/** + * @brief Apply a (complex) convolution operator to a (complex) function. + * + * @tparam D Spatial dimensionality of the multiwavelet representation. + * + * @param prec Target accuracy. If `absPrec == false`, this is interpreted + * as a **relative** tolerance; otherwise as an **absolute** tolerance. + * @param out Destination complex function trees (real/imag). On return, + * contains \f$ \text{oper} \{\text{inp}\} \f$ within the requested + * accuracy. + * @param oper Complex convolution operator (real/imag components). + * @param inp Input complex function trees to be transformed. + * @param maxIter Optional cap on internal refinement/iteration steps. + * Use `-1` (default) for the implementation’s automatic choice. + * @param absPrec When `true`, treat `prec` as absolute; when `false`, as relative. + * + * @pre + * - `out.real`, `out.imaginary`, `inp.real`, `inp.imaginary`, + * `oper.real`, and `oper.imaginary` are non-null and represent + * consistent discretizations (same MRA/order/domain). + * + * @post + * - `out` holds the complex result. Implementations typically compute: + * \f[ + * \Re(\text{out}) = \Re(\text{oper})\Re(\text{inp}) + * - \Im(\text{oper})\Im(\text{inp}), + * \qquad + * \Im(\text{out}) = \Re(\text{oper})\Im(\text{inp}) + * + \Im(\text{oper})\Re(\text{inp}), + * \f] + * with adaptive refinement to honor `prec`. + * + * @note + * The exact refinement strategy and stopping criteria are backend-dependent. + * For reproducibility across runs/nodes, set the relevant MPI/OpenMP controls + * prior to calling. + */ template void apply ( - double prec, ComplexObject< FunctionTree > &out, - ComplexObject< ConvolutionOperator > &oper, ComplexObject< FunctionTree > &inp, + double prec, ComplexObject< FunctionTree >& out, + ComplexObject< ConvolutionOperator >& oper, ComplexObject< FunctionTree >& inp, int maxIter = -1, bool absPrec = false ); // clang-format on -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp index 11d94fc19..ca4c95a86 100644 --- a/src/treebuilders/grid.cpp +++ b/src/treebuilders/grid.cpp @@ -23,41 +23,6 @@ * */ -/** - * @file grid.cpp - * @brief Utilities for constructing, copying, clearing, and refining - * multiresolution grids and functions. - * - * @details - * This module provides a unified set of routines for: - * - * - **Uniform grid construction** by splitting all leaves a fixed number of times. - * - **Analytic-driven/adaptive grid construction** using a - * #mrcpp::RepresentableFunction as a splitter oracle. - * - **Gaussian-expansion–driven grid construction** that places resolution - * according to Gaussian positions and exponents (supports periodic and - * non-periodic worlds). - * - **Copying grids** (structure only) and **copying functions** (coefficients) - * between trees with the same #mrcpp::MultiResolutionAnalysis. - * - **Clearing** coefficients on an existing grid without altering its topology. - * - **Refining** an existing grid either uniformly, by precision-driven - * wavelet criteria, by another reference tree, or by an analytic function. - * - * All routines operate on #mrcpp::FunctionTree objects (and component-wise on - * #mrcpp::CompFunction where relevant). Behind the scenes, they use - * #mrcpp::TreeBuilder with different adaptors: - * - * - #mrcpp::SplitAdaptor: unconditional splitting. - * - #mrcpp::WaveletAdaptor: split by wavelet-based precision criterion. - * - #mrcpp::AnalyticAdaptor: split by analytic visibility/zero checks. - * - #mrcpp::CopyAdaptor: split to match an existing tree structure. - * - * @note Unless otherwise stated, all "build_grid" functions **extend** the - * current grid of the output tree; they do not clear it first. Use - * #copy_grid when you want the output to match another grid exactly - * (it clears first). - */ - #include "grid.h" #include "AnalyticAdaptor.h" #include "CopyAdaptor.h" @@ -73,46 +38,14 @@ namespace mrcpp { -/** - * @brief Build an **empty** grid by uniform refinement. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree whose grid is refined. - * @param[in] scales Number of uniform refinement sweeps to apply. - * - * @details - * Performs `scales` iterations of unconditional splitting on **all** current - * leaf nodes (using #mrcpp::SplitAdaptor). No coefficients are created; this - * only modifies the grid topology. - * - * @note Starts from the existing grid of @p out and extends it. - */ template void build_grid(FunctionTree &out, int scales) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; DefaultCalculator calculator; - SplitAdaptor adaptor(maxScale, true); // Splits all nodes + SplitAdaptor adaptor(maxScale, true); for (auto n = 0; n < scales; n++) builder.build(out, calculator, adaptor, 1); } -/** - * @brief Build an **empty** grid guided by an analytic function (adaptive). - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree whose grid will be extended. - * @param[in] inp Analytic function used as a splitting oracle. - * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). - * - * @details - * Uses #mrcpp::AnalyticAdaptor to ask the analytic function @p inp whether a - * node is visible at a given scale and whether it is identically zero on the - * node interval. Nodes are split until convergence or @p maxIter is reached. - * - * @note Requires @p inp to implement `isVisibleAtScale()` and - * `isZeroOnInterval()`. - */ template void build_grid(FunctionTree &out, const RepresentableFunction &inp, int maxIter) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -122,25 +55,6 @@ template void build_grid(FunctionTree &out, const Repr print::separator(10, ' '); } -/** - * @brief Build an **empty** grid guided by a Gaussian expansion (adaptive). - * - * @tparam D Spatial dimension. - * @param[out] out Output tree whose grid will be extended. - * @param[in] inp Gaussian expansion. - * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). - * - * @details - * For a non-periodic world: - * iterates over all Gaussians in @p inp and drives refinement with - * #mrcpp::AnalyticAdaptor using each Gaussian's position and exponent. - * - * For a periodic world: - * copies and reuses the same logic via temporary Gaussian objects so that - * periodic replication is handled consistently. - * - * Higher exponents imply finer resolution near the Gaussian center. - */ template void build_grid(FunctionTree &out, const GaussExp &inp, int maxIter) { if (!out.getMRA().getWorldBox().isPeriodic()) { auto maxScale = out.getMRA().getMaxScale(); @@ -152,7 +66,7 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i } } else { auto period = out.getMRA().getWorldBox().getScalingFactors(); - (void)period; // currently unused; kept to document intent + (void)period; for (auto i = 0; i < inp.size(); i++) { auto *gauss = inp.getFunc(i).copy(); build_grid(out, *gauss, maxIter); @@ -162,21 +76,6 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i print::separator(10, ' '); } -/** - * @brief Build an **empty** grid by taking the union with another MW tree. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree to be extended. - * @param[in] inp Input tree whose structure drives refinement. - * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). - * - * @details - * Uses #mrcpp::CopyAdaptor to ensure that any node that exists (and has - * children) in @p inp will also exist in @p out after the call. - * - * @warning @p out and @p inp must share the same #mrcpp::MultiResolutionAnalysis. - */ template void build_grid(FunctionTree &out, FunctionTree &inp, int maxIter) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); auto maxScale = out.getMRA().getMaxScale(); @@ -187,21 +86,6 @@ template void build_grid(FunctionTree &out, FunctionTr print::separator(10, ' '); } -/** - * @brief Build an **empty** grid by taking the union of several MW trees. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree to be extended. - * @param[in] inp Vector of (coef, tree) pairs. - * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). - * - * @details - * Uses #mrcpp::CopyAdaptor to extend @p out so that all nodes present in any - * of the input trees are represented in the resulting grid (union). - * - * @warning All trees must share the same #mrcpp::MultiResolutionAnalysis as @p out. - */ template void build_grid(FunctionTree &out, FunctionTreeVector &inp, int maxIter) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -214,68 +98,24 @@ template void build_grid(FunctionTree &out, FunctionTr print::separator(10, ' '); } -/** - * @brief Convenience overload: build a grid from a list of tree pointers. - */ template void build_grid(FunctionTree &out, std::vector *> &inp, int maxIter) { FunctionTreeVector inp_vec; for (auto *t : inp) inp_vec.push_back({1.0, t}); build_grid(out, inp_vec, maxIter); } -/** - * @brief Copy a function from one tree to the fixed grid of another. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree (grid must already exist). - * @param[in] inp Input tree (source of coefficients). - * - * @details - * Traverses the **current leaves** of @p out and copies the corresponding - * coefficients from @p inp where nodes align, using the addition kernel - * with fixed grid (no refinement). - * - * @note Overwrites existing coefficients in @p out; does not modify its grid. - */ template void copy_func(FunctionTree &out, FunctionTree &inp) { FunctionTreeVector tmp_vec; tmp_vec.push_back(std::make_tuple(1.0, &inp)); add(-1.0, out, tmp_vec); } -/** - * @brief Make @p out's grid an exact copy of @p inp's grid (clears first). - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[out] out Output tree to be rebuilt. - * @param[in] inp Input tree supplying the grid structure. - * - * @details - * Clears @p out completely (removes all nodes) and then extends its grid to - * match @p inp using #build_grid(out, inp). - * - * @warning @p out and @p inp must share the same #mrcpp::MultiResolutionAnalysis. - */ template void copy_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") out.clear(); build_grid(out, inp); } -/** - * @brief Component-wise grid copy for composite functions (clears first). - * - * @tparam D Spatial dimension. - * @param[out] out Destination composite function. - * @param[in] inp Source composite function. - * - * @details - * Recreates @p out with the same number of components and data parameters as - * @p inp, then for each component copies the grid using the tree-based - * #build_grid overload. - */ template void copy_grid(CompFunction &out, CompFunction &inp) { out.free(); out.func_ptr->data = inp.func_ptr->data; @@ -286,63 +126,23 @@ template void copy_grid(CompFunction &out, CompFunction &inp) { } } -/** - * @brief Clear coefficients on an existing grid (topology unchanged). - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[in,out] out Tree whose coefficients will be zeroed. - * - * @details - * Uses #mrcpp::TreeBuilder::clear with #mrcpp::DefaultCalculator to reset - * coefficients while preserving node structure. - */ template void clear_grid(FunctionTree &out) { TreeBuilder builder; DefaultCalculator calculator; builder.clear(out, calculator); } -/** - * @brief Uniformly refine a grid and **transfer scaling coefficients**. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[in,out] out Tree to refine. - * @param[in] scales Number of refinement sweeps. - * @return Number of nodes that were split. - * - * @details - * Splits all leaves `scales` times using #mrcpp::TreeBuilder::split with - * coefficient transfer to children, so the function representation remains - * unchanged while resolution increases. - */ template int refine_grid(FunctionTree &out, int scales) { auto nSplit = 0; auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; - SplitAdaptor adaptor(maxScale, true); // Splits all nodes + SplitAdaptor adaptor(maxScale, true); for (auto n = 0; n < scales; n++) { - nSplit += builder.split(out, adaptor, true); // Transfers coefs to children + nSplit += builder.split(out, adaptor, true); } return nSplit; } -/** - * @brief Precision-driven refinement using wavelet criteria. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[in,out] out Tree to refine. - * @param[in] prec Precision target for split checks. - * @param[in] absPrec If true, use absolute precision; otherwise relative. - * @return Number of nodes that were split. - * - * @details - * Uses #mrcpp::WaveletAdaptor to test split conditions based on wavelet - * coefficients against @p prec (absolute or relative). When splitting, scales - * are updated by transferring coefficients to the children. - */ template int refine_grid(FunctionTree &out, double prec, bool absPrec) { int maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -351,19 +151,6 @@ template int refine_grid(FunctionTree &out, double pre return nSplit; } -/** - * @brief Refine a grid to include all structure present in a reference tree. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[in,out] out Tree to refine (and receive coefficient transfer). - * @param[in] inp Reference tree that defines where @p out should split. - * @return Number of nodes that were split. - * - * @details - * Uses #mrcpp::CopyAdaptor to mirror structural refinement from @p inp into - * @p out and transfers coefficients to children where splits occur. - */ template int refine_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") auto maxScale = out.getMRA().getMaxScale(); @@ -373,20 +160,6 @@ template int refine_grid(FunctionTree &out, FunctionTr return nSplit; } -/** - * @brief Analytic-driven refinement using a representable function. - * - * @tparam D Spatial dimension. - * @tparam T Scalar coefficient type. - * @param[in,out] out Tree to refine. - * @param[in] inp Analytic function to act as a split oracle. - * @return Number of nodes that were split. - * - * @details - * Uses #mrcpp::AnalyticAdaptor to request refinement where @p inp is visible - * at scale and not identically zero on the cell. Coefficients are transferred - * upon splitting so the represented function remains unchanged. - */ template int refine_grid(FunctionTree &out, const RepresentableFunction &inp) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -395,8 +168,6 @@ template int refine_grid(FunctionTree &out, const Repr return nSplit; } -// -------------------- explicit instantiations -------------------- - template void copy_grid(CompFunction<1> &out, CompFunction<1> &inp); template void copy_grid(CompFunction<2> &out, CompFunction<2> &inp); template void copy_grid(CompFunction<3> &out, CompFunction<3> &inp); @@ -478,4 +249,4 @@ template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp); template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/grid.h b/src/treebuilders/grid.h index 1d7021f8b..be3e88376 100644 --- a/src/treebuilders/grid.h +++ b/src/treebuilders/grid.h @@ -25,24 +25,230 @@ #pragma once +/** + * @file + * @brief Grid construction, copying, clearing, and refinement helpers for multiresolution trees. + * + * @details + * This header declares a family of utilities to *construct* and *modify* the + * topology (grid) of @ref mrcpp::FunctionTree without necessarily computing or + * moving coefficients. The functions support several sources: + * analytic/representable functions, existing trees, vectors of trees, and + * explicit scale counts. + * + * ### Conventions + * - `D` is the spatial dimension (typically 1–3). + * - `T` is the coefficient scalar type (`double` or `std::complex`). + * - Functions named `build_grid` create (or enlarge) the *tree structure* + * of `out` to be adequate for representing the given input(s). + * - Functions named `copy_grid` copy only the *structure* (no coefficients). + * - `copy_func` copies *both* structure and coefficients. + * - Functions named `refine_grid` add resolution either explicitly by a scale + * count or adaptively by a precision criterion. + * - Functions return `int` indicate the number of newly created end-nodes + * (i.e., how many refinements were actually performed). + */ + #include "functions/RepresentableFunction.h" #include "trees/FunctionTree.h" #include "trees/FunctionTreeVector.h" #include "utils/CompFunction.h" namespace mrcpp { -template void build_grid(FunctionTree &out, int scales); -template void build_grid(FunctionTree &out, const GaussExp &inp, int maxIter = -1); -template void build_grid(FunctionTree &out, const RepresentableFunction &inp, int maxIter = -1); -template void build_grid(FunctionTree &out, FunctionTree &inp, int maxIter = -1); -template void build_grid(FunctionTree &out, FunctionTreeVector &inp, int maxIter = -1); -template void build_grid(FunctionTree &out, std::vector *> &inp, int maxIter = -1); -template void copy_func(FunctionTree &out, FunctionTree &inp); -template void copy_grid(FunctionTree &out, FunctionTree &inp); -template void copy_grid(CompFunction &out, CompFunction &inp); -template void clear_grid(FunctionTree &out); -template int refine_grid(FunctionTree &out, int scales); -template int refine_grid(FunctionTree &out, double prec, bool absPrec = false); -template int refine_grid(FunctionTree &out, FunctionTree &inp); -template int refine_grid(FunctionTree &out, const RepresentableFunction &inp); -} // namespace mrcpp + +/** + * @brief Create a uniform grid of fixed depth. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Target tree whose topology will be (re)built. + * @param[in] scales Number of refinement steps from the current state. + * + * @details + * Starting from the current `out` topology (typically roots), subdivide each + * active end-node `scales` times so that a regular grid of depth increased by + * `scales` is obtained. No coefficients are computed or modified. + */ +template +void build_grid(FunctionTree &out, int scales); + +/** + * @brief Build an adaptive grid suitable for a Gaussian expansion. + * + * @tparam D Spatial dimension. + * @param[out] out Target **real** tree to receive the grid. + * @param[in] inp Analytic Gaussian expansion used as refinement oracle. + * @param[in] maxIter Maximum refinement passes; negative means “unbounded” + * until convergence by the internal criterion. + * + * @details + * Iteratively refines the tree so that the structure can represent `inp` + * within the library’s default per-node criterion (e.g., band-limited model or + * local projection error). Coefficients are not guaranteed to be written. + */ +template +void build_grid(FunctionTree &out, const GaussExp &inp, int maxIter = -1); + +/** + * @brief Build an adaptive grid for a generic representable function. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Target tree. + * @param[in] inp Representable function serving as refinement oracle. + * @param[in] maxIter Maximum refinement passes; negative means unbounded. + * + * @details + * Uses evaluations/projections of `inp` to determine where refinement is + * needed so that the resulting grid can capture `inp` with the library’s + * default tolerance heuristic. + */ +template +void build_grid(FunctionTree &out, const RepresentableFunction &inp, int maxIter = -1); + +/** + * @brief Build a grid that can accommodate another tree’s resolution/support. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Target tree to be enlarged/refined. + * @param[in] inp Source tree whose structure (support + finest scales) is used. + * + * @details + * Ensures that `out` has at least the resolution present in `inp` wherever + * `inp` has support (a *grid union* operation). Coefficients are not copied. + */ +template +void build_grid(FunctionTree &out, FunctionTree &inp, int maxIter = -1); + +/** + * @brief Build a grid that is a union of a vector of trees. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Target tree. + * @param[in] inp Vector of trees whose supports/resolutions are merged. + * @param[in] maxIter Optional iteration cap for staged refinement strategies. + */ +template +void build_grid(FunctionTree &out, FunctionTreeVector &inp, int maxIter = -1); + +/** + * @brief Build a grid that is a union of a list of tree pointers. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Target tree. + * @param[in] inp List of tree pointers to merge. + * @param[in] maxIter Optional iteration cap for staged refinement strategies. + */ +template +void build_grid(FunctionTree &out, std::vector *> &inp, int maxIter = -1); + +/** + * @brief Deep copy a tree structure *and* coefficients. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Destination tree (reallocated as needed). + * @param[in] inp Source tree. + */ +template +void copy_func(FunctionTree &out, FunctionTree &inp); + +/** + * @brief Copy only the tree topology (no coefficients). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Destination tree (structure rebuilt). + * @param[in] inp Source tree whose topology is replicated. + */ +template +void copy_grid(FunctionTree &out, FunctionTree &inp); + +/** + * @brief Copy only the topology for all components of a composite function. + * + * @tparam D Spatial dimension. + * @param[out] out Destination composite function (components allocated as needed). + * @param[in] inp Source composite function. + * + * @details + * For each component present in @p inp, ensure @p out has a corresponding + * component with identical tree structure. Coefficients are not copied. + */ +template +void copy_grid(CompFunction &out, CompFunction &inp); + +/** + * @brief Clear the grid topology (prune to roots, drop nodes). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[out] out Tree to clear; MRA association remains intact. + */ +template +void clear_grid(FunctionTree &out); + +/** + * @brief Refine uniformly by a fixed number of scales. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[in,out] out Tree to refine. + * @param[in] scales Number of subdivision steps to apply. + * @return Number of new end-nodes created by the refinement. + */ +template +int refine_grid(FunctionTree &out, int scales); + +/** + * @brief Adaptive refinement driven by a precision threshold. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[in,out] out Tree to refine. + * @param[in] prec Target precision (threshold). + * @param[in] absPrec If `true`, interpret @p prec as absolute tolerance; + * otherwise relative to a norm estimate. + * @return Number of new end-nodes created. + * + * @details + * Subdivides those nodes whose local error/indicator exceeds the requested + * threshold. The precise indicator depends on the library configuration + * (e.g., wavelet-norm-based splitting). + */ +template +int refine_grid(FunctionTree &out, double prec, bool absPrec = false); + +/** + * @brief Refine `out` so that its grid is at least as fine as `inp`. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[in,out] out Destination tree to refine. + * @param[in] inp Source tree providing the target finest scales. + * @return Number of new end-nodes created. + */ +template +int refine_grid(FunctionTree &out, FunctionTree &inp); + +/** + * @brief Adaptive refinement using a representable function as oracle. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type. + * @param[in,out] out Tree to refine. + * @param[in] inp Representable function guiding refinement. + * @return Number of new end-nodes created. + * + * @details + * Samples or projects @p inp on candidate nodes and refines where the + * estimated local error is above the internal criterion, creating a grid + * appropriate for subsequently projecting @p inp. + */ +template +int refine_grid(FunctionTree &out, const RepresentableFunction &inp); + +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp index 8e76b907c..b1d4af10c 100644 --- a/src/treebuilders/map.cpp +++ b/src/treebuilders/map.cpp @@ -23,53 +23,6 @@ * */ -/** - * @file map.cpp - * @brief Adaptive mapping of multiresolution (MW) function trees through a user - * supplied scalar-to-scalar mapping. - * - * @details - * This module implements an adaptive **pointwise mapping** of an input - * #mrcpp::FunctionTree onto an output #mrcpp::FunctionTree by applying a user - * provided mapping function \f$f:\mathbb{R}\to\mathbb{R}\f$ to the function - * values represented on the MW grid. - * - * The mapping is realized via the standard MRCPP build loop: - * - On the **current** output grid, coefficients are computed by evaluating - * the input function and applying the mapping function (handled by - * #mrcpp::MapCalculator). - * - A **wavelet-based split criterion** (via #mrcpp::WaveletAdaptor) refines - * the grid wherever the mapped function requires more resolution to meet - * the requested precision. - * - This **refine–recompute** cycle repeats until convergence or a maximum - * number of iterations is reached. - * - * ### Precision semantics - * - If `absPrec == false` (default), the adaptor uses **relative precision**: - * refinement stops when wavelet coefficients are small compared to the - * current function norm, roughly \f$|d| < \varepsilon\,/\,\|f\|\f$. - * - If `absPrec == true`, the adaptor enforces an **absolute threshold**: - * \f$|d| < \varepsilon\f$. - * - * ### Responsibilities and caveats - * - MRCPP does **not** impose constraints on the mapping function; the user - * must ensure it is numerically safe (no division by zero, no overflow, etc.). - * - The mapping is **pointwise**: it does not solve PDEs or apply operators. - * For linear/nonlinear operators, consider specialized operator modules. - * - * ### Typical usage - * @code - * // Assume 'mra' is a configured MultiResolutionAnalysis - * FunctionTree<3,double> in(mra), out(mra); - * // ... build 'in' somehow (project analytic function, read from file, etc.) - * - * auto clamp_nonnegative = [](double x) { return x < 0.0 ? 0.0 : x; }; - * map<3>(1e-6, out, in, clamp_nonnegative, -1, false); // -1: unbounded maxIter, false: relative precision - * @endcode - * - * @see mrcpp::MapCalculator, mrcpp::WaveletAdaptor, mrcpp::TreeBuilder - */ - #include "map.h" #include "MapCalculator.h" #include "MultiplicationCalculator.h" @@ -85,37 +38,6 @@ namespace mrcpp { -/** - * @brief Adaptively map an input MW function through a scalar mapping function. - * - * @tparam D Spatial dimension (1, 2, or 3). - * - * @param[in] prec Target build precision (relative or absolute depending on @p absPrec). - * @param[out] out Output function tree to be constructed (should start empty). - * @param[in] inp Input function tree providing the source values. - * @param[in] fmap Mapping function \f$f:\mathbb{R}\to\mathbb{R}\f$ to apply pointwise. - * @param[in] maxIter Maximum refinement iterations (negative = unbounded). - * @param[in] absPrec If true: interpret @p prec as absolute; otherwise relative. - * - * @details - * Pipeline: - * 1. Create a #mrcpp::MapCalculator that evaluates @p inp and applies @p fmap. - * 2. Drive refinement with a #mrcpp::WaveletAdaptor at the MRA max scale, - * honoring @p prec and @p absPrec. - * 3. Build the output via #mrcpp::TreeBuilder until convergence or @p maxIter. - * 4. Perform bottom-up MW transform and square-norm computation for diagnostics. - * 5. Clean temporary/generated artifacts on the input tree. - * - * @note - * - The algorithm **extends** whatever grid @p out currently has. For a fresh build, - * ensure @p out is empty (no coefficients). - * - The input and output trees must belong to a compatible MRA setup. - * - * @warning - * The user is responsible for the numerical stability of @p fmap. - * Discontinuous or extremely steep mappings may require tighter precision or - * more iterations to resolve features adequately. - */ template void map(double prec, FunctionTree &out, @@ -145,7 +67,6 @@ void map(double prec, print::separator(10, ' '); } -// explicit instantiations template void map<1>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &inp, FMap fmap, int maxIter, bool absPrec); template void map<2>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &inp, FMap fmap, int maxIter, bool absPrec); template void map<3>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &inp, FMap fmap, int maxIter, bool absPrec); diff --git a/src/treebuilders/map.h b/src/treebuilders/map.h index d1f86e201..696c91ff5 100644 --- a/src/treebuilders/map.h +++ b/src/treebuilders/map.h @@ -25,11 +25,113 @@ #pragma once +/** + * @file + * @brief Nonlinear mapping utilities for multiresolution trees. + * + * @details + * Declares an adaptive routine that applies a user-supplied scalar mapping + * to a multiresolution function represented by a @ref mrcpp::FunctionTree. + * The routine produces an output tree whose grid is refined as needed to meet + * a requested precision. + * + * ### What “map” does + * Given an input scalar field \( f(\mathbf{r}) \) encoded by `inp`, and a + * scalar-to-scalar function `fmap : ℝ → ℝ`, this routine builds (or refines) + * the topology of `out` and computes coefficients so that + * \f[ + * g(\mathbf{r}) = \mathrm{fmap}\big(f(\mathbf{r})\big) + * \f] + * is represented to within the requested tolerance. + * + * The mapping is *pointwise* in value space (nonlinear allowed) and the grid + * refinement is *adaptive*: nodes are split where approximation error indicates + * additional resolution is required. + * + * ### Typical uses + * - Envelope shaping (e.g., clamp, softplus, \f$x^p\f$). + * - Nonlinearities inside iterative solvers. + * - Post-processing fields (e.g., magnitude, thresholding). + * + * @note Only the **real** scalar case (`double` coefficients) is declared here. + * Complex-valued mappings typically require splitting real/imag components + * explicitly or using dedicated complex routines elsewhere in the library. + */ + #include "trees/FunctionTreeVector.h" namespace mrcpp { + template class FunctionTree; -template void map(double prec, FunctionTree &out, FunctionTree &inp, FMap fmap, int maxIter = -1, bool absPrec = false); +/** + * @brief Apply a scalar mapping to a function tree with adaptive refinement. + * + * @tparam D Spatial dimension (1–3 typical). + * + * @param[in] prec + * Target precision threshold used to control adaptive refinement. + * See @p absPrec for interpretation. + * @param[out] out + * Destination tree. Its topology will be enlarged/refined as needed and its + * coefficients overwritten with the mapped result. + * The tree must be associated with a valid MRA compatible with @p inp. + * @param[in] inp + * Source tree that encodes the input function \( f(\mathbf{r}) \). + * Logically read-only (will not be modified by a correct implementation). + * @param[in] fmap + * Scalar mapping functor of type `FMap` (typically equivalent + * to `std::function` or any callable with signature + * `double(double)`). It is applied pointwise to sample values of `inp`. + * @param[in] maxIter + * Maximum number of refinement passes. A negative value (default) requests + * unbounded passes until the internal convergence criterion is satisfied + * (e.g., no new nodes created or estimated error below @p prec everywhere). + * @param[in] absPrec + * If `true`, interpret @p prec as an **absolute** tolerance on the local + * error indicator. If `false` (default), use a **relative** tolerance, + * typically scaled by an estimate of \f$\|f\|\f$ (implementation-defined). + * + * @pre + * - `out` and `inp` share compatible MRAs (same domain, basis order, etc.). + * - `fmap` must be pure (side-effect free) and thread-safe. + * + * @post + * - `out`’s topology and coefficients represent + * \( g(\mathbf{r}) = \mathrm{fmap}(f(\mathbf{r})) \) to within the requested + * tolerance, subject to the library’s split criterion. + * + * @par Precision semantics + * - *Absolute mode* (`absPrec=true`): the error indicator is compared directly + * to @p prec. + * - *Relative mode* (`absPrec=false`): the indicator is scaled by a norm of + * the input (e.g., tree square-norm), so @p prec represents a relative + * threshold. + * + * @par Parallelization + * The routine may exploit OpenMP/MPI internally. Supplying a thread-safe + * `fmap` is required. + * + * @par Exception safety + * Strong guarantee for `inp`. `out` is modified during execution; in case of + * failure it may be left partially updated. + * + * @par Example + * @code + * using Tree = mrcpp::FunctionTree<3,double>; + * Tree fout(mra), fin(mra); + * // ... build/project fin ... + * + * auto square = [](double x){ return x*x; }; + * mrcpp::map<3>(1e-6, fout, fin, square); // fout ≈ (fin)^2 + * @endcode + */ +template +void map(double prec, + FunctionTree &out, + FunctionTree &inp, + FMap fmap, + int maxIter = -1, + bool absPrec = false); } // namespace mrcpp diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp index 1b57b10d2..d6fd1b1c1 100644 --- a/src/treebuilders/multiply.cpp +++ b/src/treebuilders/multiply.cpp @@ -23,39 +23,6 @@ * */ -/** - * @file multiply.cpp - * @brief Adaptive algebra on multiresolution (MW) function trees: product, - * square, power, (componentwise) dot, and inner products. - * - * @details - * This module implements a family of adaptive build routines that produce - * a new #mrcpp::FunctionTree from algebraic combinations of one or more input - * trees. The build is driven by the multiresolution refinement loop - * (TreeBuilder + Adaptor + Calculator): - * - * - On the current output grid, local contributions are computed by a - * Calculator (e.g. MultiplicationCalculator, SquareCalculator, PowerCalculator). - * - A refinement Adaptor (WaveletAdaptor by default, or MultiplicationAdaptor - * when useMaxNorms is enabled) decides whether to split nodes based on - * requested precision. - * - The refine–recompute process repeats until the target precision is met - * or the iteration limit is reached. - * - * Precision semantics: - * - Relative precision (absPrec = false): split while |d| is not small - * relative to the function norm. - * - Absolute precision (absPrec = true): split while |d| is above a fixed - * absolute threshold. - * - * Notes: - * - All routines assume the output tree starts with an empty grid (no coeffs). - * The grid is grown adaptively unless otherwise stated. - * - The input and output trees must belong to compatible MRAs. - * - Some routines can optionally use max-norm estimates from inputs to guide - * refinement (useMaxNorms). - */ - #include #include "MultiplicationAdaptor.h" @@ -77,28 +44,6 @@ namespace mrcpp { -/** - * @brief Adaptive product of two MW functions with an overall scalar factor. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (double or ComplexDouble). - * - * @param[in] prec Target build precision. - * @param[out] out Output function tree to construct. - * @param[in] c Scalar prefactor multiplying inp_a * inp_b. - * @param[in] inp_a First input tree. - * @param[in] inp_b Second input tree. - * @param[in] maxIter Max refinement iterations (-1 means unbounded). - * @param[in] absPrec If true: absolute precision; else relative. - * @param[in] useMaxNorms If true: use MultiplicationAdaptor with local - * max-norm estimates from inputs for split checks. - * @param[in] conjugate If true: apply complex conjugation to inp_b during multiplication. - * - * @details - * Builds out = c * inp_a * (conjugate ? conj(inp_b) : inp_b) on an adaptively - * refined grid. If useMaxNorms is true, each input tree contributes local - * estimates (makeMaxSquareNorms) to scale the precision per node. - */ template void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp_a, FunctionTree &inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { FunctionTreeVector tmp_vec; @@ -107,26 +52,6 @@ void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate); } -/** - * @brief Adaptive product of several MW functions (with per-input scalars). - * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type. - * - * @param[in] prec Target build precision. - * @param[out] out Output function tree to construct. - * @param[in] inp Vector of inputs (scalar, tree) pairs. - * @param[in] maxIter Max refinement iterations (-1 means unbounded). - * @param[in] absPrec If true: absolute precision; else relative. - * @param[in] useMaxNorms Use norm-based adaptor when true. - * @param[in] conjugate Conjugate all trees except the first (if complex). - * - * @details - * Builds out = (Π_k a_k * f_k) where each (a_k, f_k) is the k-th pair. - * If conjugate is true, all factors except the first are conjugated in the - * complex case. When useMaxNorms is true, #mrcpp::MultiplicationAdaptor - * scales the split threshold by input-node max norms to improve targeting. - */ template void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { for (auto i = 0; i < inp.size(); i++) @@ -162,12 +87,6 @@ void multiply(double prec, FunctionTree &out, FunctionTreeVector &in print::separator(10, ' '); } -/** - * @brief Convenience overload: product of a list of trees (unit coefficients). - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - */ template void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { FunctionTreeVector inp_vec; @@ -175,15 +94,6 @@ void multiply(double prec, FunctionTree &out, std::vector void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter, bool absPrec, bool conjugate) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); @@ -209,14 +119,6 @@ void square(double prec, FunctionTree &out, FunctionTree &inp, int m print::separator(10, ' '); } -/** - * @brief Adaptive power: out = inp^p (real exponent p). - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * - * @warning Conjugated inputs are not supported here. - */ template void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter, bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); @@ -243,24 +145,6 @@ void power(double prec, FunctionTree &out, FunctionTree &inp, double print::separator(10, ' '); } -/** - * @brief Adaptive componentwise dot product of two function vectors. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * - * @param[in] prec Target build precision for the per-component products. - * @param[out] out Output tree holding the sum over component products. - * @param[in] inp_a First vector of (scalar, tree) pairs. - * @param[in] inp_b Second vector of (scalar, tree) pairs. - * @param[in] maxIter Max refinement iterations per component product. - * @param[in] absPrec Absolute vs relative precision. - * - * @details - * Computes out = Σ_d (a_d f_d) · (b_d g_d) by first forming per-component - * products on grids compatible with @p out, then summing these contributions - * on the fixed union grid (addition step uses a fixed grid, not adaptive). - */ template void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter, bool absPrec) { if (inp_a.size() != inp_b.size()) MSG_ABORT("Input length mismatch"); @@ -282,19 +166,6 @@ void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, clear(tmp_vec, true); } -/** - * @brief Inner product ⟨bra|ket⟩ on compressed MW trees. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type of bra. - * @tparam U Coefficient type of ket. - * @tparam V Return type (double or ComplexDouble). - * - * @details - * Works directly on compressed representation: scaling coefficients on roots - * and wavelet coefficients on all nodes. Orthonormality across scales makes - * this efficient: only overlapping nodes contribute. - */ template V dot(FunctionTree &bra, FunctionTree &ket) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible"); @@ -322,26 +193,6 @@ V dot(FunctionTree &bra, FunctionTree &ket) { return result; } -/** - * @brief Absolute inner product proxy based on node norms. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * - * @param[in] bra First input function. - * @param[in] ket Second input function. - * @param[in] exact If true, requires ket's grid to include bra's grid and - * uses absolute coefficients per node. If false, uses an - * approximate product of node norms and root-node norms. - * - * @returns Value proportional to the absolute inner product. - * - * @details - * With exact = true, the routine converts to interpolating coefficients, - * takes absolute values, and accumulates exact contributions node by node. - * With exact = false, it avoids per-coefficient access and approximates the - * product via node norms; disjoint functions yield zero. - */ template double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Incompatible MRA"); diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h index 316066483..75e9cf316 100644 --- a/src/treebuilders/multiply.h +++ b/src/treebuilders/multiply.h @@ -25,18 +25,125 @@ #pragma once +/** + * @file + * @brief High-level algebra on multiresolution function trees. + * + * @details + * This header declares scalar and field operations on + * @ref mrcpp::FunctionTree objects: + * - continuous inner products (dot products), + * - pointwise products of two or many trees, + * - powers/squares (element-wise). + * + * Unless stated otherwise, functions honor a target accuracy parameter + * `prec` (see each overload). Implementations typically refine/coarsen + * grids adaptively using wavelet norms and multiresolution estimates until + * the requested tolerance is met (or a `maxIter` cap is reached). + * + * ### Precision semantics + * - `prec` is interpreted as a **relative** tolerance in an L2-like sense + * by default; set `absPrec=true` to treat it as an **absolute** tolerance. + * - `maxIter < 0` means “iterate as needed”; otherwise it limits refinement + * passes (the function may exit early with a looser error). + * + * ### Conjugation semantics (complex trees) + * When `T` is complex, some overloads accept `conjugate=true` to apply + * complex conjugation to the first factor (bra–ket convention), yielding + * products like \f$f \cdot \overline{g}\f$ or \f{|f|^2}\f. + */ + #include "trees/FunctionTreeVector.h" namespace mrcpp { + template class RepresentableFunction; template class FunctionTree; -template () * std::declval())> V dot(FunctionTree &bra, FunctionTree &ket); +/** + * @brief Continuous inner product \f$\langle \text{bra} \mid \text{ket} \rangle\f$ over \f$\mathbb{R}^D\f$. + * + * @tparam D Spatial dimension. + * @tparam T Scalar type of the bra tree (e.g., `double`, `ComplexDouble`). + * @tparam U Scalar type of the ket tree. + * @tparam V Return type deduced as `decltype(T{} * U{})`. + * + * @param bra Multiresolution function tree acting as the bra. + * @param ket Multiresolution function tree acting as the ket. + * @return The scalar inner product value. For complex types, the bra is + * conjugated (i.e., \f$\int \overline{bra(x)}\,ket(x)\,dx\f$). + * + * @pre Both trees must be compatible (same MRA/grid conventions). + * @note Implementations usually reconstruct to consistent representations + * before integration; they may refine adaptively to ensure accuracy. + * @warning For poorly overlapping/aliased grids, the routine may refine + * meshes internally, which can be expensive. + */ +template () * std::declval())> +V dot(FunctionTree &bra, FunctionTree &ket); -template void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter = -1, bool absPrec = false); +/** + * @brief Contract two vectors of trees into a scalar field: + * \f$out(x) = \sum_i a_i(x)\,\overline{b_i(x)}\f$ (complex) or \f$\sum_i a_i(x)\,b_i(x)\f$ (real). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type (`double` or `ComplexDouble`). + * @param prec Target accuracy for the constructed field (see “Precision semantics” above). + * @param out Output scalar field tree receiving the contraction. + * @param inp_a Vector of factor trees \f$\{a_i\}\f$. + * @param inp_b Vector of factor trees \f$\{b_i\}\f$; must have the same size and compatible grids as `inp_a`. + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * + * @details + * Builds the *pointwise* contraction of two equally sized collections of trees, + * summing products component-wise. This is often used to assemble densities + * or overlaps distributed over space. + * + * The routine adaptively refines `out` to meet `prec`. Input nodes may be + * transiently reconstructed to compatible representations. + */ +template +void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter = -1, bool absPrec = false); -template double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact = false); +/** + * @brief Fast contraction based on node norms (cheap estimate/upper bound). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param bra First tree. + * @param ket Second tree. + * @param exact If `true`, request exact inner product instead of a norm-based estimate (implementation-dependent). + * @return A scalar quantity derived from node-wise norms; commonly used + * as a quick upper bound or cheap similarity measure. + * + * @note When `exact=true`, implementations may fall back to the same + * evaluation as @ref dot(bra, ket). If exact evaluation is not + * available, `exact` may be ignored. + */ +template +double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact = false); +/** + * @brief Pointwise product of two trees with a scalar prefactor: + * \f$out \leftarrow c \cdot a \cdot (\mathrm{conj}\,b \text{ if requested})\f$. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param prec Target accuracy for `out`. + * @param out Output tree receiving the product. + * @param c Scalar prefactor applied to the product. + * @param inp_a First factor. + * @param inp_b Second factor. + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * @param useMaxNorms If `true`, use max-norm heuristics to guide refinement (may be faster, slightly more conservative). + * @param conjugate If `true` and `T` is complex, conjugate the **first** factor (bra–ket convention). + * + * @details + * Produces an adaptively refined tree such that the representation error of + * the pointwise product does not exceed `prec` under the chosen policy. + */ template void multiply(double prec, FunctionTree &out, @@ -48,14 +155,76 @@ void multiply(double prec, bool useMaxNorms = false, bool conjugate = false); +/** + * @brief Pointwise product of an arbitrary number of trees: + * \f$out \leftarrow \prod_{i} f_i\f$ (optional conjugation of the first factor). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param prec Target accuracy for `out`. + * @param out Output tree receiving the product. + * @param inp List of input tree pointers (non-null, compatible MRAs). + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * @param useMaxNorms If `true`, enable max-norm driven refinement. + * @param conjugate If `true` and `T` is complex, conjugate the **first** factor only. + * + * @note The algorithm typically multiplies factors incrementally with + * intermediate refinement; ordering can affect performance. + */ template void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); +/** + * @brief Pointwise product of a vector of trees: + * \f$out \leftarrow \prod_{i} f_i\f$ (optional conjugation of the first factor). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param prec Target accuracy for `out`. + * @param out Output tree receiving the product. + * @param inp Vector wrapper containing input trees (and possibly per-tree scalars). + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * @param useMaxNorms If `true`, enable max-norm driven refinement. + * @param conjugate If `true` and `T` is complex, conjugate the **first** factor only. + */ template void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false); -template void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter = -1, bool absPrec = false); +/** + * @brief Element-wise power: \f$out(x) = \big(inp(x)\big)^{p}\f$. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param prec Target accuracy for `out`. + * @param out Output tree. + * @param inp Input tree. + * @param p Real exponent. + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * + * @warning For real `T`, negative bases with non-integer `p` are undefined. + * For complex `T`, the principal branch is typically used. + */ +template +void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter = -1, bool absPrec = false); -template void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false); +/** + * @brief Element-wise square. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * @param prec Target accuracy for `out`. + * @param out Output tree. + * @param inp Input tree. + * @param maxIter Maximum refinement passes; `-1` for unlimited. + * @param absPrec If `true`, interpret `prec` as absolute tolerance. + * @param conjugate If `true` and `T` is complex, compute squared magnitude: + * \f$out = inp \cdot \overline{inp}\f$; otherwise compute + * \f$out = inp \cdot inp\f$. + */ +template +void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp index e24750f95..099cbf389 100644 --- a/src/treebuilders/project.cpp +++ b/src/treebuilders/project.cpp @@ -23,36 +23,6 @@ * */ -/** - * @file project.cpp - * @brief Projection of analytic (scalar or vector) functions onto a - * multiwavelet (MW) basis on an adaptively refined grid. - * - * @details - * This module builds a MW representation of an analytic function by - * adaptively refining the grid and computing (scale-/wavelet-) coefficients - * until a user-prescribed tolerance is achieved. - * - * ### Algorithm (adaptive projection) - * 1. Start from the current grid in @p out (should be empty or root-only). - * 2. On the current leaves, compute MW coefficients using - * ProjectionCalculator (quadrature in the scaling basis). - * 3. Use WaveletAdaptor to decide where to refine: - * - **Relative precision** (default): stop when local wavelet norms - * drop below `prec * ||f||_node`. - * - **Absolute precision** (`absPrec = true`): stop when local wavelet - * norms drop below `prec`. - * 4. Repeat until convergence or `maxIter` is reached. - * 5. Perform final MW transforms (TopDown/BottomUp as needed) and compute - * the tree square-norm for bookkeeping. - * - * The projection accounts for non-unit world-box scaling through - * a per-dimension scaling factor passed to ProjectionCalculator. - * - * @note The functions here operate on templated dimension @p D (1,2,3) - * and coefficient type @p T (double or ComplexDouble). - */ - #include "project.h" #include "ProjectionCalculator.h" #include "TreeBuilder.h" @@ -66,29 +36,6 @@ namespace mrcpp { -/** - * @brief Project a lambda/std::function onto the MW basis (convenience overload). - * - * Wraps the callable into an AnalyticFunction and delegates to the - * RepresentableFunction overload. - * - * @tparam D Spatial dimension (1,2,3). - * @tparam T Coefficient type (double or ComplexDouble). - * - * @param[in] prec Target precision (relative by default, see @p absPrec). - * @param[out] out Output function tree to be built (should contain only empty roots). - * @param[in] func Callable \f$f:\mathbb{R}^D \to T\f$ returning values at coordinates. - * @param[in] maxIter Maximum refinement iterations (-1 = no bound). - * @param[in] absPrec Use absolute (true) or relative (false, default) thresholding. - * - * @details - * This is syntactic sugar for quickly projecting a user-provided callable. - * The adaptive procedure, grid policy, and stopping criteria are identical - * to the main projection overload below. - * - * @note The current grid in @p out is honored and extended; it is not cleared. - * For a fresh build, ensure @p out has only root nodes and no coefficients. - */ template void project(double prec, FunctionTree &out, @@ -99,33 +46,6 @@ void project(double prec, mrcpp::project(prec, out, inp, maxIter, absPrec); } -/** - * @brief Project a RepresentableFunction onto the MW basis, adaptive grid. - * - * @tparam D Spatial dimension (1,2,3). - * @tparam T Coefficient type (double or ComplexDouble). - * - * @param[in] prec Target precision (relative by default, see @p absPrec). - * @param[out] out Output function tree to be built (should contain only empty roots). - * @param[in] inp Analytic/representable function to project. - * @param[in] maxIter Maximum number of refinement iterations (-1 = unbounded). - * @param[in] absPrec Use absolute (true) or relative (false) thresholding. - * - * @details - * - Builds a WaveletAdaptor with precision policy (relative/absolute). - * - Creates a ProjectionCalculator configured with world-box scaling - * factors to ensure correct physical rescaling of integrals. - * - Uses TreeBuilder to iterate: - * compute coefs → test refinement → split nodes → repeat. - * - Finalizes with a BottomUp MW transform and tree norm accumulation. - * - * @par Precision semantics - * - **Relative** (`absPrec=false`): local wavelet norm compared to local function norm. - * - **Absolute** (`absPrec=true`): local wavelet norm compared to @p prec directly. - * - * @warning The output tree @p out must be compatible (same MRA/world box) - * with any other trees you later combine it with. - */ template void project(double prec, FunctionTree &out, @@ -150,24 +70,6 @@ void project(double prec, print::separator(10, ' '); } -/** - * @brief Project a vector of analytic functions (component-wise), adaptive grid. - * - * @tparam D Spatial dimension (1,2,3). - * @tparam T Coefficient type (double or ComplexDouble). - * - * @param[in] prec Target precision (relative by default, see @p absPrec). - * @param[out] out Output vector of trees (size must match @p func). - * @param[in] func Vector of component callables \f$f_j:\mathbb{R}^D \to T\f$. - * @param[in] maxIter Maximum refinement iterations (-1 = unbounded). - * @param[in] absPrec Use absolute (true) or relative (false) thresholding. - * - * @details - * Projects each component independently with the same precision policy and - * refinement limits, storing the result in the corresponding entry of @p out. - * - * @throws MSG_ABORT if @p out.size() != @p func.size(). - */ template void project(double prec, FunctionTreeVector &out, @@ -202,4 +104,4 @@ template void project<1, ComplexDouble>(double prec, FunctionTreeVector<1, Compl template void project<2, ComplexDouble>(double prec, FunctionTreeVector<2, ComplexDouble> &out, std::vector &r)>> inp, int maxIter, bool absPrec); template void project<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &out, std::vector &r)>> inp, int maxIter, bool absPrec); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/project.h b/src/treebuilders/project.h index f9e070ef2..ba3466412 100644 --- a/src/treebuilders/project.h +++ b/src/treebuilders/project.h @@ -25,12 +25,119 @@ #pragma once +/** + * @file + * @brief Projection helpers to expand analytic/representable functions on + * multiresolution bases (function trees). + * + * @details + * These overloads build or refine an output @ref FunctionTree (or a vector of + * trees) so that the supplied function(s) are represented to within a target + * precision. The projection is adaptive: nodes are split where the estimated + * local error exceeds the tolerance, and coefficients are (re)computed only + * where needed. + * + * **Precision semantics** + * - If @p absPrec is `false` (default), @p prec is interpreted as a + * *relative* tolerance with respect to a suitable global/aggregate norm of + * the function (typical L²-relative stopping criterion). + * - If @p absPrec is `true`, @p prec is treated as an *absolute* tolerance + * for local/node-wise thresholds. + * + * **Iteration control** + * - @p maxIter limits the number of refinement passes. Use `-1` for the + * default behavior (iterate until the tolerance is reached or the internal + * refiner deems the grid converged). + * + * **Preconditions and side effects** + * - @p out is modified in-place (grid may be refined/coarsened; coefficients + * are (re)computed). + * - The @p RepresentableFunction or callable provided by the user must be + * well-defined on the domain of @p out’s @ref MultiResolutionAnalysis. + * + * @note Implementations typically perform, per node: + * 1) evaluate the input function on the node’s quadrature/stencil, + * 2) compute scaling/wavelet coefficients, + * 3) estimate local error and decide on further splitting, + * 4) stop when global/local criteria satisfy @p prec or @p maxIter is hit. + */ + #include "MRCPP/mrcpp_declarations.h" #include "trees/FunctionTreeVector.h" #include namespace mrcpp { -template void project(double prec, FunctionTree &out, RepresentableFunction &inp, int maxIter = -1, bool absPrec = false); -template void project(double prec, FunctionTree &out, std::function &r)> func, int maxIter = -1, bool absPrec = false); -template void project(double prec, FunctionTreeVector &out, std::vector &r)>> func, int maxIter = -1, bool absPrec = false); -} // namespace mrcpp + +/** + * @brief Project a @ref RepresentableFunction onto an output function tree. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type (e.g., `double`, `ComplexDouble`). + * + * @param prec Target tolerance (relative by default, see @p absPrec). + * @param out Destination @ref FunctionTree; refined and filled in-place. + * @param inp Analytic / representable function to project. + * @param maxIter Maximum refinement passes (`-1` = default/unlimited). + * @param absPrec If `true`, interpret @p prec as an absolute tolerance. + * + * @details + * Builds an adaptive multiresolution representation of @p inp in @p out. + * Existing content of @p out may be reused and further refined. + */ +template +void project(double prec, + FunctionTree &out, + RepresentableFunction &inp, + int maxIter = -1, + bool absPrec = false); + +/** + * @brief Project a user-supplied callable onto an output function tree. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type (e.g., `double`, `ComplexDouble`). + * + * @param prec Target tolerance (relative by default, see @p absPrec). + * @param out Destination @ref FunctionTree; refined and filled in-place. + * @param func Callable (e.g., lambda) mapping @ref Coord to @p T. + * @param maxIter Maximum refinement passes (`-1` = default/unlimited). + * @param absPrec If `true`, interpret @p prec as an absolute tolerance. + * + * @details + * Equivalent to the @ref RepresentableFunction overload, but accepts any + * `std::function&)>` (or compatible lambda) as the source. + */ +template +void project(double prec, + FunctionTree &out, + std::function &r)> func, + int maxIter = -1, + bool absPrec = false); + +/** + * @brief Project multiple callables into a vector of function trees. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient scalar type (e.g., `double`, `ComplexDouble`). + * + * @param prec Target tolerance (relative by default, see @p absPrec). + * @param out Destination @ref FunctionTreeVector; each entry refined / + * filled in-place. It is expected to have the same length as + * @p func (one tree per callable). + * @param func Collection of callables, each mapping @ref Coord to @p T. + * @param maxIter Maximum refinement passes (`-1` = default/unlimited). + * @param absPrec If `true`, interpret @p prec as an absolute tolerance. + * + * @details + * Applies the single-tree callable projection to each element, pairing + * `out[i]` with `func[i]`. All trees should share a compatible + * @ref MultiResolutionAnalysis. + */ +template +void project(double prec, + FunctionTreeVector &out, + std::vector &r)>> func, + int maxIter = -1, + bool absPrec = false); + +} // namespace mrcpp \ No newline at end of file From 61569f6bff53944cb2c263a8ad04a417e5d5397e Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 09:51:02 +0300 Subject: [PATCH 13/51] Update TreeBuilder.h --- src/treebuilders/TreeBuilder.h | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/treebuilders/TreeBuilder.h b/src/treebuilders/TreeBuilder.h index 8f45cf0e9..8e89590fe 100644 --- a/src/treebuilders/TreeBuilder.h +++ b/src/treebuilders/TreeBuilder.h @@ -55,24 +55,24 @@ namespace mrcpp { template class TreeBuilder final { public: - /** - * @brief Adaptive build: iterate (calc → split) up to @p maxIter times. - * - * @param[in,out] tree Target tree to (re)build/refine. - * @param[in,out] calculator Calculator used to fill coefficients/norms on the current grid. - * @param[in,out] adaptor Refinement policy deciding which nodes to split. - * @param[in] maxIter Upper bound on calc/split passes (use a small integer; non-positive means 0 passes). - * - * @details - * The method performs: - * - an initial @ref calc pass, - * - up to `maxIter` refinement passes, each performing: - * - `split(tree, adaptor, /*passCoefs=*/true)` - * - `calc(tree, calculator)` - * - any calculator post-processing hooks. - * - * Implementations typically stop early when `split` returns 0 (no new nodes). - */ +/** + * @brief Adaptive build: iterate (calc → split) up to @p maxIter times. + * + * @param[in,out] tree Target tree to (re)build/refine. + * @param[in,out] calculator Calculator used to fill coefficients/norms on the current grid. + * @param[in,out] adaptor Refinement policy deciding which nodes to split. + * @param[in] maxIter Upper bound on calc/split passes (use a small integer; non-positive means 0 passes). + * + * @details + * The method performs: + * - an initial calc pass, + * - up to @p maxIter refinement passes, each performing: + * - split(tree, adaptor, passCoefs=true) + * - calc(tree, calculator) + * - any calculator post-processing hooks. + * + * Implementations typically stop early when split returns 0 (no new nodes). + */ void build(MWTree &tree, TreeCalculator &calculator, TreeAdaptor &adaptor, From 889d436cd5d1a4a284d806ae03a40aa0131ec568 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 10:09:59 +0300 Subject: [PATCH 14/51] updated .h in treebuilders folder --- src/treebuilders/AdditionCalculator.h | 98 +++++++- src/treebuilders/AnalyticAdaptor.h | 58 ++++- src/treebuilders/add.h | 111 ++++++++- src/treebuilders/apply.h | 345 ++++++++++++++++++++++++-- 4 files changed, 583 insertions(+), 29 deletions(-) diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h index 9223f1ae6..b820810cd 100644 --- a/src/treebuilders/AdditionCalculator.h +++ b/src/treebuilders/AdditionCalculator.h @@ -24,46 +24,130 @@ */ #pragma once +/** + * @file AdditionCalculator.h + * @brief Node-wise accumulator used during adaptive construction to sum + * multiresolution (MW) functions with optional conjugation. + * + * @details + * This header defines #mrcpp::AdditionCalculator, a lightweight + * #mrcpp::TreeCalculator that, for each target node, fetches the + * corresponding node from every input function in a + * #mrcpp::FunctionTreeVector and accumulates a weighted sum of their + * coefficients. No refinement policy is implemented here; pair this + * calculator with a #mrcpp::TreeBuilder and a suitable #mrcpp::TreeAdaptor. + * + * Complex handling: + * - For complex `T`, each term uses either the raw coefficients or their + * complex conjugate according to the XOR of the input tree's own + * `conjugate()` flag and the calculator-wide `conj` flag. + */ + +#include // std::is_same +#include // std::conj #include "TreeCalculator.h" #include "trees/FunctionTreeVector.h" namespace mrcpp { -template class AdditionCalculator final : public TreeCalculator { +/** + * @class AdditionCalculator + * @brief Node-wise accumulator for adaptive sums of multiresolution functions. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (e.g., `double`, `ComplexDouble`). + * + * @details + * For each target node \p node_o (identified by its NodeIndex), this calculator + * gathers the corresponding node from every input function in a + * #mrcpp::FunctionTreeVector and accumulates the weighted coefficients into + * \p node_o: + * + * \f[ + * \mathbf{c}_o \;=\; \sum_i \alpha_i \,\mathbf{c}_i . + * \f] + * + * If \p T is complex, optional conjugation is applied according to the XOR of + * the per-tree conjugation flag and the calculator-wide @ref conj flag; i.e. + * a term uses \f$\overline{\mathbf{c}_i}\f$ iff exactly one of the two flags is set. + * + * This class performs **no grid refinement** or transforms; it only writes + * coefficients, marks presence, and updates node norms. It is intended to be + * used inside the adaptive loop driven by #mrcpp::TreeBuilder together with an + * appropriate adaptor. + */ +template +class AdditionCalculator final : public TreeCalculator { public: + /** + * @brief Construct an addition calculator over a set of input trees. + * + * @param[in] inp Vector of (coefficient, tree) pairs to be summed. + * @param[in] conjugate Global conjugation toggle for complex types. For + * each input tree, the effective conjugation applied + * is `tree.conjugate() XOR conjugate`. + * + * @note All input trees are assumed to share an MRA compatible with the + * output tree provided to the builder. + */ AdditionCalculator(const FunctionTreeVector &inp, bool conjugate = false) : sum_vec(inp) , conj(conjugate) {} private: + /// Vector of weighted input trees to sum. FunctionTreeVector sum_vec; + /// Global conjugation toggle for complex accumulation (see ctor docs). bool conj; + /** + * @brief Accumulate coefficients for a single output node. + * + * @param[in,out] node_o Target node whose coefficients are overwritten + * by the weighted sum of matching input nodes. + * + * @details + * Steps: + * 1. Zero \p node_o coefficients. + * 2. For each entry \f$(\alpha_i, f_i)\f$ in @ref sum_vec: + * - Fetch (and create if needed) the input node with the same index as \p node_o. + * - Accumulate \f$\alpha_i \cdot \mathbf{c}_i\f$ (or its conjugate for complex, + * following the XOR rule) into \p node_o. + * 3. Mark coefficients present and update norms. + * + * No transforms are performed here; coefficients are assumed to be in the + * same representation across all trees. + */ void calcNode(MWNode &node_o) override { node_o.zeroCoefs(); const NodeIndex &idx = node_o.getNodeIndex(); T *coefs_o = node_o.getCoefs(); + for (int i = 0; i < this->sum_vec.size(); i++) { T c_i = get_coef(this->sum_vec, i); FunctionTree &func_i = get_func(this->sum_vec, i); - // This generates missing nodes + + // This generates the node if missing in func_i const MWNode &node_i = func_i.getNode(idx); const T *coefs_i = node_i.getCoefs(); int n_coefs = node_i.getNCoefs(); + if constexpr (std::is_same::value) { - if (func_i.conjugate() xor conj) { - for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); } + const bool use_conj = (func_i.conjugate() xor conj); + if (use_conj) { + for (int j = 0; j < n_coefs; j++) coefs_o[j] += c_i * std::conj(coefs_i[j]); } else { - for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; } + for (int j = 0; j < n_coefs; j++) coefs_o[j] += c_i * coefs_i[j]; } } else { - for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; } + for (int j = 0; j < n_coefs; j++) coefs_o[j] += c_i * coefs_i[j]; } } + node_o.setHasCoefs(); node_o.calcNorms(); } }; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/treebuilders/AnalyticAdaptor.h b/src/treebuilders/AnalyticAdaptor.h index 3e9ca0613..a7359a2e9 100644 --- a/src/treebuilders/AnalyticAdaptor.h +++ b/src/treebuilders/AnalyticAdaptor.h @@ -30,15 +30,69 @@ namespace mrcpp { -template class AnalyticAdaptor final : public TreeAdaptor { +/** + * @class AnalyticAdaptor + * @brief Refinement policy that consults an analytic (representable) function. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient/value type (e.g., `double`, `ComplexDouble`). + * + * @details + * This adaptor requests refinement of a node when the provided analytic + * function is **not yet visible** at the node's current scale and is **not + * identically zero** on the node's cell. Concretely: + * + * - If `func->isVisibleAtScale(scale, kp1)` returns **true**, the node is + * considered sufficiently resolved at this scale → **do not split**. + * - Else, if `func->isZeroOnInterval(lb, ub)` returns **true**, the function + * vanishes on the cell → **do not split**. + * - Otherwise, the feature likely requires more resolution → **split**. + * + * The visibility test uses the node’s polynomial order `k+1` (via `getKp1()`) + * as the quadrature/collocation count hint for the analytic oracle. + * + * ### Requirements on the analytic function + * The `RepresentableFunction` passed in must implement: + * - `bool isVisibleAtScale(int scale, int nQuadPts) const;` + * - `bool isZeroOnInterval(const double* lower, const double* upper) const;` + * + * ### Typical usage + * @code{.cpp} + * AnalyticFunction<3,double> f(...); // implements the required interface + * AnalyticAdaptor<3,double> adapt(f, mra.getMaxScale()); + * TreeBuilder<3,double> builder; + * DefaultCalculator<3,double> calc; + * builder.build(tree, calc, adapt, -1); // maxIter: unbounded + * @endcode + */ +template +class AnalyticAdaptor final : public TreeAdaptor { public: + /** + * @brief Construct an analytic-driven adaptor. + * @param f Analytic (representable) function used as refinement oracle. + * @param ms Maximum allowed scale for splitting (forwarded to TreeAdaptor). + */ AnalyticAdaptor(const RepresentableFunction &f, int ms) : TreeAdaptor(ms) , func(&f) {} private: + /// Pointer to the refinement oracle (not owned). const RepresentableFunction *func; + /** + * @brief Decide whether a node should be split. + * + * @param node Candidate node to test. + * @return `true` if refinement is requested; `false` otherwise. + * + * @details + * Uses the two-step logic described in the class documentation: + * 1) skip split if visible at current scale, + * 2) skip split if identically zero on the node's interval, + * 3) otherwise split. + */ bool splitNode(const MWNode &node) const override { int scale = node.getScale(); int nQuadPts = node.getKp1(); @@ -50,4 +104,4 @@ template class AnalyticAdaptor final : public TreeAdaptor +void add(double prec, + FunctionTree &out, + T a, + FunctionTree &tree_a, + T b, + FunctionTree &tree_b, + int maxIter = -1, + bool absPrec = false, + bool conjugate = false); + +/** + * @brief Adaptive linear combination from a vector of (coefficient, tree) pairs. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type. + * + * @param[in] prec Target build precision (relative by default; see @p absPrec). + * @param[out] out Output tree to construct. + * @param[in] inp Vector of pairs \f$(\alpha_k, f_k)\f$ (type `FunctionTreeVector`). + * @param[in] maxIter Maximum refinement iterations; negative means unbounded. + * @param[in] absPrec If true, interpret @p prec as absolute; else relative. + * @param[in] conjugate If true and @p T is complex, apply complex conjugation + * to all trees except the first one during accumulation. + * + * @details + * Builds + * \f[ + * \text{out} \leftarrow \sum_k \alpha_k\, g_k, + * \f] + * where \f$g_k = \overline{f_k}\f$ if @p conjugate is true (and \f$k>0\f$ in the + * complex case), otherwise \f$g_k = f_k\f$. The grid is refined adaptively + * to satisfy @p prec. + */ +template +void add(double prec, + FunctionTree &out, + FunctionTreeVector &inp, + int maxIter = -1, + bool absPrec = false, + bool conjugate = false); + +/** + * @brief Convenience overload: adaptive sum of a list of trees with unit weights. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type. + * + * @param[in] prec Target build precision (relative by default; see @p absPrec). + * @param[out] out Output tree to construct. + * @param[in] inp List of tree pointers; each term is taken with weight 1. + * @param[in] maxIter Maximum refinement iterations; negative means unbounded. + * @param[in] absPrec If true, interpret @p prec as absolute; else relative. + * @param[in] conjugate If true and @p T is complex, apply complex conjugation + * to all trees except the first during accumulation. + * + * @details + * Equivalent to the `FunctionTreeVector` overload with all coefficients set to 1. + */ template -void add(double prec, FunctionTree &out, T a, FunctionTree &tree_a, T b, FunctionTree &tree_b, int maxIter = -1, bool absPrec = false, bool conjugate = false); -template void add(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false); -template void add(double prec, FunctionTree &out, std::vector *> &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false); +void add(double prec, + FunctionTree &out, + std::vector *> &inp, + int maxIter = -1, + bool absPrec = false, + bool conjugate = false); } // namespace mrcpp diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h index fa5a43661..abdb030f6 100644 --- a/src/treebuilders/apply.h +++ b/src/treebuilders/apply.h @@ -24,6 +24,28 @@ */ #pragma once +/** + * @file apply.h + * @brief Adaptive application of convolution/derivative operators to + * multiresolution (MW) functions and composite (multi-component) functions. + * + * @details + * This header declares a family of routines that: + * - apply **separable convolution operators** (near-/far-field or full) to MW trees, + * - apply **derivative operators** to scalar or vector fields, + * - compute **divergence** of vector fields, and + * - compute **gradients**. + * + * Overloads exist for scalar MW trees (`FunctionTree`) and for composite + * multi-component fields (`CompFunction`). For composite variants a + * 4×4 complex **metric** can be supplied to define the componentwise inner + * product / mixing; by default the identity metric is used. + * + * Precision and adaptivity: + * - `prec` is the target build precision used by the adaptive refinement loop. + * - `absPrec = false` → relative criterion; `true` → absolute threshold. + * - `maxIter < 0` removes the iteration cap. + */ #include "trees/FunctionTreeVector.h" #include "utils/CompFunction.h" @@ -36,24 +58,313 @@ template class FunctionTree; template class DerivativeOperator; template class ConvolutionOperator; +/** + * @brief Default 4×4 complex metric (identity). + * + * @details + * Used by composite-function overloads to define component coupling / inner + * product when applying operators. The default is the identity, i.e., no + * cross-component mixing. + */ constexpr ComplexDouble defaultMetric [4][4] ={{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}}; -template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter = -1, bool absPrec = false); -template void apply(double prec, CompFunction &out, ConvolutionOperator &oper, const CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, int maxIter = -1, bool absPrec = false); -template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, FunctionTreeVector &precTrees, int maxIter = -1, bool absPrec = false); -template void apply(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, FunctionTreeVector *precTrees, ComplexDouble (*metric)[4] = nullptr, int maxIter = -1, bool absPrec = false); -template void apply_far_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter = -1, bool absPrec = false); -template void apply_far_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, int maxIter = -1, bool absPrec = false); -template void apply_near_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter = -1, bool absPrec = false); -template void apply_near_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, int maxIter = -1, bool absPrec = false); -template void apply(FunctionTree &out, DerivativeOperator &oper, FunctionTree &inp, int dir = -1); -template void apply(CompFunction &out, DerivativeOperator &oper, CompFunction &inp, int dir = -1, const ComplexDouble (*metric)[4] = defaultMetric); -template void divergence(FunctionTree &out, DerivativeOperator &oper, FunctionTreeVector &inp); -template void divergence(CompFunction &out, DerivativeOperator &oper, FunctionTreeVector *inp, const ComplexDouble (*metric)[4] = defaultMetric); -template void divergence(FunctionTree &out, DerivativeOperator &oper, std::vector *> &inp); -template void divergence(CompFunction &out, DerivativeOperator &oper, std::vector *> *inp, const ComplexDouble (*metric)[4] = defaultMetric); -template FunctionTreeVector gradient(DerivativeOperator &oper, FunctionTree &inp); -std::vector*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, const ComplexDouble (*metric)[4] = defaultMetric); +/** + * @name Convolution application (scalar FunctionTree) + * @{ + */ + +/** + * @brief Apply a separable convolution operator adaptively. + * + * @tparam D Spatial dimension (1, 2, or 3). + * @tparam T Coefficient type (e.g., double, ComplexDouble). + * + * @param[in] prec Target precision for the adaptive build. + * @param[out] out Output function tree (built/extended adaptively). + * @param[in] oper Convolution operator to apply. + * @param[in] inp Input function tree. + * @param[in] maxIter Maximum refinement iterations (-1 = unbounded). + * @param[in] absPrec Use absolute (true) or relative (false) precision. + */ +template +void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, + FunctionTree &inp, int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply a convolution operator with **per-node precision modulation**. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] prec Base precision. + * @param[out] out Output function tree. + * @param[in] oper Convolution operator. + * @param[in] inp Input function tree. + * @param[in] precTrees Vector of trees used to modulate local precision + * (e.g., via node-wise scaling factors). + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, + FunctionTree &inp, FunctionTreeVector &precTrees, + int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply only the **far-field** contribution of a convolution operator. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] prec Target precision. + * @param[out] out Output function tree. + * @param[in] oper Convolution operator (far-field path will be used). + * @param[in] inp Input function tree. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply_far_field(double prec, FunctionTree &out, ConvolutionOperator &oper, + FunctionTree &inp, int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply only the **near-field** contribution of a convolution operator. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] prec Target precision. + * @param[out] out Output function tree. + * @param[in] oper Convolution operator (near-field path will be used). + * @param[in] inp Input function tree. + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply_near_field(double prec, FunctionTree &out, ConvolutionOperator &oper, + FunctionTree &inp, int maxIter = -1, bool absPrec = false); + +/** @} */ + +/** + * @name Convolution application (composite CompFunction) + * @{ + */ + +/** + * @brief Apply a convolution operator to a composite function with a metric. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output composite function. + * @param[in] oper Convolution operator. + * @param[in] inp Input composite function. + * @param[in] metric Optional 4×4 complex metric (defaults to identity). + * @param[in] maxIter Maximum refinement iterations (-1 = unbounded). + * @param[in] absPrec Absolute vs. relative precision. + * + * @note Components can be coupled via @p metric during accumulation. + */ +template +void apply(double prec, CompFunction &out, ConvolutionOperator &oper, + const CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, + int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply a convolution operator to a composite function with + * precision-modulating trees and optional metric. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type used in @p precTrees. + * + * @param[in] prec Base precision. + * @param[out] out Output composite function. + * @param[in] oper Convolution operator. + * @param[in] inp Input composite function. + * @param[in] precTrees Optional per-node precision modulators (may be nullptr). + * @param[in] metric Optional 4×4 complex metric (may be nullptr → identity). + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply(double prec, CompFunction &out, ConvolutionOperator &oper, + CompFunction &inp, FunctionTreeVector *precTrees, + ComplexDouble (*metric)[4] = nullptr, int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply only the **far-field** part to a composite function. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output composite function. + * @param[in] oper Convolution operator. + * @param[in] inp Input composite function. + * @param[in] metric Optional 4×4 complex metric (defaults to identity). + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply_far_field(double prec, CompFunction &out, ConvolutionOperator &oper, + CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, + int maxIter = -1, bool absPrec = false); + +/** + * @brief Apply only the **near-field** part to a composite function. + * + * @tparam D Spatial dimension. + * + * @param[in] prec Target precision. + * @param[out] out Output composite function. + * @param[in] oper Convolution operator. + * @param[in] inp Input composite function. + * @param[in] metric Optional 4×4 complex metric (defaults to identity). + * @param[in] maxIter Maximum refinement iterations. + * @param[in] absPrec Absolute vs. relative precision. + */ +template +void apply_near_field(double prec, CompFunction &out, ConvolutionOperator &oper, + CompFunction &inp, const ComplexDouble (*metric)[4] = defaultMetric, + int maxIter = -1, bool absPrec = false); + +/** @} */ + +/** + * @name Derivative application + * @{ + */ + +/** + * @brief Apply a derivative operator to a scalar MW function. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[out] out Output tree (derivative result). + * @param[in] oper Derivative operator. + * @param[in] inp Input function. + * @param[in] dir Application direction (0..D-1). If negative, use the + * operator’s internal direction. + */ +template +void apply(FunctionTree &out, DerivativeOperator &oper, + FunctionTree &inp, int dir = -1); + +/** + * @brief Apply a derivative operator to a composite function with a metric. + * + * @tparam D Spatial dimension. + * + * @param[out] out Output composite function. + * @param[in] oper Derivative operator. + * @param[in] inp Input composite function. + * @param[in] dir Application direction (0..D-1). If negative, use operator’s default. + * @param[in] metric Optional 4×4 complex metric (defaults to identity). + */ +template +void apply(CompFunction &out, DerivativeOperator &oper, + CompFunction &inp, int dir = -1, + const ComplexDouble (*metric)[4] = defaultMetric); + +/** @} */ + +/** + * @name Divergence + * @{ + */ + +/** + * @brief Divergence of a vector field given as separate component trees. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[out] out Output scalar field (divergence). + * @param[in] oper Derivative operator (used per direction). + * @param[in] inp Vector of component trees (size D expected). + */ +template +void divergence(FunctionTree &out, DerivativeOperator &oper, + FunctionTreeVector &inp); + +/** + * @brief Divergence of a composite vector field with metric. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type used by the composite. + * + * @param[out] out Output scalar composite function (divergence). + * @param[in] oper Derivative operator (used per direction). + * @param[in] inp Pointer to vector of component composite functions. + * @param[in] metric Optional 4×4 complex metric. + */ +template +void divergence(CompFunction &out, DerivativeOperator &oper, + FunctionTreeVector *inp, + const ComplexDouble (*metric)[4] = defaultMetric); + +/** + * @brief Divergence of a vector field given as a raw list of component pointers. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[out] out Output scalar field (divergence). + * @param[in] oper Derivative operator. + * @param[in] inp Vector of pointers to component trees (size D expected). + */ +template +void divergence(FunctionTree &out, DerivativeOperator &oper, + std::vector *> &inp); + +/** + * @brief Divergence for composite fields given as raw component pointers with metric. + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type used by components. + * + * @param[out] out Output scalar composite function. + * @param[in] oper Derivative operator. + * @param[in] inp Pointer to vector of component tree pointers. + * @param[in] metric Optional 4×4 complex metric. + */ +template +void divergence(CompFunction &out, DerivativeOperator &oper, + std::vector *> *inp, + const ComplexDouble (*metric)[4] = defaultMetric); + +/** @} */ + +/** + * @name Gradient + * @{ + */ + +/** + * @brief Gradient of a scalar field (returns D component trees). + * + * @tparam D Spatial dimension. + * @tparam T Coefficient type. + * + * @param[in] oper Derivative operator (used per direction). + * @param[in] inp Input scalar field. + * @return Vector of D component trees with directional derivatives. + */ +template +FunctionTreeVector gradient(DerivativeOperator &oper, FunctionTree &inp); + +/** + * @brief Gradient (3D) for composite fields, returning heap-allocated components. + * + * @param[in] oper 3D derivative operator. + * @param[in] inp Input composite function. + * @param[in] metric Optional 4×4 complex metric (defaults to identity). + * @return Vector of pointers to newly allocated component composite functions + * representing the gradient. The caller owns and must delete them. + */ +std::vector*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, + const ComplexDouble (*metric)[4] = defaultMetric); // clang-format on -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From aba6c0b4304aeca126b20b05436763c79ceeeca9 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 10:53:01 +0300 Subject: [PATCH 15/51] updated folder operators only doxygen in .h --- src/operators/ABGVOperator.cpp | 73 ++------------ src/operators/BSOperator.cpp | 84 ++-------------- src/operators/CartesianConvolution.cpp | 101 ++------------------ src/operators/ConvolutionOperator.cpp | 114 ++++------------------ src/operators/DerivativeConvolution.cpp | 79 --------------- src/operators/HeatOperator.cpp | 95 ++---------------- src/operators/HelmholtzKernel.cpp | 66 ------------- src/operators/HelmholtzOperator.cpp | 58 +---------- src/operators/IdentityConvolution.cpp | 60 ------------ src/operators/MWOperator.cpp | 122 ++++-------------------- src/operators/OperatorStatistics.cpp | 79 +-------------- src/operators/PHOperator.cpp | 45 +-------- src/operators/PoissonKernel.cpp | 63 ------------ src/operators/PoissonOperator.cpp | 70 +------------- src/operators/TimeEvolutionOperator.cpp | 122 ++---------------------- 15 files changed, 71 insertions(+), 1160 deletions(-) diff --git a/src/operators/ABGVOperator.cpp b/src/operators/ABGVOperator.cpp index 8a65aa1c1..e992376e1 100644 --- a/src/operators/ABGVOperator.cpp +++ b/src/operators/ABGVOperator.cpp @@ -33,97 +33,36 @@ namespace mrcpp { -/** - * # ABGV finite-difference(-like) operator on an MRA - * - * This operator implements a family of first-derivative stencils controlled by two - * boundary parameters \p a and \p b (see below). The operator is **assembled once** - * as an `OperatorTree` in the multiresolution basis of the provided MRA and can then - * be applied repeatedly to vectors/functions defined on the same MRA. - * - * ## Boundary parameters - * The pair `(a,b)` selects a particular linear combination of forward/backward bias: - * - * - `a = 0.0`, `b = 0.0` → strictly local “center” difference (bandwidth 0) - * - `a = 0.5`, `b = 0.5` → semi-local **central** difference (bandwidth 1) - * - `a = 1.0`, `b = 0.0` → semi-local **forward** difference (bandwidth 1) - * - `a = 0.0`, `b = 1.0` → semi-local **backward** difference (bandwidth 1) - * - * Any non-zero `a` or `b` increases the operator’s bandwidth to 1 (one-ring coupling - * between neighboring nodes at each scale), which the `BandWidthAdaptor` enforces. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra The multiresolution analysis that defines basis, scales, and domain. - * @param a Left boundary parameter controlling asymmetry at the “minus” side. - * @param b Right boundary parameter controlling asymmetry at the “plus” side. - * - * @note The operator is built at the **root scale** of the provided MRA, and its - * internal representation (raw expansion) is cached for later applications. - */ template ABGVOperator::ABGVOperator(const MultiResolutionAnalysis &mra, double a, double b) : DerivativeOperator(mra, mra.getRootScale()) { initialize(a, b); } -/** - * @brief Internal construction routine: builds a bandwidth-adapted OperatorTree. - * - * Steps (high level): - * 1. **Bandwidth decision** — if either \p a or \p b is non-zero, set bandwidth = 1, - * otherwise 0. This determines how many neighbor interactions the operator will keep. - * 2. **Calculator** — instantiate `ABGVCalculator` with the MRA’s scaling basis and (a,b). - * The calculator knows how to evaluate local operator blocks (stencil entries) in - * the chosen basis. - * 3. **Adaptor** — create a `BandWidthAdaptor(bw, maxScale)` to prune any far-off - * couplings beyond the requested bandwidth across all scales. - * 4. **Tree build** — use `TreeBuilder<2>` (matrix builder) to assemble an `OperatorTree` - * from root to finest scale with tolerance `MachineZero` and adaptor-controlled sparsity. - * 5. **Finalize** — trigger norm computation and set up an operator-node cache for fast - * application; then store the finished tree in `raw_exp` and initialize the expansion. - * - * @param a Left boundary parameter. - * @param b Right boundary parameter. - * - * @details - * - `calcSquareNorm()` performs a pass that also ensures the internal transform state is - * consistent (it may trigger lazy transforms). We time this step for diagnostics. - * - `setupOperNodeCache()` precomputes/cache-friendly structures for repeated operator - * application (e.g., fast traversal, block reuse). - * - `initOperExp(1)` finalizes the operator’s internal expansion (single component here). - */ template void ABGVOperator::initialize(double a, double b) { - // --- (1) Decide operator bandwidth from boundary parameters ------------------------- - int bw = 0; // 0 = strictly local, 1 = nearest-neighbor coupling + int bw = 0; if (std::abs(a) > MachineZero) bw = 1; if (std::abs(b) > MachineZero) bw = 1; - // --- (2) Access the operator MRA ---------------------------------------------------- auto oper_mra = this->getOperatorMRA(); - // --- (3) Prepare builder, calculator, and bandwidth adaptor ------------------------- - TreeBuilder<2> builder; // <2> means: building a 2-index object (matrix/operator) + TreeBuilder<2> builder; ABGVCalculator calculator(oper_mra.getScalingBasis(), a, b); BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); - // --- (4) Assemble the operator tree ------------------------------------------------- - // MachineZero: force exact assembly within floating point epsilon (no thresholding). auto o_tree = std::make_unique(oper_mra, MachineZero); - builder.build(*o_tree, calculator, adaptor, -1 /* build all scales */); + builder.build(*o_tree, calculator, adaptor, -1); - // --- (5) Finalize and cache --------------------------------------------------------- Timer trans_t; - o_tree->calcSquareNorm(); // also ensures internal transforms are ready - o_tree->setupOperNodeCache(); // allocate and fill fast-access caches + o_tree->calcSquareNorm(); + o_tree->setupOperNodeCache(); print::time(10, "Time transform", trans_t); print::separator(10, ' '); - // Keep the assembled operator as our raw expansion and finalize its use this->raw_exp.push_back(std::move(o_tree)); - this->initOperExp(1); // single-operator expansion component + this->initOperExp(1); } -// Explicit template instantiations for 1D, 2D, and 3D operators. template class ABGVOperator<1>; template class ABGVOperator<2>; template class ABGVOperator<3>; diff --git a/src/operators/BSOperator.cpp b/src/operators/BSOperator.cpp index 59919794a..da736ef7e 100644 --- a/src/operators/BSOperator.cpp +++ b/src/operators/BSOperator.cpp @@ -23,41 +23,6 @@ * */ -/** - * @file BSOperator.cpp - * @brief Assembly of a smooth multiresolution derivative operator (“BS” operator). - * - * ## Purpose - * Build and cache a derivative operator in the multiresolution basis of a given - * #mrcpp::MultiResolutionAnalysis. This operator is intended for **smooth** - * functions; for non-smooth or discontinuous data, prefer #mrcpp::ABGVOperator. - * - * ## What happens here - * 1. The constructor stores the requested derivative **order** (1, 2, or 3) and - * delegates to `initialize()`. - * 2. `initialize()`: - * - chooses a (small) **bandwidth** (nearest-neighbor coupling, `bw = 1`), - * - creates a #mrcpp::BSCalculator that provides the local operator blocks - * in the MRA scaling basis for the selected derivative order, - * - wraps assembly with a #mrcpp::BandWidthAdaptor to enforce sparsity - * across all scales, - * - uses #mrcpp::TreeBuilder to assemble an #mrcpp::OperatorTree, - * - finalizes the operator (computes norms, builds per-node caches), - * - registers the resulting tree in the base #mrcpp::DerivativeOperator, - * and initializes the internal operator expansion for fast application. - * - * ## Notes - * - The chosen bandwidth (`bw = 1`) yields a compact stencil (nearest neighbors). - * - `calcSquareNorm()` is invoked once to precompute norms; this can aid later - * conditioning/thresholding steps that use these norms. - * - `setupOperNodeCache()` prepares per-node data needed for efficient - * application of the operator during transforms/apply calls. - * - * ## Performance/usage - * After construction, applying the operator to MR coefficient vectors is cheap - * and can be repeated many times. The build cost is paid once per (MRA, order). - */ - #include "BSOperator.h" #include "treebuilders/BSCalculator.h" #include "treebuilders/BandWidthAdaptor.h" @@ -67,17 +32,6 @@ namespace mrcpp { -/** - * @brief Construct a smooth (“BS”) multiresolution derivative operator. - * - * @param mra Multiresolution analysis that defines the domain, basis, and scales. - * @param order Derivative order (supported: 1, 2, or 3). - * - * The operator is anchored at the MRA’s **root scale** (via the base - * #mrcpp::DerivativeOperator constructor) and immediately assembled by - * calling `initialize()`. The internal representation is stored as an - * #mrcpp::OperatorTree and cached for fast application. - */ template BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) : DerivativeOperator(mra, mra.getRootScale()) { @@ -85,53 +39,29 @@ BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) initialize(); } -/** - * @brief Build and cache the “BS” derivative operator. - * - * **Assembly pipeline** - * 1. Select operator bandwidth `bw = 1` (nearest-neighbor coupling). - * 2. Query the operator MRA (`getOperatorMRA()`), which carries the scaling - * basis and max scale. - * 3. Instantiate: - * - #mrcpp::BSCalculator with the scaling basis and the requested derivative - * order (generates local operator blocks), - * - #mrcpp::BandWidthAdaptor with `(bw, maxScale)` to enforce sparsity, - * - #mrcpp::TreeBuilder to assemble the global #mrcpp::OperatorTree. - * 4. Build into a fresh `OperatorTree(oper_mra, MachineZero)`: - * - `MachineZero` is used as a numerical floor for tree entries. - * 5. Finalize: - * - `calcSquareNorm()` precomputes norms (useful for later compression/metrics), - * - `setupOperNodeCache()` creates per-node caches for fast application. - * 6. Store the assembled tree in `raw_exp` (owned by the base class) and call - * `initOperExp(1)` to finalize the expansion with a single raw operator. - */ template void BSOperator::initialize() { - int bw = 1; // Operator bandwidth: nearest-neighbor coupling + int bw = 1; auto oper_mra = this->getOperatorMRA(); - TreeBuilder<2> builder; // 2: binary tree arity in 1D blocks + TreeBuilder<2> builder; BSCalculator calculator(oper_mra.getScalingBasis(), this->order); - BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); // enforce sparsity across scales + BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); - // Assemble the operator tree with numerical floor MachineZero auto o_tree = std::make_unique(oper_mra, MachineZero); - builder.build(*o_tree, calculator, adaptor, -1 /* all levels */); + builder.build(*o_tree, calculator, adaptor, -1); - // Finalize and cache per-node data for fast application Timer trans_t; - o_tree->calcSquareNorm(); // precompute norms (once) - o_tree->setupOperNodeCache(); // build caches for fast apply + o_tree->calcSquareNorm(); + o_tree->setupOperNodeCache(); print::time(10, "Time transform", trans_t); print::separator(10, ' '); - // Register this raw operator with the base class and initialize expansion this->raw_exp.push_back(std::move(o_tree)); this->initOperExp(1); } -// Explicit instantiations template class BSOperator<1>; template class BSOperator<2>; template class BSOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/CartesianConvolution.cpp b/src/operators/CartesianConvolution.cpp index 6c60c7926..d33067a53 100644 --- a/src/operators/CartesianConvolution.cpp +++ b/src/operators/CartesianConvolution.cpp @@ -47,72 +47,6 @@ namespace mrcpp { -/** - * @class CartesianConvolution - * @brief 3D separable convolution operator assembled from a 1D Gaussian expansion. - * - * ### What this class builds - * We construct a rank-`R` separable operator (with `R = kernel.size()`) that can - * later be combined into Cartesian components (x, y, z). Internally we build - * **three batches** of 1D operator trees from the same 1D Gaussian expansion, - * corresponding to polynomial prefactors of degree 0, 1, and 2 multiplying the - * Gaussian (i.e., monomials \f$x^0, x^1, x^2\f$ on the line). These three - * batches are stored back-to-back in `raw_exp` and can be mapped onto the - * (x, y, z) axes in any order using #setCartesianComponents. - * - * This pattern is useful for assembling vector kernels whose Cartesian - * components differ only by the polynomial factor in each axis (e.g., kernels - * proportional to \f$(1,\,x,\,x^2)\f$ times a radial Gaussian, or for forming - * gradients / moments where different axes pick different polynomial orders). - * - * ### Precision knobs - * - `prec` is the user‐requested overall build precision for the operator. - * - We derive two internal precisions: - * - `o_prec = prec` for operator assembly; - * - `k_prec = prec / 10` for fitting/projection of the 1D kernel, slightly - * tighter so that the overall composition meets the requested tolerance. - * - * ### Memory layout of the built batches - * After construction, `raw_exp` contains `3 * R` operator trees in this order: - * ``` - * block 0: monomial power {0} for all R terms (indices 0 ... R-1) - * block 1: monomial power {1} for all R terms (indices R ... 2R-1) - * block 2: monomial power {2} for all R terms (indices 2R ... 3R-1) - * ``` - * The method #setCartesianComponents selects one of these three blocks per axis. - */ - -/** - * @brief Construct a Cartesian convolution operator on an MRA with a 1D Gaussian expansion. - * - * @param[in] mra 3D multiresolution analysis defining basis/domain/scales. - * @param[in,out] kernel 1D Gaussian expansion \f$ \sum_{r=1}^R g_r(x) \f$ used to - * generate the separable operator factors. Its length - * determines the separation rank \f$R\f$. - * **Note:** This function temporarily modifies the - * monomial power of each Gaussian term and restores it - * across the three assembly passes. - * @param[in] prec Target build precision for the operator. - * - * @details - * **Assembly recipe (done three times):** - * 1. For every term in the input 1D Gaussian expansion, set its monomial - * power to `{0}`, then call `initialize(...)` to build and append one - * operator tree per term (rank-`R` block). - * 2. Repeat with monomial power `{1}` to build the second block (indices - * `R ... 2R-1`). - * 3. Repeat with monomial power `{2}` to build the third block (indices - * `2R ... 3R-1`). - * - * After these three passes, we call `initOperExp(R)` to declare that downstream - * separable composition will have rank \f$R\f$ (each axis picks one block). - * - * **Why powers {0,1,2}?** - * Many Cartesian tensor kernels (e.g., derivatives, moments, or vector fields) - * differ by low-order polynomial prefactors along each coordinate. Prebuilding - * the families \f$\{0,1,2\}\f$ provides flexible combinations via - * #setCartesianComponents without having to rebuild for each axis. - */ CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra, GaussExp<1> &kernel, double prec) @@ -122,55 +56,32 @@ CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra // Configure precision: operator vs. kernel fit this->setBuildPrec(prec); - auto o_prec = prec; // Operator assembly precision - auto k_prec = prec / 10.0; // Kernel fitting precision (tighter on purpose) + auto o_prec = prec; // Operator assembly precision + auto k_prec = prec / 10.0; // Kernel fitting precision - // --- Batch 0: monomial power {0} (constant prefactor) --- + // Batch 0: monomial power {0} for (auto &k : kernel) k->setPow({0}); this->initialize(kernel, k_prec, o_prec); - // --- Batch 1: monomial power {1} (linear prefactor) --- + // Batch 1: monomial power {1} for (auto &k : kernel) k->setPow({1}); this->initialize(kernel, k_prec, o_prec); - // --- Batch 2: monomial power {2} (quadratic prefactor) --- + // Batch 2: monomial power {2} for (auto &k : kernel) k->setPow({2}); this->initialize(kernel, k_prec, o_prec); - // Tell the separable framework we will later combine per-axis using rank = sep_rank + // Declare separable rank this->initOperExp(this->sep_rank); Printer::setPrintLevel(oldlevel); } -/** - * @brief Choose which prebuilt monomial block (0,1,2) to use for each Cartesian axis. - * - * @param[in] x Block index used for the x-axis (0 → power{0}, 1 → power{1}, 2 → power{2}) - * @param[in] y Block index used for the y-axis (same convention as above) - * @param[in] z Block index used for the z-axis (same convention as above) - * - * @details - * This function **does not** rebuild; it only wires the already constructed - * 1D operator trees into the separable 3D operator slots. For separation rank - * \f$R\f$, each block occupies a contiguous range of \f$R\f$ entries: - * - * - Block `x`: indices `[x*R, x*R + R - 1]` become the x-factors; - * - Block `y`: indices `[y*R, y*R + R - 1]` become the y-factors; - * - Block `z`: indices `[z*R, z*R + R - 1]` become the z-factors. - * - * You may reuse the same block on multiple axes if the physics warrants it - * (e.g., isotropic components), or select different ones to form vector/tensor - * kernels with distinct Cartesian prefactors. - * - * @warning Valid block indices are 0, 1, or 2. No bounds checking is performed here. - */ void CartesianConvolution::setCartesianComponents(int x, int y, int z) { int x_shift = x * this->sep_rank; int y_shift = y * this->sep_rank; int z_shift = z * this->sep_rank; - // Fill the separable operator slots (rank index i, axis 0/1/2) with the chosen blocks. for (int i = 0; i < this->sep_rank; i++) this->assign(i, 0, this->raw_exp[x_shift + i].get()); for (int i = 0; i < this->sep_rank; i++) this->assign(i, 1, this->raw_exp[y_shift + i].get()); for (int i = 0; i < this->sep_rank; i++) this->assign(i, 2, this->raw_exp[z_shift + i].get()); diff --git a/src/operators/ConvolutionOperator.cpp b/src/operators/ConvolutionOperator.cpp index e51521fef..494fbb20a 100644 --- a/src/operators/ConvolutionOperator.cpp +++ b/src/operators/ConvolutionOperator.cpp @@ -47,26 +47,6 @@ namespace mrcpp { -/** - * @brief Construct a separable D-dimensional convolution operator from a 1D Gaussian expansion. - * - * The input kernel is a 1D Gaussian expansion (sum of Gauss terms). The implementation - * projects each 1D Gaussian to a 1D function tree and then uses cross-correlations to - * lift it into a 2D operator block; the full D-dimensional operator is assembled as a - * separable product of these 1D blocks. The final separable rank equals kernel.size(). - * - * @tparam D Spatial dimension of the target operator. - * @param mra Multiresolution analysis defining the D-dimensional domain/basis. - * @param kernel 1D Gaussian expansion whose terms become the separable factors. - * @param prec Target build precision for the operator (used for both kernel - * projection and operator assembly with a small safety split). - * - * @details - * Internally we choose `k_prec = prec / 10` (stricter) for fitting each 1D kernel term, - * and `o_prec = prec` for assembling/operatorization, to keep the composed error within - * the requested tolerance. After all factors are built, `initOperExp(kernel.size())` - * finalizes the separable structure. - */ template ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, @@ -75,29 +55,14 @@ ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mr int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - auto o_prec = prec; // precision for operator assembly (2D blocks, transforms) - auto k_prec = prec / 10.0; // stricter precision for 1D kernel projection + auto o_prec = prec; + auto k_prec = prec / 10.0; initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); // separable rank = number of kernel terms + this->initOperExp(kernel.size()); Printer::setPrintLevel(oldlevel); } -/** - * @brief Construct a convolution operator with explicit root scale and reach. - * - * This variant allows overriding the default operator root scale and reach (stencil - * half-width in levels). The rest of the pipeline is identical to the other ctor: - * build 1D kernel function trees, lift to 2D operator blocks by cross-correlation, - * transform/collapse, then finalize the separable expansion. - * - * @param mra D-dimensional MRA. - * @param kernel 1D Gaussian expansion (rank = kernel.size()). - * @param prec Target build precision; we use `k_prec = prec / 100` here to be extra - * conservative when the reach is user-controlled. - * @param root Operator root scale. - * @param reach Operator reach (levels outward from root). Negative = auto from box. - */ template ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, @@ -109,67 +74,39 @@ ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mr this->setBuildPrec(prec); auto o_prec = prec; - auto k_prec = prec / 100.0; // even tighter kernel fit when reach is custom + auto k_prec = prec / 100.0; initialize(kernel, k_prec, o_prec); this->initOperExp(kernel.size()); Printer::setPrintLevel(oldlevel); } -/** - * @brief Core build routine: from 1D Gaussian terms → 1D function trees → 2D operator blocks. - * - * Steps per Gaussian term: - * 1) **Rescaling for D-dimensional separability**: adjust the coefficient so that the product - * of D identical 1D factors yields the original 1D amplitude in D-D composition. - * Concretely: `coef ← sign(coef) * |coef|^{1/D}`. - * 2) **Projection to a 1D function tree** (@ref FunctionTree): build an empty grid sized for - * narrow Gaussians (build_grid), then project the analytic Gaussian into the tree with - * requested kernel precision `k_prec` (project). - * 3) **Lifting to a 2D operator**: create a @ref CrossCorrelationCalculator from the 1D tree, - * then use @ref TreeBuilder to expand a 2D operator tree through cross-correlations - * (effectively computing the correlation between basis functions along one axis). - * 4) **Wavelet transform & caching**: bottom-up transform, compute norms, and set up node cache. - * - * The produced 2D blocks are stored into `raw_exp`. Higher-level code composes D-D separable - * operators from these blocks (e.g., via @ref MWOperator’s machinery). - * - * @param kernel 1D Gaussian expansion (input rank). - * @param k_prec Precision for kernel projection to 1D trees. - * @param o_prec Precision for operator building / assembly. - */ template void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) { - // Build the auxiliary 1D MRA for the kernel and fetch the D-D operator MRA auto k_mra = this->getKernelMRA(); auto o_mra = this->getOperatorMRA(); - TreeBuilder<2> builder; // builds 2D operator trees from calculators - OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); // controls assembly precision / scale cap + TreeBuilder<2> builder; + OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); for (int i = 0; i < kernel.size(); i++) { - // --- (1) Adjust coefficient for separable D-fold composition --- auto *k_func = kernel.getFunc(i).copy(); - // Raise absolute coefficient to 1/D and reapply sign to preserve signed kernels k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), k_func->getCoef())); - // --- (2) Project analytic Gaussian to a 1D function tree --- FunctionTree<1> k_tree(k_mra); - mrcpp::build_grid(k_tree, *k_func); // Prepare an empty grid (fine where Gaussian is narrow) - mrcpp::project(k_prec, k_tree, *k_func); // Fit the Gaussian into the 1D multiresolution basis - delete k_func; // No longer needed; k_tree holds the discretization + mrcpp::build_grid(k_tree, *k_func); + mrcpp::project(k_prec, k_tree, *k_func); + delete k_func; - // --- (3) Lift to a 2D operator via cross-correlation --- CrossCorrelationCalculator calculator(k_tree); auto o_tree = std::make_unique(o_mra, o_prec); - builder.build(*o_tree, calculator, adaptor, -1); // Dense 2D operator block in MW format + builder.build(*o_tree, calculator, adaptor, -1); - // --- (4) Transform, normalize, and cache for application --- Timer trans_t; - o_tree->mwTransform(BottomUp); // move to MW (scaling+wavelet) representation efficiently - o_tree->calcSquareNorm(); // useful for diagnostics / thresholding - o_tree->setupOperNodeCache(); // enable fast repeated applications + o_tree->mwTransform(BottomUp); + o_tree->calcSquareNorm(); + o_tree->setupOperNodeCache(); print::time(10, "Time transform", trans_t); print::separator(10, ' '); @@ -177,27 +114,13 @@ void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, doub } } -/** - * @brief Build a 1D MRA used to discretize the kernel factors. - * - * The kernel MRA mirrors the scaling family used by the D-D operator MRA: - * - If the operator uses an interpolating basis of order s, the kernel basis is - * chosen as InterpolatingBasis with order `2*s + 1`. - * - If Legendre, we similarly pick a LegendreBasis of order `2*s + 1`. - * - * The box extent (reach) is derived from the D-D world box unless an explicit - * operator reach was set. The same uniform scaling factor is used. - * - * @return A standalone 1D @ref MultiResolutionAnalysis matching the operator’s scaling family. - */ template MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); - // Choose a kernel basis compatible with the operator basis. int type = basis.getScalingType(); - int kern_order = 2 * basis.getScalingOrder() + 1; // (2s+1) ensures adequate quadrature/correlation support + int kern_order = 2 * basis.getScalingOrder() + 1; ScalingBasis *kern_basis = nullptr; if (type == Interpol) { @@ -208,20 +131,16 @@ MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { MSG_ABORT("Invalid scaling type"); } - // Kernel root = operator root; reach defaults to the maximum box extent if negative. int root = this->oper_root; - int reach = this->oper_reach + 1; // +1 because the 1D kernel must cover neighbors used by correlations + int reach = this->oper_reach + 1; if (reach < 0) { for (int i = 0; i < D; i++) { if (box.size(i) > reach) reach = box.size(i); } } - // Build a 1D bounding box centered at zero: - // levels from -reach to +reach (total 2*reach) at the operator root scale. auto start_l = std::array{-reach}; auto tot_l = std::array{2 * reach}; - // Uniform scaling factor (operators are implemented for uniform scales only) auto sf = std::array{box.getScalingFactor(0)}; BoundingBox<1> kern_box(root, start_l, tot_l, sf); @@ -230,9 +149,8 @@ MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { return kern_mra; } -// Explicit template instantiations for the supported dimensionalities. template class ConvolutionOperator<1>; template class ConvolutionOperator<2>; template class ConvolutionOperator<3>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/DerivativeConvolution.cpp b/src/operators/DerivativeConvolution.cpp index a89f580e0..3f0fa2003 100644 --- a/src/operators/DerivativeConvolution.cpp +++ b/src/operators/DerivativeConvolution.cpp @@ -29,106 +29,28 @@ namespace mrcpp { -/** - * @class DerivativeConvolution - * @brief Separable convolution operator that approximates a (first) derivative - * using a differentiated Gaussian kernel. - * - * @tparam D Spatial dimension of the target operator. - * - * @details - * This implementation is a thin wrapper around @ref ConvolutionOperator that: - * - **Chooses the kernel**: a *single* derivative-of-Gaussian (DoG) term whose - * width is set by a requested build precision. - * - **Projects the 1D kernel** to a function tree at a tight tolerance. - * - **Lifts** the 1D kernel into a D-dimensional operator via cross-correlation - * (separable assembly) and prepares it for application in the multiwavelet basis. - * - * The resulting operator is bandwidth-limited and numerically stable, offering a - * controlled approximation to a spatial derivative. It is mainly for validation - * and experimentation; for production derivatives consider @ref ABGVOperator - * (cusps/discontinuities) or @ref BSOperator (smooth functions). - */ - -/** - * @brief Construct a derivative-convolution operator on the default root/reach. - * - * @param mra D-dimensional @ref MultiResolutionAnalysis defining basis/domain. - * @param prec Target build precision that controls kernel width and assembly thresholds. - * - * @details - * Steps performed here: - * 1. **Silence verbose output** during operator build by temporarily lowering the - * global print level (restored upon exit). - * 2. **Record build precision** via @c setBuildPrec(prec). This precision is later - * available from @ref ConvolutionOperator::getBuildPrec. - * 3. **Split tolerances** into: - * - @c k_prec = prec/10.0 for *kernel projection* (tighter; DoG is narrow), - * - @c o_prec = prec for *operator assembly* (adequate once kernel is accurate). - * 4. **Create the kernel**: a single-term @ref DerivativeKernel parametrized by - * @c k_prec, which internally chooses the Gaussian exponent consistent with the - * requested accuracy. - * 5. **Assemble the operator** by calling @ref ConvolutionOperator::initialize, - * which projects the kernel to a 1D function tree, lifts it to operator trees - * via cross-correlation, transforms to the MW domain, and caches nodes. - * - * The operator rank equals the number of 1D kernel terms; for this kernel it is 1. - */ template DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec) : ConvolutionOperator(mra) { - // Keep the build quiet; restore the previous level at the end of scope. int oldlevel = Printer::setPrintLevel(0); - // Store build precision on the base class for later diagnostics/inspection. this->setBuildPrec(prec); - - // Operator-assembly tolerance: used while expanding/lifting the kernel to operator trees. double o_prec = prec; - - // Kernel-projection tolerance: tighter than operator assembly to resolve a narrow DoG. double k_prec = prec / 10.0; - // A single differentiated Gaussian tuned by k_prec. DerivativeKernel kernel(k_prec); - - // Build the separable operator blocks from the 1D kernel. this->initialize(kernel, k_prec, o_prec); - // Restore previous print level. Printer::setPrintLevel(oldlevel); } -/** - * @brief Construct a derivative-convolution operator with explicit root and reach. - * - * @param mra D-dimensional @ref MultiResolutionAnalysis. - * @param prec Target build precision that controls kernel width and thresholds. - * @param root Operator root level (coarsest active scale). - * @param reach Operator reach (half-width in levels). Negative => auto-detected. - * - * @details - * This overload is identical in spirit to the simpler constructor, but allows - * **explicit control of the active scale window**: - * - Use when benchmarking, debugging, or composing multiple operators whose - * supports must be constrained not to overlap. - * - * Implementation notes mirror the first ctor with one change: - * - The kernel projection is made *even tighter*: @c k_prec = prec/100.0, - * which helps when the operator is restricted to a narrower scale window - * (ensuring the DoG is still faithfully represented). - */ template DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - - // Operator-assembly tolerance (same rationale as the other ctor). double o_prec = prec; - - // Very tight kernel-projection tolerance for explicit windowing. double k_prec = prec / 100.0; DerivativeKernel kernel(k_prec); @@ -137,7 +59,6 @@ DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis Printer::setPrintLevel(oldlevel); } -/* Explicit template instantiations */ template class DerivativeConvolution<1>; template class DerivativeConvolution<2>; template class DerivativeConvolution<3>; diff --git a/src/operators/HeatOperator.cpp b/src/operators/HeatOperator.cpp index 5d31391ba..25742bb81 100644 --- a/src/operators/HeatOperator.cpp +++ b/src/operators/HeatOperator.cpp @@ -23,127 +23,44 @@ * */ -/** - * @file HeatOperator.cpp - * @brief Implementation of a separable convolution operator that realizes the - * \(D\)-dimensional heat semigroup via a single-term Gaussian kernel. - * - * @details - * The continuous heat propagator at time \(t>0\) is the convolution with - * \f[ - * K_t(\mathbf x) - * = - * \frac{1}{(4\pi t)^{D/2}} - * \exp\!\left(-\frac{\lVert\mathbf x\rVert^2}{4t}\right). - * \f] - * In MRCPP, separable operators are assembled from 1D Gaussian building blocks. - * This implementation: - * - constructs a @ref HeatKernel whose single 1D Gaussian has exponent - * \f$\beta = 1/(4t)\f$ and coefficient \f$\alpha = (\beta/\pi)^{D/2}\f$, - * - projects that kernel to a 1D function tree, - * - lifts it to an operator tree by cross-correlation, - * - transforms/caches the operator in the multiwavelet domain, - * - and exposes it as a @ref ConvolutionOperator acting in \(D\) dimensions. - * - * Two constructors are provided: a default one (using the operator's default - * root/reach) and one tailored for periodic boundary conditions (PBC) with an - * explicit scale window @p root/@p reach. - */ - #include "HeatOperator.h" #include "HeatKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Build a heat propagator \(e^{t\Delta}\) as a separable convolution. - * - * @tparam D Spatial dimension of the operator (1, 2, or 3). - * - * @param[in] mra D-dimensional @ref MultiResolutionAnalysis that defines both - * the computational domain and the scaling basis. - * @param[in] t Diffusion time; determines the kernel width - * (\f$\beta = 1/(4t)\f$). Must be strictly positive. - * @param[in] prec Target build precision for assembling the operator. - * - * @details - * Steps performed: - * 1. The requested build precision is recorded via @c setBuildPrec(prec). - * 2. Two tolerances are chosen: - * - @c k_prec = @p prec / 10 for the 1D kernel projection (tighter), - * - @c o_prec = @p prec for the operator assembly. - * 3. A @ref HeatKernel is instantiated with exponent \f$1/(4t)\f$ and - * amplitude chosen to match \f$(4\pi t)^{-D/2}\f$ upon separable assembly. - * 4. @ref ConvolutionOperator::initialize is called to: - * - project the kernel to a 1D function tree, - * - lift it to an operator tree via cross-correlation, - * - transform and cache the operator in the MW domain. - * 5. @ref initOperExp is called to finalize the separable expansion (rank 1). - * - * @note Smaller @p t \(\Rightarrow\) narrower Gaussian \(\Rightarrow\) more demanding - * resolution (consider tightening @p prec and/or extending operator reach). - * @warning Passing non-positive @p t yields a meaningless kernel; callers must - * ensure @p t > 0. - */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec) : ConvolutionOperator(mra) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; // Operator-assembly tolerance - double k_prec = prec / 10.0; // Kernel-projection tolerance (tighter) + double o_prec = prec; + double k_prec = prec / 10.0; HeatKernel kernel(t); this->initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); // single-term expansion (rank = 1) + this->initOperExp(kernel.size()); Printer::setPrintLevel(oldlevel); } -/** - * @brief Build a heat propagator with an explicit operator scale window (PBC use). - * - * @tparam D Spatial dimension of the operator (1, 2, or 3). - * - * @param[in] mra D-dimensional @ref MultiResolutionAnalysis. - * @param[in] t Diffusion time (\f$t>0\f$). - * @param[in] prec Target build precision. - * @param[in] root Root (coarsest) scale the operator is attached to. - * @param[in] reach Bandwidth at the root scale (useful for PBC/domain tiling). - * - * @details - * This overload mirrors the default constructor but confines the operator to a - * specific scale window, which is particularly useful for periodic boundary - * conditions and domain-decomposition setups. - * - * Implementation differences vs. the default constructor: - * - The base @ref ConvolutionOperator is constructed with (@p root, @p reach). - * - @c k_prec is chosen even tighter ( @p prec / 100.0 ) to robustly capture - * the narrow Gaussian under potentially coarser scale constraints. - * - * @note The @p reach parameter controls the operator bandwidth measured in - * levels at @p root; see @ref MWOperator for details on scale windows - * and bandwidth semantics. - */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; // Operator-assembly tolerance - double k_prec = prec / 100.0; // Very tight kernel-projection tolerance + double o_prec = prec; + double k_prec = prec / 100.0; HeatKernel kernel(t); this->initialize(kernel, k_prec, o_prec); - this->initOperExp(kernel.size()); // single-term expansion (rank = 1) + this->initOperExp(kernel.size()); Printer::setPrintLevel(oldlevel); } -/* Explicit template instantiations */ template class HeatOperator<1>; template class HeatOperator<2>; template class HeatOperator<3>; diff --git a/src/operators/HelmholtzKernel.cpp b/src/operators/HelmholtzKernel.cpp index a75a42970..d36e97720 100644 --- a/src/operators/HelmholtzKernel.cpp +++ b/src/operators/HelmholtzKernel.cpp @@ -23,30 +23,6 @@ * */ -/** - * @file HelmholtzKernel.cpp - * @brief Gaussian expansion of the 3D screened Coulomb / Helmholtz kernel. - * - * @details - * This file implements a separable Gaussian approximation to the radial 3D - * Helmholtz kernel on a finite interval \f$[r_\text{min}, r_\text{max}]\f$. - * The expansion has the form - * \f[ - * K_\mu(r) \;\approx\; \sum_{m=1}^{M} \beta_m\, e^{-\alpha_m r^2}, - * \f] - * where \f$M\f$ (the separation rank) and the parameters \f$\{\alpha_m,\beta_m\}\f$ - * are chosen by truncating and sampling an integral representation with a uniform - * trapezoidal rule in a logarithmic variable \f$s\f$. The resulting coefficients - * depend on: - * - the screening parameter \f$\mu > 0\f$, - * - a target relative accuracy \f$\varepsilon\f$, - * - a radial domain \f$[r_\text{min}, r_\text{max}]\f$. - * - * Internally, the interval is rescaled to \f$[r_\text{min}/r_\text{max}, 1]\f$ - * to keep the step-size heuristics well-conditioned; the generated Gaussian - * parameters are then rescaled back to the original units. - */ - #include "HelmholtzKernel.h" #include @@ -56,77 +32,35 @@ namespace mrcpp { -/** - * @class HelmholtzKernel - * @brief Builds a 1D Gaussian expansion that approximates the 3D Helmholtz kernel. - * - * @details - * The constructor discretizes an auxiliary integral over a log-scaled variable - * \f$s\fin[s_1,s_2]\f$ using a uniform step \f$h\f$ derived from the requested - * tolerance \f$\varepsilon\f$. For each quadrature node it produces a single - * Gaussian term with exponent \f$\alpha_m\f$ and weight \f$\beta_m\f$. Endpoints - * receive the trapezoidal half-weights. - * - * Rescaling: - * - Define \f$r_0 = r_\text{min}/r_\text{max}\f$ and \f$r_1 = r_\text{max}\f$. - * - Work on \f$[r_0,1]\f$, then map back by multiplying - * \f$\alpha \leftarrow \alpha / r_1^2\f$ and \f$\beta \leftarrow \beta / r_1\f$. - * - * Rank control: - * - The number of exponentials is \f$M = \lceil (s_2 - s_1)/h \rceil + 1\f$. - * - If \f$M > \texttt{MaxSepRank}\f$ the constructor aborts, signaling that the - * requested accuracy on the given domain would require too large a rank. - * - * @param mu Screening parameter \f$\mu > 0\f$. - * @param epsilon Target relative accuracy \f$\varepsilon \in (0,1)\f$. - * @param r_min Minimal radius of the approximation interval (strictly positive). - * @param r_max Maximal radius of the approximation interval (\f$r_\text{max} > r_\text{min}\f$). - * - * @note - * This routine assumes the standard MRCPP constants \c pi and \c root_pi are available - * in the \c mrcpp namespace and that \c MaxSepRank bounds the admissible separation rank. - */ HelmholtzKernel::HelmholtzKernel(double mu, double epsilon, double r_min, double r_max) : GaussExp<1>() { - // Rescale the interval to [r0, 1] and precompute scaled mu const double r0 = r_min / r_max; const double r1 = r_max; const double mu_tilde = mu * r1; - // Truncation window [s1, s2] giving ~epsilon relative error - // The heuristic t = max(-2.5 ln eps, 5) balances tails for practical eps const long double t = std::max((-2.5L * std::log(epsilon)), 5.0L); const double s1 = -std::log(4.0L * t / (mu_tilde * mu_tilde)) / 2.0L; const double s2 = std::log(t / (r0 * r0)) / 2.0L; - // Trapezoidal step size h from an empirical fit versus log10(epsilon) const double h = 1.0 / (0.20L - 0.47L * std::log10(epsilon)); const int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1.0); if (n_exp > MaxSepRank) MSG_ABORT("Maximum separation rank exceeded."); - // Uniform trapezoidal quadrature in s; endpoints get half-weight. for (int i = 0; i < n_exp; ++i) { const double s = s1 + h * i; - // Intermediate quantities (written explicitly for clarity) - // temp = -2 s - // temp2 = - (mu_tilde^2) e^{-2 s} / 4 + s - // beta ~ h * 2/sqrt(pi) * exp(temp2) const double temp = -2.0 * s; const double temp2 = - (mu_tilde * mu_tilde) * std::exp(temp) / 4.0 + s; double beta = h * (2.0 / root_pi) * std::exp(temp2); double alpha = std::exp(2.0L * s); - // Rescale back to the original radial units alpha *= 1.0 / (r1 * r1); beta *= 1.0 / r1; - // Trapezoidal half-weights at the endpoints if (i == 0 || i == (n_exp - 1)) beta *= 0.5; - // Append the 1D Gaussian term exp(-alpha r^2) with prefactor beta GaussFunc<1> gFunc(alpha, beta); this->append(gFunc); } diff --git a/src/operators/HelmholtzOperator.cpp b/src/operators/HelmholtzOperator.cpp index bb8b83c66..d50752d2d 100644 --- a/src/operators/HelmholtzOperator.cpp +++ b/src/operators/HelmholtzOperator.cpp @@ -23,43 +23,13 @@ * */ -/** - * @file HelmholtzOperator.cpp - * @brief Definition of a separable 3D convolution operator approximating the Helmholtz/Yukawa kernel. - * - * @details - * This file implements @ref mrcpp::HelmholtzOperator, a convenience convolution operator - * in three spatial dimensions that applies a Gaussian expansion of the radial kernel - * \f$ e^{-\mu r}/r \f$. The expansion is built by @ref mrcpp::HelmholtzKernel and - * lifted into a separable multiwavelet operator, which can then be applied along the - * Cartesian directions. - */ - #include "HelmholtzOperator.h" #include "HelmholtzKernel.h" #include "utils/Printer.h" +#include namespace mrcpp { -/** - * @brief Construct a 3D Helmholtz (Yukawa) convolution operator. - * - * @param mra The 3D @ref MultiResolutionAnalysis that defines the domain and basis. - * @param mu Screening parameter \f$\mu>0\f$ of the Yukawa kernel. - * @param prec Build precision; controls kernel- and operator-assembly tolerances and, - * indirectly, the separation rank of the Gaussian expansion. - * - * @details - * - Chooses separate tolerances for the kernel projection (@c k_prec = prec/10) and - * the operator assembly (@c o_prec = prec). - * - Estimates the admissible radial interval \f$[r_{\min}, r_{\max}]\f$ from @p mra via - * @ref MultiResolutionAnalysis::calcMinDistance and @ref MultiResolutionAnalysis::calcMaxDistance. - * - Builds a @ref HelmholtzKernel on that interval with the requested accuracy, then - * calls @ref ConvolutionOperator::initialize to form the separable operator blocks - * and caches them via @ref MWOperator::initOperExp. - * - * @note The printer level is temporarily reduced during build to keep output concise. - */ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double mu, double prec) : ConvolutionOperator<3>(mra) { int oldlevel = Printer::setPrintLevel(0); @@ -77,31 +47,6 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub Printer::setPrintLevel(oldlevel); } -/** - * @brief Construct a 3D Helmholtz convolution operator with explicit root and reach. - * - * @param mra The 3D @ref MultiResolutionAnalysis. - * @param mu Screening parameter \f$\mu>0\f$. - * @param prec Build precision (as above). - * @param root Operator root scale (coarsest level for the operator support). - * @param reach Operator reach (half-width in levels). For periodic domains this - * sets the operator bandwidth at @p root. - * - * @details - * - Uses a tighter kernel-projection tolerance (@c k_prec = prec/100) while keeping - * the operator-assembly tolerance at @c o_prec = prec. - * - Estimates \f$[r_{\min}, r_{\max}]\f$ as in the other constructor, then adjusts - * @c r_max to reflect periodic worlds by scaling with the relative root shift and - * the chosen @p reach: - * \f[ - * r_{\max} \leftarrow r_{\max}\, 2^{-(\text{oper\_root} - \text{MRA.root})} - * \times \big( 2\,\text{reach} + 1 \big). - * \f] - * - Builds the @ref HelmholtzKernel and initializes the separable operator. - * - * @note This overload is intended for periodic boundary conditions or scenarios - * where the operator must be confined to a specific scale window. - */ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double mu, double prec, int root, int reach) : ConvolutionOperator<3>(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); @@ -112,7 +57,6 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); - // Adjust r_max for periodic world auto rel_root = this->oper_root - this->MRA.getRootScale(); r_max *= std::pow(2.0, -rel_root); r_max *= (2.0 * this->oper_reach) + 1.0; diff --git a/src/operators/IdentityConvolution.cpp b/src/operators/IdentityConvolution.cpp index 7ebf89e47..ede580ef9 100644 --- a/src/operators/IdentityConvolution.cpp +++ b/src/operators/IdentityConvolution.cpp @@ -23,49 +23,12 @@ * */ -/** - * @file IdentityConvolution.cpp - * @brief Implementation of a separable convolution operator that approximates the identity - * via a narrow Gaussian kernel (delta-approximation). - * - * @details - * This translation unit defines the templated constructors of - * @ref mrcpp::IdentityConvolution, a convenience @ref ConvolutionOperator that uses a - * single-term Gaussian kernel to approximate the Dirac delta distribution: - * \f[ - * \delta(x) \;\approx\; \alpha\,e^{-\beta x^2}. - * \f] - * The associated D-dimensional operator is assembled separably (tensor-product form) - * following MRCPP’s multiwavelet machinery. The build precision controls the kernel - * narrowness and the tolerances used during projection/assembly. - */ - #include "IdentityConvolution.h" #include "IdentityKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Construct an identity-like convolution operator on the default scale window. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra D-dimensional @ref MultiResolutionAnalysis that defines the domain and basis. - * @param prec Target build precision controlling the closeness to the delta function. - * - * @details - * Internally the constructor: - * - Stores @p prec as the build precision. - * - Uses split tolerances: - * - @c k_prec = prec/10.0 for accurate projection of the narrow Gaussian kernel. - * - @c o_prec = prec for operator assembly. - * - Builds a single-term @ref IdentityKernel at @c k_prec and calls - * @ref ConvolutionOperator::initialize to lift it into separable operator blocks. - * - Finalizes with @ref MWOperator::initOperExp for bookkeeping/caching. - * - * A tighter @p prec yields a narrower Gaussian (better delta approximation) but - * increases the required resolution and operator bandwidth in practice. - */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec) : ConvolutionOperator(mra) { @@ -82,29 +45,6 @@ IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } -/** - * @brief Construct an identity-like convolution operator with explicit root and reach (PBC-ready). - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra D-dimensional @ref MultiResolutionAnalysis. - * @param prec Target build precision controlling the closeness to the delta function. - * @param root Operator root level (coarsest scale at which the operator is defined). - * @param reach Operator half-bandwidth at @p root (relevant for periodic boundary conditions). - * - * @details - * This overload confines the operator to a specific scale window, which is useful for - * periodic boundary conditions or when coupling multiple operators with controlled support. - * Compared to the default constructor, the kernel projection tolerance is chosen even - * tighter (@c k_prec = prec/100.0) to ensure faithful representation on restricted scale - * ranges; operator assembly uses @c o_prec = prec. - * - * Steps: - * 1. Record @p prec via @ref ConvolutionOperator::setBuildPrec. - * 2. Create a single-term @ref IdentityKernel at @c k_prec. - * 3. Initialize separable operator blocks (@ref ConvolutionOperator::initialize) - * within the user-specified scale window (@p root, @p reach). - * 4. Call @ref MWOperator::initOperExp. - */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { diff --git a/src/operators/MWOperator.cpp b/src/operators/MWOperator.cpp index f8699e1e8..21ce43540 100644 --- a/src/operators/MWOperator.cpp +++ b/src/operators/MWOperator.cpp @@ -23,25 +23,6 @@ * */ -/** - * @file MWOperator.cpp - * @brief Common utilities for multiwavelet (MW) operators: term assignment, - * component access, bandwidth analysis, and operator-domain MRA setup. - * - * @details - * The templated @ref mrcpp::MWOperator provides infrastructure shared by concrete - * MW operators: - * - organizing a separated operator expansion into per-dimension @ref OperatorTree - * components, - * - computing effective bandwidths across scales, and - * - constructing the 2D operator-domain @ref MultiResolutionAnalysis used by - * operator trees (for D-dimensional function spaces). - * - * The functions implemented here are thin, performance-oriented utilities that - * avoid modifying operator semantics. They are used by higher-level operators - * such as convolution- and derivative-based classes. - */ - #include "MWOperator.h" #include "trees/BandWidth.h" #include "utils/Printer.h" @@ -51,22 +32,8 @@ using namespace Eigen; namespace mrcpp { -/** - * @brief Initialize the separated operator expansion with @p M terms. - * - * @tparam D Spatial dimension of the target space. - * @param M Number of separated terms to activate from @c raw_exp. - * - * @details - * Allocates an @c oper_exp array of size @p M, each entry holding @c D pointers - * to @ref OperatorTree components (one per Cartesian direction). - * By default, an *isotropic* operator is formed by assigning the first @p M raw - * terms to **all** directions. - * - * @pre @c raw_exp has at least @p M terms; otherwise the function aborts. - * @note You can override individual components later via @ref assign(). - */ -template void MWOperator::initOperExp(int M) { +template +void MWOperator::initOperExp(int M) { if (this->raw_exp.size() < M) MSG_ABORT("Incompatible raw expansion"); this->oper_exp.clear(); for (int m = 0; m < M; m++) { @@ -74,55 +41,28 @@ template void MWOperator::initOperExp(int M) { otrees.fill(nullptr); this->oper_exp.push_back(otrees); } - - // Sets up an isotropic operator with the first M raw terms in all directions for (int i = 0; i < M; i++) for (int d = 0; d < D; d++) assign(i, d, this->raw_exp[i].get()); } -/** - * @brief Mutable access to a specific separated component. - * - * @tparam D Spatial dimension. - * @param i Term index in the separated expansion. - * @param d Cartesian direction index (0..D-1). - * @return Reference to the requested @ref OperatorTree. - * - * @throws If indices are out of bounds or the component is null. - */ -template OperatorTree &MWOperator::getComponent(int i, int d) { +template +OperatorTree &MWOperator::getComponent(int i, int d) { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component"); return *this->oper_exp[i][d]; } -/** - * @brief Const access to a specific separated component. - * - * @tparam D Spatial dimension. - * @param i Term index in the separated expansion. - * @param d Cartesian direction index (0..D-1). - * @return Const reference to the requested @ref OperatorTree. - * - * @throws If indices are out of bounds or the component is null. - */ -template const OperatorTree &MWOperator::getComponent(int i, int d) const { +template +const OperatorTree &MWOperator::getComponent(int i, int d) const { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component"); return *this->oper_exp[i][d]; } -/** - * @brief Get the maximum effective bandwidth at a given depth. - * - * @tparam D Spatial dimension. - * @param depth Tree depth (scale) at which to query the bandwidth. If negative, - * the maximum over all depths is returned. - * @return Non-negative maximum bandwidth, or -1 if @p depth is invalid. - */ -template int MWOperator::getMaxBandWidth(int depth) const { +template +int MWOperator::getMaxBandWidth(int depth) const { int maxWidth = -1; if (depth < 0) { maxWidth = *std::max_element(this->band_max.begin(), this->band_max.end()); @@ -132,30 +72,15 @@ template int MWOperator::getMaxBandWidth(int depth) const { return maxWidth; } -/** - * @brief Clear cached @ref BandWidth information in all operator components. - * - * @tparam D Spatial dimension. - */ -template void MWOperator::clearBandWidths() { +template +void MWOperator::clearBandWidths() { for (auto &i : this->oper_exp) for (int d = 0; d < D; d++) i[d]->clearBandWidth(); } -/** - * @brief Compute effective bandwidths at all scales for all components. - * - * @tparam D Spatial dimension. - * @param prec Numerical precision used to estimate bandwidths. - * - * @details - * For each @ref OperatorTree component, this calls @ref OperatorTree::calcBandWidth - * and records the @em maximum effective width across components for every depth. - * Results are stored in @c band_max and summarized to the log at print level 20. - */ -template void MWOperator::calcBandWidths(double prec) { +template +void MWOperator::calcBandWidths(double prec) { int maxDepth = 0; - // First compute BandWidths and find depth of the deepest component for (auto &i : this->oper_exp) { for (int d = 0; d < D; d++) { OperatorTree &oTree = *i[d]; @@ -167,13 +92,12 @@ template void MWOperator::calcBandWidths(double prec) { } this->band_max = std::vector(maxDepth + 1, -1); - // Find the largest effective bandwidth at each scale for (auto &i : this->oper_exp) { for (int d = 0; d < D; d++) { const OperatorTree &oTree = *i[d]; const BandWidth &bw = oTree.getBandWidth(); - for (int n = 0; n <= bw.getDepth(); n++) { // scale loop - for (int j = 0; j < 4; j++) { // component loop + for (int n = 0; n <= bw.getDepth(); n++) { + for (int j = 0; j < 4; j++) { int w = bw.getWidth(n, j); if (w > this->band_max[n]) this->band_max[n] = w; } @@ -185,19 +109,8 @@ template void MWOperator::calcBandWidths(double prec) { println(20, std::endl); } -/** - * @brief Build the 2D operator-domain MRA used by operator trees. - * - * @tparam D Spatial dimension of the *function* domain. - * @return A @ref MultiResolutionAnalysis<2> describing the operator lattice. - * - * @details - * Operator trees live on a 2D lattice (row/column), even when acting on - * D-dimensional function spaces. The lattice extents are determined from the - * operator's root level and reach, and it reuses the function-space scaling - * basis (uniform scaling is assumed). - */ -template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { +template +MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); @@ -209,8 +122,6 @@ template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() cons } auto l = std::array{}; auto nbox = std::array{reach, reach}; - // Zero in argument since operators are only implemented - // for uniform scaling factor auto sf = std::array{box.getScalingFactor(0), box.getScalingFactor(0)}; BoundingBox<2> oper_box(this->oper_root, l, nbox, sf); @@ -218,7 +129,6 @@ template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() cons return oper_mra; } -/* Explicit template instantiations */ template class MWOperator<1>; template class MWOperator<2>; template class MWOperator<3>; diff --git a/src/operators/OperatorStatistics.cpp b/src/operators/OperatorStatistics.cpp index 66aef71fe..f6261a50b 100644 --- a/src/operators/OperatorStatistics.cpp +++ b/src/operators/OperatorStatistics.cpp @@ -23,24 +23,6 @@ * */ -/** - * @file OperatorStatistics.cpp - * @brief Implementation of lightweight counters and summaries used during - * multiwavelet operator application. - * - * @details - * This module aggregates per-thread counters while applying operators - * to multiwavelet nodes. It records: - * - Number of *g*-nodes (source nodes) computed. - * - Number of *f*-nodes (destination nodes) where an operator was applied. - * - Number of *generalized* destination nodes (as reported by MWNode::isGenNode()). - * - A small 8×8 histogram of applications by component pair (ft, gt), - * where `ft` and `gt` are component bitfields. - * - * Thread-local storage is used to avoid contention in hot loops; use - * flushNodeCounters() to accumulate into totals and reset local counters. - */ - #include "OperatorStatistics.h" #include "trees/MWNode.h" @@ -48,17 +30,6 @@ using namespace Eigen; namespace mrcpp { -/** - * @brief Construct an empty statistics object with per-thread accumulators. - * - * @details - * Allocates: - * - @c totCompCount: global 8×8 histogram (zero-initialized). - * - Per-thread scalar counters (@c fCount, @c gCount, @c genCount). - * - Per-thread 8×8 component histograms (@c compCount[i]). - * - * The number of threads is discovered via mrcpp_get_max_threads(). - */ OperatorStatistics::OperatorStatistics() : nThreads(mrcpp_get_max_threads()) , totFCount(0) @@ -86,9 +57,6 @@ OperatorStatistics::OperatorStatistics() } } -/** - * @brief Destroy statistics and free all dynamically allocated arrays. - */ OperatorStatistics::~OperatorStatistics() { for (int i = 0; i < this->nThreads; i++) { delete this->compCount[i]; } delete[] this->compCount; @@ -98,16 +66,6 @@ OperatorStatistics::~OperatorStatistics() { delete totCompCount; } -/** - * @brief Accumulate all per-thread counters into totals and reset locals. - * - * @details - * After this call: - * - @c totFCount, @c totGCount, and @c totGenCount are increased by the - * sums over all threads. - * - @c totCompCount is incremented by each thread-local 8×8 histogram. - * - All per-thread counters/histograms are reset to zero. - */ void OperatorStatistics::flushNodeCounters() { for (int i = 0; i < this->nThreads; i++) { this->totFCount += this->fCount[i]; @@ -121,36 +79,12 @@ void OperatorStatistics::flushNodeCounters() { } } -/** - * @brief Increment the *g*-node usage counter for the current thread. - * - * @tparam D Spatial dimension of the node. - * @tparam T Coefficient type. - * @param gNode Source node being processed (unused for counting). - * - * @note The thread index is obtained via mrcpp_get_thread_num(). - */ template void OperatorStatistics::incrementGNodeCounters(const MWNode &gNode) { int thread = mrcpp_get_thread_num(); this->gCount[thread]++; } -/** - * @brief Increment the *f*-node application counters for the current thread. - * - * @tparam D Spatial dimension of the node. - * @tparam T Coefficient type. - * @param fNode Destination node to which an operator is applied. - * @param ft Destination component bitfield. - * @param gt Source component bitfield. - * - * @details - * Increments: - * - Per-thread @c fCount. - * - Per-thread component histogram at entry (ft, gt). - * - Per-thread @c genCount if @c fNode.isGenNode() is true. - */ template void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int ft, int gt) { int thread = mrcpp_get_thread_num(); @@ -159,16 +93,6 @@ void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int f if (fNode.isGenNode()) { this->genCount[thread]++; } } -/** - * @brief Print a human-readable summary of accumulated totals. - * - * @param o Output stream. - * @return Reference to @p o to allow chaining. - * - * @details - * The output includes total counts for g-nodes, f-nodes, generalized nodes, - * and the aggregated 8×8 (ft, gt) component histogram. - */ std::ostream &OperatorStatistics::print(std::ostream &o) const { o << std::setw(8); o << "*OperatorFunc statistics: " << std::endl << std::endl; @@ -179,7 +103,6 @@ std::ostream &OperatorStatistics::print(std::ostream &o) const { return o; } -/* ---- Explicit template instantiations for supported node types ---- */ template void OperatorStatistics::incrementFNodeCounters<1, double>(const MWNode<1, double> &fNode, int ft, int gt); template void OperatorStatistics::incrementFNodeCounters<2, double>(const MWNode<2, double> &fNode, int ft, int gt); template void OperatorStatistics::incrementFNodeCounters<3, double>(const MWNode<3, double> &fNode, int ft, int gt); @@ -193,4 +116,4 @@ template void OperatorStatistics::incrementGNodeCounters<1, ComplexDouble>(const template void OperatorStatistics::incrementGNodeCounters<2, ComplexDouble>(const MWNode<2, ComplexDouble> &gNode); template void OperatorStatistics::incrementGNodeCounters<3, ComplexDouble>(const MWNode<3, ComplexDouble> &gNode); -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/operators/PHOperator.cpp b/src/operators/PHOperator.cpp index 54632e15a..7c17b22ef 100644 --- a/src/operators/PHOperator.cpp +++ b/src/operators/PHOperator.cpp @@ -23,24 +23,6 @@ * */ -/** - * @file PHOperator.cpp - * @brief Implementation of a derivative operator assembled via PHCalculator. - * - * @details - * This module builds a single-component multiwavelet operator that approximates - * a spatial derivative of order 1 or 2. Construction proceeds by: - * 1) creating a @ref PHCalculator tailored to the current scaling basis and - * requested derivative order, - * 2) expanding to an @ref OperatorTree with a @ref TreeBuilder and a simple - * @ref BandWidthAdaptor (bandwidth = 1), - * 3) transforming/caching the operator for efficient application. - * - * The class derives from @ref DerivativeOperator and uses the MRA’s root scale - * by default. The operator is stored as a single separable component and exposed - * through the common @ref MWOperator interface. - */ - #include "PHOperator.h" #include "treebuilders/BandWidthAdaptor.h" #include "treebuilders/PHCalculator.h" @@ -50,18 +32,6 @@ namespace mrcpp { -/** - * @brief Construct a PH-based derivative operator. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra MultiResolutionAnalysis defining the domain and basis. - * @param order Derivative order (supported: 1 or 2). - * - * @details - * Initializes the base @ref DerivativeOperator at the MRA root scale and - * triggers internal assembly via @ref initialize(). The resulting expansion - * contains a single operator block (rank-1 in the separable sense). - */ template PHOperator::PHOperator(const MultiResolutionAnalysis &mra, int order) : DerivativeOperator(mra, mra.getRootScale(), -10) { @@ -69,18 +39,6 @@ PHOperator::PHOperator(const MultiResolutionAnalysis &mra, int order) initialize(); } -/** - * @brief Assemble the operator tree for the requested derivative order. - * - * @details - * - Creates a @ref PHCalculator using the MRA’s scaling basis and the stored - * derivative order. - * - Uses a @ref BandWidthAdaptor with bandwidth 1 and the MRA’s maximum scale. - * - Builds an @ref OperatorTree with @ref TreeBuilder, computes its squared - * norm, and prepares the node cache for application. - * - Stores the built tree as a single raw term and initializes the operator - * expansion with @ref initOperExp(1). - */ template void PHOperator::initialize() { auto o_mra = this->getOperatorMRA(); @@ -89,7 +47,7 @@ template void PHOperator::initialize() { auto &basis = this->MRA.getScalingBasis(); PHCalculator calculator(basis, this->order); - int bw = 1; // Operator bandwidth + int bw = 1; int max_scale = this->MRA.getMaxScale(); BandWidthAdaptor adaptor(bw, max_scale); @@ -106,7 +64,6 @@ template void PHOperator::initialize() { this->initOperExp(1); } -/* Explicit template instantiations */ template class PHOperator<1>; template class PHOperator<2>; template class PHOperator<3>; diff --git a/src/operators/PoissonKernel.cpp b/src/operators/PoissonKernel.cpp index f21461aa9..42a1b5457 100644 --- a/src/operators/PoissonKernel.cpp +++ b/src/operators/PoissonKernel.cpp @@ -23,50 +23,6 @@ * */ -/** - * @file PoissonKernel.cpp - * @brief Builds a Gaussian expansion approximation of the 3D Poisson kernel. - * - * @details - * This implementation constructs a separated approximation to the radial - * Poisson kernel - * \f[ - * \frac{1}{\lvert \mathbf r \rvert} - * \f] - * on a finite annulus \f$ r \in [r_{\min},\, r_{\max}] \f$ by means of a - * finite sum of Gaussians - * \f[ - * \frac{1}{r} \;\approx\; \sum_{m=1}^{M} \beta_m \, e^{-\alpha_m r^2}, - * \f] - * where the coefficients \f$ \{\alpha_m,\beta_m\} \f$ are obtained by - * truncating and discretizing (via the trapezoidal rule) a suitable integral - * representation of \f$ 1/r \f$ in logarithmic variables. The truncation - * bounds \f$[s_1, s_2]\f$ and the step \f$h\f$ are chosen to meet a requested - * relative accuracy \c epsilon on the normalized interval \f$[r_{\min}/r_{\max},\,1]\f$, - * after which the expansion is rescaled back to \f$[r_{\min},\,r_{\max}]\f$. - * - * ### Inputs - * - \c epsilon: Target relative error for the expansion (heuristic, affects - * the truncation window and step size). - * - \c r_min, \c r_max: Inner/outer radii that define the interval of validity. - * - * ### Algorithm sketch - * 1. Normalize the domain to \f$[r_0, 1]\f$ with \f$r_0 = r_{\min}/r_{\max}\f$ and set - * \f$r_1 = r_{\max}\f$ for subsequent rescaling. - * 2. Determine auxiliary parameters \f$t_1, t_2\f$ such that the tails of the - * integral representation are below \c epsilon. - * 3. Convert tails to truncation limits \f$s_1, s_2\f$ in logarithmic coordinates. - * 4. Choose trapezoidal step size \f$h\f$ as a function of \c epsilon and compute - * the number of terms \f$M\f$. - * 5. Form nodes \f$s_i = s_1 + i h\f$ and corresponding Gaussian parameters - * \f$\alpha_i, \beta_i\f$ (with endpoint halving for the trapezoid rule). - * 6. Rescale \f$\alpha_i, \beta_i\f$ from the normalized interval back to - * \f$[r_{\min}, r_{\max}]\f$ and append each term to the @ref GaussExp. - * - * The resulting expansion length is capped by \c MaxSepRank; exceeding this - * limit aborts construction. - */ - #include "PoissonKernel.h" #include @@ -76,25 +32,6 @@ namespace mrcpp { -/** - * @brief Construct a Gaussian expansion of the 3D Poisson kernel on \f$[r_{\min}, r_{\max}]\f$. - * - * @param epsilon Target relative accuracy for the expansion (heuristic). - * @param r_min Minimum radius of the interval of validity (\f$>0\f$). - * @param r_max Maximum radius of the interval of validity (\f$> r_{\min}\f$). - * - * @details - * The method chooses truncation limits \f$s_1, s_2\f$ and a step size \f$h\f$ - * for a trapezoidal discretization so that the contribution of neglected tails - * is below \c epsilon in the normalized variable. Each quadrature node yields - * one Gaussian term. Endpoint weights are halved, as per the trapezoidal rule. - * - * The final expansion is rescaled to the physical interval by the mappings - * \f$ \alpha \leftarrow \alpha / r_{\max}^2 \f$ and \f$ \beta \leftarrow \beta / r_{\max} \f$, - * ensuring that the approximation targets the original (unscaled) radius. - * - * @note If the number of terms exceeds @c MaxSepRank, construction aborts. - */ PoissonKernel::PoissonKernel(double epsilon, double r_min, double r_max) : GaussExp<1>() { // Constructed on [rMin/rMax, 1.0], then rescaled to [rMin, rMax] diff --git a/src/operators/PoissonOperator.cpp b/src/operators/PoissonOperator.cpp index b12885d11..ccff16c5b 100644 --- a/src/operators/PoissonOperator.cpp +++ b/src/operators/PoissonOperator.cpp @@ -23,60 +23,19 @@ * */ -/** - * @file PoissonOperator.cpp - * @brief Definition of a separable convolution operator that approximates the 3D Poisson kernel. - * - * The operator is assembled from a one–dimensional Gaussian expansion of \f$1/r\f$ - * (see @ref PoissonKernel). Each 1D term is projected to a function tree and lifted - * to a 2D operator block by cross-correlation; the full 3D operator is built as a - * separable product and cached for efficient application. - */ - #include "PoissonOperator.h" #include "PoissonKernel.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @class PoissonOperator - * @brief Convolution operator approximating the 3D Poisson kernel \f$1/\lvert \mathbf r\rvert\f$. - * - * @details - * The kernel is approximated on a bounded radial interval by a finite Gaussian expansion - * \f[ - * \frac{1}{r} \approx \sum_{m=1}^{M} \beta_m\, e^{-\alpha_m r^2}, - * \f] - * which enables a separated representation amenable to fast multiwavelet application - * along Cartesian axes. Construction proceeds by: - * 1) choosing a target build precision to set the effective kernel width, - * 2) computing a validity interval \f$[r_{\min}, r_{\max}]\f$ from the MRA, - * 3) generating the Gaussian terms via @ref PoissonKernel, and - * 4) projecting and lifting each term into operator blocks before caching. - */ - -/** - * @brief Build a Poisson operator on the default root/reach of the provided MRA. - * - * @param mra Three–dimensional @ref MultiResolutionAnalysis defining domain and basis. - * @param prec Target build precision (heuristic closeness to \f$1/r\f$); smaller ⇒ tighter kernel. - * - * @details - * - Uses @c k_prec = prec/10 for kernel projection and @c o_prec = prec for operator assembly. - * - The radial interval is inferred from @p mra: - * - \f$ r_{\min} = \text{MRA.calcMinDistance}(k\_prec) \f$ (resolution-limited), - * - \f$ r_{\max} = \text{MRA.calcMaxDistance}() \f$ (domain-limited). - * - Constructs a @ref PoissonKernel on \f$[r_{\min}, r_{\max}]\f$, initializes internal - * operator trees, and prepares caches for application. - */ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec) : ConvolutionOperator<3>(mra) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; // operator-assembly tolerance - double k_prec = prec / 10.0; // kernel-projection tolerance + double o_prec = prec; + double k_prec = prec / 10.0; double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); @@ -87,37 +46,16 @@ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double p Printer::setPrintLevel(oldlevel); } -/** - * @brief Build a Poisson operator with explicit scale window (root/reach), e.g. for PBC-style setups. - * - * @param mra Three–dimensional @ref MultiResolutionAnalysis. - * @param prec Target build precision (heuristic closeness to \f$1/r\f$). - * @param root Operator root level (coarsest scale where the operator lives). - * @param reach Operator reach in levels (half-width around @p root); negative ⇒ auto-detect. - * - * @details - * - Uses a tighter kernel projection tolerance @c k_prec = prec/100 and @c o_prec = prec - * for assembling the operator blocks. - * - The base radial extent is obtained from the MRA; then \f$ r_{\max} \f$ is rescaled - * to reflect the selected operator scale window (periodic-world style adjustment): - * \f[ - * r_{\max} \leftarrow r_{\max} \, 2^{-(\text{oper\_root} - \text{MRA.getRootScale()})} - * \, \bigl( 2\,\text{oper\_reach} + 1 \bigr). - * \f] - * - Constructs and initializes the Gaussian expansion accordingly and prepares - * the operator components and caches. - */ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec, int root, int reach) : ConvolutionOperator<3>(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); - double o_prec = prec; // operator-assembly tolerance - double k_prec = prec / 100.0; // very tight kernel-projection tolerance + double o_prec = prec; + double k_prec = prec / 100.0; double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); - // Adjust r_max to the chosen operator scale window (e.g., periodic-world bandwidth) auto rel_root = this->oper_root - this->MRA.getRootScale(); r_max *= std::pow(2.0, -rel_root); r_max *= (2.0 * this->oper_reach) + 1.0; diff --git a/src/operators/TimeEvolutionOperator.cpp b/src/operators/TimeEvolutionOperator.cpp index dd14badb3..cda02e812 100644 --- a/src/operators/TimeEvolutionOperator.cpp +++ b/src/operators/TimeEvolutionOperator.cpp @@ -23,29 +23,7 @@ * */ -/** - * @file TimeEvolutionOperator.cpp - * @brief Construction of (real/imaginary) parts of the Schrödinger time-evolution - * operator in the multiwavelet framework. - * - * The implementation builds a separable, multi-resolution representation of the - * free-particle time-evolution semigroup - * \f[ - * U(t) = e^{\, i t \Delta} - * \f] - * (or its real/imaginary part), using cross-correlation calculators and - * precomputed power integrals \f$ \widetilde J_m \f$ (see @ref JpowerIntegrals). - * Two build modes are provided: - * - **Adaptive** down to a fixed scale \f$N=18\f$, bounding the number of - * power integrals. - * - **Uniform** down to a user-specified finest scale. - * - * Assembly follows the standard operator pipeline: - * projection/lifting → multiwavelet transform → cache/init of operator blocks. - */ - #include "TimeEvolutionOperator.h" -//#include "MRCPP/MWOperators" #include "core/InterpolatingBasis.h" #include "core/LegendreBasis.h" @@ -77,23 +55,6 @@ namespace mrcpp { -/** - * @brief Uniform constructor. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra Target @ref MultiResolutionAnalysis defining domain and basis. - * @param prec Build precision for assembly and pruning. - * @param time Time parameter \f$ t \f$ of the semigroup. - * @param finest_scale Uniform build depth (finest level) of the operator tree. - * @param imaginary If `true`, build the imaginary part; otherwise, the real part. - * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ to retain. - * - * @details - * Builds a **uniform** operator down to @p finest_scale. Internally sets up a - * @ref SchrodingerEvolution_CrossCorrelation calculator and calls - * the uniform @ref initialize(double,int,bool,int) overload. The operator - * expansion is finalized via @ref initOperExp(1). - */ template TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, @@ -101,73 +62,38 @@ TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis int finest_scale, bool imaginary, int max_Jpower) - : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well -{ + : ConvolutionOperator(mra, mra.getRootScale(), -10) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType()); this->cross_correlation = &cross_correlation; - initialize(time, finest_scale, imaginary, max_Jpower); // will go outside of the constructor in future + initialize(time, finest_scale, imaginary, max_Jpower); - this->initOperExp(1); // Important to finalize component mapping + this->initOperExp(1); Printer::setPrintLevel(oldlevel); } -/** - * @brief Adaptive constructor. - * - * @tparam D Spatial dimension (1, 2, or 3). - * @param mra Target @ref MultiResolutionAnalysis. - * @param prec Build precision. - * @param time Time parameter \f$ t \f$ of the semigroup. - * @param imaginary If `true`, build the imaginary part; otherwise, the real part. - * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ to retain. - * - * @details - * Builds an **adaptive** operator down to a fixed scale \f$N=18\f$, which keeps the number - * of necessary power integrals bounded. The assembly uses a - * @ref TimeEvolution_CrossCorrelationCalculator fed by a per-scale - * map of @ref JpowerIntegrals. - */ template TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, bool imaginary, int max_Jpower) - : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well -{ + : ConvolutionOperator(mra, mra.getRootScale(), -10) { int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType()); this->cross_correlation = &cross_correlation; - initialize(time, imaginary, max_Jpower); // will go outside of the constructor in future + initialize(time, imaginary, max_Jpower); - this->initOperExp(1); // Important to finalize component mapping + this->initOperExp(1); Printer::setPrintLevel(oldlevel); } -/** - * @brief Adaptive build: create real or imaginary part of the operator. - * - * @param time Time parameter \f$ t \f$. - * @param imaginary If `true`, build the imaginary part; otherwise, the real part. - * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. - * - * @details - * Builds **adaptively** down to scale \f$ N = 18 \f$. For each scale - * \f$ n=0,\dots,N+1 \f$ a corresponding @ref JpowerIntegrals object is created - * with parameter \f$ a = t\,4^n \f$. The operator is assembled using a - * @ref TimeEvolution_CrossCorrelationCalculator and finalized by multiwavelet - * transform, rough-scale noise removal, square-norm evaluation, and cache setup. - * - * @note The fixed depth ensures a bounded number of power integrals while building. - * Future work aims to compute only the power integrals actually needed during build. - */ template void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_Jpower) { int N = 18; @@ -185,11 +111,9 @@ void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_J mrcpp::TreeBuilder<2> builder; builder.build(*o_tree, calculator, adaptor, N); - // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(BottomUp); o_tree->removeRoughScaleNoise(); - // o_tree->clearSquareNorm(); // does not affect printing o_tree->calcSquareNorm(); o_tree->setupOperNodeCache(); @@ -201,27 +125,11 @@ void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_J for (int n = 0; n <= N + 1; n++) delete J[n]; } -/** - * @brief Uniform build: create real or imaginary part of the operator. - * - * @param time Time parameter \f$ t \f$. - * @param finest_scale Finest (uniform) scale to which the operator tree is constructed. - * @param imaginary If `true`, build the imaginary part; otherwise, the real part. - * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. - * - * @details - * Builds **uniformly** down to @p finest_scale using a @ref SplitAdaptor. - * A threshold of \f$ \text{prec}/1000 \f$ is used while creating - * @ref JpowerIntegrals for scales \f$ n=0,\dots,N+1 \f$ with - * \f$ a = t\,4^n \f$. The resulting @ref CornerOperatorTree is then transformed, - * squared-normed, and cached for later application. - */ template void TimeEvolutionOperator::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) { double o_prec = this->build_prec; auto o_mra = this->getOperatorMRA(); - // Setup uniform tree builder TreeBuilder<2> builder; SplitAdaptor<2> uniform(o_mra.getMaxScale(), true); @@ -232,9 +140,8 @@ void TimeEvolutionOperator::initialize(double time, int finest_scale, bool im TimeEvolution_CrossCorrelationCalculator calculator(J, this->cross_correlation, imaginary); auto o_tree = std::make_unique(o_mra, o_prec); - builder.build(*o_tree, calculator, uniform, N); // Expand 1D kernel into 2D operator + builder.build(*o_tree, calculator, uniform, N); - // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(BottomUp); o_tree->calcSquareNorm(); @@ -247,19 +154,6 @@ void TimeEvolutionOperator::initialize(double time, int finest_scale, bool im for (int n = 0; n <= N + 1; n++) delete J[n]; } -/** - * @brief Semi-uniform build (prototype; not ready for production). - * - * @param time Time parameter \f$ t \f$. - * @param imaginary If `true`, build the imaginary part; otherwise, the real part. - * @param max_Jpower Maximum number of power-integral terms \f$ \widetilde J_m \f$ per scale. - * - * @details - * Starts with a small uniform prefix of the operator tree and continues adaptively - * down to \f$ N = 18 \f$. **Not implemented**—kept as a placeholder for future work. - * - * @warning This method deliberately aborts at runtime. - */ template void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) { MSG_ERROR("Not implemented yet method."); @@ -284,7 +178,6 @@ void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imagina OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); builder.build(*o_tree, calculator, adaptor, 13); - // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(mrcpp::BottomUp); o_tree->removeRoughScaleNoise(); @@ -298,7 +191,6 @@ void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imagina for (int n = 0; n <= N + 1; n++) delete J[n]; } -/* Explicit template instantiations */ template class TimeEvolutionOperator<1>; template class TimeEvolutionOperator<2>; template class TimeEvolutionOperator<3>; From 70b8232031d09606d9bcfc656c6ececadf53e7be Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 11:37:24 +0300 Subject: [PATCH 16/51] udated folder function doxygen documentation only in .h and cleaned .cpp files --- src/functions/BoysFunction.cpp | 78 +------- src/functions/GaussExp.cpp | 234 +----------------------- src/functions/GaussFunc.cpp | 148 --------------- src/functions/GaussPoly.cpp | 148 +-------------- src/functions/Gaussian.cpp | 167 +++-------------- src/functions/JpowerIntegrals.cpp | 116 +----------- src/functions/LegendrePoly.cpp | 125 +------------ src/functions/Polynomial.cpp | 175 +----------------- src/functions/RepresentableFunction.cpp | 88 +-------- src/functions/function_utils.cpp | 176 ++---------------- src/functions/special_functions.cpp | 66 +------ 11 files changed, 70 insertions(+), 1451 deletions(-) diff --git a/src/functions/BoysFunction.cpp b/src/functions/BoysFunction.cpp index a23e59d1c..4aca11a71 100644 --- a/src/functions/BoysFunction.cpp +++ b/src/functions/BoysFunction.cpp @@ -23,96 +23,26 @@ * */ -/** - * @file BoysFunction.cpp - * - * @brief Numerically evaluates the Boys function - * \f[ - * F_n(x) \;=\; \int_{0}^{1} t^{2n}\,e^{-x\,t^2}\,dt - * \f] - * by projecting the integrand onto an adaptive multiresolution basis and then - * integrating the resulting @ref FunctionTree. - * - * Design overview - * --------------- - * 1) The class derives from @ref RepresentableFunction in 1D so that it can be - * used wherever MRCPP expects a function object with `evalf`. - * 2) Given an input coordinate `r`, we interpret `x = r[0]` and define the - * integrand - * g_x(t) = e^{-x t^2} · t^{2n}, t ∈ [0,1]. - * 3) We build a 1D @ref FunctionTree using an @ref MRA configured with an - * interpolating scaling basis (order 13 by default here), and call - * `project(prec, tree, f)` which adaptively refines the tree so that the - * projection error is below `prec`. - * 4) Finally we call `tree.integrate()` to integrate the projected function on - * [0,1], which is the desired value F_n(x). - * - * Notes - * ----- - * - The basis choice (`InterpolatingBasis(13)`) is a trade-off: sufficiently - * smooth to capture Gaussians well, while keeping stencil sizes reasonable. - * - The adaptive projection concentrates resolution where the integrand has - * structure (e.g., for large x near t=0 the function is sharply peaked). - * - Printing is muted during evaluation to keep the call side quiet. - */ - #include "BoysFunction.h" -#include "core/InterpolatingBasis.h" // basis used in the MRA -#include "treebuilders/project.h" // adaptive projection into a FunctionTree -#include "trees/FunctionTree.h" // hierarchical representation + integrate() +#include "core/InterpolatingBasis.h" +#include "treebuilders/project.h" +#include "trees/FunctionTree.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Construct a BoysFunction evaluator. - * - * @param n Non-negative integer order in @f$F_n(x)@f$ (power @f$t^{2n}@f$). - * @param p Target projection precision (controls adaptive refinement). - * - * Internals: - * - `order` stores @p n. - * - `prec` stores the target accuracy threshold used by `project`. - * - `MRA` is initialised over a default 1D bounding box with an - * interpolating basis of order 13; this MRA is reused per evaluation. - */ BoysFunction::BoysFunction(int n, double p) : RepresentableFunction<1, double>() , order(n) , prec(p) , MRA(BoundingBox<1>(), InterpolatingBasis(13)) {} -/** - * @brief Evaluate @f$F_n(x)@f$ at the requested abscissa. - * - * @param r Coordinate container; the one and only component is @f$x=r[0]@f$. - * @return The value of @f$F_n(x)=\int_0^1 t^{2n} e^{-x t^2}\,dt@f$. - * - * Algorithm: - * 1) Silence the printer and remember the old verbosity. - * 2) Capture `x` and `n` and form a lambda `f(t)` representing the integrand - * on @f$t\in[0,1]@f$. We compute `t_2 = t^2`, `t_2n = (t^2)^n`, and return - * `exp(-x * t_2) * t_2n`. For `n=0`, `t_2n` is set to 1 for speed/stability. - * 3) Build a fresh `FunctionTree<1,double>` bound to the stored `MRA`. - * 4) Call `project(prec, tree, f)` to approximate `f` within the tolerance - * `prec` by adaptively refining nodes where needed. - * 5) Call `tree.integrate()` to obtain the integral over [0,1]. - * 6) Restore the printer level and return the result. - * - * Accuracy remarks: - * - The achieved error depends on `prec`, the basis order, and the behaviour - * of the integrand (large x leads to rapid decay, which is well captured - * by the multiresolution approach). - */ double BoysFunction::evalf(const Coord<1> &r) const { - // Temporarily mute verbose output from the projection/integration. int oldlevel = Printer::setPrintLevel(0); int n = this->order; double x = r[0]; - // Integrand g_x(t) = exp(-x * t^2) * (t^2)^n over t in [0,1]. - // Written in terms of t^2 to reduce pow() evaluation count. auto f = [x, n](const Coord<1> &t) -> double { double t_2 = t[0] * t[0]; double xt_2 = x * t_2; @@ -121,12 +51,10 @@ double BoysFunction::evalf(const Coord<1> &r) const { return std::exp(-xt_2) * t_2n; }; - // Build an adaptive representation of f on [0,1] and integrate it. FunctionTree<1, double> tree(this->MRA); mrcpp::project<1, double>(this->prec, tree, f); double result = tree.integrate(); - // Restore previous verbosity and return. Printer::setPrintLevel(oldlevel); return result; } diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp index 219267ee5..24b517fd0 100644 --- a/src/functions/GaussExp.cpp +++ b/src/functions/GaussExp.cpp @@ -23,34 +23,6 @@ * */ -/** - * @file GaussExp.cpp - * - * @brief Implementation of @c GaussExp, a small container for a linear - * combination (expansion) of Cartesian Gaussian primitives and/or - * Gaussian–polynomial terms. The class offers: - * - basic construction/assignment and memory ownership of terms, - * - pointwise evaluation, - * - algebra (sum/product by distributing over terms), - * - norm and normalization helpers, - * - crude visibility/screening support, - * - Coulomb energy (specialized for D=3), - * - periodification helper. - * - * Design notes - * ------------ - * - The expansion holds owning pointers to @c Gaussian (base type), and - * concrete terms are either @c GaussFunc (pure Gaussian) or - * @c GaussPoly (Gaussian times a Cartesian polynomial). - * - Operations that combine expansions rely on @c dynamic_cast to handle the - * two concrete term types and produce a @c GaussPoly when multiplying. - * - @b Ownership: this class allocates copies on insert/append and frees them - * in the destructor; copy constructor and assignment perform deep copies. - * - Screening: @c screening is a scalar that configures per-term screening - * (e.g., via “n standard deviations”); negative values can be used as a - * disabled flag (see @c setScreen). Each term also receives the screen state. - */ - #include "GaussExp.h" #include @@ -67,32 +39,12 @@ using namespace Eigen; namespace mrcpp { -/** @brief Default screening parameter (in “number of standard deviations”). - * - * Each dimensional specialization gets its own static. Positive means enabled - * by default; see @ref setScreen to flip the sign and propagate to terms. - */ template double GaussExp::defaultScreening = 10.0; -/** - * @brief Construct an expansion with a fixed number of (empty) slots. - * - * @param nTerms Number of terms (initial capacity). - * @param prec Unused here (historical signature compatibility). - * - * The vector is filled with @c nullptr placeholders; actual terms must be - * installed via @ref setFunc or @ref append before use. - */ template GaussExp::GaussExp(int nTerms, double /*prec*/) { for (int i = 0; i < nTerms; i++) { this->funcs.push_back(nullptr); } } -/** - * @brief Deep-copy constructor. - * - * Clones each term by calling its virtual @c copy() (polymorphic copy). - * The @c screening flag/value is copied as well. - */ template GaussExp::GaussExp(const GaussExp &gexp) { screening = gexp.screening; for (unsigned int i = 0; i < gexp.size(); i++) { @@ -101,9 +53,6 @@ template GaussExp::GaussExp(const GaussExp &gexp) { } } -/** - * @brief Destructor: deletes all owned terms (if any) and nulls pointers. - */ template GaussExp::~GaussExp() { for (int i = 0; i < size(); i++) { if (this->funcs[i] != nullptr) { @@ -113,16 +62,8 @@ template GaussExp::~GaussExp() { } } -/** - * @brief Deep-copy assignment (strong exception safety not guaranteed). - * - * Existing terms are discarded; the right-hand side is cloned term by term. - * The @c screening parameter is @b not overwritten (commented line preserves - * current object’s screening), so only structure/terms are copied. - */ template GaussExp &GaussExp::operator=(const GaussExp &gexp) { if (&gexp == this) return *this; - // screening = gexp.screening; this->funcs.clear(); for (unsigned int i = 0; i < gexp.size(); i++) { if (gexp.funcs[i] == nullptr) { @@ -135,25 +76,12 @@ template GaussExp &GaussExp::operator=(const GaussExp &gexp) { return *this; } -/** - * @brief Pointwise evaluation: sum of all term evaluations at @p r. - * - * @param r D-dimensional coordinate. - * @return Σ_i term_i(r). - */ template double GaussExp::evalf(const Coord &r) const { double val = 0.0; for (int i = 0; i < this->size(); i++) { val += this->getFunc(i).evalf(r); } return val; } -/** - * @brief Quick “visibility” test at a given scale and sample count. - * - * @details Returns @c false if any term is not visible (fails its own - * visibility criterion); only if all are visible does it return @c true. - * This is a conservative conjunction useful for pruning. - */ template bool GaussExp::isVisibleAtScale(int scale, int nPts) const { for (unsigned int i = 0; i < this->size(); i++) { if (not this->getFunc(i).isVisibleAtScale(scale, nPts)) { return false; } @@ -161,12 +89,6 @@ template bool GaussExp::isVisibleAtScale(int scale, int nPts) const { return true; } -/** - * @brief Check whether the expansion is identically zero on [lb,ub]^D. - * - * @details Returns @c false if any term says it is non-zero on the box; - * otherwise returns @c true. Used for quick region elimination. - */ template bool GaussExp::isZeroOnInterval(const double *lb, const double *ub) const { for (unsigned int i = 0; i < this->size(); i++) { if (not this->getFunc(i).isZeroOnInterval(lb, ub)) { return false; } @@ -174,14 +96,6 @@ template bool GaussExp::isZeroOnInterval(const double *lb, const doub return true; } -/** - * @brief Install a @c GaussPoly term into slot @p i, scaling its coefficient. - * - * @param i Slot index (0-based). - * @param g Source Gaussian–polynomial term (copied). - * @param c Extra scalar factor applied multiplicatively to the stored term’s - * existing coefficient (so final coef = c * g.coef()). - */ template void GaussExp::setFunc(int i, const GaussPoly &g, double c) { if (i < 0 or i > (this->size() - 1)) { MSG_ERROR("Index out of bounds!"); @@ -193,11 +107,6 @@ template void GaussExp::setFunc(int i, const GaussPoly &g, double this->funcs[i]->setCoef(c * coef); } -/** - * @brief Install a pure @c GaussFunc term into slot @p i, scaling its coefficient. - * - * Same semantics as the GaussPoly overload. - */ template void GaussExp::setFunc(int i, const GaussFunc &g, double c) { if (i < 0 or i > (this->size() - 1)) { MSG_ERROR("Index out of bounds!"); @@ -209,17 +118,11 @@ template void GaussExp::setFunc(int i, const GaussFunc &g, double this->funcs[i]->setCoef(c * coef); } -/** - * @brief Append a new term by polymorphic copy. - */ template void GaussExp::append(const Gaussian &g) { Gaussian *gp = g.copy(); this->funcs.push_back(gp); } -/** - * @brief Append all terms from another expansion (deep copies). - */ template void GaussExp::append(const GaussExp &g) { for (int i = 0; i < g.size(); i++) { Gaussian *gp = g.getFunc(i).copy(); @@ -227,11 +130,6 @@ template void GaussExp::append(const GaussExp &g) { } } -/** - * @brief Differentiate each term with respect to coordinate @p dir and return a new expansion. - * - * @param dir Axis index (0..D-1). - */ template GaussExp GaussExp::differentiate(int dir) const { assert(dir >= 0 and dir < D); GaussExp result; @@ -239,12 +137,6 @@ template GaussExp GaussExp::differentiate(int dir) const { return result; } -/** - * @brief Termwise concatenation (sum) with another expansion. - * - * @details Produces an expansion containing all terms from @c *this followed - * by all terms from @p g, by cloning. Coefficients remain unchanged. - */ template GaussExp GaussExp::add(GaussExp &g) { int nsum = this->size() + g.size(); GaussExp sum = GaussExp(nsum); @@ -262,9 +154,6 @@ template GaussExp GaussExp::add(GaussExp &g) { return sum; } -/** - * @brief Concatenate with a single term @p g (at the end). - */ template GaussExp GaussExp::add(Gaussian &g) { int nsum = this->size() + 1; GaussExp sum = GaussExp(nsum); @@ -273,14 +162,6 @@ template GaussExp GaussExp::add(Gaussian &g) { return sum; } -/** - * @brief Product of two expansions by distributivity. - * - * @details For each pair of terms, multiply them (Gaussian×Gaussian or - * Gaussian×GaussPoly) to produce a @c GaussPoly term which is appended to the - * result. Type dispatch is handled via @c dynamic_cast and throws on unknown - * runtime types. - */ template GaussExp GaussExp::mult(GaussExp &gexp) { GaussExp result; for (int i = 0; i < this->size(); i++) { @@ -313,9 +194,6 @@ template GaussExp GaussExp::mult(GaussExp &gexp) { return result; } -/** - * @brief Multiply the expansion by a single @c GaussFunc term (distribute over terms). - */ template GaussExp GaussExp::mult(GaussFunc &g) { GaussExp result; int nTerms = this->size(); @@ -333,9 +211,6 @@ template GaussExp GaussExp::mult(GaussFunc &g) { return result; } -/** - * @brief Multiply the expansion by a single @c GaussPoly term (distribute over terms). - */ template GaussExp GaussExp::mult(GaussPoly &g) { int nTerms = this->size(); GaussExp result(nTerms); @@ -353,43 +228,28 @@ template GaussExp GaussExp::mult(GaussPoly &g) { return result; } -/** - * @brief Return a copy of the expansion scaled by constant @p d. - */ template GaussExp GaussExp::mult(double d) { GaussExp prod = *this; for (int i = 0; i < this->size(); i++) prod.funcs[i]->multConstInPlace(d); return prod; } -/** - * @brief In-place scaling of all term coefficients by @p d. - */ template void GaussExp::multInPlace(double d) { for (int i = 0; i < this->size(); i++) this->funcs[i]->multConstInPlace(d); } -/** - * @brief Compute \f$\| \sum_i f_i \|_2^2\f$ via self-terms plus cross terms. - * - * @details First sum each term’s squared norm, then add the double products - * (2× overlap) between distinct terms. To ensure closed form overlaps, terms - * are materialized as @c GaussFunc and @c calcOverlap is used internally. - */ template double GaussExp::calcSquareNorm() const { - /* computing the squares */ double norm = 0.0; for (int i = 0; i < this->size(); i++) { double nc = this->funcs[i]->calcSquareNorm(); norm += nc; } - /* computing the double products */ for (int i = 0; i < this->size(); i++) { - GaussExp funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc + GaussExp funcs_i = getFunc(i).asGaussExp(); for (int fi = 0; fi < funcs_i.size(); fi++) { GaussFunc &func_i = static_cast &>(funcs_i.getFunc(fi)); for (int j = i + 1; j < this->size(); j++) { - GaussExp funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc + GaussExp funcs_j = getFunc(j).asGaussExp(); for (int fj = 0; fj < funcs_j.size(); fj++) { GaussFunc &func_j = static_cast &>(funcs_j.getFunc(fj)); double overlap = func_i.calcOverlap(func_j); @@ -401,12 +261,6 @@ template double GaussExp::calcSquareNorm() const { return norm; } -/** - * @brief Normalize the expansion so that @c calcSquareNorm() == 1. - * - * @details Scales each term’s coefficient by 1/||f||, where - * @c ||f|| = sqrt(calcSquareNorm()). - */ template void GaussExp::normalize() { double norm = std::sqrt(this->calcSquareNorm()); for (int i = 0; i < this->size(); i++) { @@ -415,24 +269,11 @@ template void GaussExp::normalize() { } } -/** - * @brief Set the per-term screening parameter (e.g., n standard deviations). - * - * @details Stores @p nStdDev locally and forwards to each term so that they - * can precompute their own screening envelopes (e.g., bounding radii). - */ template void GaussExp::calcScreening(double nStdDev) { screening = nStdDev; for (int i = 0; i < this->size(); i++) { this->funcs[i]->calcScreening(nStdDev); } } -/** - * @brief Enable or disable screening for this expansion and all terms. - * - * @param screen If true, make @c screening positive; if false, make it negative. - * The sign convention can be used by downstream code as a quick - * toggle. Each term receives @c setScreen(screen) as well. - */ template void GaussExp::setScreen(bool screen) { if (screen) { this->screening = std::abs(this->screening); @@ -442,57 +283,11 @@ template void GaussExp::setScreen(bool screen) { for (int i = 0; i < this->size(); i++) { this->funcs[i]->setScreen(screen); } } -// ----------------------------------------------------------------------------- -// Project-to-wavelets routine (legacy) -// ----------------------------------------------------------------------------- -// The routine below shows how to compute scaling and wavelet coefficients by -// projecting each term separately and expanding to nD via tensor products. -// It is currently commented out (relies on MWNode internals), but the steps -// are left as documentation for future restoration. -/* -template -void GaussExp::calcWaveletCoefs(MWNode &node) { - static const int tDim = 1 << D; - const ScalingBasis &sf = node.getMWTree().getScalingFunctions(); - MatrixXd &scaling = node.getMWTree().getTmpScalingCoefs(); - VectorXd &tmpvec = node.getMWTree().getTmpScalingVector(); - int kp1 = node.getKp1(); - int kp1_d = node.getKp1_d(); - int inpos = kp1_d - kp1; - int scale = node.getNodeIndex().scale() + 1; - node.allocCoefs(); - for (int child = 0; child < tDim; child++) { - int l[D]; - node.calcChildTranslation(child, l); - for (int n = 0; n < this->size(); n++) { - if (this->getFunc(n).checkScreen(scale, l)) { - continue; - } - sf.calcScalingCoefs(this->getFunc(n), scale, l, scaling); - tmpvec.segment(inpos, kp1) = scaling.col(0); - math_utils::tensorExpandCoefs(D, 0, kp1, kp1_d, scaling, tmpvec); - node.getCoefs().segment(child * kp1_d, kp1_d) += tmpvec; - } - } - node.mwTransform(Compression); - node.setHasCoefs(); - node.calcNorms(); -} -*/ - -/** - * @brief Configure the global default screening parameter for all new instances. - * - * @param screen Non-negative value; throws if negative. - */ template void GaussExp::setDefaultScreening(double screen) { if (screen < 0) { MSG_ERROR("Screening constant cannot be negative!"); } defaultScreening = screen; } -/** - * @brief Pretty-printer listing the terms (order and parameters). - */ template std::ostream &GaussExp::print(std::ostream &o) const { o << "Gaussian expansion: " << size() << " terms" << std::endl; for (int i = 0; i < size(); i++) { @@ -502,31 +297,18 @@ template std::ostream &GaussExp::print(std::ostream &o) const { return o; } -/** - * @brief Coulomb self-energy placeholder for general D. - * - * @note For D≠3 this is not implemented. - */ template double GaussExp::calcCoulombEnergy() const { NOT_IMPLEMENTED_ABORT } -/** - * @brief Coulomb repulsion energy for D=3 including self-interaction once. - * - * @details Loops over pairs (i≤j), expands any composite terms to pure - * Gaussians, and accumulates @c 2*overlap for i double GaussExp<3>::calcCoulombEnergy() const { double energy = 0.0; for (int i = 0; i < this->size(); i++) { - GaussExp<3> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc + GaussExp<3> funcs_i = getFunc(i).asGaussExp(); for (int fi = 0; fi < funcs_i.size(); fi++) { GaussFunc<3> &func_i = static_cast &>(funcs_i.getFunc(fi)); for (int j = i; j < this->size(); j++) { - GaussExp<3> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc + GaussExp<3> funcs_j = getFunc(j).asGaussExp(); for (int fj = 0; fj < funcs_j.size(); fj++) { GaussFunc<3> &func_j = static_cast &>(funcs_j.getFunc(fj)); double c = 2.0; @@ -539,13 +321,6 @@ template <> double GaussExp<3>::calcCoulombEnergy() const { return energy; } -/** - * @brief Build a periodified expansion by summing periodic images of each term. - * - * @param period Period vector per axis (Lx, Ly, Lz for D=3). - * @param nStdDev Controls the width/number of included images (screening). - * @return A new @c GaussExp whose terms include periodic replicas of the input. - */ template GaussExp GaussExp::periodify(const std::array &period, double nStdDev) const { GaussExp out_exp; for (const auto &gauss : *this) { @@ -555,7 +330,6 @@ template GaussExp GaussExp::periodify(const std::array return out_exp; } -// Explicit template instantiations for common dimensions template class GaussExp<1>; template class GaussExp<2>; template class GaussExp<3>; diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp index 8753c3357..bcf75b3e5 100644 --- a/src/functions/GaussFunc.cpp +++ b/src/functions/GaussFunc.cpp @@ -23,32 +23,6 @@ * */ -/** - * @file GaussFunc.cpp - * - * @brief Implementation of @c GaussFunc, a single Cartesian Gaussian (possibly - * multiplied by a coordinate power) in D dimensions. - * - * Model - * ----- - * A term has the form - * f(r) = c * Π_{d=0}^{D-1} (x_d - R_d)^{p_d} * exp( -α_d (x_d - R_d)^2 ), - * with scalar coefficient c, center R, exponents α (per axis), and integer powers p - * (Cartesian angular momenta). Many operations here are separable over dimensions. - * - * Highlights - * ---------- - * - @ref evalf computes the value with optional screening (box truncation). - * - @ref calcSquareNorm uses 1D closed forms and multiplies across axes. - * - @ref differentiate returns a @ref GaussPoly (Gaussian times polynomial), - * using the product rule on (x-R)^p * exp(-α (x-R)^2). - * - @ref mult multiplies two @c GaussFunc into a @c GaussPoly by “completing the square” - * (handled by @c GaussPoly::multPureGauss) and then combining the two polynomials - * created by shifting to the new center. - * - @ref calcCoulombEnergy (D=3 specialization) uses Boys F_0 and assumes isotropic - * exponents for both Gaussians. - */ - #include #include "BoysFunction.h" @@ -65,26 +39,11 @@ using namespace Eigen; namespace mrcpp { -/** - * @brief Polymorphic deep copy (virtual constructor idiom). - * @return Newly allocated @c GaussFunc with identical parameters. - */ template Gaussian *GaussFunc::copy() const { auto *gauss = new GaussFunc(*this); return gauss; } -/** - * @brief Pointwise evaluation of the Gaussian (with optional polynomial factor). - * - * Steps - * ----- - * 1) If screening is enabled, immediately return 0 if any coordinate lies outside - * the precomputed box [A[d], B[d]]. - * 2) Accumulate q2 = Σ α_d (x_d - R_d)^2 (the exponent argument), - * and p2 = Π (x_d - R_d)^{p_d} (the Cartesian polynomial). - * 3) Return c * p2 * exp(-q2). - */ template double GaussFunc::evalf(const Coord &r) const { if (this->getScreen()) { for (int d = 0; d < D; d++) { @@ -106,13 +65,6 @@ template double GaussFunc::evalf(const Coord &r) const { return this->coef * p2 * std::exp(-q2); } -/** - * @brief 1D evaluation of the d-th component only (factor for separable product). - * - * This returns (x - R_d)^{p_d} * exp(-α_d (x - R_d)^2) times @c coef if d==0 - * (the overall scalar is stored redundantly only on one axis when factoring). - * Screening is applied on that axis if enabled. - */ template double GaussFunc::evalf1D(double r, int d) const { if (this->getScreen()) { if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; } @@ -133,16 +85,6 @@ template double GaussFunc::evalf1D(double r, int d) const { return result; } -/** - * @brief Squared L2 norm ||f||^2 = ∫ |f|^2 d r (separable product of 1D integrals). - * - * For one axis (drop subscript d for brevity): - * ∫ (x-R)^{2p} exp(-2α (x-R)^2) dx - * = sqrt(pi / (2α)) * [(2p-1)!!] / (2α)^p, - * which is implemented via a simple descending product: - * sq_norm = Π_{i odd from (2p-1) down to 1} i / (2α). - * The D-dimensional norm is the product over axes, multiplied by coef^2. - */ template double GaussFunc::calcSquareNorm() const { double norm = 1.0; for (int d = 0; d < D; d++) { @@ -163,28 +105,12 @@ template double GaussFunc::calcSquareNorm() const { return norm * this->coef * this->coef; } -/** - * @brief Convert a single @c GaussFunc into a length-1 @c GaussExp. - * - * Useful when operations expect an expansion (e.g., in norm cross-terms). - */ template GaussExp GaussFunc::asGaussExp() const { GaussExp gexp; gexp.append(*this); return gexp; } -/** - * @brief Derivative along axis @p dir, returning a @c GaussPoly. - * - * In 1D: - * d/dx [(x-R)^p e^{-α(x-R)^2}] = - * p (x-R)^{p-1} e^{-α...} + (x-R)^p * (-2α)(x-R) e^{-α...} - * = [ p (x-R)^{p-1} - 2α (x-R)^{p+1} ] e^{-α...} - * - * We therefore create a new polynomial of degree (p+1) with two nonzero - * coefficients at (p-1) and (p+1). Other axes carry over unchanged. - */ template GaussPoly GaussFunc::differentiate(int dir) const { GaussPoly result(*this); int oldPow = this->getPower(dir); @@ -197,21 +123,6 @@ template GaussPoly GaussFunc::differentiate(int dir) const { return result; } -/** - * @brief In-place multiplication by another @c GaussFunc with the SAME center. - * - * Preconditions - * ------------- - * - The two Gaussians must share identical centers in every axis. - * - * Effect - * ------ - * - Exponents add: α_new = α_lhs + α_rhs. - * - Powers add: p_new = p_lhs + p_rhs. - * - Coefficients multiply: c_new = c_lhs * c_rhs. - * - * This keeps the center unchanged and avoids creating polynomials. - */ template void GaussFunc::multInPlace(const GaussFunc &rhs) { GaussFunc &lhs = *this; for (int d = 0; d < D; d++) { @@ -232,22 +143,6 @@ template void GaussFunc::multInPlace(const GaussFunc &rhs) { this->setPow(newPow); } -/** @brief Multiply two GaussFuncs - * @param[in] this: Left hand side of multiply - * @param[in] rhs: Right hand side of multiply - * @returns New GaussPoly - * - * Algorithm - * --------- - * 1) “Complete the square”: the product of two Gaussians is a (shifted) Gaussian - * with combined exponent and a new center (weighted by exponents). This part is - * delegated to @c GaussPoly::multPureGauss, which sets the new Gaussian envelope - * (position, exponents, and a prefactor). - * 2) Each original polynomial factor (x-R)^p is re-expressed relative to the new - * center R_new: (x-R) = (x-R_new) + (R_new - R). We therefore have two polynomials - * per axis; they are multiplied to obtain the combined polynomial for that axis. - * 3) Multiply in the original scalar coefficients c_lhs * c_rhs. - */ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { GaussFunc &lhs = *this; GaussPoly result; @@ -263,28 +158,15 @@ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { return result; } -/** @brief Multiply GaussFunc by scalar (returns a copy with scaled coefficient). */ template GaussFunc GaussFunc::mult(double c) { GaussFunc g = *this; g.coef *= c; return g; } -/** - * @brief Pretty-printer for a Gaussian term. - * - * Prints: - * - Coef - * - Exp: either a single value if all α_d are equal, or all components. - * - Pos: center coordinates - * - Pow: integer powers per axis - */ template std::ostream &GaussFunc::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); - // If all of the values in the exponential are the same only - // one is printed, else, all of them are printed. - o << "Coef : " << this->getCoef() << std::endl; if (!is_array) { o << "Exp : "; @@ -301,43 +183,14 @@ template std::ostream &GaussFunc::print(std::ostream &o) const { return o; } -/** @brief Compute Coulomb repulsion energy between two GaussFuncs - * @param[in] this: Left hand GaussFunc - * @param[in] rhs: Right hand GaussFunc - * @returns Coulomb energy - * - * @note Both Gaussians must be normalized to unit charge - * \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct! - * - * General D is not implemented here; see the D=3 specialization below. - */ template double GaussFunc::calcCoulombEnergy(const GaussFunc &gf) const { NOT_IMPLEMENTED_ABORT; } -/** - * @brief Coulomb energy for 3D isotropic Gaussians using Boys F_0. - * - * Preconditions - * ------------- - * - Both Gaussians must have isotropic exponents (α_x = α_y = α_z), verified via - * @c details::are_all_equal<3>. - * - * Formula - * ------- - * With exponents p and q, α = p q / (p + q), separation R = |R_p - R_q|, - * the Coulomb interaction is: - * E = sqrt( 4 α / π ) * F_0( α R^2 ), - * where F_0 is the order-zero Boys function. The code constructs a @c BoysFunction(0) - * and evaluates it at α R^2. - */ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const { - // Checking if the elements in each exponent are constant if (!details::are_all_equal<3>(this->getExp()) or !details::are_all_equal<3>(gf.getExp())) NOT_IMPLEMENTED_ABORT; - // If they are constant the 0th element are assigned a value - // and the Coulomb Energy can be calculated auto p = this->getExp()[0]; auto q = gf.getExp()[0]; @@ -360,7 +213,6 @@ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const return std::sqrt(4.0 * alpha / pi) * boysFac; } -// Explicit template instantiations template class GaussFunc<1>; template class GaussFunc<2>; template class GaussFunc<3>; diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp index 9c18d420b..bfe78d349 100644 --- a/src/functions/GaussPoly.cpp +++ b/src/functions/GaussPoly.cpp @@ -37,26 +37,10 @@ using namespace Eigen; namespace mrcpp { -/** @returns New GaussPoly object - * @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$ - * @param[in] alpha: Coefficient, \f$ \alpha e^{-r^2} \f$ - * @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ - * @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ - * - * High-level: - * ----------- - * GaussPoly represents a separable polynomial-times-Gaussian: - * f(x) = coef * Π_d Poly_d(x_d - pos[d]) * exp( -alpha[d] (x_d - pos[d])^2 ). - * The per-axis polynomials are stored as pointers (Polynomial* poly[d]). - * Here we allocate those polynomials (if a non-zero power is requested), - * using the degree from `power[d]` passed to the Gaussian base ctor. - */ template GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const std::array &power) : Gaussian(beta, alpha, pos, power) { for (auto d = 0; d < D; d++) { - // If overall 'power' array is not the all-zero sentinel, create a poly - // of the requested degree for this axis. Otherwise leave pointer null. if (power != std::array{}) { this->poly[d] = new Polynomial(this->power[d]); } else { @@ -65,9 +49,6 @@ GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const st } } -/** @brief Anisotropic exponent ctor (per-axis beta). - * Same allocation logic for the per-axis polynomials as above. - */ template GaussPoly::GaussPoly(const std::array &beta, double alpha, @@ -83,21 +64,12 @@ GaussPoly::GaussPoly(const std::array &beta, } } -/** @brief Copy-construct with deep copies of the per-axis polynomials. */ template GaussPoly::GaussPoly(const GaussPoly &gp) : Gaussian(gp) { for (int d = 0; d < D; d++) { poly[d] = new Polynomial(gp.getPoly(d)); } } -/** @brief Construct a GaussPoly from a GaussFunc (monomial×Gaussian). - * - * Effect: - * ------- - * For each axis d, we create a polynomial of degree equal to the monomial power - * in that dimension, and set it to the *monomial basis* e_d(t) = t^{power[d]} - * (i.e., coefficient vector with a single 1 at index = power[d]). - */ template GaussPoly::GaussPoly(const GaussFunc &gf) : Gaussian(gf) { @@ -105,31 +77,20 @@ GaussPoly::GaussPoly(const GaussFunc &gf) int order = this->getPower(d); poly[d] = new Polynomial(order); VectorXd coefs = VectorXd::Zero(order + 1); - coefs[order] = 1.0; // t^{order} + coefs[order] = 1.0; poly[d]->setCoefs(coefs); - // poly[d]->unsetBounds(); } } -/** @brief Delete owned Polynomial objects. */ template GaussPoly::~GaussPoly() { for (int i = 0; i < D; i++) { delete poly[i]; } } -/** @brief Virtual clone (deep copy). */ template Gaussian *GaussPoly::copy() const { auto *gauss = new GaussPoly(*this); return gauss; } -/** @brief Exact L2 norm squared by expanding to GaussExp and summing overlaps. - * - * Algorithm: - * ---------- - * 1) Expand this GaussPoly into a sum of GaussFunc terms (asGaussExp()). - * 2) Sum ⟨g_i | g_j⟩ over all pairs using the analytic overlap routine - * function_utils::calc_overlap (Obara–Saika recurrences). - */ template double GaussPoly::calcSquareNorm() const { GaussExp this_exp = this->asGaussExp(); double norm = 0.0; @@ -143,13 +104,6 @@ template double GaussPoly::calcSquareNorm() const { return norm; } -/** @brief Evaluate f(r) = coef * Π_d poly_d(r_d - pos[d]) * exp(-Σ_d alpha[d](r_d-pos[d])^2) - * - * Notes: - * ------ - * - Optional *screening*: if enabled, points outside the [A,B] box give 0. - * - The polynomial is evaluated in *shifted* coordinate q = r_d - pos[d]. - */ template double GaussPoly::evalf(const Coord &r) const { if (this->getScreen()) { for (int d = 0; d < D; d++) { @@ -158,62 +112,25 @@ template double GaussPoly::evalf(const Coord &r) const { } double q2 = 0.0, p2 = 1.0; for (int d = 0; d < D; d++) { - // assert(this->poly[d]->getCheckBounds() == false); double q = r[d] - this->pos[d]; - q2 += this->alpha[d] * q * q; // accumulate quadratic exponent - p2 *= poly[d]->evalf(r[d] - this->pos[d]); // polynomial factor in dim d + q2 += this->alpha[d] * q * q; + p2 *= poly[d]->evalf(r[d] - this->pos[d]); } return this->coef * p2 * std::exp(-q2); } -/** @brief Evaluate the *1D* factor in dimension d at coordinate r. - * - * Implementation detail: - * ---------------------- - * For efficiency, only dimension d=0 gets the *full* global coefficient. - * Other dimensions return the pure 1D factor with amplitude 1.0. This is a - * deliberate convention to avoid taking the d-th root of the coefficient when - * forming tensor products; callers multiply across dims and obtain the correct - * full amplitude once (from d==0). - */ template double GaussPoly::evalf1D(const double r, int d) const { - // NOTE! - // This function evaluation will give the first dimension the full coef - // amplitude, leaving all other directions with amplitude 1.0. This is to - // avoid expensive d-root evaluation when distributing the amplitude - // equally to all dimensions. - if (this->getScreen()) { if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; } } - // assert(this->poly[d]->getCheckBounds() == false); double q2 = 0.0, p2 = 1.0; double q = (r - this->pos[d]); q2 += q * q; p2 *= poly[d]->evalf(q); - if (d == 0) { p2 *= this->coef; } // apply global amplitude once + if (d == 0) { p2 *= this->coef; } return p2 * std::exp(-this->alpha[d] * q2); } -/** @brief Expand a polynomial×Gaussian into a sum of pure Gaussians (GaussExp). - * - * Idea: - * ----- - * Each per-axis polynomial Poly_d(t) = Σ_{k=0}^{p_d} c_{d,k} t^k can be viewed - * as a linear combination of *monomial* GaussFuncs: (x-pos[d])^k * exp(-α_d t^2). - * The D-dimensional product of polynomials expands into a tensor product of - * monomials across dimensions. This routine enumerates all combinations of - * powers (k_0,...,k_{D-1}), multiplies coefficients Π_d c_{d,k_d}, and emits - * corresponding GaussFunc terms into a GaussExp. - * - * Implementation: - * --------------- - * - nTerms = Π_d (power[d] + 1). - * - fillCoefPowVector(...) recursively builds: - * * `coefs[i]` = Π_d c_{d, pow_d(i)} * global coef - * * `power[i]` = array of the per-axis monomial powers - * - For each nonzero coefficient, create GaussFunc(alpha, coef, pos, pow). - */ template GaussExp GaussPoly::asGaussExp() const { std::array pow; std::array pos; @@ -243,25 +160,14 @@ template GaussExp GaussPoly::asGaussExp() const { return gexp; } -/** @brief Analytic derivative (not implemented for GaussPoly). */ template GaussPoly GaussPoly::differentiate(int dir) const { NOT_IMPLEMENTED_ABORT; } -/** @brief In-place product with another GaussPoly (not implemented). */ template void GaussPoly::multInPlace(const GaussPoly &rhs) { NOT_IMPLEMENTED_ABORT; } -/** @brief Recursive helper: enumerate all power combinations; collect coefficients. - * - * Version 1: temporary raw `int pow[D]` buffer. - * - * On the recursion leaf (dir==0 processed), allocate a new array `newPow[d]` - * storing the tuple of powers; compute the scalar coefficient as: - * coef = global_coef * Π_d poly_d->coefs[ pow[d] ] - * and push both into the output vectors. - */ template void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector &power, int pow[D], int dir) const { dir--; @@ -283,7 +189,6 @@ void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector accumulator. */ template void GaussPoly::fillCoefPowVector(std::vector &coefs, std::vector &power, @@ -308,54 +213,21 @@ void GaussPoly::fillCoefPowVector(std::vector &coefs, } } -/** @brief Product of two GaussPoly (symbolic) — currently not implemented. */ template GaussPoly GaussPoly::mult(const GaussPoly &rhs) { NOT_IMPLEMENTED_ABORT; - /* - GaussPoly &lhs = *this; - GaussPoly result; - result.multPureGauss(lhs, rhs); - for (int d = 0; d < D; d++) { - double newPos = result.getPos()[d]; - int lhsPow = lhs.getPower(d); - Polynomial lhsPoly(lhsPow); - lhsPoly.clearCoefs(); - for (int p = 0; p <= lhsPow; p++) { - Polynomial tmpPoly(newPos - lhs.getPos()[d], p); - tmpPoly *= lhs.getPolyCoefs(d)[p]; - lhsPoly += tmpPoly; - } - - int rhsPow = rhs.getPower(d); - Polynomial rhsPoly(rhsPow); - rhsPoly.clearCoefs(); - for (int p = 0; p <= rhsPow; p++) { - Polynomial tmpPoly(newPos - rhs.getPos()[d], p); - tmpPoly *= rhs.getPolyCoefs(d)[p]; - rhsPoly += tmpPoly; - } - Polynomial newPoly = lhsPoly * rhsPoly; - result.setPoly(d, newPoly); - } - result.setCoef(result.getCoef() * lhs.getCoef() * rhs.getCoef()); - return result; - */ } -/** @brief Multiply by a scalar (returns a copy). */ template GaussPoly GaussPoly::mult(double c) { GaussPoly g = *this; g.coef *= c; return g; } -/** @brief Set polynomial degree *and* allocate a new Polynomial in dim d. */ template void GaussPoly::setPow(int d, int pow) { if (poly[d] != nullptr) { delete poly[d]; } poly[d] = new Polynomial(pow); } -/** @brief Set polynomial degrees in all dims and allocate new polynomials. */ template void GaussPoly::setPow(const std::array &pow) { for (int d = 0; d < D; d++) { if (poly[d] != nullptr) { delete poly[d]; } @@ -363,25 +235,14 @@ template void GaussPoly::setPow(const std::array &pow) { } } -/** @brief Replace the polynomial in a given dimension and update degree. - * - * Ownership: - * ---------- - * This class owns its per-axis Polynomial pointers. We take a *copy* of the - * passed polynomial to keep ownership consistent and update power[d] to match - * the new polynomial order. - */ template void GaussPoly::setPoly(int d, Polynomial &poly) { if (this->poly[d] != nullptr) { delete this->poly[d]; } this->poly[d] = new Polynomial(poly); this->power[d] = poly.getOrder(); } -/** @brief Pretty-print parameters, including per-axis polynomial coefficients. */ template std::ostream &GaussPoly::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); - - // If all exponents are identical, print a single value; else print the array. o << "Coef : " << this->getCoef() << std::endl; if (!is_array) { o << "Exp : "; @@ -399,7 +260,6 @@ template std::ostream &GaussPoly::print(std::ostream &o) const { return o; } -// Explicit template instantiations template class GaussPoly<1>; template class GaussPoly<2>; template class GaussPoly<3>; diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp index 5dc4d25e1..915731990 100644 --- a/src/functions/Gaussian.cpp +++ b/src/functions/Gaussian.cpp @@ -23,29 +23,6 @@ * */ -/** - * Implementation notes (high-level) - * --------------------------------- - * This file implements generic (templated) functionality shared by all - * Gaussian-like primitives in D dimensions, i.e. @ref Gaussian. - * - * Key responsibilities: - * - Store and initialize the parameters of a Cartesian Gaussian: - * * alpha[d] : per-axis exponents β_d (>0) - * * coef : scalar prefactor α - * * pos[d] : center coordinates R_d - * * power[d] : polynomial powers p_d ∈ {0,1,2,...} - * - Compose two Gaussians into a *pure* Gaussian by completing the square - * (multPureGauss), leaving the polynomial factors to higher layers. - * - Build cheap screening boxes / visibility tests to avoid unnecessary - * work when projecting to grids/trees (calcScreening, checkScreen, - * isVisibleAtScale, isZeroOnInterval). - * - Provide utility evaluations on batches of points (evalf over matrices). - * - Compute overlaps by expanding (if needed) into GaussFunc terms and - * using the Obara–Saika 1D recurrences (via function_utils). - * - Create semi-periodic images of a Gaussian inside a unit cell (periodify). - */ - #include #include "Gaussian.h" @@ -64,17 +41,15 @@ namespace mrcpp { * Constructors / init state * *---------------------------*/ -/** @brief Isotropic-constructor: fill all D exponents with the same value @p a. */ template Gaussian::Gaussian(double a, double c, const Coord &r, const std::array &p) - : screen(false) // screening disabled by default - , coef(c) // scalar amplitude - , power(p) // Cartesian powers - , pos(r) { // center - this->alpha.fill(a); // isotropic exponent β_d = a ∀ d + : screen(false) + , coef(c) + , power(p) + , pos(r) { + this->alpha.fill(a); } -/** @brief Anisotropic-constructor: exponents are provided per axis. */ template Gaussian::Gaussian(const std::array &a, double c, const Coord &r, const std::array &p) : screen(false) @@ -86,39 +61,21 @@ Gaussian::Gaussian(const std::array &a, double c, const Coord & /*----------------------------------------------------* * Multiply two *pure* Gaussians (no polynomial part) * *----------------------------------------------------*/ -/** - * @brief Complete-the-square product of two Gaussians into this object. - * - * Given - * G_L(x) = exp[-Σ_d α_L(d) (x_d - R_L(d))^2], - * G_R(x) = exp[-Σ_d α_R(d) (x_d - R_R(d))^2], - * their product is a *single* Gaussian - * G_P(x) = C · exp[-Σ_d α_P(d) (x_d - R_P(d))^2], - * where - * α_P(d) = α_L(d) + α_R(d), - * μ(d) = α_L(d) α_R(d) / α_P(d) (reduced exponent), - * R_P(d) = [α_L(d) R_L(d) + α_R(d) R_R(d)] / α_P(d), - * C = exp[-Σ_d μ(d) (R_L(d) - R_R(d))^2]. - * - * The polynomial prefactors (if any) are handled elsewhere (e.g. GaussFunc→GaussPoly). - */ -template void Gaussian::multPureGauss(const Gaussian &lhs, const Gaussian &rhs) { +template void Gaussian::multPureGauss(const Gaussian &lhs, const Gaussian &rhs) { auto newAlpha = std::array{}; auto mju = std::array{}; for (auto d = 0; d < D; d++) { - newAlpha[d] = lhs.alpha[d] + rhs.alpha[d]; // α_P = α_L + α_R - mju[d] = (lhs.alpha[d] * rhs.alpha[d]) / newAlpha[d]; // μ = α_L α_R / (α_L + α_R) + newAlpha[d] = lhs.alpha[d] + rhs.alpha[d]; + mju[d] = (lhs.alpha[d] * rhs.alpha[d]) / newAlpha[d]; } auto newPos = std::array{}; auto relPos = std::array{}; double newCoef = 1.0; for (int d = 0; d < D; d++) { - // Center of the product (weighted by exponents) newPos[d] = (lhs.alpha[d] * lhs.pos[d] + rhs.alpha[d] * rhs.pos[d]) / newAlpha[d]; - relPos[d] = lhs.pos[d] - rhs.pos[d]; // R_L - R_R - // Normalization factor from completing the square + relPos[d] = lhs.pos[d] - rhs.pos[d]; newCoef *= std::exp(-mju[d] * std::pow(relPos[d], 2.0)); } setExp(newAlpha); @@ -129,77 +86,44 @@ template void Gaussian::multPureGauss(const Gaussian &lhs, const G /*--------------------------------------------* * Screening boxes and quick-visibility tests * *--------------------------------------------*/ -/** - * @brief Build an axis-aligned bounding box [A,B] that captures - * ~erf coverage based on nStdDev standard deviations. - * - * For each dimension d, the 1D Gaussian has variance σ_d^2 = 1/(2 α_d). - * We choose bounds R_d ± nStdDev * σ_d. Setting @c screen=true enables - * fast culling in eval and tree projection. - */ + template void Gaussian::calcScreening(double nStdDev) { assert(nStdDev > 0); if (not this->isBounded()) { - // Lazy-allocate bounds arrays if needed this->bounded = true; this->A = new double[D]; this->B = new double[D]; } for (int d = 0; d < D; d++) { - double limit = std::sqrt(nStdDev / this->alpha[d]); // nStdDev * σ_d where σ_d = 1/sqrt(2 α_d) + double limit = std::sqrt(nStdDev / this->alpha[d]); this->A[d] = this->pos[d] - limit; this->B[d] = this->pos[d] + limit; } screen = true; } -/** - * @brief Tile-level screening: test whether a node box [a,b] at scale n - * lies entirely outside this Gaussian’s screening box. - * - * The physical length of a dyadic tile at scale n is 2^{-n}. The tile's - * coordinate bounds are computed from its integer translations l[d]. - * If the tile is completely outside [A,B] on any axis, return true - * (i.e., we can skip processing that tile). - */ template bool Gaussian::checkScreen(int n, const int *l) const { if (not getScreen()) { return false; } - double length = std::pow(2.0, -n); // tile size + double length = std::pow(2.0, -n); const double *A = this->getLowerBounds(); const double *B = this->getUpperBounds(); for (int d = 0; d < D; d++) { - double a = length * l[d]; // tile lower bound in dim d - double b = length * (l[d] + 1); // tile upper bound in dim d - if (a > B[d] or b < A[d]) { return true; } // entirely outside -> culled + double a = length * l[d]; + double b = length * (l[d] + 1); + if (a > B[d] or b < A[d]) { return true; } } return false; } -/** - * @brief Heuristic visibility test vs. resolution scale and quadrature count. - * - * A Gaussian of standard deviation σ should not be represented at - * resolutions finer than ~σ (no additional information). We compare the - * scale against a heuristic derived from σ and the number of quadrature points. - */ template bool Gaussian::isVisibleAtScale(int scale, int nQuadPts) const { for (auto &alp : this->alpha) { - double stdDeviation = std::pow(2.0 * alp, -0.5); // σ = 1/√(2α) + double stdDeviation = std::pow(2.0 * alp, -0.5); auto visibleScale = static_cast(-std::floor(std::log2(nQuadPts * 0.5 * stdDeviation))); - // If requested 'scale' is *finer* (smaller length) than what this σ supports, hide it. if (scale < visibleScale) return false; } - return true; } -/** - * @brief Quick zero test on an interval: returns true if the Gaussian - * is negligible on [a,b] (component-wise), using ±5σ rule. - * - * If the interval is completely outside [R-5σ, R+5σ] on any axis, we - * consider the function zero there for practical purposes. - */ template bool Gaussian::isZeroOnInterval(const double *a, const double *b) const { for (int i = 0; i < D; i++) { double stdDeviation = std::pow(2.0 * this->alpha[i], -0.5); @@ -213,16 +137,7 @@ template bool Gaussian::isZeroOnInterval(const double *a, const doubl /*---------------------------------------------* * Batch evaluation (matrix of points → values) * *---------------------------------------------*/ -/** - * @brief Evaluate the *separable* 1D factors on a batch of points. - * - * @param[in] points Matrix (N×D) of coordinates; column d contains the d-th coordinate of all N points. - * @param[out] values Matrix (N×D) to be filled with per-axis factors: - * values(i,d) = g_d( points(i,d) ). - * - * Note: this does not multiply across dimensions; higher-level code can - * combine the columns (e.g., by product) if the full D-D value is needed. - */ + template void Gaussian::evalf(const MatrixXd &points, MatrixXd &values) const { assert(points.cols() == D); assert(points.cols() == values.cols()); @@ -235,14 +150,8 @@ template void Gaussian::evalf(const MatrixXd &points, MatrixXd &value /*--------------------------------------* * Convenience: maximum standard dev σ * *--------------------------------------*/ -/** - * @brief Return the maximum standard deviation across axes. - * - * For isotropic exponents, that is 1/√(2 α). For anisotropic, compute - * σ_d = 1/√(2 α_d) per axis and return max_d σ_d. Used in periodification. - */ -template double Gaussian::getMaximumStandardDiviation() const { +template double Gaussian::getMaximumStandardDiviation() const { if (details::are_all_equal(this->getExp())) { auto exponent = this->getExp()[0]; return 1.0 / std::sqrt(2.0 * exponent); @@ -257,16 +166,10 @@ template double Gaussian::getMaximumStandardDiviation() const { /*-------------------------* * Overlap ⟨G|G'⟩ utilities * *-------------------------*/ -/** - * @brief General overlap by expanding both sides into @ref GaussFunc terms - * (if needed) and summing pairwise 1D Obara–Saika products. - * - * The helper function_utils::calc_overlap(GaussFunc,GaussFunc) performs the - * per-dimension recursion and multiplies contributions across D. - */ + template double Gaussian::calcOverlap(const Gaussian &inp) const { - const auto &bra_exp = this->asGaussExp(); // Make sure all entries are GaussFunc - const auto &ket_exp = inp.asGaussExp(); // Make sure all entries are GaussFunc + const auto &bra_exp = this->asGaussExp(); + const auto &ket_exp = inp.asGaussExp(); double S = 0.0; for (int i = 0; i < bra_exp.size(); i++) { @@ -282,43 +185,24 @@ template double Gaussian::calcOverlap(const Gaussian &inp) const { /*-----------------------------* * Semi-periodic “image” clone * *-----------------------------*/ -/** - * @brief Build a semi-periodic expansion by replicating this Gaussian on a - * Cartesian lattice so that most of the mass (≈erf coverage) lies - * within a single unit cell. - * - * @param period Period vector (cell lengths per axis). - * @param nStdDev Number of σ to keep around the central copy (default 4.0). - * @returns A @ref GaussExp consisting of translated copies. - * - * Algorithm: - * 1) Fold the original center into the principal cell [0,period). - * 2) Estimate the number of neighbor cells needed so that ±nStdDev·σ fits. - * 3) Generate all translations in the (2N+1)^D cube around the folded center. - * 4) Copy and shift the Gaussian for each translation and append to result. - */ + template GaussExp Gaussian::periodify(const std::array &period, double nStdDev) const { GaussExp gauss_exp; auto pos_vec = std::vector>(); auto x_std = nStdDev * this->getMaximumStandardDiviation(); - // This lambda computes how many neighbor cells are needed (per axis) - // so that the ±x_std window is covered by translated images. auto neighbooring_cells = [period, x_std](auto pos) { auto needed_cells_vec = std::vector(); for (auto i = 0; i < D; i++) { auto upper_bound = pos[i] + x_std; auto lower_bound = pos[i] - x_std; - (void)lower_bound; // not used explicitly; retained for clarity - // Minimal number of positive cell steps so that [pos-x_std, pos+x_std] is inside coverage. + (void)lower_bound; needed_cells_vec.push_back(std::ceil(upper_bound / period[i])); } - return *std::max_element(needed_cells_vec.begin(), needed_cells_vec.end()); }; - // Fold starting position into the principal cell auto startpos = this->getPos(); for (auto d = 0; d < D; d++) { @@ -326,11 +210,9 @@ template GaussExp Gaussian::periodify(const std::array if (startpos[d] < 0) startpos[d] += period[d]; } - // Symmetric image range: from -N to +N cells in each dimension auto nr_cells_upp_and_down = neighbooring_cells(startpos); for (auto d = 0; d < D; d++) { startpos[d] -= nr_cells_upp_and_down * period[d]; } - // Generate a (2N+1)^D Cartesian product of offsets auto tmp_pos = startpos; std::vector v(2 * nr_cells_upp_and_down + 1); std::iota(v.begin(), v.end(), 0.0); @@ -338,16 +220,13 @@ template GaussExp Gaussian::periodify(const std::array for (auto &c : cart) { for (auto i = 0; i < D; i++) c[i] *= period[i]; } - // Shift coordinates by the starting corner for (auto &c : cart) std::transform(c.begin(), c.end(), tmp_pos.begin(), c.begin(), std::plus()); - // Convert vectors to mrcpp::Coord for (auto &c : cart) { mrcpp::Coord pos; std::copy_n(c.begin(), D, pos.begin()); pos_vec.push_back(pos); } - // Create the translated copies for (auto &pos : pos_vec) { auto *gauss = this->copy(); gauss->setPos(pos); diff --git a/src/functions/JpowerIntegrals.cpp b/src/functions/JpowerIntegrals.cpp index 6d04e6ba0..5aaf05d13 100644 --- a/src/functions/JpowerIntegrals.cpp +++ b/src/functions/JpowerIntegrals.cpp @@ -24,148 +24,48 @@ */ #include "JpowerIntegrals.h" -#include // std::find_if_not +#include namespace mrcpp { -/** - * # Class purpose - * Computes, stores, and provides indexed access to the sequence of - * “power integrals” \( J_m(l) \) for a range of integer shifts \( l \). - * - * In this implementation each sequence \(\{J_m(l)\}_{m=0}^{M}\) is produced by - * a **three–term recurrence** seeded by a closed form for \(J_0(l)\): - * - * - Seed: - * \f[ - * J_0(l) - * = \tfrac{1}{4}\,e^{-i\pi/4}\,\frac{1}{\sqrt{\pi a}}\, - * \exp\!\Big( \tfrac{i\,l^2}{4a} \Big) - * \f] - * - Parameters: - * \f[ - * \beta = \tfrac{i}{2a}, \qquad \alpha = l\,\beta - * \f] - * - Recurrence (implemented below): - * \f[ - * J_{m+2} - * = \frac{\alpha\,J_{m+1} + \frac{m}{m+2}\,\beta\,J_{m}}{m+3}, - * \qquad m=0,1,\dots - * \f] - * - * The class builds these sequences for all integer \( l \) in the - * symmetric range \([-(2^n-1), \dots, -1, 0, \dots, 2^n-1]\), - * where `n = scaling` and `N = 2^n`. Internally, results are stored as - * `std::vector>` (one vector per shift \(l\)). - * - * ## Parameters - * - `a` : real positive parameter in the Gaussian-like kernel (see seed). - * - `scaling` : defines the number of integer shifts as \(N=2^{\text{scaling}}\). - * - `M` : the highest power index — sequences contain \(J_0,\dots,J_M\). - * - `threshold` : magnitude cutoff used by `crop()` to trim negligible tail values. - * (Note: the current constructor does **not** call `crop()`. You - * may call it manually after construction if you want trimming.) - * - * ## Indexing - * The operator `operator[](int index)` accepts the natural range - * \([-(2^n-1), \dots, 2^n-1]\). Negative indices are transparently - * mapped to the underlying zero-based container. - */ JpowerIntegrals::JpowerIntegrals(double a, int scaling, int M, double threshold) { this->scaling = scaling; - int N = 1 << scaling; // N = 2^scaling shifts on the positive side (including 0) - // Store sequences for l = 0,1,...,N-1 + int N = 1 << scaling; for (int l = 0; l < N; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold)); - // And for l = -(N-1),...,-1 (append after the non-negative ones) for (int l = 1 - N; l < 0; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold)); } -/** - * @brief Random–access to the vector of \f$ \{J_m(l)\}_{m=0}^{M} \f$ for a given shift @p index. - * - * @param index Integer shift \(l\) in \([-(2^n-1), \dots, 2^n-1]\). - * Negative inputs are internally wrapped to the layout used - * by the `integrals` storage. - * @return Reference to the vector `J` containing `[J_0, J_1, ..., J_M]` for that \(l\). - * - * @note This is a non-const overload returning a mutable reference; callers - * can modify the stored sequence if needed. - */ std::vector> &JpowerIntegrals::operator[](int index) { - if (index < 0) index += integrals.size(); // wrap negative l to the back half of the container + if (index < 0) index += integrals.size(); return integrals[index]; } -/** - * @brief Build the sequence \f$ \{J_m(l)\}_{m=0}^{M} \f$ using the closed-form seed and recurrence. - * - * @param l Integer shift parameter. - * @param a Positive real parameter from the analytic form. - * @param M Highest power index to compute (inclusive). - * @param threshold Magnitude threshold (currently not used inside this routine). - * @return Vector of length \f$ M+1 \f$ with entries \f$ [J_0, J_1, \dots, J_M] \f$. - * - * Implementation notes: - * - We store an initial dummy 0 followed by \(J_0\) so that the recurrence - * can read the two previous entries uniformly; we erase the dummy before return. - * - Complex constants: - * * `i` is introduced through `std::complex` literals (`std::complex_literals`). - * * \f$ \beta = i/(2a) \f$, \f$ \alpha = l \beta \f$. - * - Numerical behavior: - * * The recurrence is simple and linear; for large |m| or extreme `a` you may - * see accumulation of round-off; consider `crop()` afterwards if you know - * the tail becomes negligible for your use-case. - */ std::vector> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double /*threshold*/) { using namespace std::complex_literals; - // Seed J0(l) = (1/4) e^{-iπ/4} / sqrt(π a) * exp( i l^2 / (4 a) ) std::complex J_0 = 0.25 * std::exp(-0.25i * M_PI) / std::sqrt(M_PI * a) * std::exp(0.25i * static_cast(l * l) / a); - // β = i/(2a) and α = l β std::complex beta(0, 0.5 / a); auto alpha = static_cast(l) * beta; - // Work buffer: prepend a dummy zero so that J.back() = J_m, J[J.size()-2] = J_{m-1} - // After the loop we drop the dummy, leaving [J_0, J_1, ..., J_M]. std::vector> J = {0.0, J_0}; - // Three-term recurrence: - // J_{m+2} = (α J_{m+1} + (m/(m+2)) β J_m) / (m+3), for m = 0..M-1 for (int m = 0; m < M; m++) { - std::complex term1 = J[J.size() - 1] * alpha; // α J_{m+1} - std::complex term2 = J[J.size() - 2] * beta * static_cast(m) / static_cast(m + 2); // (m/(m+2)) β J_m - std::complex last = (term1 + term2) / static_cast(m + 3); // divide by (m+3) - J.push_back(last); // append J_{m+2} + std::complex term1 = J[J.size() - 1] * alpha; + std::complex term2 = J[J.size() - 2] * beta * static_cast(m) / static_cast(m + 2); + std::complex last = (term1 + term2) / static_cast(m + 3); + J.push_back(last); } - // Remove the initial dummy zero so the vector starts with J_0 J.erase(J.begin()); return J; } -/** - * @brief Trim a sequence by removing small-magnitude values from its tail. - * - * @param J The sequence \f$ [J_0, J_1, \dots] \f$ to be cropped in-place. - * @param threshold Elements with both |real| and |imag| < threshold are considered negligible. - * - * Details: - * - Traverses from the end until it finds the first element whose real/imag - * magnitude is **not** negligible and erases everything past that point. - * - Use this to keep only the “significant” prefix \f$ J_0,\dots,J_{m^\*} \f$ - * if you know the tail rapidly vanishes for your parameters. - * - * @warning The constructor does not call this automatically. If you want - * trimmed sequences, call `crop(...)` after construction. - */ void JpowerIntegrals::crop(std::vector> &J, double threshold) { - // Predicate: element is negligible if both real and imaginary parts are below threshold auto isNegligible = [threshold](const std::complex &c) { return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; }; - // Erase the trailing run of negligible entries J.erase(std::find_if_not(J.rbegin(), J.rend(), isNegligible).base(), J.end()); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/LegendrePoly.cpp b/src/functions/LegendrePoly.cpp index 4f95af763..6629cd2f8 100644 --- a/src/functions/LegendrePoly.cpp +++ b/src/functions/LegendrePoly.cpp @@ -23,27 +23,6 @@ * */ -/* - * File overview - * ------------- - * Implements LegendrePoly, a Polynomial subclass that builds the (shifted/scaled) - * Legendre polynomial of a given order k using the standard three-term recurrence. - * - * Key ideas: - * - Coefficients for P_k on the canonical interval [-1, 1] are computed once - * (in the Polynomial's coefficient storage) by combining cached lower orders. - * - The resulting polynomial is then *affinely transformed* by an internal - * mapping x ↦ N·x + L via Polynomial::translate(l) and Polynomial::dilate(n), - * so that users obtain a Legendre polynomial defined on the transformed domain. - * - A lightweight cache (ObjectCache) avoids recomputing lower - * orders repeatedly when constructing higher ones. - * - * Extras: - * - firstDerivative(x) returns both P_k(x) and P'_k(x) evaluated at x (w.r.t. the - * current affine mapping). - * - secondDerivative(x) is declared but not implemented (calls NOT_IMPLEMENTED_ABORT). - */ - #include "LegendrePoly.h" #include "core/ObjectCache.h" #include "utils/Printer.h" @@ -52,96 +31,46 @@ using namespace Eigen; namespace mrcpp { -using LegendreCache = ObjectCache; // Cache of LegendrePoly objects keyed by order +using LegendreCache = ObjectCache; -/** @brief Construct the order-k Legendre polynomial and apply an affine transform. - * - * @param k Polynomial order (degree). - * @param n Dilation factor applied after construction (see Polynomial::dilate). - * @param l Translation applied before dilation (see Polynomial::translate). - * - * Details: - * - The raw Legendre polynomial P_k is constructed on [-1, 1] using the standard - * recurrence: - * P_0(q) = 1, - * P_1(q) = q, - * P_k(q) = ((2k-1) q P_{k-1}(q) - (k-1) P_{k-2}(q)) / k, k ≥ 2. - * Here q is the canonical variable on [-1, 1]. - * - Lower-order polynomials P_{k-1}, P_{k-2} are fetched (or created once and - * cached) through LegendreCache to avoid recomputation. - * - After coefficients are set, the base interval [-1, 1] is recorded via - * setBounds, then the polynomial is translated by l and dilated by n, - * effectively producing P_k(N·x + L) in the Polynomial base class, where - * N and L are the stored affine parameters. - */ LegendrePoly::LegendrePoly(int k, double n, double l) : Polynomial(k) { - // Ensure lower orders are cached: creating P_k requires P_{k-1} and P_{k-2}. - // We preload P_{k-1} so the subsequent compute can fetch both from cache. LegendreCache &Cache = LegendreCache::getInstance(); if (k >= 1) { if (not Cache.hasId(k - 1)) { auto *lp = new LegendrePoly(k - 1); - // Rough memory accounting: 2*(k+1) doubles (heuristic) for the cache Cache.load(k - 1, lp, 2 * sizeof(double) * (k + 1)); } } - // Compute P_k on the canonical domain [-1, 1] computeLegendrePolynomial(k); - // Record canonical bounds for sanity checks in eval/derivatives double a = -1.0; double b = 1.0; setBounds(&a, &b); - // Apply affine map x ↦ N·x + L via translate(l) then dilate(n) translate(l); dilate(n); } -/** @brief Populate this->coefs with the coefficients of P_k on [-1,1]. - * - * Implements the standard three-term Legendre recurrence in coefficient space: - * P_0(q) = 1, - * P_1(q) = q, - * P_k(q) = ((2k-1) q P_{k-1}(q) - (k-1) P_{k-2}(q)) / k, for k ≥ 2. - * - * Coefficient layout: - * - The Polynomial base stores coefficients in ascending powers: - * coefs[j] corresponds to q^j. - * - To form P_k, we combine cached P_{k-1} and P_{k-2} term-by-term. - * - * Edge cases: - * - k=0 and k=1 are assigned explicitly. - */ void LegendrePoly::computeLegendrePolynomial(int k) { assert(this->size() >= k); if (k == 0) { - // P_0(q) = 1 this->coefs[0] = 1.0; } else if (k == 1) { - // P_1(q) = q this->coefs[0] = 0.0; this->coefs[1] = 1.0; } else { - // Fetch lower-order polynomials from the cache LegendreCache &Cache = LegendreCache::getInstance(); - LegendrePoly &Lm1 = Cache.get(k - 1); // P_{k-1} - LegendrePoly &Lm2 = Cache.get(k - 2); // P_{k-2} + LegendrePoly &Lm1 = Cache.get(k - 1); + LegendrePoly &Lm2 = Cache.get(k - 2); auto K = (double)k; - // Constant term (j=0): - // coef0 = -(k-1)/k * (coef from P_{k-2} at j=0) double cm2_0 = Lm2.getCoefs()[0]; this->coefs[0] = -(K - 1.0) * cm2_0 / K; - // Remaining terms (j=1..k): - // For j ≤ k-2, coef_j = ((2k-1)/k) * coef_{j-1}(P_{k-1}) - ((k-1)/k) * coef_j(P_{k-2}) - // For j = k-1 or k, the P_{k-2} contribution vanishes (index out of range), - // so only the first term remains. for (int j = 1; j < k + 1; j++) { double cm1_jm1 = Lm1.getCoefs()[j - 1]; if (j <= k - 2) { @@ -154,23 +83,6 @@ void LegendrePoly::computeLegendrePolynomial(int k) { } } -/** @brief Evaluate P_k(x) and its first derivative at x (w.r.t. the current affine map). - * - * @param x Point of evaluation (in the *external* variable). - * @return Vector2d { P_k(x), d/dx P_k(x) }. - * - * Details: - * - Bounds check (via outOfBounds) uses the base interval (set to [-1,1] and - * then transformed by the affine map stored in the Polynomial base). - * - Internally we evaluate in the mapped coordinate q = N·x + L. - * - Uses a forward recursion to accumulate both value and derivative following - * the Legendre three-term recurrence: - * y_i(q) = ((2i-1) q y_{i-1} - (i-1) y_{i-2}) / i - * dy_i(q) = ((2i-1) q dy_{i-1} - (i-1) dy_{i-2} + (2i-1) y_{i-1}) / i - * (the last term is ∂/∂q of (2i-1) q y_{i-1}). - * - The returned derivative is with respect to the external variable x, taking - * into account the internal affine mapping (via the Polynomial base members). - */ Vector2d LegendrePoly::firstDerivative(double x) const { double c1, c2, c4, ym, yp, y; double dy, dyp, dym; @@ -179,47 +91,39 @@ Vector2d LegendrePoly::firstDerivative(double x) const { MSG_ABORT("Argument out of bounds: " << x << " [" << this->A[0] << ", " << this->B[0] << "]"); } - // Affine map from external x to internal q double q = this->N * x + this->L; Vector2d val; int order = getOrder(); - // P_0(q) = 1, P'_0(q) = 0 if (order == 0) { val(0) = 1.0; val(1) = 0.0; return val; } - // P_1(q) = q; derivative follows the affine mapping stored in the base if (order == 1) { val(0) = q; - val(1) = this->N * 1.0 + this->L; // as implemented in the original code + val(1) = this->N * 1.0 + this->L; return val; } - // Initialize recurrence for i=2..order - y = q; // y = P_1 - dy = 1.0; // dy = d/dq P_1 - yp = 1.0; // yp = P_0 - dyp = 0.0;// dyp = d/dq P_0 + y = q; + dy = 1.0; + yp = 1.0; + dyp = 0.0; for (int i = 2; i < order + 1; i++) { c1 = (double)i; - c2 = c1 * 2.0 - 1.0; // (2i-1) - c4 = c1 - 1.0; // (i-1) + c2 = c1 * 2.0 - 1.0; + c4 = c1 - 1.0; - // Rotate "previous" states ym = y; - // Value recurrence: y = P_i y = (c2 * q * y - c4 * yp) / c1; - // Shift lower-order values yp = ym; - // Derivative recurrence in q dym = dy; dy = (c2 * q * dy - c4 * dyp + c2 * yp) / c1; dyp = dym; @@ -230,15 +134,6 @@ Vector2d LegendrePoly::firstDerivative(double x) const { return val; } -/** @brief Evaluate P_k(x) together with first and second derivatives (not implemented). - * - * @param x Point of evaluation. - * @return Vector3d { P_k(x), P'_k(x), P''_k(x) }. - * - * @note This routine currently calls NOT_IMPLEMENTED_ABORT. The code that follows - * shows the intended structure (value/first/second derivative recurrences), - * but it is not active. Keep as-is to reflect current behavior. - */ Vector3d LegendrePoly::secondDerivative(double x) const { NOT_IMPLEMENTED_ABORT; double c1, c2, c4, ym, yp, y, d2y; diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp index 1fcbf0341..e0b0d708d 100644 --- a/src/functions/Polynomial.cpp +++ b/src/functions/Polynomial.cpp @@ -23,30 +23,6 @@ * */ -/** - * Implementation notes for Polynomial - * ----------------------------------- - * This file implements a univariate polynomial P expressed in an *affine* - * internal coordinate q = N*x - L, where: - * - N is a dilation (scale) factor, - * - L is a translation shift (stored with sign to match the internal form). - * - * Coefficients are stored in ascending powers of q: coefs[k] multiplies q^k. - * Many operations (evaluation, algebra, derivatives, integrals) are performed - * with respect to q but expose an API in terms of the external variable x. - * - * Bounding: - * The base class (RepresentableFunction) holds optional lower/upper bounds - * in the *q*-domain. Helper functions `getScaledLowerBound()` / - * `getScaledUpperBound()` convert those bounds to the *x*-domain via the - * inverse affine map x = (q + L)/N. Evaluation outside the bounds yields 0. - * - * Algebra: - * Addition and multiplication require the same affine map (same N and L). - * We check that before combining coefficient vectors to avoid mixing - * different coordinate systems. - */ - #include #include "Polynomial.h" @@ -57,18 +33,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief Construct a zero-valued polynomial of order @p k with optional bounds. - * - * @param k Maximum power (order). The polynomial will have (k+1) coefficients. - * @param a (Optional) pointer to lower bound in x; forwarded to base class. - * @param b (Optional) pointer to upper bound in x; forwarded to base class. - * - * Details - * ------- - * - Initializes the affine map to identity: N = 1, L = 0 (so q = x). - * - Allocates a coefficient vector of length k+1, initialized to zero. - * - Bounds are stored by the base class; they affect evalf() and integration. - */ Polynomial::Polynomial(int k, const double *a, const double *b) : RepresentableFunction<1, double>(a, b) { assert(k >= 0); @@ -77,19 +41,6 @@ Polynomial::Polynomial(int k, const double *a, const double *b) this->coefs = VectorXd::Zero(k + 1); } -/** @brief Construct the expanded monomial (x - c)^k (up to scaling) with optional bounds. - * - * @param c Shift in the monomial center (i.e., builds coefficients of (x - c)^k). - * @param k Order of the monomial. - * @param a Optional lower bound; forwarded to base. - * @param b Optional upper bound; forwarded to base. - * - * Details - * ------- - * - Uses binomial coefficients to expand (x - c)^k into the internal q = x - * basis (N = 1, L = 0). - * - coefs[i] = binom(k, i) * (-c)^(k - i). - */ Polynomial::Polynomial(double c, int k, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -98,14 +49,6 @@ Polynomial::Polynomial(double c, int k, const double *a, const double *b) for (int i = 0; i <= k; i++) { this->coefs[i] *= std::pow(c, k - i); } } -/** @brief Construct from a coefficient vector (ascending powers in q) with optional bounds. - * - * @param c Coefficients for q^0, q^1, ..., q^k. - * @param a Optional lower bound; forwarded to base. - * @param b Optional upper bound; forwarded to base. - * - * Initializes affine map to identity (N=1, L=0) and copies coefficients. - */ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -113,7 +56,6 @@ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) setCoefs(c); } -/** @brief Copy constructor (deep copy), including bounds and affine map. */ Polynomial::Polynomial(const Polynomial &poly) : RepresentableFunction<1>(poly) { this->N = poly.N; @@ -121,10 +63,6 @@ Polynomial::Polynomial(const Polynomial &poly) this->coefs = poly.coefs; } -/** @brief Copy assignment (deep copy), including bounds and affine map. - * - * Copies base part, then affine parameters N,L and coefficient vector. - */ Polynomial &Polynomial::operator=(const Polynomial &poly) { RepresentableFunction<1>::operator=(poly); this->N = poly.N; @@ -133,19 +71,6 @@ Polynomial &Polynomial::operator=(const Polynomial &poly) { return *this; } -/** @brief Evaluate the polynomial at external x, honoring bounds. - * - * @param x Point of evaluation in external coordinates. - * @return P(x) if within bounds, otherwise 0. - * - * Implementation - * -------------- - * - If bounded, quickly reject x outside the mapped interval. - * - Evaluate in the internal coordinate q = N*x - L using a simple - * power-accumulation loop: - * y = sum_k coefs[k] * q^k. - * (xp accumulates q^k without recomputing powers.) - */ double Polynomial::evalf(double x) const { if (isBounded()) { if (x < this->getScaledLowerBound()) return 0.0; @@ -155,55 +80,33 @@ double Polynomial::evalf(double x) const { double y = 0.0; for (int k = 0; k < getOrder() + 1; k++) { y += (xp * this->coefs[k]); - xp *= this->N * x - this->L; // advance q^k -> q^(k+1) + xp *= this->N * x - this->L; } return y; } -/** @brief Lower bound in external x-space, derived from the internal bound via x = (q + L)/N. - * - * Preconditions: polynomial must be bounded (otherwise errors). - */ double Polynomial::getScaledLowerBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->A[0] + this->L)); } -/** @brief Upper bound in external x-space, derived from the internal bound via x = (q + L)/N. - * - * Preconditions: polynomial must be bounded (otherwise errors). - */ double Polynomial::getScaledUpperBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->B[0] + this->L)); } -/** @brief Normalize the polynomial in L2 on its current (bounded) domain. - * - * Details - * ------- - * - Computes squared norm via innerProduct(*this). - * - Scales coefficients by 1/sqrt(norm). - * - If unbounded or norm < 0, aborts with an error. - */ void Polynomial::normalize() { double sqNorm = calcSquareNorm(); if (sqNorm < 0.0) MSG_ABORT("Cannot normalize polynomial"); (*this) *= 1.0 / std::sqrt(sqNorm); } -/** @brief Compute squared L2 norm on current bounds, or -1 if unbounded. */ double Polynomial::calcSquareNorm() { double sqNorm = -1.0; if (isBounded()) { sqNorm = this->innerProduct(*this); } return sqNorm; } -/** @brief Effective order = highest index i with |coefs[i]| > MachineZero. - * - * Note: This ignores trailing coefficients numerically equal to zero, - * and can be lower than (coefs.size()-1). - */ int Polynomial::getOrder() const { int n = 0; for (int i = 0; i < this->coefs.size(); i++) { @@ -212,24 +115,11 @@ int Polynomial::getOrder() const { return n; } -/** @brief In-place scale: P(x) ← c * P(x). */ Polynomial &Polynomial::operator*=(double c) { this->coefs = c * this->coefs; return *this; } -/** @brief In-place product P(x) ← P(x) * Q(x) (same affine map required). - * - * Preconditions - * ------------- - * - Both polynomials must share identical (N, L) so they represent functions - * in the same internal coordinate q. Otherwise we error out. - * - * Implementation - * -------------- - * - Standard coefficient convolution yielding degree(P)+degree(Q). - * - Affine parameters are left unchanged. - */ Polynomial &Polynomial::operator*=(const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -246,7 +136,6 @@ Polynomial &Polynomial::operator*=(const Polynomial &Q) { return P; } -/** @brief Return Q(x) = c * P(x). */ Polynomial Polynomial::operator*(double c) const { const Polynomial &P = *this; Polynomial Q(P); @@ -254,11 +143,6 @@ Polynomial Polynomial::operator*(double c) const { return Q; } -/** @brief Return R(x) = P(x) * Q(x) (same affine map required). - * - * Returns an unbounded polynomial that inherits the affine map and - * coefficients from the in-place logic. - */ Polynomial Polynomial::operator*(const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -267,23 +151,16 @@ Polynomial Polynomial::operator*(const Polynomial &Q) const { return R; } -/** @brief In-place sum: P(x) ← P(x) + Q(x). (Same affine map required.) */ Polynomial &Polynomial::operator+=(const Polynomial &Q) { this->addInPlace(1.0, Q); return *this; } -/** @brief In-place difference: P(x) ← P(x) - Q(x). (Same affine map required.) */ Polynomial &Polynomial::operator-=(const Polynomial &Q) { this->addInPlace(-1.0, Q); return *this; } -/** @brief In-place fused add: P(x) ← P(x) + c * Q(x). (Same affine map required.) - * - * Chooses the max order among P and Q and adds coefficients component-wise, - * padding with zeros where needed. - */ void Polynomial::addInPlace(double c, const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -301,7 +178,6 @@ void Polynomial::addInPlace(double c, const Polynomial &Q) { P.setCoefs(newCoefs); } -/** @brief Return R(x) = P(x) + c * Q(x), leaving operands unchanged. */ Polynomial Polynomial::add(double c, const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -310,7 +186,6 @@ Polynomial Polynomial::add(double c, const Polynomial &Q) const { return R; } -/** @brief Return Q(x) = dP/dx (external derivative). */ Polynomial Polynomial::calcDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -318,18 +193,6 @@ Polynomial Polynomial::calcDerivative() const { return Q; } -/** @brief In-place derivative: P(x) ← dP/dx. - * - * Implementation - * -------------- - * - Works on the internal representation in q = N*x - L: - * d/dx [ Σ a_i q^i ] = Σ i*a_i q^(i-1) * dq/dx = N * Σ i*a_i q^(i-1). - * - Since the current storage uses q-powers, we first form Σ i*a_i q^(i-1) - * in coefficient space. The factor N is embedded in the affine mapping - * (via evaluation), and the polynomial’s coefficient update matches the - * intended external derivative semantics given how evalf() builds q. - * - The code mirrors the existing convention (keeping N,L intact). - */ void Polynomial::calcDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -339,7 +202,6 @@ void Polynomial::calcDerivativeInPlace() { P.setCoefs(newCoefs); } -/** @brief Return the indefinite integral Q(x) = ∫ P(x) dx with zero constant. */ Polynomial Polynomial::calcAntiDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -347,40 +209,17 @@ Polynomial Polynomial::calcAntiDerivative() const { return Q; } -/** @brief In-place antiderivative: P(x) ← ∫ P(x) dx, integration constant = 0. - * - * Implementation - * -------------- - * - In q-space: ∫ (Σ a_i q^i) dq = Σ a_i/(i+1) q^(i+1) + C. - * - For external x, dx = dq / N; the factor 1/N is accounted for when - * integrating over x in Polynomial::integrate(), not in coefficient - * construction here. We thus store the q-antiderivative coefficients. - */ void Polynomial::calcAntiDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); const VectorXd &oldCoefs = P.getCoefs(); VectorXd newCoefs = VectorXd::Zero(P_order + 2); - newCoefs[0] = 0.0; // integration constant + newCoefs[0] = 0.0; newCoefs[1] = oldCoefs[0]; for (int i = 2; i < newCoefs.size(); i++) { newCoefs[i] = 1.0 / i * oldCoefs[i - 1]; } P.setCoefs(newCoefs); } -/** @brief Analytic definite integral ∫_a^b P(x) dx, honoring bounds if present. - * - * @param a Optional external lower limit (overrides internal bound if tighter). - * @param b Optional external upper limit (overrides internal bound if tighter). - * @return The integral value over max(lower bounds) to min(upper bounds). - * - * Details - * ------- - * - If polynomial is bounded, the domain is intersected with [a,b]. - * - Builds the (q-based) antiderivative and evaluates it at the endpoints - * transformed to the q-domain by the affine map. The Jacobian dx = dq/N - * yields a prefactor 1/N (“sfac”). - * - If the final [lb,ub] is empty, returns 0. - */ double Polynomial::integrate(const double *a, const double *b) const { double lb = -DBL_MAX, ub = DBL_MAX; if (this->isBounded()) { @@ -399,16 +238,6 @@ double Polynomial::integrate(const double *a, const double *b) const { return sfac * (antidiff.evalf(ub) - antidiff.evalf(lb)); } -/** @brief Inner product ⟨P,Q⟩ over P’s current bounded domain. - * - * @param Q Polynomial to multiply with. - * @return ∫ P(x) Q(x) dx over P’s bounds. - * - * Details - * ------- - * - Requires that P is bounded; Q is multiplied algebraically in q-space. - * - The product polynomial inherits P’s bounds; we then call integrate(). - */ double Polynomial::innerProduct(const Polynomial &Q) const { const Polynomial &P = *this; if (not P.isBounded()) MSG_ERROR("Unbounded polynomial"); diff --git a/src/functions/RepresentableFunction.cpp b/src/functions/RepresentableFunction.cpp index d5f2283e1..9d0dc0300 100644 --- a/src/functions/RepresentableFunction.cpp +++ b/src/functions/RepresentableFunction.cpp @@ -23,57 +23,11 @@ * */ -/** - * # RepresentableFunction (implementation) - * - * A lightweight base providing **optional rectangular bounds** for - * D-dimensional functions used across MRCPP. Derived classes supply the - * actual function evaluation; this class only manages: - * - * - whether a function is **bounded** or **unbounded**; - * - storage and lifetime of lower/upper bounds `A[d]`, `B[d]`; - * - cheap **containment tests** via @ref outOfBounds. - * - * ## Interval semantics - * Bounds are interpreted as a Cartesian product of **half-open intervals**: - * - * \f[ - * \prod_{d=0}^{D-1} [A_d,\; B_d) - * \f] - * - * so a point is considered out of bounds if **any** coordinate is - * `< A_d` or `>= B_d`. This convention is important for tessellations, - * avoiding double counting on shared faces. - * - * ## Ownership and copying - * - Bounds are stored in dynamically allocated arrays `A` and `B` when the - * function is bounded. The destructor frees them. - * - The **copy constructor** performs a deep copy of the bounds. - * - The **assignment operator** in this base intentionally **does not** - * copy bounds (a documented “no-op” that returns `*this`). If you need to - * copy bounds, use the copy constructor instead, or call `setBounds()`. - * - * Derived functors can call `outOfBounds()` prior to expensive evaluations to - * fast-return zeros outside the active box. - */ - #include "RepresentableFunction.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Construct with optional bounds. - * - * @param a Pointer to the lower bounds array of length D, or `nullptr` for - * an unbounded function. - * @param b Pointer to the upper bounds array of length D, or `nullptr` for - * an unbounded function. - * - * If either pointer is `nullptr`, the function is marked **unbounded** and no - * memory is allocated. Otherwise, both arrays are deep-copied and the function - * is marked **bounded**. Each dimension is validated to satisfy `a[d] ≤ b[d]`. - */ template RepresentableFunction::RepresentableFunction(const double *a, const double *b) { if (a == nullptr or b == nullptr) { @@ -92,12 +46,6 @@ RepresentableFunction::RepresentableFunction(const double *a, const double } } -/** - * @brief Copy-construct from another function, including its bounds. - * - * Deep-copies the bounds if @p func is bounded; otherwise keeps the new - * function unbounded. - */ template RepresentableFunction::RepresentableFunction(const RepresentableFunction &func) { if (func.isBounded()) { @@ -115,25 +63,12 @@ RepresentableFunction::RepresentableFunction(const RepresentableFunction RepresentableFunction & RepresentableFunction::operator=(const RepresentableFunction &func) { return *this; } -/** - * @brief Destructor releases bound storage if allocated. - */ template RepresentableFunction::~RepresentableFunction() { if (this->isBounded()) { @@ -144,16 +79,6 @@ RepresentableFunction::~RepresentableFunction() { this->B = nullptr; } -/** - * @brief Set (or overwrite) bounds. - * - * @param a Lower bounds array of length D (must be non-null). - * @param b Upper bounds array of length D (must be non-null). - * - * - If the function was previously unbounded, storage for `A` and `B` is - * allocated and the function becomes bounded. - * - Each dimension is validated to have `a[d] ≤ b[d]`. - */ template void RepresentableFunction::setBounds(const double *a, const double *b) { if (a == nullptr or b == nullptr) { MSG_ERROR("Invalid arguments"); } @@ -169,16 +94,6 @@ void RepresentableFunction::setBounds(const double *a, const double *b) { } } -/** - * @brief Check whether a point is outside the active bounds. - * - * @param r D-tuple (coordinate) to test. - * @return `true` if unambiguously out of bounds, `false` otherwise. - * - * Semantics: if the function is **unbounded**, this always returns `false`. - * If bounded, it returns `true` when **any** coordinate violates the - * half-open interval in that dimension: `r[d] < A[d]` or `r[d] >= B[d]`. - */ template bool RepresentableFunction::outOfBounds(const Coord &r) const { if (not isBounded()) { return false; } @@ -189,7 +104,6 @@ bool RepresentableFunction::outOfBounds(const Coord &r) const { return false; } -/* Explicit template instantiations used in MRCPP. */ template class RepresentableFunction<1, double>; template class RepresentableFunction<2, double>; template class RepresentableFunction<3, double>; @@ -197,4 +111,4 @@ template class RepresentableFunction<1, ComplexDouble>; template class RepresentableFunction<2, ComplexDouble>; template class RepresentableFunction<3, ComplexDouble>; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp index e866bd1af..0551a3cc4 100644 --- a/src/functions/function_utils.cpp +++ b/src/functions/function_utils.cpp @@ -23,101 +23,19 @@ * */ -/** - * @file function_utils.cpp - * - * @brief Overlap integrals for (possibly polynomially weighted) Cartesian - * Gaussian primitives using the Obara–Saika 1D recurrence. - * - * Overview - * -------- - * - `calc_overlap(GaussFunc, GaussFunc)` computes the D-dimensional - * overlap of two separable Cartesian Gaussians by factoring the integral - * into a product of 1D overlaps along each Cartesian axis. - * - The core 1D overlap - * \f[ - * S_{p_a p_b}(x_a,x_b;c_a,c_b) = - * \int_{-\infty}^{+\infty}\!(x-x_a)^{p_a}(x-x_b)^{p_b} - * e^{-c_a (x-x_a)^2}e^{-c_b (x-x_b)^2}\,dx - * \f] - * is evaluated by `ObaraSaika_ab`, a compact implementation of - * the Obara–Saika recurrence relations. - * - * Notation (1D) - * ------------- - * - Exponents: \f$c_a, c_b > 0\f$. - * - Powers (angular momenta per axis): \f$p_a, p_b \in \mathbb{N}_0\f$. - * - Centers: \f$x_a, x_b \in \mathbb{R}\f$. - * - Composite quantities: - * \f[ - * p = c_a + c_b,\quad - * \mu = \frac{c_a c_b}{p},\quad - * X_{AB} = x_a - x_b,\quad - * X_P = \frac{c_a x_a + c_b x_b}{p},\quad - * X_{PA} = X_P - x_a,\quad X_{PB} = X_P - x_b. - * \f] - * - Spherical–spherical overlap seed: - * \f[ - * S_{00} = \sqrt{\frac{\pi}{p}}\;\exp(-\mu X_{AB}^2). - * \f] - * - * Recurrence (sketch) - * ------------------- - * Let \f$S_{ij}\f$ denote the overlap with powers \f$(i,j)\f$. - * The code constructs the first “row” \f$S_{0j}\f$ for \f$j=0..p_b\f$ - * via the \f$X_{PB}\f$ recursion, then generates entries with \f$i>0\f$ - * using relations involving \f$X_{AB}\f$ and \f$X_{PA}\f$. - * Entries are packed into a 1D array `s_coeff` using a simple linear map. - * - * Limits - * ------ - * - `s_coeff` has fixed size 64; the code comment suggests support up to - * combined angular momenta roughly \f$p_a \le 20, p_b \le 20\f$ - * (so that `power_b + 2*power_a` stays within the array). - */ - #include "function_utils.h" namespace mrcpp { -// Forward declaration of the 1D core routine (defined below). namespace function_utils { double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b); } // namespace function_utils -/** - * @brief D-dimensional overlap of two separable Cartesian Gaussians. - * - * The D-dimensional overlap factorizes into a product of 1D overlaps - * along each coordinate axis. Each 1D factor is computed by the - * Obara–Saika recurrence (`ObaraSaika_ab`). - * - * Mathematically: - * \f[ - * \langle \mathbf{a} | \mathbf{b} \rangle - * = - * c_a c_b \prod_{d=1}^{D} - * \int_{-\infty}^{+\infty} - * (x_d - A_d)^{p_{a,d}} - * (x_d - B_d)^{p_{b,d}} - * e^{-\alpha_d (x_d - A_d)^2} - * e^{-\beta_d (x_d - B_d)^2}\,dx_d, - * \f] - * where `getPower()[d] = p_{*,d}`, `getPos()[d] = A_d or B_d`, - * `getExp()[d] = α_d or β_d`, and `getCoef()` multiplies at the end. - * - * @tparam D Dimensionality (1,2,3,...). - * @param a First Gaussian primitive (powers, position, exponents, coefficient). - * @param b Second Gaussian primitive (powers, position, exponents, coefficient). - * @return Overlap integral value. - */ template double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { double S = 1.0; - - // Multiply 1D overlaps across all Cartesian axes for (int d = 0; d < D; d++) { S *= ObaraSaika_ab( a.getPower()[d], b.getPower()[d], @@ -125,122 +43,54 @@ double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b a.getExp()[d], b.getExp()[d] ); } - - // Global prefactor from the two primitives S *= a.getCoef() * b.getCoef(); return S; } -/** - * @brief 1D Obara–Saika recurrence for Cartesian Gaussian overlap. - * - * Computes - * \f[ - * S_{ij} = - * \int_{-\infty}^{+\infty} - * (x-x_a)^i (x-x_b)^j - * e^{-c_a (x-x_a)^2} - * e^{-c_b (x-x_b)^2}\,dx, - * \f] - * returning the value for \f$i = \texttt{power\_a}\f$ and - * \f$j = \texttt{power\_b}\f$. - * - * Parameters - * ---------- - * @param power_a \f$p_a\f$ (non-negative integer power about center @p pos_a) - * @param power_b \f$p_b\f$ (non-negative integer power about center @p pos_b) - * @param pos_a \f$x_a\f$ (center of the first Gaussian) - * @param pos_b \f$x_b\f$ (center of the second Gaussian) - * @param expo_a \f$c_a\f$ (exponent of the first Gaussian) - * @param expo_b \f$c_b\f$ (exponent of the second Gaussian) - * - * Implementation notes - * -------------------- - * - Forms the composite exponent \f$p=c_a+c_b\f$ and reduced exponent - * \f$\mu = c_a c_b / p\f$. - * - Computes the “product center” \f$X_P = (c_a x_a + c_b x_b)/p\f$ - * and shift distances \f$X_{PA}=X_P-x_a\f$, \f$X_{PB}=X_P-x_b\f$. - * - Seeds the recurrence with the spherical–spherical overlap - * \f$S_{00} = \sqrt{\pi/p}\,\exp(-\mu (x_a-x_b)^2)\f$. - * - Builds the first row \f$S_{0j}\f$ for \f$j=0..p_b\f$ using the - * forward recurrence in \f$j\f$ (involving \f$X_{PB}\f$ and \f$p\f$). - * - Extends to \f$i>0\f$ by recurrences that couple \f$S_{i0}\f$, \f$S_{i1}\f$ - * to previously computed entries and the shifts \f$X_{AB}=x_a-x_b\f$, - * \f$X_{PA}\f$. - * - * Storage - * ------- - * - Coefficients are stored in a flat array `s_coeff` with a simple linear - * indexing that appends new entries as they are generated: - * - indices 0..power_b : `S_{0,0}, S_{0,1}, ..., S_{0,power_b}` - * - then pairs `(S_{1,0}, S_{1,1})`, `(S_{2,0}, S_{2,1})`, ... - * - The last needed value is at index `power_b + 2*power_a`. - * - * @return The requested overlap value `S_{power_a, power_b}`. - */ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) { int i, j; double expo_p, mu, pos_p, x_ab, x_pa, x_pb, s_00; - // Maximum size comment from original author: - // "The highest angular momentum combination is l=20 for a and b simultaneously" - // With a flat buffer of length 64, the required index (= power_b + 2*power_a) - // must be < 64. Keep powers within this bound. double s_coeff[64]; - // ---- Composite quantities and seed S_00 ---- - expo_p = expo_a + expo_b; // p = c_a + c_b - mu = expo_a * expo_b / (expo_a + expo_b); // μ = c_a c_b / p - pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p;// X_P - x_ab = pos_a - pos_b; // X_AB - x_pa = pos_p - pos_a; // X_PA - x_pb = pos_p - pos_b; // X_PB + expo_p = expo_a + expo_b; + mu = expo_a * expo_b / (expo_a + expo_b); + pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; + x_ab = pos_a - pos_b; + x_pa = pos_p - pos_a; + x_pb = pos_p - pos_b; s_00 = pi / expo_p; - s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); // S_{00} + s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); - // ---- First row: S_{0,j} for j=0..power_b ---- - s_coeff[0] = s_00; // S_{0,0} - s_coeff[1] = x_pb * s_00; // S_{0,1} + s_coeff[0] = s_00; + s_coeff[1] = x_pb * s_00; j = 1; - // Recurrence in j: - // S_{0,j+1} = X_PB * S_{0,j} + (j / (2p)) * S_{0,j-1} while (j < power_b) { s_coeff[j + 1] = x_pb * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); j++; } - // ---- Bootstrap first two entries with i > 0: S_{1,0}, S_{1,1} ---- - // Relations: - // S_{1,0} = S_{0,1} - X_AB * S_{0,0} - // S_{1,1} = X_PA * S_{1,0} + (j/(2p)) * S_{0,j} with j = power_b - s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; // S_{1,0} - s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); // S_{1,1} + s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; + s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); i = 1; - // ---- General i>0 step: append (S_{i+1,0}, S_{i+1,1}) for i=1..power_a-1 ---- while (i < power_a) { - int i_l = j + 2 * i + 1; // index for S_{i+1,0} - int i_r = j + 2 * i + 2; // index for S_{i+1,1} + int i_l = j + 2 * i + 1; + int i_r = j + 2 * i + 2; - // S_{i+1,0} = S_{i,1} - X_AB * S_{i,0} s_coeff[i_l] = s_coeff[i_l - 1] - x_ab * s_coeff[i_l - 2]; - - // S_{i+1,1} = X_PA * S_{i,1} + (j * S_{i,0} + i * S_{i-1,0}) / (2p) - // (the packed indexing below matches these dependencies) s_coeff[i_r] = x_pa * s_coeff[i_r - 2] + (j * s_coeff[i_r - 3] + i * s_coeff[i_r - 4]) / (2.0 * expo_p); i++; } - // The requested entry is S_{power_a, power_b} at index power_b + 2*power_a. return s_coeff[power_b + 2 * power_a]; } -// ---- Explicit template instantiations for common dimensions ---- template double function_utils::calc_overlap<1>(const GaussFunc<1> &a, const GaussFunc<1> &b); template double function_utils::calc_overlap<2>(const GaussFunc<2> &a, const GaussFunc<2> &b); template double function_utils::calc_overlap<3>(const GaussFunc<3> &a, const GaussFunc<3> &b); diff --git a/src/functions/special_functions.cpp b/src/functions/special_functions.cpp index 74c9ed750..73eff21db 100644 --- a/src/functions/special_functions.cpp +++ b/src/functions/special_functions.cpp @@ -27,79 +27,17 @@ namespace mrcpp { -/** - * @brief Analytic solution of the free-particle Schrödinger equation on ℝ at time @p t. - * - * This implements the standard Gaussian wave packet propagation (free particle, \f$\hbar=1\f$, mass \f$m=\tfrac12\f$ - * so that the free propagator denominator becomes \f$4it+\sigma\f$ as used below). Given an initial - * Gaussian of width parameter \f$\sigma>0\f$ centered at \f$x_0\f$ at time \f$t=0\f$, - * the wave function at time \f$t\f$ is - * - * \f[ - * \psi(x,t) - * = - * \sqrt{\frac{\sigma}{\,\sigma + 4\, i\, t\,}} - * \exp\!\left( - * -\,\frac{(x - x_0)^2}{\,\sigma + 4\, i\, t\,} - * \right), - * \f] - * - * which disperses in time and acquires a complex phase. - * - * #### Parameters - * - @param x Real-space coordinate \f$x \in \mathbb{R}\f$. - * - @param x0 Initial center \f$x_0\f$ of the Gaussian at \f$t=0\f$. - * - @param t Time \f$t \in \mathbb{R}\f$ (can be positive or negative). - * - @param sigma Width parameter \f$\sigma>0\f$ of the initial Gaussian. - * - * #### Returns - * The complex-valued wave function \f$\psi(x,t)\f$ at the requested space-time point. - * - * #### Notes - * - For @p t = 0, this reduces to \f$\psi(x,0)=\exp\!\big(-\tfrac{(x-x_0)^2}{\sigma}\big)\f$. - * - The branch of the complex square root is the principal branch via `std::sqrt(std::complex)`. - * - Numerical behavior near large \f$|t|\f$: the modulus decays like \f$|\sigma/(\sigma+4it)|^{1/2}\f$, - * while the phase is dominated by the complex denominator; standard `std::complex` arithmetic handles this. - * - This function assumes consistent physical units so that the closed form above applies directly. - */ std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma) { - std::complex i(0.0, 1.0); // imaginary unit i - std::complex denom = sigma + 4.0 * t * i; // σ + 4 i t + std::complex i(0.0, 1.0); + std::complex denom = sigma + 4.0 * t * i; std::complex exponent = -((x - x0) * (x - x0)) / denom; return std::sqrt(sigma) / std::sqrt(denom) * std::exp(exponent); } -/** - * @brief Smooth, compactly supported "bump" function on the interval \f$(a,b)\f$. - * - * Defines a non-negative \f$C^\infty\f$ function - * \f[ - * g_{a,b}(x) = - * \begin{cases} - * \exp\!\Big( -\,\dfrac{b-a}{(x-a)(b-x)} \Big), & a < x < b,\\[6pt] - * 0, & \text{otherwise}, - * \end{cases} - * \f] - * which vanishes to **all orders** at the endpoints \f$a\f$ and \f$b\f$. - * - * #### Parameters - * - @param x Real-space coordinate \f$x \in \mathbb{R}\f$. - * - @param a Left endpoint (must satisfy \f$aa\f$). - * - * #### Returns - * - \f$g_{a,b}(x)\f$ if \f$a < x < b\f$, and `0.0` otherwise. - * - * #### Numerical remarks - * - Near the endpoints, \f$(x-a)(b-x)\to 0^+\f$ and the exponent \f$-\frac{b-a}{(x-a)(b-x)}\f$ becomes large - * and negative, so the value safely underflows toward 0; this is expected and preserves smooth compact support. - * - If `a >= b`, the definition yields the zero function for all `x`. - */ double smooth_compact_function(double x, double a, double b) { if (a < x && x < b) { - // Equivalent to: exp( - (b-a) / ((x-a)(b-x)) ) return std::exp((a - b) / ((x - a) * (b - x))); } return 0.0; From 5b57471cfea509305d265d41227157a2e2dc37e6 Mon Sep 17 00:00:00 2001 From: Christian Tantardini Date: Fri, 31 Oct 2025 11:54:21 +0300 Subject: [PATCH 17/51] updated folder core doxygen documetation only in .h files and removed from .cpp files --- src/core/CrossCorrelation.cpp | 98 +---------- src/core/CrossCorrelationCache.cpp | 67 -------- src/core/FilterCache.cpp | 76 +-------- src/core/GaussQuadrature.cpp | 161 ++---------------- src/core/InterpolatingBasis.cpp | 63 ++----- src/core/LegendreBasis.cpp | 52 +----- src/core/MWFilter.cpp | 131 +------------- src/core/ObjectCache.cpp | 96 ----------- src/core/QuadratureCache.cpp | 86 +--------- src/core/ScalingBasis.cpp | 82 +-------- .../SchrodingerEvolution_CrossCorrelation.cpp | 42 +---- 11 files changed, 40 insertions(+), 914 deletions(-) diff --git a/src/core/CrossCorrelation.cpp b/src/core/CrossCorrelation.cpp index fd4ce23c8..149ba63fe 100644 --- a/src/core/CrossCorrelation.cpp +++ b/src/core/CrossCorrelation.cpp @@ -23,53 +23,6 @@ * */ -/* - * File purpose (high level): - * -------------------------- - * This implementation provides the CrossCorrelation class used to load and - * store *cross-correlation coefficient matrices* for multiwavelet filters. - * Two families of filters are supported (as encoded by `type`): - * - Interpolatory (prefix "I_") - * - Legendre (prefix "L_") - * - * Given an integer "order" k (poly order), we define K = k + 1. The class - * expects to find two binary files that contain the left and right cross - * correlation blocks: - * /

_c_left_ - * /

_c_right_ - * where

is "I" or "L" depending on the family. The directory is - * discovered via `details::find_filters()`. - * - * Each file stores K*K rows, and each row contains 2*K doubles. The data are - * read into two Eigen matrices: - * Left : (K*K) x (2K) - * Right : (K*K) x (2K) - * - * Notes on indexing and sizes: - * - K = order + 1 - * - The (K*K) rows represent a flattened 2D (i,j) index; i,j = 0..K-1. - * - Each row has 2K columns; the "2K" arises from the two-sided support - * of the correlation stencil (negative and positive offsets). - * - * Error handling: - * - The code uses MRCPP's messaging macros (MSG_ABORT / MSG_ERROR) to - * report invalid input or missing files. - * - * Endianness / portability: - * - Files are read as raw binary `double`. They must be produced on an - * architecture with compatible endianness and `double` layout. - */ - -/* - * - * - * \date Jul 18, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø - * - * \breif - */ - #include "CrossCorrelation.h" #include @@ -84,20 +37,11 @@ using namespace Eigen; namespace mrcpp { -// ---------------------------------------------------------------------------- -// Constructor: CrossCorrelation(int k, int t) -// Creates an object for filter family `t` (see CrossCorrelation.h for the -// enum/type codes) and polynomial order `k`. It validates the order, validates -// the family, discovers the filter library directory, composes the filenames, -// and immediately loads the binary data into `Left` and `Right`. -// ---------------------------------------------------------------------------- CrossCorrelation::CrossCorrelation(int k, int t) : type(t) , order(k) { - // Sanity check on order. `MaxOrder` is a library constant limiting k. if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order: " << this->order); - // Validate filter family (currently Interpol or Legendre are accepted). switch (this->type) { case (Interpol): case (Legendre): @@ -106,29 +50,16 @@ CrossCorrelation::CrossCorrelation(int k, int t) MSG_ERROR("Unknown filter type: " << this->type); } - // Locate the directory holding precomputed filter/correlation files. - // `details::find_filters()` returns the absolute path to that directory. setCCCPaths(details::find_filters()); - - // Load binary matrices Left and Right from disk into Eigen::MatrixXd. readCCCBin(); } -// ---------------------------------------------------------------------------- -// Constructor: CrossCorrelation(int t, const MatrixXd& L, const MatrixXd& R) -// Directly construct a CrossCorrelation from matrices already in memory. -// `order` is inferred from the number of columns: 2K columns → K = order+1. -// Ensures the Left/Right shapes are compatible and the family type is valid. -// No file I/O is performed here. -// ---------------------------------------------------------------------------- CrossCorrelation::CrossCorrelation(int t, const MatrixXd &L, const MatrixXd &R) : type(t) , order(L.cols() / 2 - 1) { - // Derive order from matrix width (2K columns → order = K - 1). if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order, " << this->order); if (R.cols() != L.cols()) MSG_ABORT("Right and Left cross correlation have different order!"); - // Validate family. switch (this->type) { case (Interpol): case (Legendre): @@ -137,18 +68,10 @@ CrossCorrelation::CrossCorrelation(int t, const MatrixXd &L, const MatrixXd &R) MSG_ERROR("Unknown filter type: " << this->type); } - // Shallow copies into class members (Eigen handles the allocation). this->Left = L; this->Right = R; } -// ---------------------------------------------------------------------------- -// setCCCPaths: Compose the on-disk file paths for the left/right matrices. -// Input: `lib` is the directory returned by `details::find_filters()`. -// The filenames follow the convention: -// Interpol: I_c_left_, I_c_right_ -// Legendre: L_c_left_, L_c_right_ -// ---------------------------------------------------------------------------- void CrossCorrelation::setCCCPaths(const std::string &lib) { switch (this->type) { case (Interpol): @@ -164,51 +87,34 @@ void CrossCorrelation::setCCCPaths(const std::string &lib) { } } -// ---------------------------------------------------------------------------- -// readCCCBin: Open the two binary files and load them into Eigen matrices. -// File structure: -// - Let K = order + 1. -// - Each file contains K*K consecutive rows. -// - Each row stores 2*K doubles (contiguous), representing one stencil line. -// Post-processing: -// - Any absolute value < MachinePrec is zeroed to improve sparsity/readability. -// - Matrices are resized to (K*K) x (2*K). -// ---------------------------------------------------------------------------- void CrossCorrelation::readCCCBin() { - // Open both files in binary mode; abort if either is missing. std::ifstream L_fis(this->L_path.c_str(), std::ios::binary); std::ifstream R_fis(this->R_path.c_str(), std::ios::binary); if (not L_fis) MSG_ABORT("Could not open cross correlation: " << this->L_path); if (not R_fis) MSG_ABORT("Could not open cross correlation: " << this->R_path); - // Derive matrix dimensions from order. int K = this->order + 1; this->Left = MatrixXd::Zero(K * K, 2 * K); this->Right = MatrixXd::Zero(K * K, 2 * K); - // Temporary row buffers for reading a single row (2K doubles) at a time. double dL[2 * K]; double dR[2 * K]; - // Loop over all K*K rows and fill both Left and Right matrices. for (int i = 0; i < K * K; i++) { - // Read one row for Left and one row for Right (raw binary doubles). L_fis.read((char *)dL, sizeof(double) * 2 * K); R_fis.read((char *)dR, sizeof(double) * 2 * K); - // Copy into Eigen matrices with small-value cleanup. for (int j = 0; j < 2 * K; j++) { - if (std::abs(dL[j]) < MachinePrec) dL[j] = 0.0; // numerical zeroing + if (std::abs(dL[j]) < MachinePrec) dL[j] = 0.0; if (std::abs(dR[j]) < MachinePrec) dR[j] = 0.0; this->Left(i, j) = dL[j]; this->Right(i, j) = dR[j]; } } - // Close streams (RAII would also close on destruction, but explicit is clear). L_fis.close(); R_fis.close(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/CrossCorrelationCache.cpp b/src/core/CrossCorrelationCache.cpp index 975135a0d..83438f545 100644 --- a/src/core/CrossCorrelationCache.cpp +++ b/src/core/CrossCorrelationCache.cpp @@ -23,28 +23,6 @@ * */ -/* - * File purpose (high level) - * ------------------------- - * This file implements a small, thread-safe cache for CrossCorrelation - * objects, parameterized on the filter family (template parameter T). - * - * Motivation: - * CrossCorrelation(order, type) loads two dense (K*K)×(2K) matrices - * from binary files. Loading them repeatedly is expensive. This cache - * stores one instance per (order, type) and returns references to it. - * - * Template parameter T: - * - Must be one of the family tags (e.g. Interpol, Legendre). - * - The explicit instantiations at the end fix T to these two values. - * - * Concurrency: - * - The cache uses MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK to guard the - * critical section that performs the initial load and insertion. - * - Once loaded, subsequent get() calls read from the cache without - * reloading (fast path). - */ - #include "CrossCorrelationCache.h" #include "utils/Printer.h" @@ -54,13 +32,6 @@ using namespace Eigen; namespace mrcpp { -/* - * Constructor - * ----------- - * Initialize the runtime 'type' field from the compile-time template - * parameter T, and validate that it matches a known family. - * If T is invalid, emit an error. - */ template CrossCorrelationCache::CrossCorrelationCache() { switch (T) { case (Interpol): @@ -74,22 +45,6 @@ template CrossCorrelationCache::CrossCorrelationCache() { } } -/* - * load(order) - * ----------- - * Ensure that a CrossCorrelation for the given 'order' exists in the cache. - * If not present, construct it and insert it with a memory budget hint. - * - * Steps: - * 1) Acquire OpenMP lock (thread-safe insertion). - * 2) If key 'order' is absent, allocate a new CrossCorrelation(order, type). - * 3) Compute a crude memory footprint 'memo' for cache accounting: - * - getLMatrix().size() returns (#rows * #cols) - * - Multiply by 2 because we also store a Right matrix of same size - * - Multiply by sizeof(double) to get bytes - * 4) Insert into the underlying ObjectCache keyed by 'order'. - * 5) Release lock. - */ template void CrossCorrelationCache::load(int order) { MRCPP_SET_OMP_LOCK(); if (not hasId(order)) { @@ -100,23 +55,11 @@ template void CrossCorrelationCache::load(int order) { MRCPP_UNSET_OMP_LOCK(); } -/* - * get(order) - * ---------- - * Return a reference to the cached CrossCorrelation for 'order'. - * If missing, it will be loaded on-demand (calling load()). - */ template CrossCorrelation &CrossCorrelationCache::get(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order); } -/* - * getLMatrix(order) - * ----------------- - * Convenience accessor: returns a const reference to the Left matrix - * for the requested 'order', auto-loading it if necessary. - */ template const Eigen::MatrixXd &CrossCorrelationCache::getLMatrix(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order).getLMatrix(); @@ -192,16 +135,6 @@ template const Eigen::MatrixXd &CrossCorrelationCache::getRMatrix(int return ObjectCache::get(order).getRMatrix(); } -/* - * Explicit template instantiations - * -------------------------------- - * Build concrete cache types for the known families: - * - CrossCorrelationCache - * - CrossCorrelationCache - * - * This ensures the compiler generates code for these two variants in - * this translation unit, so users can link against them. - */ template class CrossCorrelationCache; template class CrossCorrelationCache; diff --git a/src/core/FilterCache.cpp b/src/core/FilterCache.cpp index 3af5d5ddb..a233217d3 100644 --- a/src/core/FilterCache.cpp +++ b/src/core/FilterCache.cpp @@ -23,35 +23,6 @@ * */ -/* - * File purpose (high level) - * ------------------------- - * This file implements a small, thread-safe cache for MWFilter objects - * (multiwavelet filter banks) keyed by polynomial order. The cache is - * parameterized by filter family via the template parameter T (e.g., - * Interpol or Legendre). - * - * Motivation: - * Loading/constructing MWFilter(order, type) may involve I/O and setup. - * Reusing the same filter for repeated calls is faster and reduces memory - * churn. This cache ensures a single instance per (order, type). - * - * Concurrency: - * - Uses MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK to serialize the - * first-time construction and insertion into the cache. - * - After an entry exists, get() returns it without reloading. - * - * Memory accounting: - * - A rough memory footprint (in bytes) is computed as - * f->getFilter().size() * sizeof(double) - * and passed to the base ObjectCache for bookkeeping/eviction policy. - * - * Template parameter T: - * - Must be a valid family tag (Interpol or Legendre). - * - The explicit instantiations at the end of the file make sure code for - * these two variants is emitted by the compiler. - */ - #include "FilterCache.h" #include "utils/Printer.h" @@ -61,14 +32,6 @@ using namespace Eigen; namespace mrcpp { -/* - * Constructor - * ----------- - * Determine the runtime 'type' field from the compile-time template parameter T. - * If T is not a recognized family, emit an error. Valid values are: - * - Interpol : interpolatory multiwavelet family - * - Legendre : Legendre multiwavelet family - */ template FilterCache::FilterCache() { switch (T) { case (Interpol): @@ -82,20 +45,6 @@ template FilterCache::FilterCache() { } } -/* - * load(order) - * ----------- - * Ensure that an MWFilter for the given 'order' exists in the cache. If not, - * construct it and insert it along with a memory estimate. - * - * Steps: - * 1) Acquire OpenMP lock to prevent concurrent insertions. - * 2) Check presence via hasId(order). If absent: - * - Allocate MWFilter(order, type). - * - Compute 'memo' as (#elements) * sizeof(double). - * - Insert into base ObjectCache keyed by 'order'. - * 3) Release the lock. - */ template void FilterCache::load(int order) { MRCPP_SET_OMP_LOCK(); if (not hasId(order)) { @@ -106,40 +55,17 @@ template void FilterCache::load(int order) { MRCPP_UNSET_OMP_LOCK(); } -/* - * get(order) - * ---------- - * Retrieve a reference to the cached MWFilter for 'order'; if it doesn't - * exist yet, load() is called lazily. The reference is owned by the cache. - */ template MWFilter &FilterCache::get(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order); } -/* - * getFilterMatrix(order) - * ---------------------- - * Convenience accessor: returns a const reference to the underlying filter - * matrix for the requested 'order'. Triggers lazy load if necessary. - * - * Notes: - * - MWFilter::getFilter() is expected to return an Eigen::MatrixXd (or - * compatible type) containing the filter taps laid out as used elsewhere - * in MRCPP. - */ template const MatrixXd &FilterCache::getFilterMatrix(int order) { if (not hasId(order)) { load(order); } return ObjectCache::get(order).getFilter(); } -/* - * Explicit template instantiations - * -------------------------------- - * Instantiate the cache for the two standard families so clients can link - * against these symbols without needing to compile this TU with their T. - */ template class FilterCache; template class FilterCache; -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/GaussQuadrature.cpp b/src/core/GaussQuadrature.cpp index edf5c1958..c29eb1c82 100644 --- a/src/core/GaussQuadrature.cpp +++ b/src/core/GaussQuadrature.cpp @@ -23,41 +23,6 @@ * */ -/* - * Overview - * -------- - * This file implements Gauss-Legendre quadrature on an arbitrary interval [A,B], - * optionally replicated across a number of equal sub-intervals ("intervals"). - * - * Key members of GaussQuadrature (see header): - * - order : number of Gauss nodes per sub-interval. - * - intervals : number of equal sub-intervals tiling [A,B]. - * - A,B : lower/upper bounds of the total integration interval. - * - npts : total number of nodes = order * intervals. - * - unscaledRoots : size 'order' nodes on [-1,1] (canonical Gauss-Legendre). - * - unscaledWeights : size 'order' weights for [-1,1]. - * - roots, weights : size 'npts' nodes/weights mapped onto [A,B] with - * equal replication across 'intervals' pieces. - * - * Construction logic: - * - Compute unscaled roots/weights on [-1,1] via Newton's method - * applied to Legendre polynomials (calcGaussPtsWgts). - * - Map them onto [A,B] (or each sub-interval) and scale the weights - * appropriately (calcScaledPtsWgts). - * - * Integration helpers: - * - integrate() overloads for 1D/2D/3D perform tensor-product quadrature - * using the prepared (roots,weights). The general ND version is sketched - * but intentionally not implemented here. - * - * Notes on accuracy/stability: - * - The Newton iteration uses standard initial guesses based on cosines, - * converging rapidly for moderate 'order'. The maximum stable order is - * limited by MaxGaussOrder (configured in MRCPP). - * - For composite quadrature (intervals > 1), each sub-interval reuses - * the same unscaled rule after an affine mapping. - */ - #include "GaussQuadrature.h" #include "MRCPP/constants.h" #include "MRCPP/macros.h" @@ -68,21 +33,6 @@ using namespace Eigen; namespace mrcpp { -/** Constructor for Gauss-Legendre quadrature. - * - * \param k Polynominal order (number of nodes per sub-interval) - * \param a Lower bound of validity (A) - * \param b Upper bound of validity (B) - * \param inter Number of sub-intervals to divide |a-b| into (>=1) - * - * Steps: - * 1) Store parameters and validate input (order bounds, a=1). - * 2) Allocate vectors for: - * - global nodes/weights (size npts = order*intervals), - * - canonical nodes/weights on [-1,1] (size order). - * 3) Compute canonical Gauss nodes/weights on [-1,1] (calcGaussPtsWgts). - * 4) Map/scale them to the composite interval [A,B] (calcScaledPtsWgts). - */ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { this->order = k; this->A = a; @@ -97,28 +47,17 @@ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { this->npts = this->order * this->intervals; - // Global (composite) rule on [A,B], replicated across 'intervals' blocks this->roots = VectorXd::Zero(this->npts); this->weights = VectorXd::Zero(this->npts); - // Canonical (single-block) rule on [-1,1] this->unscaledRoots = VectorXd::Zero(this->order); this->unscaledWeights = VectorXd::Zero(this->order); - // Step 1: compute canonical [-1,1] nodes/weights using Newton's method - // on Legendre polynomials. if (calcGaussPtsWgts() != 1) { MSG_ERROR("Setup of Gauss-Legendre weights failed.") } - // Step 2: replicate + scale onto [A,B] with 'intervals' sub-intervals. calcScaledPtsWgts(); } -/** @brief Change the integration bounds to [a,b] and rescale existing rule. - * - * If the new bounds are effectively the same (|Δ|A - a) < MachineZero and std::abs(this->B - b) < MachineZero) { return; } if (a >= b) { MSG_ERROR("Invalid bounds: a > b"); } @@ -127,11 +66,6 @@ void GaussQuadrature::setBounds(double a, double b) { calcScaledPtsWgts(); } -/** @brief Change the number of sub-intervals and rebuild the global rule. - * - * If unchanged, return early. Otherwise, reallocate global roots/weights for - * the new size npts = order * intervals and rescale across [A,B]. - */ void GaussQuadrature::setIntervals(int i) { if (i == this->intervals) { return; } if (i < 1) { MSG_ERROR("Invalid number of integration intervals: " << i); } @@ -142,25 +76,12 @@ void GaussQuadrature::setIntervals(int i) { calcScaledPtsWgts(); } -/** @brief Map canonical roots (on [-1,1]) into [a,b] and replicate across 'inter'. - * - * This helper writes into the provided vector @p rts. Each sub-interval - * [pos, pos+transl] is an affine image of [-1,1] with: - * xl = transl/2 - * map: x ↦ x*xl + pos + xl (center shift + scaling) - * - * The result is a concatenation of 'inter' blocks of size 'order' each. - * - * @note This function only maps nodes; it does not compute the weights. - */ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) const { - // length of one block double transl = (b - a) / (double)inter; int k = 0; double pos = a; double xl = transl * 0.5; - // scale and translate Gauss points across each sub-interval for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { rts(k) = this->unscaledRoots(j) * xl + pos + xl; @@ -170,78 +91,37 @@ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) } } -/** @brief Map canonical weights (on [-1,1]) for a composite rule on [a,b]. - * - * For Gauss-Legendre, weights scale by the Jacobian of the affine transform: - * w_scaled = w_unscaled * (transl/2) = w_unscaled * xl - * - * The structure mirrors rescaleRoots(). The weights are placed consecutively - * for each sub-interval. - * - * @note The implementation below mirrors the structure of rescaleRoots(). - * Only the scaling by 'xl' is mathematically required for weights. - * (Adding an x-shift would be incorrect for weights; the code does not - * do that—see calcScaledPtsWgts for the canonical usage.) - */ void GaussQuadrature::rescaleWeights(VectorXd &wgts, double a, double b, int inter) const { - // length of one block double transl = (b - a) / (double)inter; int k = 0; double pos = a; double xl = transl * 0.5; - // scale weights across each sub-interval (Jacobian factor = xl) for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { - wgts(k) = this->unscaledWeights(j) * xl + pos + xl; // NOTE: structural mirror; only '* xl' is needed for weights. + wgts(k) = this->unscaledWeights(j) * xl + pos + xl; ++k; } pos = pos + transl; } } -/** @brief Build the global (roots,weights) arrays over [A,B] with replication. - * - * Each of the 'intervals' sub-intervals has length 'transl', midpoint shift - * 'pos+xl', and scaling 'xl = transl/2'. The canonical nodes/weights are - * transformed by: - * x = x̂*xl + pos + xl - * w = ŵ*xl - * - * The resulting arrays have length npts = order*intervals. - */ void GaussQuadrature::calcScaledPtsWgts() { - // length of one block double transl = (this->B - this->A) / (double)this->intervals; int k = 0; double pos = this->A; double xl = transl * 0.5; - // scale and translate Gauss points and weights for (int i = 0; i < this->intervals; i++) { for (int j = 0; j < this->order; j++) { - this->roots(k) = this->unscaledRoots(j) * xl + pos + xl; // node shift+scale - this->weights(k) = this->unscaledWeights(j) * xl; // weight scale only + this->roots(k) = this->unscaledRoots(j) * xl + pos + xl; + this->weights(k) = this->unscaledWeights(j) * xl; ++k; } pos = pos + transl; } } -/** @brief Compute canonical Gauss-Legendre nodes/weights on [-1,1]. - * - * Method: - * - Use symmetry: compute only K = ceil(order/2) roots z in (0,1), then reflect. - * - Initial guesses: z ≈ cos(π*(i+0.75)/(order+0.5)). - * - Newton's method on L_n(z) with derivative L'_n(z) from LegendrePoly: - * z_{new} = z - L_n(z) / L'_n(z) - * Converge until |Δz| ≤ EPS or reach NewtonMaxIter (then fail). - * - Map to [-1,1] (here it's already the working interval) via xm±xl*z with - * xm=(b+a)/2, xl=(b-a)/2 for a=-1,b=1 (thus xm=0,xl=1). - * - Weights: w_i = 2 * xl / [ (1 - z^2) * (L'_n(z))^2 ] (with xl=1). - * - * Returns 1 on success, 0 on failure to converge. - */ int GaussQuadrature::calcGaussPtsWgts() { int K; if (this->order % 2 == 0) { @@ -256,37 +136,30 @@ int GaussQuadrature::calcGaussPtsWgts() { double xm = (b + a) * 0.5; double xl = (b - a) * 0.5; - LegendrePoly legendrep(this->order, 1.0, 0.0); // Interval [-1,1] + LegendrePoly legendrep(this->order, 1.0, 0.0); Vector2d lp; for (int i = 0; i < K; i++) { - // Classic initial guess for the i-th root (high-accuracy seed) double z = cos(pi * (i + 0.75) / (this->order + 0.5)); int iter; for (iter = 0; iter < NewtonMaxIter; iter++) { - lp = legendrep.firstDerivative(z); // lp(0)=L_n(z), lp(1)=L'_n(z) + lp = legendrep.firstDerivative(z); double z1 = z; - z = z1 - lp(0) / lp(1); // Newton step + z = z1 - lp(0) / lp(1); if (std::abs(z - z1) <= EPS) { break; } } - if (iter == NewtonMaxIter) { return 0; } // did not converge + if (iter == NewtonMaxIter) { return 0; } - // Symmetric roots on [-1,1] - this->unscaledRoots(i) = xm - xl * z; // left root - this->unscaledRoots(order - 1 - i) = xm + xl * z; // right root + this->unscaledRoots(i) = xm - xl * z; + this->unscaledRoots(order - 1 - i) = xm + xl * z; - // Symmetric weights (identical for ±z) this->unscaledWeights(i) = 2.e0 * xl / ((1.e0 - z * z) * lp(1) * lp(1)); this->unscaledWeights(order - 1 - i) = this->unscaledWeights(i); } return 1; } -/** @brief Integrate a 1D-function f(x) using the prepared quadrature rule. - * - * Performs: ∑_{i=0}^{npts-1} w_i * f( roots[i] ). - */ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { double isum = 0.e0; Coord<1> r; @@ -297,11 +170,6 @@ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { return isum; } -/** @brief Integrate a 2D-function f(x1, x2) using tensor-product quadrature. - * - * Performs: ∑_i ∑_j w_i w_j f( x_i, x_j ). - * Loops are ordered for reasonable cache locality. - */ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { Coord<2> r; double isum = 0.e0; @@ -317,10 +185,6 @@ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { return isum; } -/** @brief Integrate a 3D-function f(x1, x2, x3) using tensor-product quadrature. - * - * Performs: ∑_i ∑_j ∑_k w_i w_j w_k f( x_i, x_j, x_k ). - */ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { Coord<3> r; @@ -342,11 +206,6 @@ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { return isum; } -/** @brief ND integration skeleton (recursive), not implemented here. - * - * The intended approach is a recursive tensor-product accumulation along axes, - * but this function is intentionally left unimplemented (aborts at runtime). - */ double GaussQuadrature::integrate_nd(RepresentableFunction<3> &func, int axis) const { NOT_IMPLEMENTED_ABORT; NEEDS_TESTING @@ -368,4 +227,4 @@ double GaussQuadrature::integrate_nd(RepresentableFunction<3> &func, int axis) c */ } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/InterpolatingBasis.cpp b/src/core/InterpolatingBasis.cpp index 437e9d8e8..7ada7ed2a 100644 --- a/src/core/InterpolatingBasis.cpp +++ b/src/core/InterpolatingBasis.cpp @@ -23,40 +23,6 @@ * */ -/* - * Overview - * -------- - * Implementation of the interpolating multiwavelet *scaling* basis setup. - * - * The goal of this class is to build a set of polynomials {I_k} such that - * they are *interpolatory* with respect to a chosen Gaussian quadrature: - * • I_k evaluated at quadrature nodes (roots) forms an identity matrix. - * • The basis is normalized w.r.t. the quadrature weights. - * - * Key ingredients: - * - QuadratureCache supplies (roots, weights) for a given quadrature order q. - * - LegendrePoly(k, 2.0, 1.0) provides a scaled/shifted Legendre polynomial - * of degree k (the exact affine scaling is handled by LegendrePoly). - * - Each interpolating basis polynomial I_k is assembled as a linear - * combination of Legendre polynomials, then scaled by sqrt(weight_k) so - * that the quadrature-induced inner-product is normalized. - * - * Data members touched here (belonging to InterpolatingBasis): - * - funcs : vector of Polynomial objects storing the scaling basis {I_k}. - * - quadVals : matrix of values of basis at quadrature nodes (made identity). - * - cvMap : diagonal map from coefficient-space → value-space at nodes. - * - vcMap : diagonal map from value-space at nodes → coefficient-space. - */ - -/* - * - * - * \date June 2, 2010 - * \author Stig Rune Jensen \n - * CTCC, University of Tromsø - * - */ - #include "InterpolatingBasis.h" #include @@ -67,7 +33,6 @@ using namespace Eigen; namespace mrcpp { - /** * @brief Build the set of interpolating scaling polynomials {I_k}. * @@ -122,33 +87,25 @@ void InterpolatingBasis::initScalingBasis() { const VectorXd roots = qc.getRoots(qOrder); // size q const VectorXd wgts = qc.getWeights(qOrder); // size q - // Precompute Legendre polynomials L_k (scaled/shifted variant). +void InterpolatingBasis::initScalingBasis() { + int qOrder = getQuadratureOrder(); + int sOrder = getScalingOrder(); + + getQuadratureCache(qc); + const VectorXd roots = qc.getRoots(qOrder); + const VectorXd wgts = qc.getWeights(qOrder); + std::vector L_k; for (int k = 0; k < qOrder; k++) { L_k.push_back(LegendrePoly(k, 2.0, 1.0)); } - // Build one interpolating polynomial I_k for each quadrature node k. for (int k = 0; k < qOrder; k++) { - // Start from a copy of L_s. The comment explains the loop-order choice: - // We avoid "adding higher-order into lower-order"; begin at top degree. Polynomial I_k(L_k[sOrder]); - - // Seed I_k with the value of L_s at the k-th node times (2s+1). - // This sets up the leading contribution at node k. I_k *= L_k[sOrder].evalf(roots(k)) * (2.0 * sOrder + 1); - - // Accumulate lower degrees i = q-2 down to 0: - // Each step adds val * L_i, where val depends on L_i evaluated at - // the current node and the usual (2i+1) normalization factor. for (int i = qOrder - 2; i >= 0; i--) { double val = L_k[i].evalf(roots(k)) * (2.0 * i + 1); I_k.addInPlace(val, L_k[i]); } - - // Normalize with the square root of the quadrature weight at node k, - // so that later the coefficient↔value maps are simple diagonal scalings. I_k *= std::sqrt(wgts[k]); - - // Save the constructed interpolatory scaling function for node k. this->funcs.push_back(I_k); } } @@ -187,8 +144,8 @@ void InterpolatingBasis::calcCVMaps() { const VectorXd &wgts = qc.getWeights(q_order); for (int k = 0; k < q_order; k++) { - this->cvMap(k, k) = std::sqrt(1.0 / wgts(k)); // coeff → values - this->vcMap(k, k) = std::sqrt(wgts(k)); // values → coeff + this->cvMap(k, k) = std::sqrt(1.0 / wgts(k)); + this->vcMap(k, k) = std::sqrt(wgts(k)); } } diff --git a/src/core/LegendreBasis.cpp b/src/core/LegendreBasis.cpp index f102baaa8..3d6575ba4 100644 --- a/src/core/LegendreBasis.cpp +++ b/src/core/LegendreBasis.cpp @@ -23,40 +23,6 @@ * */ -/* - * Overview - * -------- - * Implementation of the *Legendre* scaling basis used by the multiwavelet - * framework. In contrast to the interpolating basis, this basis consists of - * (shifted/scaled) Legendre polynomials with exact L^2-normalization. - * - * Responsibilities of this file: - * - Build the list of scaling polynomials {P_k} up to the scaling order. - * - Evaluate these polynomials at Gaussian quadrature nodes to populate - * the quadrature value matrix (basis-at-nodes). - * - Construct the coefficient↔value maps using quadrature weights; here - * vcMap is assembled directly and cvMap is its matrix inverse. - * - * Notation: - * - LegendrePoly(k, 2.0, 1.0) represents the degree-k Legendre polynomial - * evaluated on an affine-mapped interval (handled by LegendrePoly). - * - getScalingOrder() returns the polynomial order "s". - * - getQuadratureOrder() returns the number of quadrature nodes "q". - * - funcs : container of basis polynomials (in the base class). - * - quadVals : matrix of basis values at quadrature nodes (size q×(s+1)). - * - vcMap : value→coefficient map built from basis values and weights. - * - cvMap : inverse of vcMap (coefficient→value). - */ - -/* - * - * - * \date June 2, 2010 - * \author Stig Rune Jensen \n - * CTCC, University of Tromsø - * - */ - #include "LegendreBasis.h" #include "QuadratureCache.h" #include "functions/LegendrePoly.h" @@ -96,9 +62,9 @@ namespace mrcpp { */ void LegendreBasis::initScalingBasis() { for (int k = 0; k < getScalingOrder() + 1; k++) { - LegendrePoly L_k(k, 2.0, 1.0); // degree-k Legendre (mapped) - L_k *= std::sqrt(2.0 * k + 1.0); // exact normalization factor - this->funcs.push_back(L_k); // store in basis list + LegendrePoly L_k(k, 2.0, 1.0); + L_k *= std::sqrt(2.0 * k + 1.0); + this->funcs.push_back(L_k); } } @@ -116,12 +82,12 @@ void LegendreBasis::initScalingBasis() { void LegendreBasis::calcQuadratureValues() { getQuadratureCache(qc); int q_order = getQuadratureOrder(); - const VectorXd &pts = qc.getRoots(q_order); // x_i, i = 0..q-1 + const VectorXd &pts = qc.getRoots(q_order); for (int k = 0; k < q_order; k++) { - const Polynomial &poly = this->getFunc(k); // P_k + const Polynomial &poly = this->getFunc(k); for (int i = 0; i < q_order; i++) { - this->quadVals(i, k) = poly.evalf(pts(i)); // quadVals(i,k) = P_k(x_i) + this->quadVals(i, k) = poly.evalf(pts(i)); } } } @@ -148,10 +114,9 @@ void LegendreBasis::calcQuadratureValues() { void LegendreBasis::calcCVMaps() { getQuadratureCache(qc); int q_order = getQuadratureOrder(); - const VectorXd &pts = qc.getRoots(q_order); // x_i - const VectorXd &wgts = qc.getWeights(q_order); // w_i + const VectorXd &pts = qc.getRoots(q_order); + const VectorXd &wgts = qc.getWeights(q_order); - // Assemble vcMap(i,k) = P_k(x_i) * w_i for (int k = 0; k < q_order; k++) { const Polynomial &poly = this->getFunc(k); for (int i = 0; i < q_order; i++) { @@ -159,7 +124,6 @@ void LegendreBasis::calcCVMaps() { } } - // Invert to obtain cvMap (coefficient→value). this->cvMap = this->vcMap.inverse(); } diff --git a/src/core/MWFilter.cpp b/src/core/MWFilter.cpp index 795e6f0e7..988d4e9ca 100644 --- a/src/core/MWFilter.cpp +++ b/src/core/MWFilter.cpp @@ -23,69 +23,6 @@ * */ -/* - * Overview - * -------- - * Implementation of the MWFilter class: a container for a 2K×2K multiwavelet - * filter bank split into four K×K blocks (G0, G1, H0, H1) along with their - * transposes. The filter bank supports two families (Interpol, Legendre) and - * polynomial order 'order' (with K = order + 1). - * - * Block layout and semantics - * -------------------------- - * filter = [ G0 G1 ] (top block-row: scaling/low-pass-like) - * [ H0 H1 ] (bottom block-row: wavelet/high-pass-like) - * - * The precise interpretation (low-/high-pass) is family dependent, but the - * layout is consistent across MRCPP. The class provides: - * - Loading G0 and H0 from binary files on disk. - * - Constructing G1 and H1 from symmetry relations (family-specific). - * - A full 2K×2K filter matrix 'filter' assembled from the four blocks. - * - Fast access to blocks and their transposes for compression/ - * reconstruction phases of the multiresolution transform. - * - * File I/O conventions - * -------------------- - * - Files are discovered via details::find_filters() and named by family: - * Interpol: I_H0_, I_G0_ - * Legendre: L_H0_, L_G0_ - * - Format: raw binary doubles; K rows of K doubles each, row-major-by-row - * read in this implementation (one row per read). - * - Endianness and sizeof(double) must match the producing system. - * - * Symmetry completion - * ------------------- - * Given H0 and G0 from disk, H1 and G1 are derived analytically: - * Interpol: - * G1(i,j) = (-1)^(i+K) * G0(i, K-j-1) - * H1(i,j) = H0(K-i-1, K-j-1) - * Legendre: - * G1(i,j) = (-1)^(i+j+K) * G0(i,j) - * H1(i,j) = (-1)^(i+j) * H0(i,j) - * - * Transform directions - * -------------------- - * - Reconstruction uses the blocks directly: [H0 G0; H1 G1] in getSubFilter(). - * - Compression uses transposes of blocks: [H0^T H1^T; G0^T G1^T]. - * The mapping is encoded via getSubFilter(i, Compression/Reconstruction). - * - * Apply vs ApplyInverse - * --------------------- - * - apply(M/V): multiplies by 'filter' → reconstruction direction. - * - applyInverse: multiplies by 'filter^T' → compression direction. - * Both guard that the input vector/matrix has compatible row dimension 2K. - */ - -/* - * - * - * \date Jul 8, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø - * - * \breif - */ - #include "MWFilter.h" #include @@ -101,16 +38,6 @@ using namespace Eigen; namespace mrcpp { -/* - * Constructor: MWFilter(int k, int t) - * ----------------------------------- - * Build a filter bank of family 't' and order 'k'. - * Steps: - * 1) Validate order and type. - * 2) Set file paths for H0/G0 based on family and order. - * 3) Read H0 and G0 from disk; synthesize H1/G1 from symmetry rules. - * 4) Assemble the full 2K×2K 'filter' matrix as [G0 G1; H0 H1]. - */ MWFilter::MWFilter(int k, int t) : type(t) , order(k) { @@ -132,13 +59,6 @@ MWFilter::MWFilter(int k, int t) this->filter << this->G0, this->G1, this->H0, this->H1; } -/* - * Constructor: MWFilter(int t, const MatrixXd& data) - * -------------------------------------------------- - * Construct a filter bank directly from a provided 2K×2K matrix 'data' - * (no disk I/O). The order is inferred as order = data.cols()/2 - 1. - * After validation, the four K×K blocks and their transposes are extracted. - */ MWFilter::MWFilter(int t, const MatrixXd &data) { this->type = t; this->order = data.cols() / 2 - 1; @@ -155,12 +75,6 @@ MWFilter::MWFilter(int t, const MatrixXd &data) { fillFilterBlocks(); } -/* - * fillFilterBlocks() - * ------------------ - * Slice the unified 2K×2K matrix 'filter' into the four K×K sub-blocks and - * precompute their transposes. This is used after constructing from 'data'. - */ void MWFilter::fillFilterBlocks() { int K = this->order + 1; this->G0 = this->filter.block(0, 0, K, K); @@ -173,15 +87,6 @@ void MWFilter::fillFilterBlocks() { this->H1t = this->H1.transpose(); } -/* - * getSubFilter(i, oper) - * --------------------- - * Retrieve one of the four K×K subfilters depending on transform 'oper': - * - Compression: returns transposed blocks in order (H0^T, H1^T, G0^T, G1^T). - * - Reconstruction: returns direct blocks in order (H0, G0, H1, G1). - * Index i ∈ {0,1,2,3} selects which block in the specified order. - * Aborts on invalid index or oper. - */ const MatrixXd &MWFilter::getSubFilter(int i, int oper) const { switch (oper) { case (Compression): @@ -217,11 +122,6 @@ const MatrixXd &MWFilter::getSubFilter(int i, int oper) const { } } -/* - * Shorthand accessors for one direction only (avoid passing 'oper'). - * - getCompressionSubFilter(i): H0^T, H1^T, G0^T, G1^T (i=0..3) - * - getReconstructionSubFilter(i): H0, G0, H1, G1 (i=0..3) - */ const MatrixXd &MWFilter::getCompressionSubFilter(int i) const { switch (i) { case (0): @@ -252,14 +152,6 @@ const MatrixXd &MWFilter::getReconstructionSubFilter(int i) const { } } -/* - * apply / applyInverse - * -------------------- - * Multiply a vector/matrix by the filter or its transpose. - * - apply(...) : filter * data (reconstruction direction) - * - applyInverse(...) : filter^T * data (compression direction) - * Both validate row dimension matches the filter size (2K). - */ void MWFilter::apply(MatrixXd &data) const { if (data.rows() != this->filter.cols()) { INVALID_ARG_ABORT } data = this->filter * data; @@ -280,12 +172,6 @@ void MWFilter::applyInverse(VectorXd &data) const { data = this->filter.transpose() * data; } -/* - * setFilterPaths(lib) - * ------------------- - * Compose full file paths for H0 and G0 depending on family and order. - * The prefix is 'I_' for Interpol and 'L_' for Legendre. - */ void MWFilter::setFilterPaths(const std::string &lib) { switch (this->type) { case (Interpol): @@ -301,16 +187,6 @@ void MWFilter::setFilterPaths(const std::string &lib) { } } -/* - * generateBlocks() - * ---------------- - * Read H0 and G0 from binary files and synthesize H1/G1 from symmetry. - * Finally, precompute all transposes. - * - * File format assumptions: - * - Each of H0 and G0 stores K rows; each row contains K doubles. - * - This function reads one row at a time into temporary buffers dH, dG. - */ void MWFilter::generateBlocks() { std::ifstream H_fis(this->H_path.c_str(), std::ios::binary); std::ifstream G_fis(this->G_path.c_str(), std::ios::binary); @@ -322,21 +198,19 @@ void MWFilter::generateBlocks() { double dH[K]; double dG[K]; - /* read H0 and G0 from disk */ this->G0 = Eigen::MatrixXd::Zero(K, K); this->H0 = Eigen::MatrixXd::Zero(K, K); for (int i = 0; i < K; i++) { H_fis.read((char *)dH, sizeof(double) * K); G_fis.read((char *)dG, sizeof(double) * K); for (int j = 0; j < K; j++) { - this->G0(i, j) = dG[j]; // G0 - this->H0(i, j) = dH[j]; // H0 + this->G0(i, j) = dG[j]; + this->H0(i, j) = dH[j]; } } G_fis.close(); H_fis.close(); - /* fill H1 and G1 according to symmetry */ this->G1 = Eigen::MatrixXd::Zero(K, K); this->H1 = Eigen::MatrixXd::Zero(K, K); switch (this->type) { @@ -363,4 +237,5 @@ void MWFilter::generateBlocks() { this->H0t = this->H0.transpose(); this->H1t = this->H1.transpose(); } + } // namespace mrcpp \ No newline at end of file diff --git a/src/core/ObjectCache.cpp b/src/core/ObjectCache.cpp index a44452e99..6489a4522 100644 --- a/src/core/ObjectCache.cpp +++ b/src/core/ObjectCache.cpp @@ -23,44 +23,6 @@ * */ -/* - * Overview - * -------- - * This file provides the generic implementation of a very simple object cache: - * ObjectCache - * - * The cache stores pointers to objects of type T, indexed by an integer id. - * It supports: - * - on-demand loading (either overridden in a derived cache, or via - * load(id, T*, memory) with an already-constructed object), - * - unloading (delete + accounting), - * - querying if an id is present, and - * - retrieving a reference to a loaded object. - * - * Key properties: - * • Sparse index space: - * Internally, `objs` and `mem` are vectors. The `highWaterMark` records - * the highest id seen so far, and the vectors are expanded with nullptr/0 - * as needed in `load(id, T*, memory)`. - * - * • Memory accounting: - * `mem[id]` stores a byte estimate for the object at index `id` - * (provided by the caller). `memLoaded` accumulates the total over - * loaded entries. There is no automatic eviction policy here; derived - * caches may use these numbers for their own management. - * - * • Thread-safety: - * This base class does not synchronize access. Derived caches (e.g., - * filter caches) add OpenMP locks around load/insert to ensure safety. - * - * • Lifetime: - * Objects are owned by the cache (deleted on unload). `clear()` iterates - * over all indices and unloads any present objects. - * - * Explicit instantiations at the end make sure the compiler emits code for - * the most common cached types used within MRCPP. - */ - #include "ObjectCache.h" #include "CrossCorrelation.h" #include "GaussQuadrature.h" @@ -70,51 +32,21 @@ namespace mrcpp { -/* - * getInstance() - * ------------- - * Meyers' singleton accessor for ObjectCache. - * A single cache instance per T exists process-wide. - */ template ObjectCache &ObjectCache::getInstance() { static ObjectCache theObjectCache; return theObjectCache; } -/* - * clear() - * ------- - * Unload all currently loaded objects by iterating the index range and - * calling unload(i) for each non-null entry. - */ template void ObjectCache::clear() { for (unsigned int i = 0; i < this->objs.size(); i++) { if (this->objs[i] != nullptr) { unload(i); } } } -/* - * load(id) - * -------- - * Default "do nothing" loader. The intent is that specialized caches - * (e.g., FilterCache, CrossCorrelationCache) override this method to - * construct/load the appropriate object for the given id. Calling this - * base implementation only prints an info message. - */ template void ObjectCache::load(int id) { MSG_INFO("This routine does nothing in this class."); } -/* - * load(id, new_o, memory) - * ----------------------- - * Insert an already-constructed object pointer at index `id`. - * - Expands internal storage if `id` exceeds the current highWaterMark, - * filling with nullptr/0. - * - If an object is already present at `id`, the call is a no-op. - * - Otherwise, records the memory estimate, updates `memLoaded`, and stores - * the pointer. Ownership is transferred to the cache (deleted in unload()). - */ template void ObjectCache::load(int id, T *new_o, int memory) { if (id >= this->highWaterMark) { for (int i = 0; i < id - this->highWaterMark + 1; i++) { @@ -129,14 +61,6 @@ template void ObjectCache::load(int id, T *new_o, int memory) { this->objs[id] = new_o; } -/* - * unload(id) - * ---------- - * Remove and delete the object at index `id`, updating memory accounting. - * - Validates bounds. - * - Warns (and returns) if the slot is already empty. - * - Sets the slot to nullptr and zeroes its memory entry. - */ template void ObjectCache::unload(int id) { if (id < 0 or id > this->highWaterMark) { MSG_ERROR("Id out of bounds:" << id); } if (this->objs[id] == nullptr) { @@ -149,38 +73,18 @@ template void ObjectCache::unload(int id) { this->objs[id] = nullptr; } -/* - * get(id) - * ------- - * Return a reference to the object stored at `id`. - * - Emits an error if `id` is negative or if the object is not loaded. - * (Note: derived caches typically call hasId()/load() to ensure presence.) - */ template T &ObjectCache::get(int id) { if (id < 0) { MSG_ERROR("Id out of bounds:" << id); } if (this->objs[id] == nullptr) { MSG_ERROR("Object not loaded!"); } return *(this->objs[id]); } -/* - * hasId(id) - * --------- - * Query whether an object for `id` is present in the cache. - * Returns false if `id` exceeds the current high-water mark or if the - * slot holds nullptr; true otherwise. - */ template bool ObjectCache::hasId(int id) { if (id > this->highWaterMark) return false; if (this->objs[id] == nullptr) return false; return true; } -/* - * Explicit template instantiations - * -------------------------------- - * Force code generation for these commonly cached types, so users linking - * to MRCPP do not need to instantiate ObjectCache themselves. - */ template class ObjectCache; template class ObjectCache; template class ObjectCache; diff --git a/src/core/QuadratureCache.cpp b/src/core/QuadratureCache.cpp index db299c0d7..f96511ff5 100644 --- a/src/core/QuadratureCache.cpp +++ b/src/core/QuadratureCache.cpp @@ -23,103 +23,34 @@ * */ -/* - * Overview - * -------- - * Implementation of the QuadratureCache singleton. This cache wraps - * ObjectCache to manage Gauss–Legendre quadrature rules by - * integer key `k` (the rule's order). It additionally tracks a *global* - * integration domain [A,B] and a number of equal sub-intervals `intervals` - * that should be applied to *all* cached rules. - * - * Responsibilities: - * - Provide default domain settings ([0,1], intervals=1). - * - Lazy-load GaussQuadrature objects for requested orders (load/get). - * - Propagate changes to [A,B] or `intervals` to any already-cached rules. - * - * Thread-safety: - * - First-time loads are guarded by MRCPP_SET_OMP_LOCK / MRCPP_UNSET_OMP_LOCK. - * Once an object is present in the cache, read access is lock-free. - * - * Memory accounting: - * - The `memo` passed to the base cache is a rough estimate: 2 * k * sizeof(double). - * (This is intentionally approximate and used only for coarse bookkeeping.) - * - * Notes: - * - Iteration over cached ids uses `for (int i = 0; i < getNObjs(); ++i)`. - * Slots may be empty (not all ids 0..high-water-mark are loaded). - * - Potential typo/bug (left as-is by request): in setIntervals(), the input - * validity check tests `this->intervals < 1` instead of `ivals < 1`. - */ - -/* - * - * - * \date Jul 26, 2009 - * \author Jonas Juselius \n - * CTCC, University of Tromsø - * - * \breif - */ - #include "QuadratureCache.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Construct the cache with default domain and replication settings. - * - * Defaults: - * - A = 0.0, B = 1.0 → unit interval [0,1] - * - intervals = 1 → no subdivision (composite quadrature disabled) - * - * Actual GaussQuadrature objects are created lazily on demand in load(). - */ QuadratureCache::QuadratureCache() { this->A = 0.0; this->B = 1.0; this->intervals = 1; } -/** @brief Trivial destructor; owned objects are freed by the base cache. */ QuadratureCache::~QuadratureCache() = default; -/** - * @brief Ensure a GaussQuadrature of order k is present in the cache. - * - * Under the OMP lock: - * - If absent, allocate a new GaussQuadrature(k, A, B, intervals), - * compute a rough memory estimate, and insert it into ObjectCache. - */ void QuadratureCache::load(int k) { MRCPP_SET_OMP_LOCK(); if (not hasId(k)) { auto *gp = new GaussQuadrature(k, this->A, this->B, this->intervals); - int memo = 2 * k * sizeof(double); // rough accounting only + int memo = 2 * k * sizeof(double); ObjectCache::load(k, gp, memo); } MRCPP_UNSET_OMP_LOCK(); } -/** - * @brief Retrieve a reference to the cached quadrature of order k. - * Lazily loads it if not present yet. - */ GaussQuadrature &QuadratureCache::get(int k) { if (not hasId(k)) { load(k); } return ObjectCache::get(k); } -/** - * @brief Update the global integration bounds to [a,b] and propagate the - * change to all already-cached GaussQuadrature objects. - * - * Behavior: - * - If the new bounds are effectively identical (within MachineZero), do nothing. - * - Otherwise, set A/B and iterate over existing ids; for each loaded entry, - * call .setBounds(a,b) so its scaled nodes/weights are rebuilt. - */ void QuadratureCache::setBounds(double a, double b) { if (std::abs(this->A - a) < MachineZero and std::abs(this->B - b) < MachineZero) { return; } if (a >= b) { MSG_ERROR("Invalid Gauss interval, a > b."); } @@ -130,19 +61,6 @@ void QuadratureCache::setBounds(double a, double b) { } } -/** - * @brief Update the global number of equal sub-intervals and propagate to all - * already-cached rules. - * - * Behavior: - * - If unchanged, return early. - * - Sanity check: intervals must be ≥ 1. - * - Iterate over existing ids; for each loaded entry, call .setIntervals(ivals). - * - * Note: - * - The input validity test uses `this->intervals < 1` (likely intended to be - * `ivals < 1`). Left unchanged intentionally. - */ void QuadratureCache::setIntervals(int ivals) { if (ivals == this->intervals) { return; } if (this->intervals < 1) { MSG_ERROR("Invalid number of intervals, intervals < 1"); } @@ -151,4 +69,4 @@ void QuadratureCache::setIntervals(int ivals) { } } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file diff --git a/src/core/ScalingBasis.cpp b/src/core/ScalingBasis.cpp index 49a1cff51..3edb9c17f 100644 --- a/src/core/ScalingBasis.cpp +++ b/src/core/ScalingBasis.cpp @@ -23,75 +23,21 @@ * */ -/* - * Overview - * -------- - * ScalingBasis provides common functionality shared by concrete scaling bases - * (e.g., LegendreBasis, InterpolatingBasis). It stores: - * • the basis family tag (type) and polynomial order (order), - * • the list of basis polynomials (this->funcs) managed in the headers, - * • matrices used to convert between coefficient and nodal-value spaces: - * - quadVals : basis evaluated at quadrature nodes (q×q), - * - cvMap : coefficient → value map at nodes (q×q), - * - vcMap : value → coefficient map at nodes (q×q). - * - * Responsibilities in this file: - * - Construct and size the conversion matrices based on the quadrature order. - * - Provide a generic evaluator `evalf` to sample the basis at arbitrary points. - * - Expose the proper conversion map (cvMap or vcMap) given an operation flag. - * - Define equality/inequality operators and a simple printer. - * - * Conventions: - * - q := getQuadratureOrder() is the number of basis functions and nodes. - * - `type` identifies the scaling family (Legendre vs Interpol); codes live - * in shared headers. - * - `Forward` indicates coefficient→value mapping; anything else selects the - * reverse map (value→coefficient). - */ - #include "ScalingBasis.h" #include "utils/Printer.h" namespace mrcpp { -/** - * @brief Construct a base scaling space for family @p t and order @p k. - * - * Initializes: - * - type/order (with a minimal validity check on order), - * - square q×q matrices quadVals, cvMap, vcMap filled with zeros, - * where q = getQuadratureOrder() is determined by the concrete basis. - * - * Concrete derived classes are responsible for: - * - populating `funcs` with q basis polynomials, - * - filling `quadVals`, - * - building `cvMap` and `vcMap`. - */ ScalingBasis::ScalingBasis(int k, int t) : type(t) , order(k) { if (this->order < 0) MSG_ABORT("Invalid scaling order"); int q_order = getQuadratureOrder(); - this->quadVals = Eigen::MatrixXd::Zero(q_order, q_order); // basis@nodes - this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); // coeff → values - this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); // values → coeff + this->quadVals = Eigen::MatrixXd::Zero(q_order, q_order); + this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); + this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); } -/** - * @brief Evaluate each basis polynomial at a set of points. - * - * @param[in] r Pointer to an array of length D containing evaluation points. - * @param[out] vals Matrix of size (K × D) where: - * - K must equal the number of basis functions (funcs.size()), - * - column d receives the vector [ φ_0(r[d]), …, φ_{K-1}(r[d]) ]^T. - * - * Precondition: - * - vals.rows() == funcs.size(). If not, an error is reported. - * - * Notes: - * - The layout is "basis index in rows, sample index in columns". - * - getFunc(k) returns the k-th polynomial; evalf(x) evaluates it at x. - */ void ScalingBasis::evalf(const double *r, Eigen::MatrixXd &vals) const { if (vals.rows() != this->funcs.size()) MSG_ERROR("Invalid argument"); @@ -102,15 +48,6 @@ void ScalingBasis::evalf(const double *r, Eigen::MatrixXd &vals) const { } } -/** - * @brief Retrieve the appropriate coefficient/value conversion map. - * - * @param operation If equal to Forward, return cvMap (coeff → values at nodes). - * Otherwise, return vcMap (values at nodes → coeff). - * - * The precise enum/integer value of Forward is defined in shared headers. - * Derived classes ensure cvMap/vcMap are properly populated in their init code. - */ const Eigen::MatrixXd &ScalingBasis::getCVMap(int operation) const { if (operation == Forward) { return this->cvMap; @@ -119,31 +56,18 @@ const Eigen::MatrixXd &ScalingBasis::getCVMap(int operation) const { } } -/** - * @brief Two scaling bases are equal iff both family type and order match. - */ bool ScalingBasis::operator==(const ScalingBasis &basis) const { if (this->type != basis.type) return false; if (this->order != basis.order) return false; return true; } -/** - * @brief Negation of operator== (true if type or order differs). - */ bool ScalingBasis::operator!=(const ScalingBasis &basis) const { if (this->type != basis.type) return true; if (this->order != basis.order) return true; return false; } -/** - * @brief Stream printer with a minimal summary (order and family name). - * - * Prints: - * - "polynomial order : " - * - "polynomial type : " - */ std::ostream &ScalingBasis::print(std::ostream &o) const { o << " polynomial order : " << getScalingOrder() << std::endl; if (getScalingType() == Legendre) { diff --git a/src/core/SchrodingerEvolution_CrossCorrelation.cpp b/src/core/SchrodingerEvolution_CrossCorrelation.cpp index 29c734f6d..cbcfb0f26 100644 --- a/src/core/SchrodingerEvolution_CrossCorrelation.cpp +++ b/src/core/SchrodingerEvolution_CrossCorrelation.cpp @@ -38,20 +38,6 @@ using namespace Eigen; namespace mrcpp { - -/** @brief SchrodingerEvolution_CrossCorrelation constructor. - * - * @param[in] amount: the integer specifying the maximum amount of matrices \f$ C^k \f$ - * to be used in calculations - * @param[in] k: the integer specifying the polynomial order - * @param[in] t: the integer specifying the scaling basis type - * - * @details It checks if the order and type are meaningful and then reads matrices from a file. - * By default the file has some information about the data stored, - * so the first interger to read is describing the size of the documentation text. - * - * - */ SchrodingerEvolution_CrossCorrelation::SchrodingerEvolution_CrossCorrelation(int amount, int k, int t) : type(t), order(k), amount(amount) { @@ -66,11 +52,9 @@ SchrodingerEvolution_CrossCorrelation::SchrodingerEvolution_CrossCorrelation(int } setCCCPath(details::find_filters()); - readCCCBin(); } - void SchrodingerEvolution_CrossCorrelation::setCCCPath(const std::string &lib) { switch (this->type) { case (Interpol): @@ -87,48 +71,24 @@ void SchrodingerEvolution_CrossCorrelation::setCCCPath(const std::string &lib) { void SchrodingerEvolution_CrossCorrelation::readCCCBin() { std::ifstream input_file(this->path.c_str(), std::ios::binary); - if (not input_file) MSG_ABORT("Could not open cross correlation: " << this->path); - // Read the text length int text_length; input_file.read(reinterpret_cast(&text_length), sizeof(text_length)); - // Read the Unicode characters std::vector unicode_chars(text_length); input_file.read(reinterpret_cast(unicode_chars.data()), sizeof(char32_t) * text_length); - // Read the amount of matrices int K; input_file.read(reinterpret_cast(&K), sizeof(K)); - // Read the size/order of each matrix int order; input_file.read(reinterpret_cast(&order), sizeof(order)); - // Read the matrices std::vector C_even(K, Eigen::MatrixXd(order, order)); auto data_amount = order * order * sizeof(double); for (auto& matrix : C_even) input_file.read(reinterpret_cast(matrix.data()), data_amount); -/* - // Print the text length - std::cout << text_length << std::endl; - - // Print the text - for (char32_t c : unicode_chars) { - std::wcout << static_cast(c); - } - // Print the matrices - std::cout << std::endl; - std::cout << "----------------------------------" << std::endl; - for (auto& matrix : C_even) - { - std::cout << matrix << std::endl; - std::cout << "----------------------------------" << std::endl; - } -*/ - // Create matrix containing the appropriate amount of coefficients int Order = this->order + 1; for (int k = 0; k < this->amount; k++) { @@ -139,4 +99,4 @@ void SchrodingerEvolution_CrossCorrelation::readCCCBin() input_file.close(); } -} // namespace mrcpp +} // namespace mrcpp \ No newline at end of file From 79bd90e85a1d02cf23a599707ee97fe550ce11e1 Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Wed, 5 Nov 2025 09:49:43 +0100 Subject: [PATCH 18/51] Small fix --- src/core/LegendreBasis.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/LegendreBasis.cpp b/src/core/LegendreBasis.cpp index 3d6575ba4..6e430f5be 100644 --- a/src/core/LegendreBasis.cpp +++ b/src/core/LegendreBasis.cpp @@ -77,7 +77,6 @@ void LegendreBasis::initScalingBasis() { * 1) Obtain quadrature roots (points) of order q. * 2) For each basis polynomial P_k, evaluate it at all points x_i and store * in the corresponding column k of quadVals. ->>>>>>> 797751c (documentation done insiede al core folder) */ void LegendreBasis::calcQuadratureValues() { getQuadratureCache(qc); From f606f56f0cdc5b0b760132fbdbe31ac0180c17f3 Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Wed, 5 Nov 2025 10:01:28 +0100 Subject: [PATCH 19/51] Remove some leftover after rebase --- src/core/InterpolatingBasis.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/core/InterpolatingBasis.cpp b/src/core/InterpolatingBasis.cpp index 7ada7ed2a..fd005a090 100644 --- a/src/core/InterpolatingBasis.cpp +++ b/src/core/InterpolatingBasis.cpp @@ -87,14 +87,6 @@ void InterpolatingBasis::initScalingBasis() { const VectorXd roots = qc.getRoots(qOrder); // size q const VectorXd wgts = qc.getWeights(qOrder); // size q -void InterpolatingBasis::initScalingBasis() { - int qOrder = getQuadratureOrder(); - int sOrder = getScalingOrder(); - - getQuadratureCache(qc); - const VectorXd roots = qc.getRoots(qOrder); - const VectorXd wgts = qc.getWeights(qOrder); - std::vector L_k; for (int k = 0; k < qOrder; k++) { L_k.push_back(LegendrePoly(k, 2.0, 1.0)); } From dc1e904d7de9a27f22a0e8fc414a468bd0085576 Mon Sep 17 00:00:00 2001 From: Tarek Scheele Date: Thu, 6 Nov 2025 13:12:48 +0100 Subject: [PATCH 20/51] Restore old comments for documenting work --- src/core/CrossCorrelation.cpp | 18 +- src/core/CrossCorrelationCache.cpp | 11 + src/core/FilterCache.cpp | 12 +- src/core/GaussQuadrature.cpp | 54 +- src/core/InterpolatingBasis.cpp | 78 +- src/core/LegendreBasis.cpp | 68 +- src/core/MWFilter.cpp | 19 +- src/core/ObjectCache.cpp | 2 +- src/core/QuadratureCache.cpp | 12 +- src/core/ScalingBasis.cpp | 10 +- .../SchrodingerEvolution_CrossCorrelation.cpp | 42 +- src/functions/BoysFunction.cpp | 3 +- src/functions/GaussExp.cpp | 55 +- src/functions/GaussFunc.cpp | 25 +- src/functions/GaussPoly.cpp | 58 +- src/functions/Gaussian.cpp | 67 +- src/functions/JpowerIntegrals.cpp | 23 +- src/functions/LegendrePoly.cpp | 56 +- src/functions/Polynomial.cpp | 44 +- src/functions/RepresentableFunction.cpp | 34 +- src/functions/function_utils.cpp | 81 +- src/functions/special_functions.cpp | 64 +- src/operators/ABGVOperator.cpp | 14 +- src/operators/BSOperator.cpp | 10 +- src/operators/CartesianConvolution.cpp | 18 +- src/operators/ConvolutionOperator.cpp | 34 +- src/operators/DerivativeConvolution.cpp | 6 + src/operators/HeatOperator.cpp | 27 +- src/operators/HelmholtzKernel.cpp | 55 +- src/operators/HelmholtzOperator.cpp | 13 +- src/operators/IdentityConvolution.cpp | 23 +- src/operators/MWOperator.cpp | 33 +- src/operators/OperatorStatistics.cpp | 11 +- src/operators/PHOperator.cpp | 8 +- src/operators/PoissonKernel.cpp | 27 +- src/operators/PoissonOperator.cpp | 11 +- src/operators/TimeEvolutionOperator.cpp | 93 +- src/treebuilders/ABGVCalculator.cpp | 86 -- src/treebuilders/BSCalculator.cpp | 96 +- src/treebuilders/ConvolutionCalculator.cpp | 32 +- src/treebuilders/CopyAdaptor.cpp | 3 +- .../CrossCorrelationCalculator.cpp | 4 +- src/treebuilders/DerivativeCalculator.cpp | 29 +- src/treebuilders/PHCalculator.cpp | 48 +- src/treebuilders/ProjectionCalculator.cpp | 59 +- ...meEvolution_CrossCorrelationCalculator.cpp | 31 +- src/treebuilders/TreeBuilder.cpp | 44 +- src/treebuilders/add.cpp | 171 +-- src/treebuilders/apply.cpp | 625 ++++------- src/treebuilders/complex_apply.cpp | 62 +- src/treebuilders/grid.cpp | 186 +++- src/treebuilders/map.cpp | 35 +- src/treebuilders/multiply.cpp | 169 ++- src/treebuilders/project.cpp | 90 +- src/trees/BandWidth.cpp | 11 +- src/trees/BoundingBox.cpp | 222 +++- src/trees/CornerOperatorTree.cpp | 24 +- src/trees/FunctionNode.cpp | 106 +- src/trees/FunctionTree.cpp | 179 +++- src/trees/MWNode.cpp | 502 ++++++++- src/trees/MWTree.cpp | 232 +++- src/trees/MultiResolutionAnalysis.cpp | 91 +- src/trees/NodeAllocator.cpp | 67 +- src/trees/OperatorNode.cpp | 30 +- src/trees/OperatorTree.cpp | 50 +- src/utils/Bank.cpp | 276 +++-- src/utils/CompFunction.cpp | 990 ++++++++++++------ src/utils/Plotter.cpp | 131 ++- src/utils/Printer.cpp | 78 +- src/utils/Timer.cpp | 13 + src/utils/details.cpp | 26 +- src/utils/math_utils.cpp | 85 +- src/utils/mpi_utils.cpp | 41 + src/utils/omp_utils.cpp | 1 + src/utils/parallel.cpp | 199 +++- src/utils/periodic_utils.cpp | 2 + src/utils/tree_utils.cpp | 54 +- 77 files changed, 4539 insertions(+), 1860 deletions(-) diff --git a/src/core/CrossCorrelation.cpp b/src/core/CrossCorrelation.cpp index 149ba63fe..8b04cf18d 100644 --- a/src/core/CrossCorrelation.cpp +++ b/src/core/CrossCorrelation.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 18, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "CrossCorrelation.h" #include @@ -41,7 +51,6 @@ CrossCorrelation::CrossCorrelation(int k, int t) : type(t) , order(k) { if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order: " << this->order); - switch (this->type) { case (Interpol): case (Legendre): @@ -51,6 +60,7 @@ CrossCorrelation::CrossCorrelation(int k, int t) } setCCCPaths(details::find_filters()); + readCCCBin(); } @@ -59,7 +69,6 @@ CrossCorrelation::CrossCorrelation(int t, const MatrixXd &L, const MatrixXd &R) , order(L.cols() / 2 - 1) { if (this->order < 1 or this->order > MaxOrder) MSG_ABORT("Invalid cross correlation order, " << this->order); if (R.cols() != L.cols()) MSG_ABORT("Right and Left cross correlation have different order!"); - switch (this->type) { case (Interpol): case (Legendre): @@ -97,14 +106,11 @@ void CrossCorrelation::readCCCBin() { int K = this->order + 1; this->Left = MatrixXd::Zero(K * K, 2 * K); this->Right = MatrixXd::Zero(K * K, 2 * K); - double dL[2 * K]; double dR[2 * K]; - for (int i = 0; i < K * K; i++) { L_fis.read((char *)dL, sizeof(double) * 2 * K); R_fis.read((char *)dR, sizeof(double) * 2 * K); - for (int j = 0; j < 2 * K; j++) { if (std::abs(dL[j]) < MachinePrec) dL[j] = 0.0; if (std::abs(dR[j]) < MachinePrec) dR[j] = 0.0; @@ -117,4 +123,4 @@ void CrossCorrelation::readCCCBin() { R_fis.close(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/CrossCorrelationCache.cpp b/src/core/CrossCorrelationCache.cpp index 83438f545..6b0595858 100644 --- a/src/core/CrossCorrelationCache.cpp +++ b/src/core/CrossCorrelationCache.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 18, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "CrossCorrelationCache.h" #include "utils/Printer.h" @@ -65,6 +75,7 @@ template const Eigen::MatrixXd &CrossCorrelationCache::getLMatrix(int return ObjectCache::get(order).getLMatrix(); } + /** @brief Fetches the cross correlation coefficients. * * @param[in] order: Dimension of \f$ V_0 \subset L^2(\mathbb R) \f$ minus one, diff --git a/src/core/FilterCache.cpp b/src/core/FilterCache.cpp index a233217d3..e6841bd7d 100644 --- a/src/core/FilterCache.cpp +++ b/src/core/FilterCache.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 8, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "FilterCache.h" #include "utils/Printer.h" @@ -68,4 +78,4 @@ template const MatrixXd &FilterCache::getFilterMatrix(int order) { template class FilterCache; template class FilterCache; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/GaussQuadrature.cpp b/src/core/GaussQuadrature.cpp index c29eb1c82..9c30823f8 100644 --- a/src/core/GaussQuadrature.cpp +++ b/src/core/GaussQuadrature.cpp @@ -23,6 +23,9 @@ * */ +/* + */ + #include "GaussQuadrature.h" #include "MRCPP/constants.h" #include "MRCPP/macros.h" @@ -33,6 +36,13 @@ using namespace Eigen; namespace mrcpp { +/** Constructor for Gauss-Legendre quadrature. + * + * \param order Polynominal order + * \param a Lower bound of validity + * \param b Upper bound of validity + * \param intervals Number of intervals to divde |a-b| into + */ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { this->order = k; this->A = a; @@ -44,17 +54,13 @@ GaussQuadrature::GaussQuadrature(int k, double a, double b, int inter) { } if (a >= b) { MSG_ERROR("Invalid Gauss interval, a > b."); } if (this->intervals < 1) { MSG_ERROR("Invalid number of intervals, intervals < 1"); } - this->npts = this->order * this->intervals; - this->roots = VectorXd::Zero(this->npts); this->weights = VectorXd::Zero(this->npts); - this->unscaledRoots = VectorXd::Zero(this->order); this->unscaledWeights = VectorXd::Zero(this->order); - + // set up unscaled Gauss points and weights ( interval ]-1,1[) if (calcGaussPtsWgts() != 1) { MSG_ERROR("Setup of Gauss-Legendre weights failed.") } - calcScaledPtsWgts(); } @@ -76,12 +82,18 @@ void GaussQuadrature::setIntervals(int i) { calcScaledPtsWgts(); } +/** Calculate scaled distribution of roots for Gauss-Legendre + * quadrature on on ]a,b[. The number of quadrature points on the interval + * is scale*(order+1). + */ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) const { + // length of one block double transl = (b - a) / (double)inter; int k = 0; double pos = a; double xl = transl * 0.5; + // scale and translate Gauss points and weights for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { rts(k) = this->unscaledRoots(j) * xl + pos + xl; @@ -91,12 +103,18 @@ void GaussQuadrature::rescaleRoots(VectorXd &rts, double a, double b, int inter) } } +/** Calculate scaled distribution of weights for Gauss-Legendre + * quadrature on on ]a,b[. The number of quadrature points on the interval + * is scale*(order+1). + */ void GaussQuadrature::rescaleWeights(VectorXd &wgts, double a, double b, int inter) const { + // length of one block double transl = (b - a) / (double)inter; int k = 0; double pos = a; double xl = transl * 0.5; + // scale and translate Gauss points and weights for (int i = 0; i < inter; i++) { for (int j = 0; j < this->order; j++) { wgts(k) = this->unscaledWeights(j) * xl + pos + xl; @@ -106,12 +124,18 @@ void GaussQuadrature::rescaleWeights(VectorXd &wgts, double a, double b, int int } } +/** Calculate scaled distribution of points and weights for Gauss-Legendre + * quadrature on on ]a,b[. The number of quadrature points on the interval + * is scale*(order+1). + */ void GaussQuadrature::calcScaledPtsWgts() { + // length of one block double transl = (this->B - this->A) / (double)this->intervals; int k = 0; double pos = this->A; double xl = transl * 0.5; + // scale and translate Gauss points and weights for (int i = 0; i < this->intervals; i++) { for (int j = 0; j < this->order; j++) { this->roots(k) = this->unscaledRoots(j) * xl + pos + xl; @@ -122,6 +146,14 @@ void GaussQuadrature::calcScaledPtsWgts() { } } +/** Calulate distribution of points and weights for Guass-Legendre quadrature on + * ]-1,1[. + * + * Find quadrature points and weights by solving for the roots of + * Legendre polynomials using Newtons method. Using double precison the + * maximum stable order is currently set to 13. Return 1 on success, 0 on failure. + * + */ int GaussQuadrature::calcGaussPtsWgts() { int K; if (this->order % 2 == 0) { @@ -136,7 +168,7 @@ int GaussQuadrature::calcGaussPtsWgts() { double xm = (b + a) * 0.5; double xl = (b - a) * 0.5; - LegendrePoly legendrep(this->order, 1.0, 0.0); + LegendrePoly legendrep(this->order, 1.0, 0.0); // Interval [-1,1] Vector2d lp; for (int i = 0; i < K; i++) { @@ -160,6 +192,7 @@ int GaussQuadrature::calcGaussPtsWgts() { return 1; } +/** Integrate a 1D-function f(x) using quadrature */ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { double isum = 0.e0; Coord<1> r; @@ -170,6 +203,7 @@ double GaussQuadrature::integrate(RepresentableFunction<1> &func) const { return isum; } +/** Integrate a 2D-function f(x1, x2) using quadrature */ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { Coord<2> r; double isum = 0.e0; @@ -185,6 +219,7 @@ double GaussQuadrature::integrate(RepresentableFunction<2> &func) const { return isum; } +/** Integrate a 3D-function f(x1, x2, x3) using quadrature */ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { Coord<3> r; @@ -206,6 +241,11 @@ double GaussQuadrature::integrate(RepresentableFunction<3> &func) const { return isum; } +/** Integrate a ND-function f(x1,...), allowing for different + * quadrature in each dimension. + * + * This function has been implemented using a recursive algorithm. + */ double GaussQuadrature::integrate_nd(RepresentableFunction<3> &func, int axis) const { NOT_IMPLEMENTED_ABORT; NEEDS_TESTING @@ -227,4 +267,4 @@ double GaussQuadrature::integrate_nd(RepresentableFunction<3> &func, int axis) c */ } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/InterpolatingBasis.cpp b/src/core/InterpolatingBasis.cpp index fd005a090..3a371b5ee 100644 --- a/src/core/InterpolatingBasis.cpp +++ b/src/core/InterpolatingBasis.cpp @@ -23,6 +23,15 @@ * */ +/* + * + * + * \date June 2, 2010 + * \author Stig Rune Jensen \n + * CTCC, University of Tromsø + * + */ + #include "InterpolatingBasis.h" #include @@ -33,9 +42,10 @@ using namespace Eigen; namespace mrcpp { -/** - * @brief Build the set of interpolating scaling polynomials {I_k}. - * + + +/** @brief Initialise interpolating scaling basis. + * * @details Fills * std::vector \b funcs * declared in the base class @@ -55,44 +65,27 @@ namespace mrcpp { * Here \f$ k \f$ is \b order declared in the base class. * * @note These interpolating scaling functions are defined on the unit interval \f$ (0, 1) \f$. - - * Procedure (for quadrature order q and scaling order s): - * 1) Fetch Gaussian quadrature nodes (roots) and weights (wgts) of order q. - * 2) Precompute Legendre polynomials L_0, L_1, …, L_{q-1} (scaled/shifted - * variant via LegendrePoly(k, 2.0, 1.0)). - * 3) For each quadrature node k: - * a) Start from a copy of L_s (highest degree used for stability). - * b) Scale it so that I_k(roots[k]) accumulates the desired unit - * contribution. The factor (2*i+1) is the standard Legendre - * normalization multiplier that appears in expansions / projections. - * c) Accumulate lower-degree Legendre polynomials down to degree 0, - * with coefficients proportional to L_i(roots[k]) * (2*i+1). - * d) Finally, scale I_k by sqrt(wgts[k]) to make the quadrature-based - * normalization diagonal and simple (see calcCVMaps()). - * 4) Store I_k into this->funcs. + * * - * Remarks: - * - The outer loop is over nodes k, producing one cardinal/interpolatory - * polynomial per node. - * - The inner loop goes from high to low degree (q-2 … 0). The comment in - * the code notes that adding higher-order polys into lower-order ones is - * numerically undesirable, hence the chosen order of accumulation. + * */ void InterpolatingBasis::initScalingBasis() { - int qOrder = getQuadratureOrder(); // number of quadrature points (q) - int sOrder = getScalingOrder(); // polynomial "scaling order" (s) + int qOrder = getQuadratureOrder(); + int sOrder = getScalingOrder(); // sOrder = qOrder - 1 - // Obtain quadrature nodes and weights of order q. getQuadratureCache(qc); - const VectorXd roots = qc.getRoots(qOrder); // size q - const VectorXd wgts = qc.getWeights(qOrder); // size q + const VectorXd roots = qc.getRoots(qOrder); + const VectorXd wgts = qc.getWeights(qOrder); std::vector L_k; for (int k = 0; k < qOrder; k++) { L_k.push_back(LegendrePoly(k, 2.0, 1.0)); } for (int k = 0; k < qOrder; k++) { + // Can't add higher-order polynomials to lower-order ones, so I + // changed the order of the loop Polynomial I_k(L_k[sOrder]); I_k *= L_k[sOrder].evalf(roots(k)) * (2.0 * sOrder + 1); + for (int i = qOrder - 2; i >= 0; i--) { double val = L_k[i].evalf(roots(k)) * (2.0 * i + 1); I_k.addInPlace(val, L_k[i]); @@ -102,33 +95,22 @@ void InterpolatingBasis::initScalingBasis() { } } -/** - * @brief Fill the matrix of basis values at quadrature nodes. - * - * For an *interpolating* basis, evaluating basis polynomial I_k at node k' - * yields δ_{k,k'}. Therefore, the quadrature value matrix is just the identity. + +/** @brief In Progress by Evgueni... + * * - * Implementation detail: - * - Only the diagonal entries are set to 1; all other entries remain 0 - * (matrix presumed zero-initialized elsewhere). + * */ void InterpolatingBasis::calcQuadratureValues() { int q_order = getQuadratureOrder(); for (int k = 0; k < q_order; k++) { this->quadVals(k, k) = 1.0; } } -/** - * @brief Build coefficient↔value diagonal maps using quadrature weights. - * - * The maps relate coefficient vectors in the interpolatory basis to vectors - * of point-values at quadrature nodes, under the quadrature-induced inner - * product: - * - * - cvMap: coefficient → value map at nodes (scales by sqrt(1/w_k)) - * - vcMap: value → coefficient map at nodes (scales by sqrt(w_k)) + +/** @brief In Progress by Evgueni... + * * - * With the construction in initScalingBasis(), these maps are diagonal and - * inverse of each other. + * */ void InterpolatingBasis::calcCVMaps() { int q_order = getQuadratureOrder(); diff --git a/src/core/LegendreBasis.cpp b/src/core/LegendreBasis.cpp index 6e430f5be..1fe198d09 100644 --- a/src/core/LegendreBasis.cpp +++ b/src/core/LegendreBasis.cpp @@ -23,6 +23,15 @@ * */ +/* + * + * + * \date June 2, 2010 + * \author Stig Rune Jensen \n + * CTCC, University of Tromsø + * + */ + #include "LegendreBasis.h" #include "QuadratureCache.h" #include "functions/LegendrePoly.h" @@ -31,15 +40,9 @@ using namespace Eigen; namespace mrcpp { -/** - * @brief Initialize the Legendre scaling basis {P_k}_{k=0..s}. - * - * For each degree k up to the scaling order, construct a (shifted/scaled) - * Legendre polynomial and multiply by sqrt(2k+1) to achieve exact L^2 - * normalization on the reference interval used by LegendrePoly. - * - * Effects: - * - Appends each normalized polynomial to this->funcs. + +/** @brief Initialise Legendre scaling basis. + * * @details Fills * std::vector \b funcs * declared in the base class @@ -59,24 +62,21 @@ namespace mrcpp { * Here \f$ k \f$ is \b order declared in the base class. * * @note These Legendre scaling functions are defined on the unit interval \f$ (0, 1) \f$. + * */ void LegendreBasis::initScalingBasis() { for (int k = 0; k < getScalingOrder() + 1; k++) { LegendrePoly L_k(k, 2.0, 1.0); - L_k *= std::sqrt(2.0 * k + 1.0); + L_k *= std::sqrt(2.0 * k + 1.0); // exact normalization this->funcs.push_back(L_k); } } -/** - * @brief Fill the matrix of basis values at Gaussian quadrature points. - * - * quadVals(i, k) := P_k( x_i ), where {x_i} are the q quadrature nodes. + +/** @brief In Progress by Evgueni... + * * - * Steps: - * 1) Obtain quadrature roots (points) of order q. - * 2) For each basis polynomial P_k, evaluate it at all points x_i and store - * in the corresponding column k of quadVals. + * */ void LegendreBasis::calcQuadratureValues() { getQuadratureCache(qc); @@ -85,44 +85,26 @@ void LegendreBasis::calcQuadratureValues() { for (int k = 0; k < q_order; k++) { const Polynomial &poly = this->getFunc(k); - for (int i = 0; i < q_order; i++) { - this->quadVals(i, k) = poly.evalf(pts(i)); - } + for (int i = 0; i < q_order; i++) { this->quadVals(i, k) = poly.evalf(pts(i)); } } } -/** - * @brief Build the coefficient↔value maps using quadrature weights. - * - * For the Legendre basis, we assemble vcMap directly via: - * vcMap(i, k) = P_k( x_i ) * w_i, - * where {w_i} are the quadrature weights. This corresponds to the (discrete) - * projection of the basis onto the quadrature nodes with weighting. - * - * Then we compute cvMap as the matrix inverse of vcMap: - * cvMap = (vcMap)^{-1}. - * - * Interpretation: - * - vcMap : value→coefficient (takes nodal values and produces coefficients) - * - cvMap : coefficient→value (evaluates coefficients back to nodal values) + +/** @brief In Progress by Evgueni... + * * - * Note: - * - Unlike the interpolating basis (where maps are diagonal), for the - * Legendre basis vcMap is dense (q×q) and we invert it numerically. + * */ void LegendreBasis::calcCVMaps() { getQuadratureCache(qc); int q_order = getQuadratureOrder(); - const VectorXd &pts = qc.getRoots(q_order); + const VectorXd &pts = qc.getRoots(q_order); const VectorXd &wgts = qc.getWeights(q_order); for (int k = 0; k < q_order; k++) { const Polynomial &poly = this->getFunc(k); - for (int i = 0; i < q_order; i++) { - this->vcMap(i, k) = poly.evalf(pts(i)) * wgts(i); - } + for (int i = 0; i < q_order; i++) { this->vcMap(i, k) = poly.evalf(pts(i)) * wgts(i); } } - this->cvMap = this->vcMap.inverse(); } diff --git a/src/core/MWFilter.cpp b/src/core/MWFilter.cpp index 988d4e9ca..8ada4f2ca 100644 --- a/src/core/MWFilter.cpp +++ b/src/core/MWFilter.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 8, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "MWFilter.h" #include @@ -198,19 +208,21 @@ void MWFilter::generateBlocks() { double dH[K]; double dG[K]; + /* read H0 and G0 from disk */ this->G0 = Eigen::MatrixXd::Zero(K, K); this->H0 = Eigen::MatrixXd::Zero(K, K); for (int i = 0; i < K; i++) { H_fis.read((char *)dH, sizeof(double) * K); G_fis.read((char *)dG, sizeof(double) * K); for (int j = 0; j < K; j++) { - this->G0(i, j) = dG[j]; - this->H0(i, j) = dH[j]; + this->G0(i, j) = dG[j]; // G0 + this->H0(i, j) = dH[j]; // H0 } } G_fis.close(); H_fis.close(); + /* fill H1 and G1 according to symmetry */ this->G1 = Eigen::MatrixXd::Zero(K, K); this->H1 = Eigen::MatrixXd::Zero(K, K); switch (this->type) { @@ -237,5 +249,4 @@ void MWFilter::generateBlocks() { this->H0t = this->H0.transpose(); this->H1t = this->H1.transpose(); } - -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/ObjectCache.cpp b/src/core/ObjectCache.cpp index 6489a4522..0df714200 100644 --- a/src/core/ObjectCache.cpp +++ b/src/core/ObjectCache.cpp @@ -90,4 +90,4 @@ template class ObjectCache; template class ObjectCache; template class ObjectCache; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/QuadratureCache.cpp b/src/core/QuadratureCache.cpp index f96511ff5..ceb7df652 100644 --- a/src/core/QuadratureCache.cpp +++ b/src/core/QuadratureCache.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 26, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "QuadratureCache.h" #include "utils/Printer.h" @@ -69,4 +79,4 @@ void QuadratureCache::setIntervals(int ivals) { } } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/ScalingBasis.cpp b/src/core/ScalingBasis.cpp index 3edb9c17f..99dc83f24 100644 --- a/src/core/ScalingBasis.cpp +++ b/src/core/ScalingBasis.cpp @@ -34,17 +34,15 @@ ScalingBasis::ScalingBasis(int k, int t) if (this->order < 0) MSG_ABORT("Invalid scaling order"); int q_order = getQuadratureOrder(); this->quadVals = Eigen::MatrixXd::Zero(q_order, q_order); - this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); - this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); + this->cvMap = Eigen::MatrixXd::Zero(q_order, q_order); + this->vcMap = Eigen::MatrixXd::Zero(q_order, q_order); } void ScalingBasis::evalf(const double *r, Eigen::MatrixXd &vals) const { if (vals.rows() != this->funcs.size()) MSG_ERROR("Invalid argument"); for (int d = 0; d < vals.cols(); d++) { - for (int k = 0; k < vals.rows(); k++) { - vals(k, d) = getFunc(k).evalf(r[d]); - } + for (int k = 0; k < vals.rows(); k++) { vals(k, d) = getFunc(k).evalf(r[d]); } } } @@ -80,4 +78,4 @@ std::ostream &ScalingBasis::print(std::ostream &o) const { return o; } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/core/SchrodingerEvolution_CrossCorrelation.cpp b/src/core/SchrodingerEvolution_CrossCorrelation.cpp index cbcfb0f26..29c734f6d 100644 --- a/src/core/SchrodingerEvolution_CrossCorrelation.cpp +++ b/src/core/SchrodingerEvolution_CrossCorrelation.cpp @@ -38,6 +38,20 @@ using namespace Eigen; namespace mrcpp { + +/** @brief SchrodingerEvolution_CrossCorrelation constructor. + * + * @param[in] amount: the integer specifying the maximum amount of matrices \f$ C^k \f$ + * to be used in calculations + * @param[in] k: the integer specifying the polynomial order + * @param[in] t: the integer specifying the scaling basis type + * + * @details It checks if the order and type are meaningful and then reads matrices from a file. + * By default the file has some information about the data stored, + * so the first interger to read is describing the size of the documentation text. + * + * + */ SchrodingerEvolution_CrossCorrelation::SchrodingerEvolution_CrossCorrelation(int amount, int k, int t) : type(t), order(k), amount(amount) { @@ -52,9 +66,11 @@ SchrodingerEvolution_CrossCorrelation::SchrodingerEvolution_CrossCorrelation(int } setCCCPath(details::find_filters()); + readCCCBin(); } + void SchrodingerEvolution_CrossCorrelation::setCCCPath(const std::string &lib) { switch (this->type) { case (Interpol): @@ -71,24 +87,48 @@ void SchrodingerEvolution_CrossCorrelation::setCCCPath(const std::string &lib) { void SchrodingerEvolution_CrossCorrelation::readCCCBin() { std::ifstream input_file(this->path.c_str(), std::ios::binary); + if (not input_file) MSG_ABORT("Could not open cross correlation: " << this->path); + // Read the text length int text_length; input_file.read(reinterpret_cast(&text_length), sizeof(text_length)); + // Read the Unicode characters std::vector unicode_chars(text_length); input_file.read(reinterpret_cast(unicode_chars.data()), sizeof(char32_t) * text_length); + // Read the amount of matrices int K; input_file.read(reinterpret_cast(&K), sizeof(K)); + // Read the size/order of each matrix int order; input_file.read(reinterpret_cast(&order), sizeof(order)); + // Read the matrices std::vector C_even(K, Eigen::MatrixXd(order, order)); auto data_amount = order * order * sizeof(double); for (auto& matrix : C_even) input_file.read(reinterpret_cast(matrix.data()), data_amount); +/* + // Print the text length + std::cout << text_length << std::endl; + + // Print the text + for (char32_t c : unicode_chars) { + std::wcout << static_cast(c); + } + // Print the matrices + std::cout << std::endl; + std::cout << "----------------------------------" << std::endl; + for (auto& matrix : C_even) + { + std::cout << matrix << std::endl; + std::cout << "----------------------------------" << std::endl; + } +*/ + // Create matrix containing the appropriate amount of coefficients int Order = this->order + 1; for (int k = 0; k < this->amount; k++) { @@ -99,4 +139,4 @@ void SchrodingerEvolution_CrossCorrelation::readCCCBin() input_file.close(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/BoysFunction.cpp b/src/functions/BoysFunction.cpp index 4aca11a71..7b9f1ddb5 100644 --- a/src/functions/BoysFunction.cpp +++ b/src/functions/BoysFunction.cpp @@ -42,7 +42,6 @@ double BoysFunction::evalf(const Coord<1> &r) const { int n = this->order; double x = r[0]; - auto f = [x, n](const Coord<1> &t) -> double { double t_2 = t[0] * t[0]; double xt_2 = x * t_2; @@ -59,4 +58,4 @@ double BoysFunction::evalf(const Coord<1> &r) const { return result; } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp index 24b517fd0..a57fe6708 100644 --- a/src/functions/GaussExp.cpp +++ b/src/functions/GaussExp.cpp @@ -41,7 +41,7 @@ namespace mrcpp { template double GaussExp::defaultScreening = 10.0; -template GaussExp::GaussExp(int nTerms, double /*prec*/) { +template GaussExp::GaussExp(int nTerms, double prec) { for (int i = 0; i < nTerms; i++) { this->funcs.push_back(nullptr); } } @@ -64,6 +64,7 @@ template GaussExp::~GaussExp() { template GaussExp &GaussExp::operator=(const GaussExp &gexp) { if (&gexp == this) return *this; + // screening = gexp.screening; this->funcs.clear(); for (unsigned int i = 0; i < gexp.size(); i++) { if (gexp.funcs[i] == nullptr) { @@ -210,7 +211,6 @@ template GaussExp GaussExp::mult(GaussFunc &g) { } return result; } - template GaussExp GaussExp::mult(GaussPoly &g) { int nTerms = this->size(); GaussExp result(nTerms); @@ -239,17 +239,19 @@ template void GaussExp::multInPlace(double d) { } template double GaussExp::calcSquareNorm() const { + /* computing the squares */ double norm = 0.0; for (int i = 0; i < this->size(); i++) { double nc = this->funcs[i]->calcSquareNorm(); norm += nc; } + /* computing the double products */ for (int i = 0; i < this->size(); i++) { - GaussExp funcs_i = getFunc(i).asGaussExp(); + GaussExp funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc for (int fi = 0; fi < funcs_i.size(); fi++) { GaussFunc &func_i = static_cast &>(funcs_i.getFunc(fi)); for (int j = i + 1; j < this->size(); j++) { - GaussExp funcs_j = getFunc(j).asGaussExp(); + GaussExp funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc for (int fj = 0; fj < funcs_j.size(); fj++) { GaussFunc &func_j = static_cast &>(funcs_j.getFunc(fj)); double overlap = func_i.calcOverlap(func_j); @@ -283,6 +285,40 @@ template void GaussExp::setScreen(bool screen) { for (int i = 0; i < this->size(); i++) { this->funcs[i]->setScreen(screen); } } +// Calculate the scaling and wavelet coefs of all the children, and do the +// outer product to make the nD-scaling coefs. Since a Gaussian expansion +// is not separable, we have to do the projection term by term. +/* +template +void GaussExp::calcWaveletCoefs(MWNode &node) { + static const int tDim = 1 << D; + const ScalingBasis &sf = node.getMWTree().getScalingFunctions(); + MatrixXd &scaling = node.getMWTree().getTmpScalingCoefs(); + VectorXd &tmpvec = node.getMWTree().getTmpScalingVector(); + int kp1 = node.getKp1(); + int kp1_d = node.getKp1_d(); + int inpos = kp1_d - kp1; + int scale = node.getNodeIndex().scale() + 1; + node.allocCoefs(); + for (int child = 0; child < tDim; child++) { + int l[D]; + node.calcChildTranslation(child, l); + for (int n = 0; n < this->size(); n++) { + if (this->getFunc(n).checkScreen(scale, l)) { + continue; + } + sf.calcScalingCoefs(this->getFunc(n), scale, l, scaling); + tmpvec.segment(inpos, kp1) = scaling.col(0); + math_utils::tensorExpandCoefs(D, 0, kp1, kp1_d, scaling, tmpvec); + node.getCoefs().segment(child * kp1_d, kp1_d) += tmpvec; + } + } + node.mwTransform(Compression); + node.setHasCoefs(); + node.calcNorms(); +} +*/ + template void GaussExp::setDefaultScreening(double screen) { if (screen < 0) { MSG_ERROR("Screening constant cannot be negative!"); } defaultScreening = screen; @@ -297,6 +333,11 @@ template std::ostream &GaussExp::print(std::ostream &o) const { return o; } +/** @returns Coulomb repulsion energy between all pairs in GaussExp, including self-interaction + * + * @note Each Gaussian must be normalized to unit charge + * \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct! + */ template double GaussExp::calcCoulombEnergy() const { NOT_IMPLEMENTED_ABORT } @@ -304,11 +345,11 @@ template double GaussExp::calcCoulombEnergy() const { template <> double GaussExp<3>::calcCoulombEnergy() const { double energy = 0.0; for (int i = 0; i < this->size(); i++) { - GaussExp<3> funcs_i = getFunc(i).asGaussExp(); + GaussExp<3> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc for (int fi = 0; fi < funcs_i.size(); fi++) { GaussFunc<3> &func_i = static_cast &>(funcs_i.getFunc(fi)); for (int j = i; j < this->size(); j++) { - GaussExp<3> funcs_j = getFunc(j).asGaussExp(); + GaussExp<3> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc for (int fj = 0; fj < funcs_j.size(); fj++) { GaussFunc<3> &func_j = static_cast &>(funcs_j.getFunc(fj)); double c = 2.0; @@ -334,4 +375,4 @@ template class GaussExp<1>; template class GaussExp<2>; template class GaussExp<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp index bcf75b3e5..28736be58 100644 --- a/src/functions/GaussFunc.cpp +++ b/src/functions/GaussFunc.cpp @@ -143,6 +143,11 @@ template void GaussFunc::multInPlace(const GaussFunc &rhs) { this->setPow(newPow); } +/** @brief Multiply two GaussFuncs + * @param[in] this: Left hand side of multiply + * @param[in] rhs: Right hand side of multiply + * @returns New GaussPoly + */ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { GaussFunc &lhs = *this; GaussPoly result; @@ -158,6 +163,10 @@ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { return result; } +/** @brief Multiply GaussFunc by scalar + * @param[in] c: Scalar to multiply + * @returns New GaussFunc + */ template GaussFunc GaussFunc::mult(double c) { GaussFunc g = *this; g.coef *= c; @@ -167,6 +176,9 @@ template GaussFunc GaussFunc::mult(double c) { template std::ostream &GaussFunc::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); + // If all of the values in the exponential are the same only + // one is printed, else, all of them are printed. + o << "Coef : " << this->getCoef() << std::endl; if (!is_array) { o << "Exp : "; @@ -183,14 +195,25 @@ template std::ostream &GaussFunc::print(std::ostream &o) const { return o; } +/** @brief Compute Coulomb repulsion energy between two GaussFuncs + * @param[in] this: Left hand GaussFunc + * @param[in] rhs: Right hand GaussFunc + * @returns Coulomb energy + * + * @note Both Gaussians must be normalized to unit charge + * \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct! + */ template double GaussFunc::calcCoulombEnergy(const GaussFunc &gf) const { NOT_IMPLEMENTED_ABORT; } template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const { + // Checking if the elements in each exponent are constant if (!details::are_all_equal<3>(this->getExp()) or !details::are_all_equal<3>(gf.getExp())) NOT_IMPLEMENTED_ABORT; + // If they are constant the 0th element are assigned a value + // and the Coulomb Energy can be calculated auto p = this->getExp()[0]; auto q = gf.getExp()[0]; @@ -216,4 +239,4 @@ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const template class GaussFunc<1>; template class GaussFunc<2>; template class GaussFunc<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp index bfe78d349..0dfeaf2cd 100644 --- a/src/functions/GaussPoly.cpp +++ b/src/functions/GaussPoly.cpp @@ -37,6 +37,12 @@ using namespace Eigen; namespace mrcpp { +/** @returns New GaussPoly object + * @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$ + * @param[in] alpha: Coefficient, \f$ \alpha e^{-r^2} \f$ + * @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ + * @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ + */ template GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const std::array &power) : Gaussian(beta, alpha, pos, power) { @@ -79,6 +85,7 @@ GaussPoly::GaussPoly(const GaussFunc &gf) VectorXd coefs = VectorXd::Zero(order + 1); coefs[order] = 1.0; poly[d]->setCoefs(coefs); + // poly[d]->unsetBounds(); } } @@ -112,6 +119,7 @@ template double GaussPoly::evalf(const Coord &r) const { } double q2 = 0.0, p2 = 1.0; for (int d = 0; d < D; d++) { + // assert(this->poly[d]->getCheckBounds() == false); double q = r[d] - this->pos[d]; q2 += this->alpha[d] * q * q; p2 *= poly[d]->evalf(r[d] - this->pos[d]); @@ -120,9 +128,16 @@ template double GaussPoly::evalf(const Coord &r) const { } template double GaussPoly::evalf1D(const double r, int d) const { + // NOTE! + // This function evaluation will give the first dimension the full coef + // amplitude, leaving all other directions with amplitude 1.0. This is to + // avoid expensive d-root evaluation when distributing the amplitude + // equally to all dimensions. + if (this->getScreen()) { if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; } } + // assert(this->poly[d]->getCheckBounds() == false); double q2 = 0.0, p2 = 1.0; double q = (r - this->pos[d]); q2 += q * q; @@ -215,8 +230,41 @@ void GaussPoly::fillCoefPowVector(std::vector &coefs, template GaussPoly GaussPoly::mult(const GaussPoly &rhs) { NOT_IMPLEMENTED_ABORT; + /* + GaussPoly &lhs = *this; + GaussPoly result; + result.multPureGauss(lhs, rhs); + for (int d = 0; d < D; d++) { + double newPos = result.getPos()[d]; + int lhsPow = lhs.getPower(d); + Polynomial lhsPoly(lhsPow); + lhsPoly.clearCoefs(); + for (int p = 0; p <= lhsPow; p++) { + Polynomial tmpPoly(newPos - lhs.getPos()[d], p); + tmpPoly *= lhs.getPolyCoefs(d)[p]; + lhsPoly += tmpPoly; + } + + int rhsPow = rhs.getPower(d); + Polynomial rhsPoly(rhsPow); + rhsPoly.clearCoefs(); + for (int p = 0; p <= rhsPow; p++) { + Polynomial tmpPoly(newPos - rhs.getPos()[d], p); + tmpPoly *= rhs.getPolyCoefs(d)[p]; + rhsPoly += tmpPoly; + } + Polynomial newPoly = lhsPoly * rhsPoly; + result.setPoly(d, newPoly); + } + result.setCoef(result.getCoef() * lhs.getCoef() * rhs.getCoef()); + return result; + */ } +/** @brief Multiply GaussPoly by scalar + * @param[in] c: Scalar to multiply + * @returns New GaussPoly + */ template GaussPoly GaussPoly::mult(double c) { GaussPoly g = *this; g.coef *= c; @@ -235,6 +283,11 @@ template void GaussPoly::setPow(const std::array &pow) { } } +/** @brief Set polynomial in given dimension + * + * @param[in] d: Cartesian direction + * @param[in] poly: Polynomial to set + */ template void GaussPoly::setPoly(int d, Polynomial &poly) { if (this->poly[d] != nullptr) { delete this->poly[d]; } this->poly[d] = new Polynomial(poly); @@ -243,6 +296,9 @@ template void GaussPoly::setPoly(int d, Polynomial &poly) { template std::ostream &GaussPoly::print(std::ostream &o) const { auto is_array = details::are_all_equal(this->getExp()); + + // If all of the values in the exponential are the same only + // one is printed, else, all of them are printed o << "Coef : " << this->getCoef() << std::endl; if (!is_array) { o << "Exp : "; @@ -264,4 +320,4 @@ template class GaussPoly<1>; template class GaussPoly<2>; template class GaussPoly<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp index 915731990..6dbfa7c5b 100644 --- a/src/functions/Gaussian.cpp +++ b/src/functions/Gaussian.cpp @@ -23,6 +23,15 @@ * */ +/** + * + * + * \date May 25, 2010 + * \author Stig Rune Jensen + * CTCC, University of Tromsø + * + */ + #include #include "Gaussian.h" @@ -37,10 +46,6 @@ using namespace Eigen; namespace mrcpp { -/*---------------------------* - * Constructors / init state * - *---------------------------*/ - template Gaussian::Gaussian(double a, double c, const Coord &r, const std::array &p) : screen(false) @@ -58,11 +63,8 @@ Gaussian::Gaussian(const std::array &a, double c, const Coord & , alpha(a) , pos(r) {} -/*----------------------------------------------------* - * Multiply two *pure* Gaussians (no polynomial part) * - *----------------------------------------------------*/ - template void Gaussian::multPureGauss(const Gaussian &lhs, const Gaussian &rhs) { + auto newAlpha = std::array{}; auto mju = std::array{}; for (auto d = 0; d < D; d++) { @@ -83,10 +85,6 @@ template void Gaussian::multPureGauss(const Gaussian &lhs, const G setCoef(newCoef); } -/*--------------------------------------------* - * Screening boxes and quick-visibility tests * - *--------------------------------------------*/ - template void Gaussian::calcScreening(double nStdDev) { assert(nStdDev > 0); if (not this->isBounded()) { @@ -119,8 +117,10 @@ template bool Gaussian::isVisibleAtScale(int scale, int nQuadPts) con for (auto &alp : this->alpha) { double stdDeviation = std::pow(2.0 * alp, -0.5); auto visibleScale = static_cast(-std::floor(std::log2(nQuadPts * 0.5 * stdDeviation))); + if (scale < visibleScale) return false; } + return true; } @@ -134,10 +134,6 @@ template bool Gaussian::isZeroOnInterval(const double *a, const doubl return false; } -/*---------------------------------------------* - * Batch evaluation (matrix of points → values) * - *---------------------------------------------*/ - template void Gaussian::evalf(const MatrixXd &points, MatrixXd &values) const { assert(points.cols() == D); assert(points.cols() == values.cols()); @@ -147,11 +143,8 @@ template void Gaussian::evalf(const MatrixXd &points, MatrixXd &value } } -/*--------------------------------------* - * Convenience: maximum standard dev σ * - *--------------------------------------*/ - template double Gaussian::getMaximumStandardDiviation() const { + if (details::are_all_equal(this->getExp())) { auto exponent = this->getExp()[0]; return 1.0 / std::sqrt(2.0 * exponent); @@ -163,13 +156,9 @@ template double Gaussian::getMaximumStandardDiviation() const { } } -/*-------------------------* - * Overlap ⟨G|G'⟩ utilities * - *-------------------------*/ - template double Gaussian::calcOverlap(const Gaussian &inp) const { - const auto &bra_exp = this->asGaussExp(); - const auto &ket_exp = inp.asGaussExp(); + const auto &bra_exp = this->asGaussExp(); // Make sure all entries are GaussFunc + const auto &ket_exp = inp.asGaussExp(); // Make sure all entries are GaussFunc double S = 0.0; for (int i = 0; i < bra_exp.size(); i++) { @@ -182,27 +171,38 @@ template double Gaussian::calcOverlap(const Gaussian &inp) const { return S; } -/*-----------------------------* - * Semi-periodic “image” clone * - *-----------------------------*/ - +/** @brief Generates a GaussExp that is semi-periodic around a unit-cell + * + * @returns Semi-periodic version of a Gaussian around a unit-cell + * @param[in] period: The period of the unit cell + * @param[in] nStdDev: Number of standard diviations covered in each direction. Default 4.0 + * + * @details nStdDev = 1, 2, 3 and 4 ensures atleast 68.27%, 95.45%, 99.73% and 99.99% of the + * integral is conserved with respect to the integration limits. + * + */ template GaussExp Gaussian::periodify(const std::array &period, double nStdDev) const { GaussExp gauss_exp; auto pos_vec = std::vector>(); auto x_std = nStdDev * this->getMaximumStandardDiviation(); + // This lambda function calculates the number of neighbooring cells + // requred to keep atleast x_stds of the integral conserved in the + // unit-cell. auto neighbooring_cells = [period, x_std](auto pos) { auto needed_cells_vec = std::vector(); for (auto i = 0; i < D; i++) { auto upper_bound = pos[i] + x_std; auto lower_bound = pos[i] - x_std; - (void)lower_bound; + // number of cells upp and down relative to the center of the Gaussian needed_cells_vec.push_back(std::ceil(upper_bound / period[i])); } + return *std::max_element(needed_cells_vec.begin(), needed_cells_vec.end()); }; + // Finding starting position auto startpos = this->getPos(); for (auto d = 0; d < D; d++) { @@ -220,7 +220,9 @@ template GaussExp Gaussian::periodify(const std::array for (auto &c : cart) { for (auto i = 0; i < D; i++) c[i] *= period[i]; } + // Shift coordinates for (auto &c : cart) std::transform(c.begin(), c.end(), tmp_pos.begin(), c.begin(), std::plus()); + // Go from vector to mrcpp::Coord for (auto &c : cart) { mrcpp::Coord pos; std::copy_n(c.begin(), D, pos.begin()); @@ -237,9 +239,6 @@ template GaussExp Gaussian::periodify(const std::array return gauss_exp; } -/*-----------------------------* - * Explicit template instances * - *-----------------------------*/ template class Gaussian<1>; template class Gaussian<2>; template class Gaussian<3>; diff --git a/src/functions/JpowerIntegrals.cpp b/src/functions/JpowerIntegrals.cpp index 5aaf05d13..179f6fcc6 100644 --- a/src/functions/JpowerIntegrals.cpp +++ b/src/functions/JpowerIntegrals.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -24,7 +24,7 @@ */ #include "JpowerIntegrals.h" -#include +#include // std::find_if_not namespace mrcpp { @@ -35,16 +35,18 @@ JpowerIntegrals::JpowerIntegrals(double a, int scaling, int M, double threshold) for (int l = 1 - N; l < 0; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold)); } +/// @brief in progress +/// @param index - interger lying in the interval \f$ [ -2^n + 1, \ldots, 2^n - 1 ] \f$. +/// @return in progress std::vector> &JpowerIntegrals::operator[](int index) { if (index < 0) index += integrals.size(); return integrals[index]; } -std::vector> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double /*threshold*/) { +std::vector> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double threshold) { using namespace std::complex_literals; std::complex J_0 = 0.25 * std::exp(-0.25i * M_PI) / std::sqrt(M_PI * a) * std::exp(0.25i * static_cast(l * l) / a); - std::complex beta(0, 0.5 / a); auto alpha = static_cast(l) * beta; @@ -53,7 +55,7 @@ std::vector> JpowerIntegrals::calculate_J_power_integrals(i for (int m = 0; m < M; m++) { std::complex term1 = J[J.size() - 1] * alpha; std::complex term2 = J[J.size() - 2] * beta * static_cast(m) / static_cast(m + 2); - std::complex last = (term1 + term2) / static_cast(m + 3); + std::complex last = (term1 + term2) / static_cast(m + 3); J.push_back(last); } @@ -61,11 +63,12 @@ std::vector> JpowerIntegrals::calculate_J_power_integrals(i return J; } +/// @details Removes negligible elements in \b J until it reaches a considerable value. void JpowerIntegrals::crop(std::vector> &J, double threshold) { - auto isNegligible = [threshold](const std::complex &c) { - return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; - }; + // Lambda function to check if an element is negligible + auto isNegligible = [threshold](const std::complex &c) { return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; }; + // Remove negligible elements from the end of the vector J.erase(std::find_if_not(J.rbegin(), J.rend(), isNegligible).base(), J.end()); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/LegendrePoly.cpp b/src/functions/LegendrePoly.cpp index 6629cd2f8..4dce049b0 100644 --- a/src/functions/LegendrePoly.cpp +++ b/src/functions/LegendrePoly.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -23,6 +23,15 @@ * */ +/* + * + * \date Jul 5, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "LegendrePoly.h" #include "core/ObjectCache.h" #include "utils/Printer.h" @@ -33,8 +42,12 @@ namespace mrcpp { using LegendreCache = ObjectCache; +/** Legendre polynomial constructed on [-1,1] and + * scaled by n and translated by l */ LegendrePoly::LegendrePoly(int k, double n, double l) : Polynomial(k) { + // Since we create Legendre polynomials recursively on [-1,1] + // we cache all lower order polynomilas for future use. LegendreCache &Cache = LegendreCache::getInstance(); if (k >= 1) { if (not Cache.hasId(k - 1)) { @@ -42,20 +55,17 @@ LegendrePoly::LegendrePoly(int k, double n, double l) Cache.load(k - 1, lp, 2 * sizeof(double) * (k + 1)); } } - computeLegendrePolynomial(k); - double a = -1.0; double b = 1.0; setBounds(&a, &b); - translate(l); dilate(n); } +/** Compute Legendre polynomial coefs on interval [-1,1] */ void LegendrePoly::computeLegendrePolynomial(int k) { assert(this->size() >= k); - if (k == 0) { this->coefs[0] = 1.0; } else if (k == 1) { @@ -67,10 +77,8 @@ void LegendrePoly::computeLegendrePolynomial(int k) { LegendrePoly &Lm2 = Cache.get(k - 2); auto K = (double)k; - double cm2_0 = Lm2.getCoefs()[0]; this->coefs[0] = -(K - 1.0) * cm2_0 / K; - for (int j = 1; j < k + 1; j++) { double cm1_jm1 = Lm1.getCoefs()[j - 1]; if (j <= k - 2) { @@ -83,6 +91,9 @@ void LegendrePoly::computeLegendrePolynomial(int k) { } } +/** Calculate the value of an n:th order Legendre polynominal in x, including + * the first derivative. + */ Vector2d LegendrePoly::firstDerivative(double x) const { double c1, c2, c4, ym, yp, y; double dy, dyp, dym; @@ -95,7 +106,6 @@ Vector2d LegendrePoly::firstDerivative(double x) const { Vector2d val; int order = getOrder(); - if (order == 0) { val(0) = 1.0; val(1) = 0.0; @@ -108,22 +118,17 @@ Vector2d LegendrePoly::firstDerivative(double x) const { return val; } - y = q; + y = q; dy = 1.0; yp = 1.0; dyp = 0.0; - for (int i = 2; i < order + 1; i++) { c1 = (double)i; c2 = c1 * 2.0 - 1.0; c4 = c1 - 1.0; - ym = y; - y = (c2 * q * y - c4 * yp) / c1; - yp = ym; - dym = dy; dy = (c2 * q * dy - c4 * dyp + c2 * yp) / c1; dyp = dym; @@ -134,6 +139,9 @@ Vector2d LegendrePoly::firstDerivative(double x) const { return val; } +/** Calculate the value of an n:th order Legendre polynominal in x, including + * first and second derivatives. + */ Vector3d LegendrePoly::secondDerivative(double x) const { NOT_IMPLEMENTED_ABORT; double c1, c2, c4, ym, yp, y, d2y; @@ -161,28 +169,24 @@ Vector3d LegendrePoly::secondDerivative(double x) const { return val; } - y = q; - dy = 1.e0; + y = q; + dy = 1.e0; d2y = 0.e0; - yp = 1.e0; - dyp = 0.e0; + yp = 1.e0; + dyp = 0.e0; d2yp = 0.e0; - for (int i = 2; i < order + 1; i++) { c1 = (double)i; c2 = c1 * 2.e0 - 1.e0; c4 = c1 - 1.e0; - ym = y; - y = (c2 * x * y - c4 * yp) / c1; + y = (c2 * x * y - c4 * yp) / c1; yp = ym; - dym = dy; - dy = (c2 * x * dy - c4 * dyp + c2 * yp) / c1; + dy = (c2 * x * dy - c4 * dyp + c2 * yp) / c1; dyp = dym; - d2ym = d2y; - d2y = (c2 * x * d2y - c4 * d2yp + c2 * 2.e0 * dyp) / c1; + d2y = (c2 * x * d2y - c4 * d2yp + c2 * 2.e0 * dyp) / c1; d2yp = d2ym; } val(0) = y; diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp index e0b0d708d..c54acc148 100644 --- a/src/functions/Polynomial.cpp +++ b/src/functions/Polynomial.cpp @@ -7,8 +7,8 @@ * This file is part of MRCPP. * * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation, either version 3 of the License, or + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MRCPP is distributed in the hope that it will be useful, @@ -23,6 +23,15 @@ * */ +/** + * + * \date Jun 7, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * + */ + #include #include "Polynomial.h" @@ -33,6 +42,8 @@ using namespace Eigen; namespace mrcpp { +/** Construct polynomial of order zero with given size and bounds. + * Includes default constructor. */ Polynomial::Polynomial(int k, const double *a, const double *b) : RepresentableFunction<1, double>(a, b) { assert(k >= 0); @@ -49,6 +60,7 @@ Polynomial::Polynomial(double c, int k, const double *a, const double *b) for (int i = 0; i <= k; i++) { this->coefs[i] *= std::pow(c, k - i); } } +/** Construct polynomial with given coefficient vector and bounds. */ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -56,6 +68,7 @@ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) setCoefs(c); } +/** Makes a complete copy of the polynomial */ Polynomial::Polynomial(const Polynomial &poly) : RepresentableFunction<1>(poly) { this->N = poly.N; @@ -63,6 +76,7 @@ Polynomial::Polynomial(const Polynomial &poly) this->coefs = poly.coefs; } +/** Copies only the function, not its bounds */ Polynomial &Polynomial::operator=(const Polynomial &poly) { RepresentableFunction<1>::operator=(poly); this->N = poly.N; @@ -71,6 +85,7 @@ Polynomial &Polynomial::operator=(const Polynomial &poly) { return *this; } +/** Evaluate scaled and translated polynomial */ double Polynomial::evalf(double x) const { if (isBounded()) { if (x < this->getScaledLowerBound()) return 0.0; @@ -85,28 +100,35 @@ double Polynomial::evalf(double x) const { return y; } +/** This returns the actual scaled lower bound */ double Polynomial::getScaledLowerBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->A[0] + this->L)); } +/** This returns the actual scaled upper bound */ double Polynomial::getScaledUpperBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->B[0] + this->L)); } +/** Divide by norm of (bounded) polynomial. */ void Polynomial::normalize() { double sqNorm = calcSquareNorm(); if (sqNorm < 0.0) MSG_ABORT("Cannot normalize polynomial"); (*this) *= 1.0 / std::sqrt(sqNorm); } +/** Compute the squared L2-norm of the (bounded) polynomial. + * Unbounded polynomials return -1.0. */ double Polynomial::calcSquareNorm() { double sqNorm = -1.0; if (isBounded()) { sqNorm = this->innerProduct(*this); } return sqNorm; } +/** Returns the order of the highest non-zero coef. + * NB: Not the length of the coefs vector. */ int Polynomial::getOrder() const { int n = 0; for (int i = 0; i < this->coefs.size(); i++) { @@ -115,11 +137,13 @@ int Polynomial::getOrder() const { return n; } +/** Calculate P = c*P */ Polynomial &Polynomial::operator*=(double c) { this->coefs = c * this->coefs; return *this; } +/** Calculate P = P*Q */ Polynomial &Polynomial::operator*=(const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -136,6 +160,7 @@ Polynomial &Polynomial::operator*=(const Polynomial &Q) { return P; } +/** Calculate Q = c*P */ Polynomial Polynomial::operator*(double c) const { const Polynomial &P = *this; Polynomial Q(P); @@ -143,6 +168,8 @@ Polynomial Polynomial::operator*(double c) const { return Q; } +/** Calculate R = P*Q. + * Returns unbounded polynomial. */ Polynomial Polynomial::operator*(const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -151,16 +178,19 @@ Polynomial Polynomial::operator*(const Polynomial &Q) const { return R; } +/** Calculate P = P + Q. */ Polynomial &Polynomial::operator+=(const Polynomial &Q) { this->addInPlace(1.0, Q); return *this; } +/** Calculate P = P - Q. */ Polynomial &Polynomial::operator-=(const Polynomial &Q) { this->addInPlace(-1.0, Q); return *this; } +/** Calculate P = P + c*Q. */ void Polynomial::addInPlace(double c, const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -178,6 +208,8 @@ void Polynomial::addInPlace(double c, const Polynomial &Q) { P.setCoefs(newCoefs); } +/** Calculate R = P + c*Q, with a default c = 1.0. + * Returns unbounded polynomial. */ Polynomial Polynomial::add(double c, const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -186,6 +218,7 @@ Polynomial Polynomial::add(double c, const Polynomial &Q) const { return R; } +/** Calculate Q = dP/dx */ Polynomial Polynomial::calcDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -193,6 +226,7 @@ Polynomial Polynomial::calcDerivative() const { return Q; } +/** Calculate P = dP/dx */ void Polynomial::calcDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -202,6 +236,7 @@ void Polynomial::calcDerivativeInPlace() { P.setCoefs(newCoefs); } +/** Calculate indefinite integral Q = \int dP dx, integration constant set to zero */ Polynomial Polynomial::calcAntiDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -209,6 +244,7 @@ Polynomial Polynomial::calcAntiDerivative() const { return Q; } +/** Calculate indefinite integral P = \int dP dx, integration constant set to zero */ void Polynomial::calcAntiDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -220,6 +256,7 @@ void Polynomial::calcAntiDerivativeInPlace() { P.setCoefs(newCoefs); } +/** Integrate the polynomial P on [a,b] analytically */ double Polynomial::integrate(const double *a, const double *b) const { double lb = -DBL_MAX, ub = DBL_MAX; if (this->isBounded()) { @@ -238,6 +275,7 @@ double Polynomial::integrate(const double *a, const double *b) const { return sfac * (antidiff.evalf(ub) - antidiff.evalf(lb)); } +/** Compute analytically on interval defined by the calling polynomial. */ double Polynomial::innerProduct(const Polynomial &Q) const { const Polynomial &P = *this; if (not P.isBounded()) MSG_ERROR("Unbounded polynomial"); @@ -246,4 +284,4 @@ double Polynomial::innerProduct(const Polynomial &Q) const { return pq.integrate(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/RepresentableFunction.cpp b/src/functions/RepresentableFunction.cpp index 9d0dc0300..3c55ac92b 100644 --- a/src/functions/RepresentableFunction.cpp +++ b/src/functions/RepresentableFunction.cpp @@ -23,13 +23,22 @@ * */ +/** + * + * + * \date April 30, 2010 + * \author Stig Rune Jensen \n + * CTCC, University of Tromsø + * + * + */ + #include "RepresentableFunction.h" #include "utils/Printer.h" namespace mrcpp { -template -RepresentableFunction::RepresentableFunction(const double *a, const double *b) { +template RepresentableFunction::RepresentableFunction(const double *a, const double *b) { if (a == nullptr or b == nullptr) { this->bounded = false; this->A = nullptr; @@ -46,8 +55,8 @@ RepresentableFunction::RepresentableFunction(const double *a, const double } } -template -RepresentableFunction::RepresentableFunction(const RepresentableFunction &func) { +/** Constructs a new function with same bounds as the input function */ +template RepresentableFunction::RepresentableFunction(const RepresentableFunction &func) { if (func.isBounded()) { this->bounded = true; this->A = new double[D]; @@ -63,14 +72,13 @@ RepresentableFunction::RepresentableFunction(const RepresentableFunction -RepresentableFunction & -RepresentableFunction::operator=(const RepresentableFunction &func) { +/** Copies function, not bounds. Use copy constructor if you want an + * identical function. */ +template RepresentableFunction &RepresentableFunction::operator=(const RepresentableFunction &func) { return *this; } -template -RepresentableFunction::~RepresentableFunction() { +template RepresentableFunction::~RepresentableFunction() { if (this->isBounded()) { delete[] this->A; delete[] this->B; @@ -79,8 +87,7 @@ RepresentableFunction::~RepresentableFunction() { this->B = nullptr; } -template -void RepresentableFunction::setBounds(const double *a, const double *b) { +template void RepresentableFunction::setBounds(const double *a, const double *b) { if (a == nullptr or b == nullptr) { MSG_ERROR("Invalid arguments"); } if (not isBounded()) { this->bounded = true; @@ -94,8 +101,7 @@ void RepresentableFunction::setBounds(const double *a, const double *b) { } } -template -bool RepresentableFunction::outOfBounds(const Coord &r) const { +template bool RepresentableFunction::outOfBounds(const Coord &r) const { if (not isBounded()) { return false; } for (int d = 0; d < D; d++) { if (r[d] < getLowerBound(d)) return true; @@ -111,4 +117,4 @@ template class RepresentableFunction<1, ComplexDouble>; template class RepresentableFunction<2, ComplexDouble>; template class RepresentableFunction<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp index 0551a3cc4..598c9b12a 100644 --- a/src/functions/function_utils.cpp +++ b/src/functions/function_utils.cpp @@ -28,66 +28,87 @@ namespace mrcpp { namespace function_utils { -double ObaraSaika_ab(int power_a, int power_b, - double pos_a, double pos_b, - double expo_a, double expo_b); +double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b); } // namespace function_utils -template -double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { +template double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { double S = 1.0; - for (int d = 0; d < D; d++) { - S *= ObaraSaika_ab( - a.getPower()[d], b.getPower()[d], - a.getPos()[d], b.getPos()[d], - a.getExp()[d], b.getExp()[d] - ); - } + for (int d = 0; d < D; d++) { S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]); } S *= a.getCoef() * b.getCoef(); return S; } -double function_utils::ObaraSaika_ab(int power_a, int power_b, - double pos_a, double pos_b, - double expo_a, double expo_b) { +/** Compute the monodimensional overlap integral between two + gaussian distributions by means of the Obara-Saika recursiive + scheme + + \f[ S_{ij} = \int_{-\infty}^{+\infty} \,\mathrm{d} x + (x-x_a)^{p_a} + (x-x_b)^{p_b} + e^{-c_a (x-x_a)^2} + e^{-c_b (x-x_b)^2}\f] + + @param power_a \f$ p_a \f$ + @param power_b \f$ p_b \f$ + @param pos_a \f$ x_a \f$ + @param pos_b \f$ x_b \f$ + @param expo_a \f$ c_a \f$ + @param expo_b \f$ c_b \f$ + + */ +double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) { int i, j; double expo_p, mu, pos_p, x_ab, x_pa, x_pb, s_00; - + /* The highest angular momentum combination is l=20 for a and b + * simulatnelusly */ double s_coeff[64]; - expo_p = expo_a + expo_b; - mu = expo_a * expo_b / (expo_a + expo_b); - pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; - x_ab = pos_a - pos_b; - x_pa = pos_p - pos_a; - x_pb = pos_p - pos_b; - + // if (out_of_bounds(power_a, 0, MAX_GAUSS_POWER) || + // out_of_bounds(power_b, 0, MAX_GAUSS_POWER) + // ) { + // PRINT_FUNC_NAME; + // INVALID_ARG_EXIT; + // } + + /* initialization of a hell of a lot of coefficients.... */ + expo_p = expo_a + expo_b; /* total exponent */ + mu = expo_a * expo_b / (expo_a + expo_b); /* reduced exponent */ + pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; /* center of charge */ + x_ab = pos_a - pos_b; /* X_{AB} */ + x_pa = pos_p - pos_a; /* X_{PA} */ + x_pb = pos_p - pos_b; /* X_{PB} */ s_00 = pi / expo_p; - s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); + s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); /* overlap of two spherical gaussians */ + // int n_0j_coeff = 1 + power_b; /* n. of 0j coefficients needed */ + // int n_ij_coeff = 2 * power_a; /* n. of ij coefficients needed (i > 0) */ + /* we add 3 coeffs. to avoid a hell of a lot of if statements */ + /* n_tot_coeff = n_0j_coeff + n_ij_coeff + 3; */ + /* s_coeff = (double *) calloc(n_tot_coeff, sizeof(double));*/ + + /* generate first two coefficients */ s_coeff[0] = s_00; s_coeff[1] = x_pb * s_00; - j = 1; + /* generate the rest of the first row */ while (j < power_b) { s_coeff[j + 1] = x_pb * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); j++; } - + /* generate the first two coefficients with i > 0 */ s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); - i = 1; + /* generate the remaining coefficients with i > 0 */ while (i < power_a) { int i_l = j + 2 * i + 1; int i_r = j + 2 * i + 2; - s_coeff[i_l] = s_coeff[i_l - 1] - x_ab * s_coeff[i_l - 2]; s_coeff[i_r] = x_pa * s_coeff[i_r - 2] + (j * s_coeff[i_r - 3] + i * s_coeff[i_r - 4]) / (2.0 * expo_p); - i++; } + /* free(s_coeff);*/ return s_coeff[power_b + 2 * power_a]; } @@ -95,4 +116,4 @@ template double function_utils::calc_overlap<1>(const GaussFunc<1> &a, const Gau template double function_utils::calc_overlap<2>(const GaussFunc<2> &a, const GaussFunc<2> &b); template double function_utils::calc_overlap<3>(const GaussFunc<3> &a, const GaussFunc<3> &b); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/functions/special_functions.cpp b/src/functions/special_functions.cpp index 73eff21db..555528a58 100644 --- a/src/functions/special_functions.cpp +++ b/src/functions/special_functions.cpp @@ -25,22 +25,74 @@ #include "special_functions.h" + namespace mrcpp { + +/** @brief Free-particle time evolution on real line. + * + * @param[in] x: space coordinate in \f$ \mathbb R \f$. + * @param[in] x0: \f$ x_0 \f$ center of gaussian function at zero time moment. + * @param[in] t: time moment. + * @param[in] sigma: \f$ \sigma \f$ width of the initial gaussian wave. + * + * @details Analytical solution of a one dimensional free-particle + * movement + * \f[ + * \psi(x, t) + * = + * \sqrt{ + * \frac{ \sigma }{ 4it + \sigma } + * } + * e^{ - \frac { (x - x_0)^2 }{ 4it + \sigma } } + * \f] + * where \f$ t, \sigma > 0 \f$. + * + * @returns The complex-valued wave function + * \f$ \psi(x, t) \f$ + * at the specified space coordinate and time. + * + * + */ std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma) { - std::complex i(0.0, 1.0); - std::complex denom = sigma + 4.0 * t * i; - std::complex exponent = -((x - x0) * (x - x0)) / denom; + std::complex i(0.0, 1.0); // Imaginary unit + auto denominator = 4 * t * i + sigma; + std::complex sqrt_denom = std::sqrt(denominator); + std::complex exponent = -((x - x0) * (x - x0)) / denominator; - return std::sqrt(sigma) / std::sqrt(denom) * std::exp(exponent); + return std::sqrt(sigma) / sqrt_denom * std::exp(exponent); } + + +/** @brief A smooth compactly supported non-negative function. + * + * @param[in] x: space coordinate in \f$ \mathbb R \f$. + * @param[in] a: the left support boundary. + * @param[in] b: the right support boundary. + * + * @details Smooth function on the real line \f$ \mathbb R \f$ + * defined by the formula + * \f[ + * g_{a,b} (x) = \exp \left( - \frac{b - a}{(x - a)(b - x)} \right) + * , \quad + * a < x < b + * \f] + * and \f$ g_{a,b} (x) = 0 \f$ elsewhere. + * + * @returns The non-negative value + * \f$ g_{a,b} (x) \f$ + * at the specified space coordinate \f$ x \in \mathbb R \f$. + * + * + */ double smooth_compact_function(double x, double a, double b) { + double res = 0; if (a < x && x < b) { - return std::exp((a - b) / ((x - a) * (b - x))); + res = exp((a - b) / (x - a) / (b - x)); } - return 0.0; + return res; } } // namespace mrcpp \ No newline at end of file diff --git a/src/operators/ABGVOperator.cpp b/src/operators/ABGVOperator.cpp index e992376e1..05525405e 100644 --- a/src/operators/ABGVOperator.cpp +++ b/src/operators/ABGVOperator.cpp @@ -33,6 +33,16 @@ namespace mrcpp { +/** @returns New ABGVOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] a: Left boundary condition + * @param[in] b: Right boundary condition + * @details Boundary parameters correspond to: + * - `a=0.0` `b=0.0`: Strictly local "center" difference + * - `a=0.5` `b=0.5`: Semi-local central difference + * - `a=1.0` `b=0.0`: Semi-local forward difference + * - `a=0.0` `b=1.0`: Semi-local backward difference + */ template ABGVOperator::ABGVOperator(const MultiResolutionAnalysis &mra, double a, double b) : DerivativeOperator(mra, mra.getRootScale()) { @@ -40,7 +50,7 @@ ABGVOperator::ABGVOperator(const MultiResolutionAnalysis &mra, double a, d } template void ABGVOperator::initialize(double a, double b) { - int bw = 0; + int bw = 0; // Operator bandwidth if (std::abs(a) > MachineZero) bw = 1; if (std::abs(b) > MachineZero) bw = 1; @@ -67,4 +77,4 @@ template class ABGVOperator<1>; template class ABGVOperator<2>; template class ABGVOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/BSOperator.cpp b/src/operators/BSOperator.cpp index da736ef7e..62b8e30c3 100644 --- a/src/operators/BSOperator.cpp +++ b/src/operators/BSOperator.cpp @@ -32,6 +32,10 @@ namespace mrcpp { +/** @returns New BSOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] order: Derivative order, defined for 1, 2 and 3 + */ template BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) : DerivativeOperator(mra, mra.getRootScale()) { @@ -40,11 +44,11 @@ BSOperator::BSOperator(const MultiResolutionAnalysis &mra, int order) } template void BSOperator::initialize() { - int bw = 1; + int bw = 1; // Operator bandwidth auto oper_mra = this->getOperatorMRA(); TreeBuilder<2> builder; - BSCalculator calculator(oper_mra.getScalingBasis(), this->order); + BSCalculator calculator(oper_mra.getScalingBasis(), this->order); BandWidthAdaptor adaptor(bw, oper_mra.getMaxScale()); auto o_tree = std::make_unique(oper_mra, MachineZero); @@ -64,4 +68,4 @@ template class BSOperator<1>; template class BSOperator<2>; template class BSOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/CartesianConvolution.cpp b/src/operators/CartesianConvolution.cpp index d33067a53..64ac5491d 100644 --- a/src/operators/CartesianConvolution.cpp +++ b/src/operators/CartesianConvolution.cpp @@ -47,33 +47,23 @@ namespace mrcpp { -CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra, - GaussExp<1> &kernel, - double prec) +CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra, GaussExp<1> &kernel, double prec) : ConvolutionOperator<3>(mra) , sep_rank(kernel.size()) { int oldlevel = Printer::setPrintLevel(0); - // Configure precision: operator vs. kernel fit this->setBuildPrec(prec); - auto o_prec = prec; // Operator assembly precision - auto k_prec = prec / 10.0; // Kernel fitting precision + auto o_prec = prec; + auto k_prec = prec / 10.0; - // Batch 0: monomial power {0} for (auto &k : kernel) k->setPow({0}); this->initialize(kernel, k_prec, o_prec); - - // Batch 1: monomial power {1} for (auto &k : kernel) k->setPow({1}); this->initialize(kernel, k_prec, o_prec); - - // Batch 2: monomial power {2} for (auto &k : kernel) k->setPow({2}); this->initialize(kernel, k_prec, o_prec); - // Declare separable rank this->initOperExp(this->sep_rank); - Printer::setPrintLevel(oldlevel); } @@ -87,4 +77,4 @@ void CartesianConvolution::setCartesianComponents(int x, int y, int z) { for (int i = 0; i < this->sep_rank; i++) this->assign(i, 2, this->raw_exp[z_shift + i].get()); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/ConvolutionOperator.cpp b/src/operators/ConvolutionOperator.cpp index 494fbb20a..9d37929aa 100644 --- a/src/operators/ConvolutionOperator.cpp +++ b/src/operators/ConvolutionOperator.cpp @@ -48,9 +48,7 @@ namespace mrcpp { template -ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, - GaussExp<1> &kernel, - double prec) +ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec) : MWOperator(mra, mra.getRootScale(), -10) { int oldlevel = Printer::setPrintLevel(0); @@ -64,11 +62,7 @@ ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mr } template -ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, - GaussExp<1> &kernel, - double prec, - int root, - int reach) +ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mra, GaussExp<1> &kernel, double prec, int root, int reach) : MWOperator(mra, root, reach) { int oldlevel = Printer::setPrintLevel(0); @@ -81,8 +75,7 @@ ConvolutionOperator::ConvolutionOperator(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } -template -void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) { +template void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) { auto k_mra = this->getKernelMRA(); auto o_mra = this->getOperatorMRA(); @@ -90,18 +83,18 @@ void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, doub OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); for (int i = 0; i < kernel.size(); i++) { + // Rescale Gaussian for D-dim application auto *k_func = kernel.getFunc(i).copy(); - k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), - k_func->getCoef())); + k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), k_func->getCoef())); FunctionTree<1> k_tree(k_mra); - mrcpp::build_grid(k_tree, *k_func); - mrcpp::project(k_prec, k_tree, *k_func); + mrcpp::build_grid(k_tree, *k_func); // Generate empty grid to hold narrow Gaussian + mrcpp::project(k_prec, k_tree, *k_func); // Project Gaussian starting from the empty grid delete k_func; CrossCorrelationCalculator calculator(k_tree); auto o_tree = std::make_unique(o_mra, o_prec); - builder.build(*o_tree, calculator, adaptor, -1); + builder.build(*o_tree, calculator, adaptor, -1); // Expand 1D kernel into 2D operator Timer trans_t; o_tree->mwTransform(BottomUp); @@ -114,8 +107,7 @@ void ConvolutionOperator::initialize(GaussExp<1> &kernel, double k_prec, doub } } -template -MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { +template MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); @@ -138,12 +130,12 @@ MultiResolutionAnalysis<1> ConvolutionOperator::getKernelMRA() const { if (box.size(i) > reach) reach = box.size(i); } } - auto start_l = std::array{-reach}; - auto tot_l = std::array{2 * reach}; + auto tot_l = std::array{2 * reach}; + // Zero in argument since operators are only implemented + // for uniform scaling factor auto sf = std::array{box.getScalingFactor(0)}; BoundingBox<1> kern_box(root, start_l, tot_l, sf); - MultiResolutionAnalysis<1> kern_mra(kern_box, *kern_basis); delete kern_basis; return kern_mra; @@ -153,4 +145,4 @@ template class ConvolutionOperator<1>; template class ConvolutionOperator<2>; template class ConvolutionOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/DerivativeConvolution.cpp b/src/operators/DerivativeConvolution.cpp index 3f0fa2003..30a655218 100644 --- a/src/operators/DerivativeConvolution.cpp +++ b/src/operators/DerivativeConvolution.cpp @@ -29,6 +29,12 @@ namespace mrcpp { +/** @returns New DerivativeConvolution object + * @param[in] mra: Which MRA the operator is defined + * @param[in] pr: Build precision, closeness to delta function + * @details This will project a kernel of a single differentiated + * gaussian with exponent sqrt(10/build_prec). + */ template DerivativeConvolution::DerivativeConvolution(const MultiResolutionAnalysis &mra, double prec) : ConvolutionOperator(mra) { diff --git a/src/operators/HeatOperator.cpp b/src/operators/HeatOperator.cpp index 25742bb81..cad3d9139 100644 --- a/src/operators/HeatOperator.cpp +++ b/src/operators/HeatOperator.cpp @@ -29,6 +29,15 @@ namespace mrcpp { +/** @brief Constructor of the HeatOperator object + * @returns New HeatOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] t: Time moment + * @param[in] prec: Build precision + * @details This will project a kernel of a single gaussian with + * exponent \f$ 1/(4t) \f$. + * + */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec) : ConvolutionOperator(mra) { @@ -45,6 +54,22 @@ HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, d Printer::setPrintLevel(oldlevel); } +/** @brief Constructor of the HeatOperator object in case of Periodic Boundary Conditions (PBC) + * @returns New IdentityConvolution object + * @param[in] mra: Which MRA the operator is defined + * @param[in] t: Time moment + * @param[in] prec: Build precision + * @param[in] root: root scale of operator. + * @param[in] reach: width at root scale (applies to periodic boundary conditions) + * @details This will project a kernel of a single gaussian with + * exponent \f$ 1/(4t) \f$. + * This version of the constructor + * is used for calculations within periodic boundary conditions (PBC). + * The \a root parameter is the coarsest negative scale at wich the operator + * is applied. The \a reach parameter is the bandwidth of the operator at + * the root scale. For details see \ref MWOperator + * + */ template HeatOperator::HeatOperator(const MultiResolutionAnalysis &mra, double t, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { @@ -65,4 +90,4 @@ template class HeatOperator<1>; template class HeatOperator<2>; template class HeatOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/HelmholtzKernel.cpp b/src/operators/HelmholtzKernel.cpp index d36e97720..7d8ed0467 100644 --- a/src/operators/HelmholtzKernel.cpp +++ b/src/operators/HelmholtzKernel.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 7, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "HelmholtzKernel.h" #include @@ -32,38 +42,41 @@ namespace mrcpp { +/** generate an approximation of the 3d helmholtz kernel expanded in gaussian functions + */ HelmholtzKernel::HelmholtzKernel(double mu, double epsilon, double r_min, double r_max) : GaussExp<1>() { - const double r0 = r_min / r_max; - const double r1 = r_max; - const double mu_tilde = mu * r1; - - const long double t = std::max((-2.5L * std::log(epsilon)), 5.0L); - const double s1 = -std::log(4.0L * t / (mu_tilde * mu_tilde)) / 2.0L; - const double s2 = std::log(t / (r0 * r0)) / 2.0L; + // Constructed on [rMin/rMax, 1.0], and then rescaled to [rMin,rMax] + double r0 = r_min / r_max; + double r1 = r_max; + double mu_tilde = mu * r1; - const double h = 1.0 / (0.20L - 0.47L * std::log10(epsilon)); - const int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1.0); + // Set the truncation limits s1,s2 of the integral (integrate over [s1,s2]) + // for achieving relative error epsilon + double t = std::max((-2.5L * std::log(epsilon)), 5.0L); + double s1 = -std::log(4 * t / (mu_tilde * mu_tilde)) / 2; + double s2 = std::log(t / (r0 * r0)) / 2; + // Now, set the proper step size h for use in the trapezoidal rule for given MU + double h = 1.0 / (0.20L - 0.47L * std::log10(epsilon)); + int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1); if (n_exp > MaxSepRank) MSG_ABORT("Maximum separation rank exceeded."); - for (int i = 0; i < n_exp; ++i) { - const double s = s1 + h * i; - - const double temp = -2.0 * s; - const double temp2 = - (mu_tilde * mu_tilde) * std::exp(temp) / 4.0 + s; - - double beta = h * (2.0 / root_pi) * std::exp(temp2); - double alpha = std::exp(2.0L * s); + for (int i = 0; i < n_exp; i++) { + double arg = s1 + h * i; + double temp = -arg * 2.0; + double temp2 = -mu_tilde * mu_tilde * std::exp(temp) / 4.0 + arg; + double beta = (h * (2.0 / root_pi) * std::exp(temp2)); + double temp3 = 2.0L * arg; + double alpha = std::exp(temp3); alpha *= 1.0 / (r1 * r1); - beta *= 1.0 / r1; - - if (i == 0 || i == (n_exp - 1)) beta *= 0.5; + beta *= 1.0 / r1; + if (i == 0 or i == (n_exp - 1)) { beta *= 1.0 / 2.0; } GaussFunc<1> gFunc(alpha, beta); this->append(gFunc); } } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/HelmholtzOperator.cpp b/src/operators/HelmholtzOperator.cpp index d50752d2d..59d6311ff 100644 --- a/src/operators/HelmholtzOperator.cpp +++ b/src/operators/HelmholtzOperator.cpp @@ -26,10 +26,18 @@ #include "HelmholtzOperator.h" #include "HelmholtzKernel.h" #include "utils/Printer.h" -#include namespace mrcpp { +/** @returns New HelmholtzOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] m: Exponential parameter of the operator + * @param[in] pr: Build precision, closeness to exp(-mu*r)/r + * @details This will construct a gaussian expansion to approximate + * exp(-mu*r)/r, and project each term into a one-dimensional MW operator. + * Subsequent application of this operator will apply each of the terms to + * the input function in all Cartesian directions. + */ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, double mu, double prec) : ConvolutionOperator<3>(mra) { int oldlevel = Printer::setPrintLevel(0); @@ -57,6 +65,7 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); + // Adjust r_max for periodic world auto rel_root = this->oper_root - this->MRA.getRootScale(); r_max *= std::pow(2.0, -rel_root); r_max *= (2.0 * this->oper_reach) + 1.0; @@ -68,4 +77,4 @@ HelmholtzOperator::HelmholtzOperator(const MultiResolutionAnalysis<3> &mra, doub Printer::setPrintLevel(oldlevel); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/IdentityConvolution.cpp b/src/operators/IdentityConvolution.cpp index ede580ef9..038d076cc 100644 --- a/src/operators/IdentityConvolution.cpp +++ b/src/operators/IdentityConvolution.cpp @@ -29,6 +29,13 @@ namespace mrcpp { +/** @brief Constructor of the IdentityConvolution object + * @returns New IdentityConvolution object + * @param[in] mra: Which MRA the operator is defined + * @param[in] prec: Build precision, closeness to delta function + * @details This will project a kernel of a single gaussian with + * exponent sqrt(10/build_prec). + */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec) : ConvolutionOperator(mra) { @@ -45,6 +52,19 @@ IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } +/** @brief Constructor of the IdentityConvolution object in case of Periodic Boundary Conditions (PBC) + * @returns New IdentityConvolution object + * @param[in] mra: Which MRA the operator is defined + * @param[in] prec: Build precision, closeness to delta function + * @param[in] root: root scale of operator. + * @param[in] reach: width at root scale (applies to periodic boundary conditions) + * @details This will project a kernel of a single gaussian with + * exponent sqrt(10/build_prec). This version of the constructor + * is used for calculations within periodic boundary conditions (PBC). + * The \a root parameter is the coarsest negative scale at wich the operator + * is applied. The \a reach parameter is the bandwidth of the operator at + * the root scale. For details see \ref MWOperator + */ template IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mra, double prec, int root, int reach) : ConvolutionOperator(mra, root, reach) { @@ -61,9 +81,8 @@ IdentityConvolution::IdentityConvolution(const MultiResolutionAnalysis &mr Printer::setPrintLevel(oldlevel); } -/* Explicit template instantiations */ template class IdentityConvolution<1>; template class IdentityConvolution<2>; template class IdentityConvolution<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/MWOperator.cpp b/src/operators/MWOperator.cpp index 21ce43540..225108f48 100644 --- a/src/operators/MWOperator.cpp +++ b/src/operators/MWOperator.cpp @@ -32,8 +32,7 @@ using namespace Eigen; namespace mrcpp { -template -void MWOperator::initOperExp(int M) { +template void MWOperator::initOperExp(int M) { if (this->raw_exp.size() < M) MSG_ABORT("Incompatible raw expansion"); this->oper_exp.clear(); for (int m = 0; m < M; m++) { @@ -41,28 +40,27 @@ void MWOperator::initOperExp(int M) { otrees.fill(nullptr); this->oper_exp.push_back(otrees); } + + // Sets up an isotropic operator with the first M raw terms in all direction for (int i = 0; i < M; i++) for (int d = 0; d < D; d++) assign(i, d, this->raw_exp[i].get()); } -template -OperatorTree &MWOperator::getComponent(int i, int d) { +template OperatorTree &MWOperator::getComponent(int i, int d) { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component"); return *this->oper_exp[i][d]; } -template -const OperatorTree &MWOperator::getComponent(int i, int d) const { +template const OperatorTree &MWOperator::getComponent(int i, int d) const { if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds"); if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds"); if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component"); return *this->oper_exp[i][d]; } -template -int MWOperator::getMaxBandWidth(int depth) const { +template int MWOperator::getMaxBandWidth(int depth) const { int maxWidth = -1; if (depth < 0) { maxWidth = *std::max_element(this->band_max.begin(), this->band_max.end()); @@ -72,15 +70,14 @@ int MWOperator::getMaxBandWidth(int depth) const { return maxWidth; } -template -void MWOperator::clearBandWidths() { +template void MWOperator::clearBandWidths() { for (auto &i : this->oper_exp) for (int d = 0; d < D; d++) i[d]->clearBandWidth(); } -template -void MWOperator::calcBandWidths(double prec) { +template void MWOperator::calcBandWidths(double prec) { int maxDepth = 0; + // First compute BandWidths and find depth of the deepest component for (auto &i : this->oper_exp) { for (int d = 0; d < D; d++) { OperatorTree &oTree = *i[d]; @@ -92,12 +89,13 @@ void MWOperator::calcBandWidths(double prec) { } this->band_max = std::vector(maxDepth + 1, -1); + // Find the largest effective bandwidth at each scale for (auto &i : this->oper_exp) { for (int d = 0; d < D; d++) { const OperatorTree &oTree = *i[d]; const BandWidth &bw = oTree.getBandWidth(); - for (int n = 0; n <= bw.getDepth(); n++) { - for (int j = 0; j < 4; j++) { + for (int n = 0; n <= bw.getDepth(); n++) { // scale loop + for (int j = 0; j < 4; j++) { // component loop int w = bw.getWidth(n, j); if (w > this->band_max[n]) this->band_max[n] = w; } @@ -109,8 +107,7 @@ void MWOperator::calcBandWidths(double prec) { println(20, std::endl); } -template -MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { +template MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { const BoundingBox &box = this->MRA.getWorldBox(); const ScalingBasis &basis = this->MRA.getScalingBasis(); @@ -122,6 +119,8 @@ MultiResolutionAnalysis<2> MWOperator::getOperatorMRA() const { } auto l = std::array{}; auto nbox = std::array{reach, reach}; + // Zero in argument since operators are only implemented + // for uniform scaling factor auto sf = std::array{box.getScalingFactor(0), box.getScalingFactor(0)}; BoundingBox<2> oper_box(this->oper_root, l, nbox, sf); @@ -133,4 +132,4 @@ template class MWOperator<1>; template class MWOperator<2>; template class MWOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/OperatorStatistics.cpp b/src/operators/OperatorStatistics.cpp index f6261a50b..f58ae2b0d 100644 --- a/src/operators/OperatorStatistics.cpp +++ b/src/operators/OperatorStatistics.cpp @@ -66,6 +66,7 @@ OperatorStatistics::~OperatorStatistics() { delete totCompCount; } +/** Sum all node counters from all threads. */ void OperatorStatistics::flushNodeCounters() { for (int i = 0; i < this->nThreads; i++) { this->totFCount += this->fCount[i]; @@ -79,14 +80,14 @@ void OperatorStatistics::flushNodeCounters() { } } -template -void OperatorStatistics::incrementGNodeCounters(const MWNode &gNode) { +/** Increment g-node usage counter. Needed for load balancing. */ +template void OperatorStatistics::incrementGNodeCounters(const MWNode &gNode) { int thread = mrcpp_get_thread_num(); this->gCount[thread]++; } -template -void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int ft, int gt) { +/** Increment operator application counter. */ +template void OperatorStatistics::incrementFNodeCounters(const MWNode &fNode, int ft, int gt) { int thread = mrcpp_get_thread_num(); this->fCount[thread]++; (*this->compCount[thread])(ft, gt) += 1; @@ -116,4 +117,4 @@ template void OperatorStatistics::incrementGNodeCounters<1, ComplexDouble>(const template void OperatorStatistics::incrementGNodeCounters<2, ComplexDouble>(const MWNode<2, ComplexDouble> &gNode); template void OperatorStatistics::incrementGNodeCounters<3, ComplexDouble>(const MWNode<3, ComplexDouble> &gNode); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/PHOperator.cpp b/src/operators/PHOperator.cpp index 7c17b22ef..11ccee0e9 100644 --- a/src/operators/PHOperator.cpp +++ b/src/operators/PHOperator.cpp @@ -32,6 +32,10 @@ namespace mrcpp { +/** @returns New PHOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] order: Derivative order, defined for 1 and 2 + */ template PHOperator::PHOperator(const MultiResolutionAnalysis &mra, int order) : DerivativeOperator(mra, mra.getRootScale(), -10) { @@ -47,7 +51,7 @@ template void PHOperator::initialize() { auto &basis = this->MRA.getScalingBasis(); PHCalculator calculator(basis, this->order); - int bw = 1; + int bw = 1; // Operator bandwidth int max_scale = this->MRA.getMaxScale(); BandWidthAdaptor adaptor(bw, max_scale); @@ -68,4 +72,4 @@ template class PHOperator<1>; template class PHOperator<2>; template class PHOperator<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/PoissonKernel.cpp b/src/operators/PoissonKernel.cpp index 42a1b5457..75e60eb7c 100644 --- a/src/operators/PoissonKernel.cpp +++ b/src/operators/PoissonKernel.cpp @@ -23,6 +23,16 @@ * */ +/* + * + * + * \date Jul 7, 2009 + * \author Jonas Juselius \n + * CTCC, University of Tromsø + * + * \breif + */ + #include "PoissonKernel.h" #include @@ -32,24 +42,27 @@ namespace mrcpp { +/** generate an approximation of the 3d poisson kernel expanded in + * gaussian functions this routine assumes that the expansion be centered + */ PoissonKernel::PoissonKernel(double epsilon, double r_min, double r_max) : GaussExp<1>() { - // Constructed on [rMin/rMax, 1.0], then rescaled to [rMin, rMax] + // Constructed on [rMin/rMax, 1.0], and then rescaled to [rMin,rMax] double r0 = r_min / r_max; double r1 = r_max; - // Choose t1, t2 so that tail contributions are below epsilon double t1 = 1.0L; while ((2.0 * t1 * std::exp(-t1)) > epsilon) t1 *= 1.1L; double t2 = 1.0L; while ((std::sqrt(t2) * std::exp(-t2) / r0) > epsilon) t2 *= 1.1L; - // Truncation window [s1, s2] ensuring relative error ~ epsilon + // Set the truncation limits s1,s2 of the integral (integrate over [s1,s2]) + // for achieving relative error epsilon double s1 = -std::log(2.0 * t1); double s2 = std::log(t2 / (r0 * r0)) / 2.0; - // Trapezoidal step size h determined from epsilon (empirical fit) + // Now, set the step size h for use in the trapezoidal rule for given MU double h = 1.0 / (0.2L - 0.47L * std::log10(epsilon)); int n_exp = static_cast(std::ceil((s2 - s1) / h) + 1); if (n_exp > MaxSepRank) MSG_ABORT("Maximum separation rank exceeded."); @@ -60,16 +73,12 @@ PoissonKernel::PoissonKernel(double epsilon, double r_min, double r_max) double cosharg = std::cosh(arg); double onepexp = 1.0 + std::exp(-sinharg); - // Parameters before rescaling back to [r_min, r_max] double expo = 4.0L * (sinharg + std::log(onepexp)) * (sinharg + std::log(onepexp)); double coef = h * (4.0L / root_pi) * cosharg / onepexp; - // Rescale to physical interval expo *= 1.0 / (r1 * r1); coef *= 1.0 / r1; - - // Trapezoidal rule endpoint correction - if (i == 0 || i == (n_exp - 1)) coef *= 1.0 / 2.0; + if (i == 0 or i == (n_exp - 1)) coef *= 1.0 / 2.0; GaussFunc<1> gFunc(expo, coef); this->append(gFunc); diff --git a/src/operators/PoissonOperator.cpp b/src/operators/PoissonOperator.cpp index ccff16c5b..582d990a8 100644 --- a/src/operators/PoissonOperator.cpp +++ b/src/operators/PoissonOperator.cpp @@ -29,6 +29,14 @@ namespace mrcpp { +/** @returns New PoissonOperator object + * @param[in] mra: Which MRA the operator is defined + * @param[in] pr: Build precision, closeness to 1/r + * @details This will construct a gaussian expansion to approximate 1/r, + * and project each term into a one-dimensional MW operator. Subsequent + * application of this operator will apply each of the terms to the input + * function in all Cartesian directions. + */ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double prec) : ConvolutionOperator<3>(mra) { int oldlevel = Printer::setPrintLevel(0); @@ -56,6 +64,7 @@ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double p double r_min = this->MRA.calcMinDistance(k_prec); double r_max = this->MRA.calcMaxDistance(); + // Adjust r_max for periodic world auto rel_root = this->oper_root - this->MRA.getRootScale(); r_max *= std::pow(2.0, -rel_root); r_max *= (2.0 * this->oper_reach) + 1.0; @@ -67,4 +76,4 @@ PoissonOperator::PoissonOperator(const MultiResolutionAnalysis<3> &mra, double p Printer::setPrintLevel(oldlevel); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/operators/TimeEvolutionOperator.cpp b/src/operators/TimeEvolutionOperator.cpp index cda02e812..09913a591 100644 --- a/src/operators/TimeEvolutionOperator.cpp +++ b/src/operators/TimeEvolutionOperator.cpp @@ -24,6 +24,7 @@ */ #include "TimeEvolutionOperator.h" +//#include "MRCPP/MWOperators" #include "core/InterpolatingBasis.h" #include "core/LegendreBasis.h" @@ -55,47 +56,75 @@ namespace mrcpp { +/** @brief A uniform constructor for TimeEvolutionOperator class. + * + * @param[in] mra: MRA. + * @param[in] prec: precision. + * @param[in] time: the time moment (step). + * @param[in] finest_scale: the operator tree is constructed uniformly down to this scale. + * @param[in] imaginary: defines the real (faulse) or imaginary (true) part of the semigroup. + * @param[in] max_Jpower: maximum amount of power integrals used. + * + * @details Constructs either real or imaginary part of the Schrodinger semigroup at a given time moment. + * + */ template -TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, - double prec, - double time, - int finest_scale, - bool imaginary, - int max_Jpower) - : ConvolutionOperator(mra, mra.getRootScale(), -10) { +TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower) + : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well +{ int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType()); this->cross_correlation = &cross_correlation; - initialize(time, finest_scale, imaginary, max_Jpower); + initialize(time, finest_scale, imaginary, max_Jpower); // will go outside of the constructor in future - this->initOperExp(1); + this->initOperExp(1); // this turns out to be important Printer::setPrintLevel(oldlevel); } +/** @brief An adaptive constructor for TimeEvolutionOperator class. + * + * @param[in] mra: MRA. + * @param[in] prec: precision. + * @param[in] time: the time moment (step). + * @param[in] imaginary: defines the real (faulse) or imaginary (true) part of the semigroup. + * @param[in] max_Jpower: maximum amount of power integrals used. + * + * @details Adaptively constructs either real or imaginary part of the Schrodinger semigroup at a given time moment. + * It is recommended for use in case of high polynomial order in use of the scaling basis. + * + * @note For technical reasons the operator tree is constructed no deeper than to scale \f$ n = 18 \f$. + * This should be weakened in future. + * + */ template -TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, - double prec, - double time, - bool imaginary, - int max_Jpower) - : ConvolutionOperator(mra, mra.getRootScale(), -10) { +TimeEvolutionOperator::TimeEvolutionOperator(const MultiResolutionAnalysis &mra, double prec, double time, bool imaginary, int max_Jpower) + : ConvolutionOperator(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well +{ int oldlevel = Printer::setPrintLevel(0); this->setBuildPrec(prec); SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType()); this->cross_correlation = &cross_correlation; - initialize(time, imaginary, max_Jpower); + initialize(time, imaginary, max_Jpower); // will go outside of the constructor in future - this->initOperExp(1); + this->initOperExp(1); // this turns out to be important Printer::setPrintLevel(oldlevel); } -template -void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_Jpower) { +/** @brief Creates Re or Im of operator + * + * @details Adaptive down to scale \f$ N = 18 \f$. + * This scale limit bounds the amount of JpowerIntegrals + * to be calculated. + * @note In future work we plan to optimize calculation of JpowerIntegrals so that we calculate + * only needed ones, while building the tree (in progress). + * + */ +template void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_Jpower) { int N = 18; double o_prec = this->build_prec; @@ -111,9 +140,11 @@ void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_J mrcpp::TreeBuilder<2> builder; builder.build(*o_tree, calculator, adaptor, N); + // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(BottomUp); o_tree->removeRoughScaleNoise(); + // o_tree->clearSquareNorm(); //does not affect printing o_tree->calcSquareNorm(); o_tree->setupOperNodeCache(); @@ -125,11 +156,16 @@ void TimeEvolutionOperator::initialize(double time, bool imaginary, int max_J for (int n = 0; n <= N + 1; n++) delete J[n]; } -template -void TimeEvolutionOperator::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) { +/** @brief Creates Re or Im of operator + * + * @details Uniform down to finest scale. + * + */ +template void TimeEvolutionOperator::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) { double o_prec = this->build_prec; auto o_mra = this->getOperatorMRA(); + // Setup uniform tree builder TreeBuilder<2> builder; SplitAdaptor<2> uniform(o_mra.getMaxScale(), true); @@ -140,8 +176,9 @@ void TimeEvolutionOperator::initialize(double time, int finest_scale, bool im TimeEvolution_CrossCorrelationCalculator calculator(J, this->cross_correlation, imaginary); auto o_tree = std::make_unique(o_mra, o_prec); - builder.build(*o_tree, calculator, uniform, N); + builder.build(*o_tree, calculator, uniform, N); // Expand 1D kernel into 2D operator + // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(BottomUp); o_tree->calcSquareNorm(); @@ -154,8 +191,15 @@ void TimeEvolutionOperator::initialize(double time, int finest_scale, bool im for (int n = 0; n <= N + 1; n++) delete J[n]; } -template -void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) { +/** @brief Creates Re or Im of operator (in progress) + * + * @details Tree construction starts uniformly and then continues adaptively down to scale \f$ N = 18 \f$. + * This scale limit bounds the amount of JpowerIntegrals + * to be calculated. + * @note This method is not ready for use and should not be used (in progress). + * + */ +template void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) { MSG_ERROR("Not implemented yet method."); double o_prec = this->build_prec; @@ -178,6 +222,7 @@ void TimeEvolutionOperator::initializeSemiUniformly(double time, bool imagina OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale()); builder.build(*o_tree, calculator, adaptor, 13); + // Postprocess to make the operator functional Timer trans_t; o_tree->mwTransform(mrcpp::BottomUp); o_tree->removeRoughScaleNoise(); diff --git a/src/treebuilders/ABGVCalculator.cpp b/src/treebuilders/ABGVCalculator.cpp index 11c6e6144..10252f46a 100644 --- a/src/treebuilders/ABGVCalculator.cpp +++ b/src/treebuilders/ABGVCalculator.cpp @@ -23,30 +23,6 @@ * */ -/** - * @file ABGVCalculator.cpp - * @brief Local block assembly for the Alpert–Beylkin–Gines–Vozovoi derivative operator. - * - * @details - * This module implements the calculator that fills the per-node matrix blocks for the - * ABGV derivative operator used in multiresolution form. It is consumed by a - * TreeBuilder to populate an OperatorTree with local stencil entries expressed in - * the chosen scaling basis (interpolating or Legendre). - * - * The assembly depends on: - * - the basis type and quadrature order, - * - precomputed endpoint values of basis functions on the reference interval [0,1], - * - a basis-dependent local derivative matrix K, - * - two boundary weights A and B that select central, forward, backward, or - * semi-local differences. - * - * For each operator node the calculator determines the relative logical offset - * between interacting cells. Only three cases produce non-zero local couplings: - * left neighbor, same cell, and right neighbor. The four component blocks of the - * 2-by-2 cell coupling are then filled accordingly, rescaled to the current level, - * compressed to multiwavelet form, and cached with per-node norms. - */ - #include "ABGVCalculator.h" #include "core/InterpolatingBasis.h" #include "core/LegendreBasis.h" @@ -59,22 +35,6 @@ using Eigen::VectorXd; namespace mrcpp { -/** - * @brief Construct an ABGVCalculator and precompute basis-dependent tables. - * - * @param basis Scaling basis that defines quadrature order and function family. - * @param a Left boundary weight that controls semi-local coupling. - * @param b Right boundary weight that controls semi-local coupling. - * - * @details - * The constructor allocates and fills: - * - K: a kp1-by-kp1 local derivative matrix assembled on the reference cell, - * - valueZero: endpoint values phi_i(0) for all basis indices, - * - valueOne: endpoint values phi_i(1) for all basis indices. - * - * The exact formulas are basis dependent and computed in calcKMatrix and - * calcValueVectors respectively. - */ ABGVCalculator::ABGVCalculator(const ScalingBasis &basis, double a, double b) : A(a) , B(b) { @@ -86,18 +46,6 @@ ABGVCalculator::ABGVCalculator(const ScalingBasis &basis, double a, double b) calcValueVectors(basis); } -/** - * @brief Precompute endpoint values of scaling functions on [0, 1]. - * - * @param basis Scaling basis. - * - * @details - * - Interpolating basis: values are obtained by direct evaluation at 0 and 1. - * - Legendre basis on [0, 1]: closed-form values are used. - * For index i we set - * valueOne(i) = sqrt(2*i + 1), - * valueZero(i) = (-1)^i * sqrt(2*i + 1). - */ void ABGVCalculator::calcValueVectors(const ScalingBasis &basis) { int kp1 = basis.getQuadratureOrder(); double sqrtCoef[kp1]; @@ -124,19 +72,6 @@ void ABGVCalculator::calcValueVectors(const ScalingBasis &basis) { } } -/** - * @brief Assemble the local derivative matrix K on the reference cell. - * - * @param basis Scaling basis. - * - * @details - * The construction of K depends on the basis family: - * - Interpolating basis: K(i,j) = 2 * sqrt(w_j) * d(phi_i)/dx evaluated at x_j, - * where (x_j, w_j) are Gauss–Legendre quadrature nodes and weights provided - * by QuadratureCache. The factor 2 accounts for mapping from [-1,1] to [0,1]. - * - Legendre basis: a closed-form sparse pattern is used where K(j,i) is non-zero - * only if (i - j) is odd, in which case K(j,i) = 2 * sqrt(2i+1) * sqrt(2j+1). - */ void ABGVCalculator::calcKMatrix(const ScalingBasis &basis) { int kp1 = basis.getQuadratureOrder(); double sqrtCoef[kp1]; @@ -166,27 +101,6 @@ void ABGVCalculator::calcKMatrix(const ScalingBasis &basis) { } } -/** - * @brief Fill the local operator block for a given operator node and finalize it. - * - * @param node Operator node to be populated. - * - * @details - * The node couples two 1D intervals at the same scale; its logical index encodes - * which pair is assembled. Let l = idx[1] - idx[0]. Three cases are handled: - * - * - l = 0: intra-cell coupling. All four sub-blocks are filled using endpoint - * values and K, with boundary weights A and B selecting central or semi-local - * behavior. - * - l = +1: right neighbor coupling. Only the block that mixes left and right - * components is filled, proportional to B. - * - l = -1: left neighbor coupling. Only the symmetric block is filled, - * proportional to A. - * - * After filling, all entries are scaled by 2^(n+1) where n = idx.getScale() to - * account for the derivative scaling at that level, then the node is transformed - * with compression, marked as having coefficients, and its norms are computed. - */ void ABGVCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); diff --git a/src/treebuilders/BSCalculator.cpp b/src/treebuilders/BSCalculator.cpp index 565cd9c00..be6285206 100644 --- a/src/treebuilders/BSCalculator.cpp +++ b/src/treebuilders/BSCalculator.cpp @@ -23,44 +23,6 @@ * */ -/** - * @file BSCalculator.cpp - * @brief Local stencil builder for smooth multiresolution derivative operators (“BS” family). - * - * @details - * The **BSCalculator** assembles the *local* building blocks used by the smooth - * derivative operator (see BSOperator). For a chosen scaling basis and derivative - * order \( n\in\{1,2,3\} \), it loads three pretabulated coupling matrices - * \f$S_{-1}, S_{0}, S_{+1}\f$ which represent the action of the derivative on a - * 1D scaling block and its immediate neighbors (left, center, right) at a given scale. - * - * Source of the matrices: - * - Files are looked up via `details::find_filters()`. - * - Filenames depend on the scaling basis type and derivative order: - * - Legendre scaling: `L_b-spline-deriv{n}.txt` - * - Interpolating scaling: `I_b-spline-deriv{n}.txt` - * - For each supported polynomial order `kp1 = 2..20`, the file stores a stacked - * 3·kp1 × kp1 array that is split into the three kp1 × kp1 blocks - * \f$S_{+1}\f$, \f$S_{0}\f$, \f$S_{-1}\f$ (in that order). - * - * Application on a node: - * - Given a 2D operator node (with index difference \f$\ell = i_1 - i_0 \in \{-1,0,+1\}\f$), - * BSCalculator writes the appropriate block(s) into the 2×2 corner layout of the node: - * - \f$\ell = -1\f$: left-neighbor coupling uses \f$S_{-1}\f$ - * - \f$\ell = 0 \f$: center block uses \f$S_{0}\f$, off-diagonals use \f$S_{\pm 1}\f$ - * - \f$\ell = +1\f$: right-neighbor coupling uses \f$S_{+1}\f$ - * - * Scale factor: - * - Derivatives scale as \f$2^{n\,(j+1)}\f$ where \f$n\f$ is the derivative order - * and \f$j+1\f$ is the node scale `np1`. The calculator multiplies all filled - * entries by \f$2^{n\,(j+1)}\f$. - * - * Limits and errors: - * - Supported derivative orders: 1, 2, 3. - * - Supported scaling orders: 1..20 (i.e., `kp1 = 2..21` in MRCPP terminology). - * - On unsupported cases or missing files, the code aborts with a diagnostic. - */ - #include "BSCalculator.h" #include @@ -74,19 +36,6 @@ using Eigen::MatrixXd; namespace mrcpp { -/** - * @brief Construct a BSCalculator and load derivative coupling blocks. - * - * @param basis Scaling basis (determines file family and polynomial order). - * @param n Derivative order (1, 2, or 3). - * - * @details - * Dispatches to #readSMatrix to load \f$S_{-1}, S_{0}, S_{+1}\f$ for the given basis - * and derivative order. Orders \f$n \ge 4\f$ are not implemented. - * - * @throws Aborts on unsupported derivative order, unsupported scaling order, - * or if the filter file cannot be opened. - */ BSCalculator::BSCalculator(const ScalingBasis &basis, int n) : diff_order(n) { if (this->diff_order <= 0) NOT_IMPLEMENTED_ABORT; @@ -96,28 +45,6 @@ BSCalculator::BSCalculator(const ScalingBasis &basis, int n) if (this->diff_order >= 4) NOT_IMPLEMENTED_ABORT; } -/** - * @brief Load the pretabulated derivative coupling matrices from disk. - * - * @param basis Scaling basis (type and order). - * @param n Character identifying derivative order: '1', '2' or '3'. - * - * @details - * - Chooses filename by basis type and derivative order. - * - Iterates over the entries in the file for polynomial orders `kp1 = 2..20` - * until it matches the current basis order (`basis.getScalingOrder() + 1`). - * - Splits the stacked 3·kp1 × kp1 array into three kp1 × kp1 blocks: - * \f$S_{+1}\f$, \f$S_{0}\f$, \f$S_{-1}\f$. - * - * File format expectations (per `kp1` section): - * - First line: integer `order` (must equal `kp1`). - * - Next 3·kp1 lines: kp1 numbers per line (row-major), forming the stacked matrix. - * - * @throws Aborts if: - * - the file cannot be opened, - * - the on-file order header does not match the expected `kp1`, - * - the basis scaling order is unsupported. - */ void BSCalculator::readSMatrix(const ScalingBasis &basis, char n) { std::string file; std::string path = details::find_filters(); @@ -146,32 +73,13 @@ void BSCalculator::readSMatrix(const ScalingBasis &basis, char n) { } if (kp1 == (basis.getScalingOrder() + 1)) { this->S_p1 = data.block(0 * kp1, 0, kp1, kp1); - this->S_0 = data.block(1 * kp1, 0, kp1, kp1); + this->S_0 = data.block(1 * kp1, 0, kp1, kp1); this->S_m1 = data.block(2 * kp1, 0, kp1, kp1); break; } } } -/** - * @brief Populate a 2D operator node with the appropriate local derivative blocks. - * - * @param node Operator node to fill (corner layout, 2×2 logical structure). - * - * @details - * Let \f$\ell = \text{idx}[1] - \text{idx}[0]\f$ denote the neighbor offset in the - * second minus the first index direction. Depending on \f$\ell\f$, write the relevant - * coupling block(s) into the node storage and multiply all entries by the scale factor - * \f$2^{\,\text{diff\_order}\cdot (j+1)}\f$, where \f$j+1 = \text{idx.getScale()}+1\f$. - * - * Block placement (coefficient planes are enumerated in the code as 0,1,2,3): - * - \f$\ell = +1\f$: only the “+1” plane is filled with \f$S_{+1}\f$. - * - \f$\ell = 0 \f$: planes [0,1,2,3] are filled with \f$S_{0}, S_{-1}, S_{+1}, S_{0}\f$. - * - \f$\ell = -1\f$: only the “+2” plane is filled with \f$S_{-1}\f$. - * - * After filling, the node is transformed (Compression), flagged as having coefficients, - * and its norms are computed. - */ void BSCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); @@ -220,4 +128,4 @@ void BSCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/ConvolutionCalculator.cpp b/src/treebuilders/ConvolutionCalculator.cpp index ae1d4012f..497fe0dd8 100644 --- a/src/treebuilders/ConvolutionCalculator.cpp +++ b/src/treebuilders/ConvolutionCalculator.cpp @@ -100,6 +100,8 @@ template void ConvolutionCalculator::printTimers() con Printer::setPrecision(oldprec); } +/** Initialize the number of nodes formally within the bandwidth of an + operator. The band size is used for thresholding. */ template void ConvolutionCalculator::initBandSizes() { for (int i = 0; i < this->oper->size(); i++) { // IMPORTANT: only 0-th dimension! @@ -112,6 +114,10 @@ template void ConvolutionCalculator::initBandSizes() { } } +/** Calculate the number of nodes within the bandwidth + * of an operator. Currently this routine ignores the fact that + * there are edges on the world box, and thus over estimates + * the number of nodes. This is different from the previous version. */ template void ConvolutionCalculator::calcBandSizeFactor(MatrixXi &bs, int depth, const BandWidth &bw) { for (int gt = 0; gt < this->nComp; gt++) { for (int ft = 0; ft < this->nComp; ft++) { @@ -132,6 +138,7 @@ template void ConvolutionCalculator::calcBandSizeFacto bs(depth, this->nComp2) = bs.row(depth).maxCoeff(); } +/** Return a vector of nodes in F affected by O, given a node in G */ template MWNodeVector *ConvolutionCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { auto *band = new MWNodeVector; @@ -154,7 +161,7 @@ template MWNodeVector *ConvolutionCalculator::ma for (int i = 0; i < D; i++) { sIdx[i] = gIdx[i] - width; eIdx[i] = gIdx[i] + width; - // Consider world borders / periodic wrapping + // We need to consider the world borders int nboxes = fWorld.size(i) * (1 << o_depth); int c_i = cIdx[i] * (1 << o_depth); if (not periodic) { @@ -172,8 +179,8 @@ template MWNodeVector *ConvolutionCalculator::ma return band; } -template -void ConvolutionCalculator::fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim) { +/** Recursively retrieve all reachable f-nodes within the bandwidth. */ +template void ConvolutionCalculator::fillOperBand(MWNodeVector *band, std::vector> &idx_band, NodeIndex &idx, const int *nbox, int dim) { int l_start = idx[dim]; for (int j = 0; j < nbox[dim]; j++) { // Recurse until dim == 0 @@ -225,13 +232,12 @@ template void ConvolutionCalculator::calcNode(MWNode os(gNode, tmpCoefs); this->operStat.incrementGNodeCounters(gNode); - // Get all nodes in f within the bandwidth of O around g + // Get all nodes in f within the bandwith of O in g this->band_t[mrcpp_get_thread_num()]->resume(); std::vector> idx_band; MWNodeVector *fBand = makeOperBand(gNode, idx_band); this->band_t[mrcpp_get_thread_num()]->stop(); - // Build target threshold (relative by default; may be scaled by precFunc) MWTree &gTree = gNode.getMWTree(); double gThrs = gTree.getSquareNorm(); if (gThrs > 0.0) { @@ -239,9 +245,9 @@ template void ConvolutionCalculator::calcNode(MWNodeprecFunc(gNode.getNodeIndex()); gThrs = this->prec * precFac * std::sqrt(gThrs / nTerms); } + os.gThreshold = gThrs; - // Scan band and apply screened operator terms this->calc_t[mrcpp_get_thread_num()]->resume(); for (int n = 0; n < fBand->size(); n++) { MWNode &fNode = *(*fBand)[n]; @@ -268,6 +274,7 @@ template void ConvolutionCalculator::calcNode(MWNode void ConvolutionCalculator::applyOperComp(OperatorState &os) { double fNorm = os.fNode->getComponentNorm(os.ft); int o_depth = os.fNode->getScale() - this->oper->getOperatorRoot(); @@ -281,6 +288,13 @@ template void ConvolutionCalculator::applyOperComp(Ope } } +/** @brief Apply a single operator component (term) to a single f-node. + * + * @details Apply a single operator component (term) to a single f-node. + * Whether the operator actualy is applied is determined by a screening threshold. + * Here we make use of the sparcity of matrices \f$ A, B, C \f$. + * + */ template void ConvolutionCalculator::applyOperator(int i, OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -296,7 +310,8 @@ template void ConvolutionCalculator::applyOperator(int auto &oTree = this->oper->getComponent(i, d); int oTransl = fIdx[d] - gIdx[d]; - // Per-direction bandwidth check + // The following will check the actual band width in each direction. + // Not needed if the thresholding at the end of this routine is active. int a = (os.gt & (1 << d)) >> d; int b = (os.ft & (1 << d)) >> d; int idx = (a << 1) + b; @@ -314,6 +329,8 @@ template void ConvolutionCalculator::applyOperator(int } } +/** Perorm the required linear algebra operations in order to apply an +operator component to a f-node in a n-dimensional tesor space. */ template void ConvolutionCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -388,7 +405,6 @@ template MWNodeVector *ConvolutionCalculator::ge return nodeVec; } -// Explicit instantiations template class ConvolutionCalculator<1, double>; template class ConvolutionCalculator<2, double>; template class ConvolutionCalculator<3, double>; diff --git a/src/treebuilders/CopyAdaptor.cpp b/src/treebuilders/CopyAdaptor.cpp index cc4a3a308..8312ebb0f 100644 --- a/src/treebuilders/CopyAdaptor.cpp +++ b/src/treebuilders/CopyAdaptor.cpp @@ -71,7 +71,6 @@ template bool CopyAdaptor::splitNode(const MWNode; template class CopyAdaptor<2, double>; template class CopyAdaptor<3, double>; @@ -80,4 +79,4 @@ template class CopyAdaptor<1, ComplexDouble>; template class CopyAdaptor<2, ComplexDouble>; template class CopyAdaptor<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/CrossCorrelationCalculator.cpp b/src/treebuilders/CrossCorrelationCalculator.cpp index cb81b7de2..a5eef945d 100644 --- a/src/treebuilders/CrossCorrelationCalculator.cpp +++ b/src/treebuilders/CrossCorrelationCalculator.cpp @@ -89,8 +89,10 @@ template void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros double two_n = std::pow(2.0, -scale / 2.0); for (int i = 0; i < t_dim * kp1_d; i++) { auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0); + // This is only implemented for unifrom scaling factors + // hence the zero TODO: make it work for non-uniform scaling coefs[i] = std::sqrt(scaling_factor) * two_n * vec_o(i); } } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp index e07ca1a1c..b298d1b6e 100644 --- a/src/treebuilders/DerivativeCalculator.cpp +++ b/src/treebuilders/DerivativeCalculator.cpp @@ -87,6 +87,7 @@ template void DerivativeCalculator::printTimers() cons } template void DerivativeCalculator::calcNode(MWNode &inpNode, MWNode &outNode) { + // if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw"); outNode.zeroCoefs(); int nComp = (1 << D); T tmpCoefs[outNode.getNCoefs()]; @@ -96,18 +97,19 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); if (abs(scaling_factor - 1.0) > MachineZero) { for (int i = 0; i < outNode.getNCoefs(); i++) outNode.getCoefs()[i] *= scaling_factor; } - outNode.calcNorms(); + outNode.calcNorms(); // TODO:required? norms are not used for now } template void DerivativeCalculator::calcNode(MWNode &gNode) { @@ -118,12 +120,14 @@ template void DerivativeCalculator::calcNode(MWNode os(gNode, tmpCoefs); this->operStat.incrementGNodeCounters(gNode); + // Get all nodes in f within the bandwith of O in g this->band_t[mrcpp_get_thread_num()].resume(); std::vector> idx_band; MWNodeVector fBand = makeOperBand(gNode, idx_band); this->band_t[mrcpp_get_thread_num()].stop(); this->calc_t[mrcpp_get_thread_num()].resume(); + for (int n = 0; n < fBand.size(); n++) { MWNode &fNode = *fBand[n]; NodeIndex &fIdx = idx_band[n]; @@ -139,6 +143,7 @@ template void DerivativeCalculator::calcNode(MWNodeapplyDir), oper->getOrder()); for (int i = 0; i < gNode.getNCoefs(); i++) gNode.getCoefs()[i] /= scaling_factor; this->calc_t[mrcpp_get_thread_num()].stop(); @@ -148,6 +153,7 @@ template void DerivativeCalculator::calcNode(MWNodenorm_t[mrcpp_get_thread_num()].stop(); } +/** Return a vector of nodes in F affected by O, given a node in G */ template MWNodeVector DerivativeCalculator::makeOperBand(const MWNode &gNode, std::vector> &idx_band) { assert(this->applyDir >= 0); assert(this->applyDir < D); @@ -155,11 +161,13 @@ template MWNodeVector DerivativeCalculator::make MWNodeVector band; const NodeIndex &idx_0 = gNode.getNodeIndex(); + // Assumes given width only in applyDir, otherwise width = 0 int width = this->oper->getMaxBandWidth(); for (int w = -width; w <= width; w++) { NodeIndex idx_w(idx_0); idx_w[this->applyDir] += w; + // returns -1 if out of bounds and 0 for periodic int rIdx_w = this->fTree->getRootIndex(idx_w); if (rIdx_w >= 0) { idx_band.push_back(idx_w); @@ -169,7 +177,9 @@ template MWNodeVector DerivativeCalculator::make return band; } +/** Apply a single operator component (term) to a single f-node assuming zero bandwidth */ template void DerivativeCalculator::applyOperator_bw0(OperatorState &os) { + // cout<<" applyOperator "< &gNode = *os.gNode; MWNode &fNode = *os.fNode; const NodeIndex &fIdx = *os.fIdx; @@ -187,8 +197,10 @@ template void DerivativeCalculator::applyOperator_bw0( oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oIdx == 0 or oIdx == 3) { + // This will activate the identity operator in direction i oData[d] = nullptr; } else { + // This means that we are in a zero part of the identity operator return; } } @@ -197,6 +209,8 @@ template void DerivativeCalculator::applyOperator_bw0( tensorApplyOperComp(os); } +/** Apply a single operator component (term) to a single f-node. Whether the +operator actualy is applied is determined by a screening threshold. */ template void DerivativeCalculator::applyOperator(OperatorState &os) { MWNode &gNode = *os.gNode; MWNode &fNode = *os.fNode; @@ -212,6 +226,8 @@ template void DerivativeCalculator::applyOperator(Oper int oTransl = fIdx[d] - gIdx[d]; + // The following will check the actual band width in each direction. + // Not needed if the thresholding at the end of this routine is active. int a = (os.gt & (1 << d)) >> d; int b = (os.ft & (1 << d)) >> d; int idx = (a << 1) + b; @@ -226,8 +242,10 @@ template void DerivativeCalculator::applyOperator(Oper oData[d] = const_cast(oNode.getCoefs()) + oIdx * os.kp1_2; } else { if (oTransl == 0 and (oIdx == 0 or oIdx == 3)) { + // This will activate the identity operator in direction i oData[d] = nullptr; } else { + // This means that we are in a zero part of the identity operator return; } } @@ -236,6 +254,8 @@ template void DerivativeCalculator::applyOperator(Oper tensorApplyOperComp(os); } +/** Perform the required linear algebra operations in order to apply an +operator component to a f-node in a n-dimensional tensor space. */ template void DerivativeCalculator::tensorApplyOperComp(OperatorState &os) { T **aux = os.getAuxData(); double **oData = os.getOperData(); @@ -244,13 +264,14 @@ template void DerivativeCalculator::tensorApplyOperCom Eigen::Map> g(aux[i + 1], os.kp1_dm1, os.kp1); if (oData[i] != nullptr) { Eigen::Map op(oData[i], os.kp1, os.kp1); - if (i == D - 1) { + if (i == D - 1) { // Last dir: Add up into g g.noalias() += f.transpose() * op; } else { g.noalias() = f.transpose() * op; } } else { - if (i == D - 1) { + // Identity operator in direction i + if (i == D - 1) { // Last dir: Add up into g g.noalias() += f.transpose(); } else { g.noalias() = f.transpose(); diff --git a/src/treebuilders/PHCalculator.cpp b/src/treebuilders/PHCalculator.cpp index ea3bf5ced..2da879d7b 100644 --- a/src/treebuilders/PHCalculator.cpp +++ b/src/treebuilders/PHCalculator.cpp @@ -23,26 +23,6 @@ * */ -/** - * @file PHCalculator.cpp - * @brief Populate piecewise-homogeneous (PH) derivative stencil blocks for - * 2D MW nodes and apply them as local operators. - * - * @details - * The PH operator is applied on a 2D tensor-product node and uses three - * nearest-neighbour coupling blocks along the refinement line: - * - S_m1 : block coupling to the left child (l = -1) - * - S_0 : block coupling to the same child (l = 0) - * - S_p1 : block coupling to the right child (l = +1) - * - * For a node at scale j, the coefficients are scaled by 2^{diff_order*(j+1)} - * to account for the dyadic scaling of derivatives in multiresolution analysis. - * - * The block matrices are read from precomputed text files (see @ref readSMatrix) - * that depend on the scaling basis (Legendre or Interpolating) and the - * derivative order (currently n = 1 or 2). - */ - #include "PHCalculator.h" #include @@ -92,7 +72,7 @@ void PHCalculator::readSMatrix(const ScalingBasis &basis, char n) { } if (kp1 == (basis.getScalingOrder() + 1)) { this->S_p1 = data.block(0 * kp1, 0, kp1, kp1); - this->S_0 = data.block(1 * kp1, 0, kp1, kp1); + this->S_0 = data.block(1 * kp1, 0, kp1, kp1); this->S_m1 = data.block(2 * kp1, 0, kp1, kp1); break; } @@ -103,15 +83,15 @@ void PHCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); const auto &idx = node.getNodeIndex(); - int l = idx[1] - idx[0]; // neighbour offset along refinement line - int np1 = idx.getScale() + 1; // j+1, used in dyadic derivative scaling - int kp1 = node.getKp1(); // k+1 (polynomial order + 1) - int kp1_d = node.getKp1_d(); // (k+1)^2, tile size per child + int l = idx[1] - idx[0]; + int np1 = idx.getScale() + 1; + int kp1 = node.getKp1(); + int kp1_d = node.getKp1_d(); double two_np1 = std::pow(2.0, this->diff_order * np1); double *coefs = node.getCoefs(); switch (l) { - case 1: // right neighbour: only S_{+1} contributes + case 1: for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; @@ -119,18 +99,18 @@ void PHCalculator::calcNode(MWNode<2> &node) { } } break; - case 0: // interior: stencil spans S_0 (diagonal) and S_{-1}, S_{+1} + case 0: for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; - coefs[0 * kp1_d + idx] = two_np1 * this->S_0 (i, j); + coefs[0 * kp1_d + idx] = two_np1 * this->S_0(i, j); coefs[1 * kp1_d + idx] = two_np1 * this->S_m1(i, j); coefs[2 * kp1_d + idx] = two_np1 * this->S_p1(i, j); - coefs[3 * kp1_d + idx] = two_np1 * this->S_0 (i, j); + coefs[3 * kp1_d + idx] = two_np1 * this->S_0(i, j); } } break; - case -1: // left neighbour: only S_{-1} contributes + case -1: for (int i = 0; i < kp1; i++) { for (int j = 0; j < kp1; j++) { int idx = i * kp1 + j; @@ -142,9 +122,9 @@ void PHCalculator::calcNode(MWNode<2> &node) { // When periodic do nothing, else it should never end up here. break; } - node.mwTransform(Compression); // convert to MW (wavelet) coefficients - node.setHasCoefs(); // mark coefficients present - node.calcNorms(); // update node/component norms + node.mwTransform(Compression); + node.setHasCoefs(); + node.calcNorms(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/ProjectionCalculator.cpp b/src/treebuilders/ProjectionCalculator.cpp index d6ca9fc77..15ba014da 100644 --- a/src/treebuilders/ProjectionCalculator.cpp +++ b/src/treebuilders/ProjectionCalculator.cpp @@ -31,8 +31,7 @@ using Eigen::MatrixXd; namespace mrcpp { -template -void ProjectionCalculator::calcNode(MWNode &node) { +template void ProjectionCalculator::calcNode(MWNode &node) { MatrixXd exp_pts; node.getExpandedChildPts(exp_pts); @@ -51,6 +50,62 @@ void ProjectionCalculator::calcNode(MWNode &node) { node.calcNorms(); } +/* Old interpolating version, somewhat faster +template +void ProjectionCalculator::calcNode(MWNode &node) { + const ScalingBasis &sf = node.getMWTree().getMRA().getScalingBasis(); + if (sf.getScalingType() != Interpol) { + NOT_IMPLEMENTED_ABORT; + } + int quadratureOrder = sf.getQuadratureOrder(); + getQuadratureCache(qc); + const VectorXd &pts = qc.getRoots(quadratureOrder); + const VectorXd &wgts = qc.getWeights(quadratureOrder); + + double tmp_coefs[node.getNCoefs()]; + + int scale = node.getScale(); + int kp1_d = node.getKp1_d(); + + double scaleFactor = 1.0 / std::pow(2.0, scale + 1.0); + double sqrtScaleFactor = std::sqrt(scaleFactor); + double point[D]; + + static int tDim = 1 << D; + for (int cIdx = 0; cIdx < tDim; cIdx++) { + NodeIndex nIdx(node.getNodeIndex(), cIdx); + const int *l = nIdx.getTranslation(); + + int indexCounter[D]; + for (int i = 0; i < D; i++) { + indexCounter[i] = 0; + } + + for (int i = 0; i < kp1_d; i++) { + double coef = 1.0; + for (int j = 0; j < D; j++) { + point[j] = scaleFactor * (pts(indexCounter[j]) + l[j]); + coef *= std::sqrt(wgts(indexCounter[j])) * sqrtScaleFactor; + } + + tmp_coefs[i] = coef * this->func->evalf(point); + + indexCounter[0]++; + for (int j = 0; j < D - 1; j++) { + if (indexCounter[j] == quadratureOrder) { + indexCounter[j] = 0; + indexCounter[j + 1]++; + } + } + } + node.setCoefBlock(cIdx, kp1_d, tmp_coefs); + } + node.mwTransform(Compression); + node.setHasCoefs(); + node.calcNorms(); +} +*/ + template class ProjectionCalculator<1, double>; template class ProjectionCalculator<2, double>; template class ProjectionCalculator<3, double>; diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp index 962963d6f..844f952d9 100644 --- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp +++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp @@ -33,6 +33,13 @@ using Eigen::VectorXd; namespace mrcpp { +/** @param[in] node: ... + * @details This will ... (work in progress) + * + * + * + * + */ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.zeroCoefs(); int type = node.getMWTree().getMRA().getScalingBasis().getScalingType(); @@ -54,9 +61,21 @@ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) { node.calcNorms(); } +/** @param[in] node: ... + * @details This will ... (work in progress) + * + * + * + * + */ +// template void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { - int t_dim = node.getTDim(); - int kp1_d = node.getKp1_d(); + // std::cout << node; + // The scale of J power integrals: + // int scale = node.getScale() + 1; //scale = n = (n - 1) + 1 + + int t_dim = node.getTDim(); // t_dim = 4 + int kp1_d = node.getKp1_d(); // kp1_d = (k + 1)^2 VectorXd vec_o = VectorXd::Zero(t_dim * kp1_d); const NodeIndex<2> &idx = node.getNodeIndex(); @@ -70,15 +89,15 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { int vec_o_segment_index = 0; for (int p = 0; p <= node.getOrder(); p++) for (int j = 0; j <= node.getOrder(); j++) { + // std::min(M, N) could be used for breaking the following loop + // this->cross_correlation->Matrix.size() should be big enough a priori for (int k = 0; 2 * k + p + j < J_power_inetgarls[l_b].size(); k++) { double J; if (this->imaginary) J = J_power_inetgarls[l_b][2 * k + p + j].imag(); else J = J_power_inetgarls[l_b][2 * k + p + j].real(); - - vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += - J * cross_correlation->Matrix[k](p, j); + vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += J * cross_correlation->Matrix[k](p, j); // by default eigen library reads a transpose matrix from a file } vec_o_segment_index++; } @@ -86,7 +105,9 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) { double *coefs = node.getCoefs(); for (int i = 0; i < t_dim * kp1_d; i++) { + // auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0); coefs[i] = vec_o(i); + // std::cout<< "coefs[i] = " << coefs[i] << std::endl; } } diff --git a/src/treebuilders/TreeBuilder.cpp b/src/treebuilders/TreeBuilder.cpp index cb64ad044..ba0e5d973 100644 --- a/src/treebuilders/TreeBuilder.cpp +++ b/src/treebuilders/TreeBuilder.cpp @@ -35,11 +35,7 @@ namespace mrcpp { -template -void TreeBuilder::build(MWTree &tree, - TreeCalculator &calculator, - TreeAdaptor &adaptor, - int maxIter) const { +template void TreeBuilder::build(MWTree &tree, TreeCalculator &calculator, TreeAdaptor &adaptor, int maxIter) const { Timer calc_t(false), split_t(false), norm_t(false); println(10, " == Building tree"); @@ -53,7 +49,6 @@ void TreeBuilder::build(MWTree &tree, while (workVec->size() > 0) { printout(10, " -- #" << std::setw(3) << iter << ": Calculated "); printout(10, std::setw(6) << workVec->size() << " nodes "); - calc_t.resume(); calculator.calcNodeVector(*workVec); calc_t.stop(); @@ -62,9 +57,11 @@ void TreeBuilder::build(MWTree &tree, if (iter == 0) sNorm = calcScalingNorm(*workVec); wNorm += calcWaveletNorm(*workVec); - if (sNorm < 0.0 || wNorm < 0.0) { + if (sNorm < 0.0 or wNorm < 0.0) { tree.squareNorm = -1.0; } else { + // approximate norm for thresholding only + // exact norm is recomputed after mwTransform tree.squareNorm = sNorm + wNorm; } println(10, std::setw(24) << tree.squareNorm); @@ -72,9 +69,7 @@ void TreeBuilder::build(MWTree &tree, split_t.resume(); newVec = new MWNodeVector; - if (iter >= maxIter && maxIter >= 0) { - workVec->clear(); - } + if (iter >= maxIter and maxIter >= 0) workVec->clear(); adaptor.splitNodeVector(*newVec, *workVec); split_t.stop(); @@ -82,24 +77,22 @@ void TreeBuilder::build(MWTree &tree, workVec = newVec; iter++; } - tree.resetEndNodeTable(); delete workVec; print::separator(10, ' '); - print::time(10, "Time calc", calc_t); - print::time(10, "Time norm", norm_t); + print::time(10, "Time calc", calc_t); + print::time(10, "Time norm", norm_t); print::time(10, "Time split", split_t); } -template -void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculator) const { +template void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Clearing tree"); Timer clean_t; MWNodeVector nodeVec; tree_utils::make_node_table(tree, nodeVec); - calculator.calcNodeVector(nodeVec); + calculator.calcNodeVector(nodeVec); // clear all coefficients clean_t.stop(); tree.clearSquareNorm(); @@ -110,25 +103,19 @@ void TreeBuilder::clear(MWTree &tree, TreeCalculator &calculat print::separator(10, ' '); } -template -int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const { +template int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, bool passCoefs) const { println(10, " == Refining tree"); Timer split_t; MWNodeVector newVec; MWNodeVector *workVec = tree.copyEndNodeTable(); - adaptor.splitNodeVector(newVec, *workVec); - if (passCoefs) { for (int i = 0; i < workVec->size(); i++) { MWNode &node = *(*workVec)[i]; - if (node.isBranchNode()) { - node.giveChildrenCoefs(true); - } + if (node.isBranchNode()) { node.giveChildrenCoefs(true); } } } - delete workVec; tree.resetEndNodeTable(); split_t.stop(); @@ -143,8 +130,7 @@ int TreeBuilder::split(MWTree &tree, TreeAdaptor &adaptor, boo return newVec.size(); } -template -void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculator) const { +template void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculator) const { println(10, " == Calculating tree"); Timer calc_t; @@ -161,8 +147,7 @@ void TreeBuilder::calc(MWTree &tree, TreeCalculator &calculato print::time(10, "Time calc", calc_t); } -template -double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { +template double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { double sNorm = 0.0; for (int i = 0; i < vec.size(); i++) { const MWNode &node = *vec[i]; @@ -171,8 +156,7 @@ double TreeBuilder::calcScalingNorm(const MWNodeVector &vec) const { return sNorm; } -template -double TreeBuilder::calcWaveletNorm(const MWNodeVector &vec) const { +template double TreeBuilder::calcWaveletNorm(const MWNodeVector &vec) const { double wNorm = 0.0; for (int i = 0; i < vec.size(); i++) { const MWNode &node = *vec[i]; diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp index bb8feae7a..4ee28cff6 100644 --- a/src/treebuilders/add.cpp +++ b/src/treebuilders/add.cpp @@ -23,46 +23,6 @@ * */ -/** - * @file add.cpp - * @brief Adaptive summation of multiwavelet (MW) function trees. - * - * @details - * This module provides a family of `add` routines that assemble the linear - * combination of one or more MW functions into an output MW function on an - * adaptively refined grid. - * - * The summation is performed by the generic @ref TreeBuilder orchestrating: - * - an @ref AdditionCalculator that evaluates the local sum of input trees - * with their numerical coefficients (and optional complex conjugation), - * - a @ref WaveletAdaptor that refines the output grid where needed to meet - * the requested precision. - * - * The core algorithm (all overloads): - * - Compute MW coefficients of the sum on the **current** output grid. - * - Refine the grid according to the precision target. - * - Repeat until convergence or until a maximum number of refinement - * iterations is reached. - * - Finally transform the output to the MW domain and compute its squared norm. - * - * Precision and iteration controls: - * - `prec < 0` or `maxIter = 0` disables refinement (single pass on - * the existing output grid). - * - `maxIter < 0` removes the iteration limit and refines until the - * precision criterion is satisfied. - * - `absPrec = true` interprets `prec` as an absolute tolerance, otherwise - * it is treated as a relative criterion. - * - * Requirements: - * - All input trees must share the same @ref MultiResolutionAnalysis as the - * output tree, otherwise the routine aborts. - * - * Notes: - * - The routine starts from whatever grid is already present in `out`. This - * grid is expected to be empty in terms of coefficients. - * - Generated nodes present in input trees are removed at the end (cleanup). - */ - #include #include @@ -77,78 +37,60 @@ namespace mrcpp { -/** - * @brief Sum two MW functions (with scalar weights) into an output tree using adaptive refinement. +/** @brief Addition of two MW function representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] a: Numerical coefficient of function a + * @param[in] inp_a: Input function a + * @param[in] b: Numerical coefficient of function b + * @param[in] inp_b: Input function b + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed as the sum of the two input + * functions (including the numerical coefficient), using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (e.g., double or ComplexDouble). - * - * @param[in] prec Target build precision for the output function. - * @param[out] out Output function tree to build (its current grid is used as a starting point). - * @param[in] a Numerical coefficient multiplying `inp_a`. - * @param[in] inp_a First input function tree. - * @param[in] b Numerical coefficient multiplying `inp_b`. - * @param[in] inp_b Second input function tree. - * @param[in] maxIter Maximum number of refinement iterations. - * Use a negative value to allow unbounded refinement. - * Use zero to disable refinement (single-pass build). - * @param[in] absPrec If true, interpret `prec` as an absolute tolerance; - * otherwise interpret it as relative. - * @param[in] conjugate When `T` is complex, conjugate all input trees before summation. - * - * @details - * Builds `out ≈ a * inp_a (+) b * inp_b` to the requested precision on an adaptively - * refined grid. After the build, `out` is transformed to the MW domain and its squared - * norm is computed. The input trees are not modified except that any generated nodes - * created temporarily during the build are cleaned up. */ -template -void add(double prec, - FunctionTree &out, - T a, FunctionTree &inp_a, - T b, FunctionTree &inp_b, - int maxIter, - bool absPrec, - bool conjugate) { +template void add(double prec, FunctionTree &out, T a, FunctionTree &inp_a, T b, FunctionTree &inp_b, int maxIter, bool absPrec, bool conjugate) { FunctionTreeVector tmp_vec; tmp_vec.push_back(std::make_tuple(a, &inp_a)); tmp_vec.push_back(std::make_tuple(b, &inp_b)); add(prec, out, tmp_vec, maxIter, absPrec, conjugate); } -/** - * @brief Sum a vector of MW functions (with scalar weights) into an output tree using adaptive refinement. +/** @brief Addition of several MW function representations, adaptive grid * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (e.g., double or ComplexDouble). + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Vector of input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision * - * @param[in] prec Target build precision for the output function. - * @param[out] out Output function tree to build (its current grid is used as a starting point). - * @param[in] inp Vector of pairs (weight, pointer-to-tree) to be summed. - * @param[in] maxIter Maximum number of refinement iterations. - * Use a negative value to allow unbounded refinement. - * Use zero to disable refinement (single-pass build). - * @param[in] absPrec If true, interpret `prec` as an absolute tolerance; - * otherwise interpret it as relative. - * @param[in] conjugate When `T` is complex, conjugate all input trees before summation. + * @details The output function will be computed as the sum of all input + * functions in the vector (including their numerical coefficients), using + * the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @details - * Builds `out ≈ Σ_i w_i * f_i` to the requested precision on an adaptively refined grid. - * The routine: - * - verifies that all inputs share the same MRA as `out`, - * - constructs a @ref WaveletAdaptor with the precision policy, - * - uses an @ref AdditionCalculator to evaluate the local sums, - * - runs @ref TreeBuilder to refine and assemble, - * - finishes with MW transform and squared norm computation, - * - and finally deletes any generated nodes from inputs. */ -template -void add(double prec, - FunctionTree &out, - FunctionTreeVector &inp, - int maxIter, - bool absPrec, - bool conjugate) { +template void add(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool conjugate) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -177,36 +119,12 @@ void add(double prec, print::separator(10, ' '); } -/** - * @brief Convenience overload: sum a list of unweighted trees (weights set to 1). - * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (e.g., double or ComplexDouble). - * - * @param[in] prec Target build precision for the output function. - * @param[out] out Output function tree. - * @param[in] inp Vector of pointers to input trees (all weights taken as 1). - * @param[in] maxIter Maximum number of refinement iterations (see other overload). - * @param[in] absPrec Absolute-vs-relative precision flag. - * @param[in] conjugate Conjugate complex inputs before summation. - * - * @details - * Internally wraps the list into a @ref FunctionTreeVector with unit weights - * and forwards to the vector-based overload. - */ -template -void add(double prec, - FunctionTree &out, - std::vector *> &inp, - int maxIter, - bool absPrec, - bool conjugate) { +template void add(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); add(prec, out, inp_vec, maxIter, absPrec, conjugate); } -/* ------- Explicit template instantiations (double) ------- */ template void add<1, double>(double prec, FunctionTree<1, double> &out, double a, FunctionTree<1, double> &tree_a, double b, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool conjugate); template void @@ -222,7 +140,6 @@ template void add<1, double>(double prec, FunctionTree<1, double> &out, std::vec template void add<2, double>(double prec, FunctionTree<2, double> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); template void add<3, double>(double prec, FunctionTree<3, double> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); -/* ------- Explicit template instantiations (ComplexDouble) ------- */ template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ComplexDouble a, @@ -259,4 +176,4 @@ template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> template void add<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); template void add<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, std::vector *> &inp, int maxIter, bool absPrec, bool conjugate); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp index 9e6e91777..bfcbf164e 100644 --- a/src/treebuilders/apply.cpp +++ b/src/treebuilders/apply.cpp @@ -23,38 +23,6 @@ * */ -/** - * @file apply.cpp - * @brief Application pipelines for MW operators (convolution and derivative) to MW function trees. - * - * @details - * This module provides high-level procedures to **apply multiresolution operators** - * to MW representations of functions. Two broad operator families are supported: - * - * - **Convolution-like integral operators** (e.g., Poisson, Helmholtz, Heat, identity), - * implemented as separable kernels in the scaling basis via @ref mrcpp::ConvolutionOperator. - * Application is performed on an **adaptively refined** output grid to meet a target precision. - * - * - **Local or band-limited derivative operators** (e.g., ABGV, PH, BS) implemented via - * @ref mrcpp::DerivativeOperator. Application occurs on a **fixed grid** derived from the - * input and widened according to the operator bandwidth in the selected direction. - * - * The typical adaptive application pipeline for convolution operators is: - * - Pre-step: estimate operator bandwidths at each scale and set up an adaptive refinement policy. - * - Build-step: evaluate local operator actions on the current grid, refine where needed until - * the precision target is reached (or a maximum number of iterations is met). - * - Post-step: assemble and transform the output to the MW domain, compute norms, and clean any - * transient data generated on the inputs. - * - * Additional features: - * - **Near-/Far-field splits** on periodic domains by including/excluding contributions - * from the unit cell. - * - **Precision scaling** using auxiliary trees that modulate local tolerances based on - * maximum norms. - * - **Multi-component support** through a 4×4 metric that mixes input/output components - * for relativistic-like workflows. - */ - #include "apply.h" #include "ConvolutionCalculator.h" #include "CopyAdaptor.h" @@ -74,65 +42,30 @@ namespace mrcpp { -/** - * @brief Internal helper to apply a convolution operator while restricting contributions - * to inside or outside of the unit cell on periodic domains. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type (e.g., double or ComplexDouble). - * - * @param[in] inside If true, include only contributions from inside the unit cell; - * if false, include only contributions from outside the unit cell. - * @param[in] prec Target precision that drives adaptive refinement. - * @param[out] out Output function to be built. Should contain empty root nodes on entry. - * @param[in] oper Convolution operator to apply. - * @param[in] inp Input function. - * @param[in] maxIter Maximum number of refinement iterations. Negative means unbounded. - * @param[in] absPrec If true, treat `prec` as an absolute tolerance; otherwise relative. - * - * @details - * Follows the standard adaptive pipeline for convolution operators, with the difference that - * the calculator is instructed to selectively include unit-cell contributions according to - * the `inside` flag. - */ -template -void apply_on_unit_cell(bool inside, - double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - int maxIter, - bool absPrec); - -/** - * @brief Apply a convolution-like integral operator on a single-component function (adaptive). - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * - * @param[in] prec Target precision driving adaptive refinement. - * @param[out] out Output function tree. Must belong to the same MRA as `inp`. - * @param[in] oper Convolution operator to apply. - * @param[in] inp Input function tree. - * @param[in] maxIter Maximum refinement iterations (negative for unbounded, zero disables refinement). - * @param[in] absPrec If true, treat `prec` as absolute; otherwise relative. - * - * @details - * Pipeline: - * - Pre: compute operator bandwidths and create a @ref WaveletAdaptor with the given precision policy. - * - Build: @ref TreeBuilder iteratively refines and evaluates the operator action. - * - Post: transform to MW domain, compute squared norms, and clean generated structures. - * - * @note The output tree should initially contain only empty root nodes. - * @throws Aborts if `out` and `inp` belong to different MRAs. +template void apply_on_unit_cell(bool inside, double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec); + +/** @brief Application of MW integral convolution operator + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * */ -template -void apply(double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - int maxIter, - bool absPrec) { +template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); Timer pre_t; @@ -141,13 +74,12 @@ void apply(double prec, WaveletAdaptor adaptor(prec, maxScale, absPrec); ConvolutionCalculator calculator(prec, oper, inp); pre_t.stop(); - TreeBuilder builder; builder.build(out, calculator, adaptor, maxIter); Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); + out.mwTransform(TopDown, false); // add coarse scale contributions out.mwTransform(BottomUp); out.calcSquareNorm(); out.deleteGeneratedParents(); @@ -160,32 +92,31 @@ void apply(double prec, print::separator(10, ' '); } -/** - * @brief Apply a convolution operator to a 4-component function using a mixing metric. - * - * @tparam D Spatial dimension. - * - * @param[in] prec Target precision. - * @param[out] out Output multi-component function (structure copied from `inp`). - * @param[in] oper Convolution operator to apply. - * @param[in] inp Input multi-component function. - * @param[in] metric 4×4 coefficient array mapping input to output components. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. +/** @brief Application of MW integral convolution operator on Four component + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] metric: 4x4 array with coefficients that relates the in and out components + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false + * + * @details The output function will be computed using the general algorithm: + * - For each input component apply the operator + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * - After application multiply by metric coefficient, and put in relevant output component + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @details - * For each input component `icomp`, the operator is applied and accumulated into each output - * component `ocomp` with weight `metric[icomp][ocomp]`. Real and complex specializations are - * handled, including rescaling of the result by the metric entries. */ -template -void apply(double prec, - CompFunction &out, - ConvolutionOperator &oper, - const CompFunction &inp, - const ComplexDouble (*metric)[4], - int maxIter, - bool absPrec) { +template void apply(double prec, CompFunction &out, ConvolutionOperator &oper, const CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { @@ -194,43 +125,40 @@ void apply(double prec, if (inp.isreal()) { if (out.CompD[ocomp] == nullptr) out.alloc_comp(ocomp); apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompD[ocomp]->rescale(metric[icomp][ocomp].real()); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp].real()); } } else { if (out.CompC[ocomp] == nullptr) out.alloc_comp(ocomp); apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } } } -/** - * @brief Apply a convolution operator while selectively including or excluding unit-cell contributions. +/** @brief Application of MW integral convolution operator * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @param[in] inside: Use points inside (true) or outside (false) the unitcell + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @param[in] inside Select inside (true) or outside (false) contributions in the unit cell. - * @param[in] prec Target precision. - * @param[out] out Output tree. - * @param[in] oper Convolution operator. - * @param[in] inp Input tree. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. */ -template -void apply_on_unit_cell(bool inside, - double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - int maxIter, - bool absPrec) { +template void apply_on_unit_cell(bool inside, double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); Timer pre_t; @@ -246,7 +174,7 @@ void apply_on_unit_cell(bool inside, Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); + out.mwTransform(TopDown, false); // add coarse scale contributions out.mwTransform(BottomUp); out.calcSquareNorm(); out.deleteGeneratedParents(); @@ -259,37 +187,37 @@ void apply_on_unit_cell(bool inside, print::separator(10, ' '); } -/** - * @brief Apply a convolution operator with **locally scaled precision** from auxiliary trees. +/** @brief Application of MW integral convolution operator + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] precTrees: Precision trees + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on _scaled_ `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound * - * @param[in] prec Base precision target. - * @param[out] out Output function tree. - * @param[in] oper Convolution operator. - * @param[in] inp Input function tree. - * @param[in] precTrees Vector of trees whose max norms modulate the local precision. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. + * The precision will be scaled locally by the maxNorms of the precTrees input vector. + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @details - * The local precision at node index `idx` is scaled by `1 / max_norm(idx)`, where `max_norm` - * is taken across the supplied `precTrees`. This provides an error budget that adapts to - * local magnitudes of reference fields. */ -template -void apply(double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - FunctionTreeVector &precTrees, - int maxIter, - bool absPrec) { +template void apply(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, FunctionTreeVector &precTrees, int maxIter, bool absPrec) { Timer pre_t; oper.calcBandWidths(prec); int maxScale = out.getMRA().getMaxScale(); + // The local precision will be scaled by the maxNorm of the + // corresponding node(s) in the precTrees vector. for (int i = 0; i < precTrees.size(); i++) get_func(precTrees, i).makeMaxSquareNorms(); auto precFunc = [&precTrees](const NodeIndex &idx) -> double { auto maxNorm = (precTrees.size()) ? 0.0 : 1.0; @@ -311,7 +239,7 @@ void apply(double prec, Timer post_t; oper.clearBandWidths(); - out.mwTransform(TopDown, false); + out.mwTransform(TopDown, false); // add coarse scale contributions out.mwTransform(BottomUp); out.calcSquareNorm(); inp.deleteGenerated(); @@ -322,30 +250,8 @@ void apply(double prec, print::separator(10, ' '); } -/** - * @brief Multi-component variant of the precision-scaled convolution application. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - * - * @param[in] prec Base precision target. - * @param[out] out Output multi-component function (structure copied from `inp`). - * @param[in] oper Convolution operator. - * @param[in] inp Input multi-component function. - * @param[in] precTrees Array (per input component) of precision trees used for scaling. - * @param[in] metric 4×4 mixing matrix. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. - */ template -void apply(double prec, - CompFunction &out, - ConvolutionOperator &oper, - CompFunction &inp, - FunctionTreeVector *precTrees, - const ComplexDouble (*metric)[4], - int maxIter, - bool absPrec) { +void apply(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, FunctionTreeVector *precTrees, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { @@ -353,64 +259,43 @@ void apply(double prec, if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], precTrees[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompD[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } } else { apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], precTrees[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } } } -/** - * @brief Apply a convolution operator while excluding inside-cell contributions (far-field). +/** @brief Application of MW integral convolution operator on a periodic cell, + excluding contributions inside the unit cell. + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @param[in] prec Target precision. - * @param[out] out Output function. - * @param[in] oper Convolution operator. - * @param[in] inp Input function. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. */ -template -void apply_far_field(double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - int maxIter, - bool absPrec) { +template void apply_far_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { apply_on_unit_cell(false, prec, out, oper, inp, maxIter, absPrec); } -/** - * @brief Multi-component far-field application with mixing metric. - * - * @tparam D Spatial dimension. - * - * @param[in] prec Target precision. - * @param[out] out Output multi-component function. - * @param[in] oper Convolution operator. - * @param[in] inp Input multi-component function. - * @param[in] metric 4×4 mixing matrix. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. - */ -template -void apply_far_field(double prec, - CompFunction &out, - ConvolutionOperator &oper, - CompFunction &inp, - const ComplexDouble (*metric)[4], - int maxIter, - bool absPrec) { +template void apply_far_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { out = inp.paramCopy(true); for (int icomp = 0; icomp < 4; icomp++) { @@ -419,14 +304,10 @@ void apply_far_field(double prec, if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply_on_unit_cell(false, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompD[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } } else { apply_on_unit_cell(false, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } @@ -434,50 +315,33 @@ void apply_far_field(double prec, } } -/** - * @brief Apply a convolution operator while excluding outside-cell contributions (near-field). +/** @brief Application of MW integral convolution operator on a periodic cell, + excluding contributions outside the unit cell. + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). * - * @param[in] prec Target precision. - * @param[out] out Output function. - * @param[in] oper Convolution operator. - * @param[in] inp Input function. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. */ -template -void apply_near_field(double prec, - FunctionTree &out, - ConvolutionOperator &oper, - FunctionTree &inp, - int maxIter, - bool absPrec) { +template void apply_near_field(double prec, FunctionTree &out, ConvolutionOperator &oper, FunctionTree &inp, int maxIter, bool absPrec) { apply_on_unit_cell(true, prec, out, oper, inp, maxIter, absPrec); } -/** - * @brief Multi-component near-field application with mixing metric. - * - * @tparam D Spatial dimension. - * - * @param[in] prec Target precision. - * @param[out] out Output multi-component function. - * @param[in] oper Convolution operator. - * @param[in] inp Input multi-component function. - * @param[in] metric 4×4 mixing matrix. - * @param[in] maxIter Maximum refinement iterations. - * @param[in] absPrec Absolute-vs-relative precision flag. - */ -template -void apply_near_field(double prec, - CompFunction &out, - ConvolutionOperator &oper, - CompFunction &inp, - const ComplexDouble (*metric)[4], - int maxIter, - bool absPrec) { +template void apply_near_field(double prec, CompFunction &out, ConvolutionOperator &oper, CompFunction &inp, const ComplexDouble (*metric)[4], int maxIter, bool absPrec) { for (int icomp = 0; icomp < 4; icomp++) { if (inp.Comp[icomp] != nullptr) { @@ -485,14 +349,10 @@ void apply_near_field(double prec, if (std::norm(metric[icomp][ocomp]) > MachinePrec) { if (inp.isreal()) { apply_on_unit_cell(true, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompD[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); } } else { apply_on_unit_cell(true, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } @@ -500,43 +360,41 @@ void apply_near_field(double prec, } } -/** - * @brief Apply a **derivative operator** on a fixed grid in the given direction. +/** @brief Application of MW derivative operator + * + * @param[out] out: Output function to be built + * @param[in] oper: Derivative operator to apply + * @param[in] inp: Input function + * @param[in] dir: Direction of derivative * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @details The output function will be computed on a FIXED grid that is + * predetermined by the type of derivative operator. For a strictly local + * operator (ABGV_00), the grid is an exact copy of the input function. For + * operators that involve also neighboring nodes (ABGV_55, PH, BS) the base grid + * will be WIDENED by one node in the direction of application (on each side). * - * @param[out] out Output function. Should contain only empty root nodes on entry. - * @param[in] oper Derivative operator (defines bandwidth and assembly policy). - * @param[in] inp Input function. - * @param[in] dir Direction of application (0 for x, 1 for y, 2 for z). + * @note The output function should contain only empty root nodes at entry. * - * @details - * The output grid is constructed by copying the input grid and **widening** it by the - * operator bandwidth along the selected direction, if needed. Application then proceeds - * on this fixed grid without additional refinement. */ -template -void apply(FunctionTree &out, - DerivativeOperator &oper, - FunctionTree &inp, - int dir) { +template void apply(FunctionTree &out, DerivativeOperator &oper, FunctionTree &inp, int dir) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); TreeBuilder builder; int maxScale = out.getMRA().getMaxScale(); - int bw[D]; + int bw[D]; // Operator bandwidth in [x,y,z] for (int d = 0; d < D; d++) bw[d] = 0; + // Copy input tree plus bandwidth in operator direction Timer pre_t; - oper.calcBandWidths(1.0); + oper.calcBandWidths(1.0); // Fixed 0 or 1 for derivatives bw[dir] = oper.getMaxBandWidth(); CopyAdaptor pre_adaptor(inp, maxScale, bw); DefaultCalculator pre_calculator; builder.build(out, pre_calculator, pre_adaptor, -1); pre_t.stop(); - SplitAdaptor apply_adaptor(maxScale, false); + // Apply operator on fixed expanded grid + SplitAdaptor apply_adaptor(maxScale, false); // Splits no nodes DerivativeCalculator apply_calculator(dir, oper, inp); builder.build(out, apply_calculator, apply_adaptor, 0); if (out.isPeriodic()) out.rescale(std::pow(2.0, -oper.getOperatorRoot())); @@ -553,38 +411,20 @@ void apply(FunctionTree &out, print::separator(10, ' '); } -/** - * @brief Multi-component derivative application with mixing metric. - * - * @tparam D Spatial dimension. - * - * @param[out] out Output multi-component function. - * @param[in] oper Derivative operator. - * @param[in] inp Input multi-component function. - * @param[in] dir Direction of derivative. - * @param[in] metric 4×4 mixing matrix. - * - * @details - * Applies the derivative in `dir` to each input component and accumulates the result into - * output components according to `metric`. Handles real-to-complex promotion if necessary. - */ -template -void apply(CompFunction &out, - DerivativeOperator &oper, - CompFunction &inp, - int dir, - const ComplexDouble (*metric)[4]) { - out = inp.paramCopy(true); +template void apply(CompFunction &out, DerivativeOperator &oper, CompFunction &inp, int dir, const ComplexDouble (*metric)[4]) { + // TODO: sums and not only each components independently, when concrete examples with non diagonal metric are tested + + out = inp.paramCopy(true); // note that this will copy the factor of inp (inp.func_ptr->data.c1) for (int icomp = 0; icomp < inp.Ncomp(); icomp++) { for (int ocomp = 0; ocomp < 4; ocomp++) { if (std::norm(metric[icomp][ocomp]) > MachinePrec) { - if (inp.isreal() && (std::imag(metric[icomp][ocomp]) < MachinePrec || inp.Ncomp() == 1)) { + if (inp.isreal() and (std::imag(metric[icomp][ocomp]) < MachinePrec or inp.Ncomp() == 1)) { apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir); if (std::norm(metric[icomp][ocomp] - 1.0) > MachinePrec) { if (std::imag(metric[icomp][ocomp]) < MachinePrec) out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp])); else - out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; + out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; // To consider: multiply c1 in rescale? } out.func_ptr->isreal = 1; } else { @@ -596,32 +436,26 @@ void apply(CompFunction &out, } else { apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir); } - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - out.CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } } } -/** - * @brief Compute the gradient vector of a scalar function using a derivative operator. +/** @brief Calculation of gradient vector of a function * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @param[in] oper: Derivative operator to apply + * @param[in] inp: Input function + * @returns FunctionTreeVector containing the gradient * - * @param[in] oper Derivative operator to apply in each Cartesian direction. - * @param[in] inp Input scalar function. - * @return FunctionTreeVector containing D components of the gradient. + * @details The derivative operator is applied in each Cartesian direction to + * the input function and appended to the output vector. + * + * @note The length of the output vector will be the template dimension D. * - * @details - * Applies the operator in each direction `d = 0..D-1` and returns the resulting - * component trees with unit weights. */ -template -FunctionTreeVector gradient(DerivativeOperator &oper, - FunctionTree &inp) { +template FunctionTreeVector gradient(DerivativeOperator &oper, FunctionTree &inp) { FunctionTreeVector out; for (int d = 0; d < D; d++) { auto *grad_d = new FunctionTree(inp.getMRA()); @@ -631,21 +465,7 @@ FunctionTreeVector gradient(DerivativeOperator &oper, return out; } -/** - * @brief Compute the gradient for 3D multi-component inputs with mixing metric. - * - * @param[in] oper Derivative operator. - * @param[in] inp Input multi-component function. - * @param[in] metric 4×4 mixing matrix. - * @return Vector of component functions for each spatial direction. - * - * @details - * For each spatial direction, applies the derivative operator to each component and - * mixes according to `metric`. Handles both real and complex cases. - */ -std::vector *> gradient(DerivativeOperator<3> &oper, - CompFunction<3> &inp, - const ComplexDouble (*metric)[4]) { +std::vector *> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, const ComplexDouble (*metric)[4]) { std::vector *> out; for (int d = 0; d < 3; d++) { @@ -659,17 +479,13 @@ std::vector *> gradient(DerivativeOperator<3> &oper, grad_d->func_ptr->iscomplex = 0; grad_d->CompD[ocomp] = new FunctionTree<3, double>(inp.CompD[0]->getMRA()); apply(*(grad_d->CompD[ocomp]), oper, *inp.CompD[icomp], d); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real()); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real()); } } else { grad_d->func_ptr->isreal = 0; grad_d->func_ptr->iscomplex = 1; grad_d->CompC[ocomp] = new FunctionTree<3, ComplexDouble>(inp.CompC[0]->getMRA()); apply(*(grad_d->CompC[ocomp]), oper, *inp.CompC[icomp], d); - if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { - grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]); - } + if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]); } } } } @@ -679,27 +495,23 @@ std::vector *> gradient(DerivativeOperator<3> &oper, return out; } -/** - * @brief Compute the divergence of a vector field using a derivative operator. +/** @brief Calculation of divergence of a function vector * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. + * @param[out] out: Output function + * @param[in] oper: Derivative operator to apply + * @param[in] inp: Input function vector * - * @param[out] out Output scalar function. - * @param[in] oper Derivative operator applied to each component. - * @param[in] inp Vector of D function components with coefficients. + * @details The derivative operator is applied in each Cartesian direction to + * the corresponding components of the input vector and added up to the final + * output. The grid of the output is fixed as the union of the component + * grids (including any derivative widening, see derivative apply). * - * @details - * Applies the derivative to each component along its matching direction and - * sums the results on the **union grid** of the widened component grids. + * @note + * - The length of the input vector must be the same as the template dimension D. + * - The output function should contain only empty root nodes at entry. * - * @note The length of `inp` must equal `D`. The output should contain only - * empty root nodes on entry. */ -template -void divergence(FunctionTree &out, - DerivativeOperator &oper, - FunctionTreeVector &inp) { +template void divergence(FunctionTree &out, DerivativeOperator &oper, FunctionTreeVector &inp) { if (inp.size() != D) MSG_ABORT("Dimension mismatch"); for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -713,40 +525,22 @@ void divergence(FunctionTree &out, tmp_vec.push_back(std::make_tuple(coef_d, out_d)); } build_grid(out, tmp_vec); - add(-1.0, out, tmp_vec, 0); + add(-1.0, out, tmp_vec, 0); // Addition on union grid clear(tmp_vec, true); } -/** - * @brief Divergence for multi-component inputs with metric (not implemented). - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - */ -template -void divergence(CompFunction &out, - DerivativeOperator &oper, - FunctionTreeVector *inp, - const ComplexDouble (*metric)[4]) { +template void divergence(CompFunction &out, DerivativeOperator &oper, FunctionTreeVector *inp, const ComplexDouble (*metric)[4]) { MSG_ABORT("not implemented"); } -/** - * @brief Convenience overload: divergence from a list of unweighted component trees. - * - * @tparam D Spatial dimension. - * @tparam T Coefficient type. - */ -template -void divergence(FunctionTree &out, - DerivativeOperator &oper, - std::vector *> &inp) { +template void divergence(FunctionTree &out, DerivativeOperator &oper, std::vector *> &inp) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); divergence(out, oper, inp_vec); } - -/* ---------- Explicit template instantiations ---------- */ +template void divergence(CompFunction &out, DerivativeOperator &oper, std::vector *> *inp, const ComplexDouble (*metric)[4]) { + MSG_ABORT("not implemented"); +} template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); @@ -754,31 +548,27 @@ template void apply<3, double>(double prec, FunctionTree<3, double> &out, Convol template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, const CompFunction<1> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, const CompFunction<2> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, const CompFunction<3> &inp, const ComplexDouble (*metric)[4], int maxIter = -1, bool absPrec = false); - -template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec); -template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec); -template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec); - +template void +apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec); +template void +apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec); +template void +apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec); template void apply_far_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply_far_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); template void apply_far_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec); - template void apply_near_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec); template void apply_near_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec); template void apply_near_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec); - template void apply<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTree<1, double> &inp, int dir); template void apply<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTree<2, double> &inp, int dir); template void apply<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTree<3, double> &inp, int dir); - template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, double> &inp); template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, double> &inp); template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, double> &inp); - template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, std::vector *> &inp); template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, std::vector *> &inp); template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, std::vector *> &inp); - template FunctionTreeVector<1, double> gradient<1>(DerivativeOperator<1> &oper, FunctionTree<1, double> &inp); template FunctionTreeVector<2, double> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, double> &inp); template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, double> &inp); @@ -808,27 +598,22 @@ template void apply<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec); - template void apply_far_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_far_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_far_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec); - template void apply_near_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_near_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec); template void apply_near_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec); - template void apply<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int dir); template void apply<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int dir); template void apply<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int dir); - template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, ComplexDouble> &inp); template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, ComplexDouble> &inp); template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, ComplexDouble> &inp); - template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, std::vector *> &inp); template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, std::vector *> &inp); template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, std::vector *> &inp); template void apply(CompFunction<3> &out, DerivativeOperator<3> &oper, CompFunction<3> &inp, int dir = -1, const ComplexDouble (*metric)[4]); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/complex_apply.cpp b/src/treebuilders/complex_apply.cpp index 406d8baf0..5cf0e3b08 100644 --- a/src/treebuilders/complex_apply.cpp +++ b/src/treebuilders/complex_apply.cpp @@ -42,32 +42,56 @@ namespace mrcpp { -template -void apply(double prec, - ComplexObject> &out, - ComplexObject> &oper, - ComplexObject> &inp, - int maxIter, - bool absPrec) { +/** @brief Application of MW integral convolution operator (complex version) + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] oper: Convolution operator to apply + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1 + * @param[in] absPrec: Build output tree based on absolute precision, default false + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * The default is to work with relative precision + * (stop when the wavelet coefficients are below a given (small) fraction of + * function norm. + * Sometimes it is better to use absolute precision (e.g. a contribution in a sum) + * which means stop once wavelet coefficients are below a certain (absoute) value + * Rel prec ∣d∣<ϵ/∣f∣ + * Abs prec ∣d∣<ϵ + * The two ϵ are not necessarily the same. + * The first one is (in general) the overall precision of the calculation (not always...) + * The second one depends on the particular operation which you are performing. + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * \todo !!! Here should be given a method for greed cleaning !!! + * + * + */ +template void apply(double prec, ComplexObject> &out, ComplexObject> &oper, ComplexObject> &inp, int maxIter, bool absPrec) { FunctionTree temp1(inp.real->getMRA()); FunctionTree temp2(inp.real->getMRA()); - // Real part: OR*FR - OI*FI - apply(prec, temp1, *oper.real, *inp.real, maxIter, absPrec); + apply(prec, temp1, *oper.real, *inp.real, maxIter, absPrec); apply(prec, temp2, *oper.imaginary, *inp.imaginary, maxIter, absPrec); add(prec, *out.real, 1.0, temp1, -1.0, temp2); - // Imag part: OI*FR + OR*FI - apply(prec, temp1, *oper.imaginary, *inp.real, maxIter, absPrec); - apply(prec, temp2, *oper.real, *inp.imaginary, maxIter, absPrec); + // temp1.setZero(); + // temp2.setZero(); + + apply(prec, temp1, *oper.imaginary, *inp.real, maxIter, absPrec); + apply(prec, temp2, *oper.real, *inp.imaginary, maxIter, absPrec); add(prec, *out.imaginary, 1.0, temp1, 1.0, temp2); } -template void apply<1>(double prec, - ComplexObject> &out, - ComplexObject> &oper, - ComplexObject> &inp, - int maxIter, - bool absPrec); +template void apply<1>(double prec, ComplexObject> &out, ComplexObject> &oper, ComplexObject> &inp, int maxIter, bool absPrec); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp index ca4c95a86..0e7fb968b 100644 --- a/src/treebuilders/grid.cpp +++ b/src/treebuilders/grid.cpp @@ -38,14 +38,43 @@ namespace mrcpp { +/** @brief Build empty grid by uniform refinement + * + * @param[in,out] out: Output tree to be built + * @param[in] scales: Number of refinement levels + * + * @details This will split ALL leaf nodes in the tree the given number of times. + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called. + */ template void build_grid(FunctionTree &out, int scales) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; DefaultCalculator calculator; - SplitAdaptor adaptor(maxScale, true); + SplitAdaptor adaptor(maxScale, true); // Splits all nodes for (auto n = 0; n < scales; n++) builder.build(out, calculator, adaptor, 1); } +/** @brief Build empty grid based on info from analytic function + * + * @param[out] out: Output tree to be built + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * + * @details The grid of the output function will be EXTENDED using the general + * algorithm: + * - Loop through current leaf nodes of the output tree + * - Refine node based on custom split check from the function + * - Repeat until convergence or `maxIter` is reached + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called. It requires that the functions + * `isVisibleAtScale()` and `isZeroOnInterval()` is implemented in the + * particular `RepresentableFunction`. + * + */ template void build_grid(FunctionTree &out, const RepresentableFunction &inp, int maxIter) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -55,6 +84,25 @@ template void build_grid(FunctionTree &out, const Repr print::separator(10, ' '); } +/** @brief Build empty grid based on info from Gaussian expansion + * + * @param[out] out: Output tree to be built + * @param[in] inp: Input Gaussian expansion + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * + * @details The grid of the output function will be EXTENDED using the general + * algorithm: + * - Loop through current leaf nodes of the output tree + * - Refine node based on custom split check from the function + * - Repeat until convergence or `maxIter` is reached + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called. It will loop through the Gaussians in the + * expansion and extend the grid based on the position and exponent of each + * term. Higher exponent means finer resolution. + * + */ template void build_grid(FunctionTree &out, const GaussExp &inp, int maxIter) { if (!out.getMRA().getWorldBox().isPeriodic()) { auto maxScale = out.getMRA().getMaxScale(); @@ -66,7 +114,6 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i } } else { auto period = out.getMRA().getWorldBox().getScalingFactors(); - (void)period; for (auto i = 0; i < inp.size(); i++) { auto *gauss = inp.getFunc(i).copy(); build_grid(out, *gauss, maxIter); @@ -76,6 +123,25 @@ template void build_grid(FunctionTree &out, const GaussExp &inp, i print::separator(10, ' '); } +/** @brief Build empty grid based on another MW function representation + * + * @param[out] out: Output tree to be built + * @param[in] inp: Input tree + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * + * @details The grid of the output function will be EXTENDED with all existing + * nodes in corresponding input function, using the general algorithm: + * - Loop through current leaf nodes of the output tree + * - Refine node if the corresponding node in the input has children + * - Repeat until all input nodes are covered or `maxIter` is reached + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called. This means that all nodes on the input + * tree will also be in the final output tree (unless `maxIter` is reached, + * but NOT vice versa. + * + */ template void build_grid(FunctionTree &out, FunctionTree &inp, int maxIter) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); auto maxScale = out.getMRA().getMaxScale(); @@ -86,6 +152,25 @@ template void build_grid(FunctionTree &out, FunctionTr print::separator(10, ' '); } +/** @brief Build empty grid based on several MW function representation + * + * @param[out] out: Output tree to be built + * @param[in] inp: Input tree vector + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * + * @details The grid of the output function will be EXTENDED with all existing + * nodes in all corresponding input functions, using the general algorithm: + * - Loop through current leaf nodes of the output tree + * - Refine node if the corresponding node in one of the inputs has children + * - Repeat until all input nodes are covered or `maxIter` is reached + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called. This means that the final output grid + * will contain (at least) the union of the nodes of all input trees (unless + * `maxIter` is reached). + * + */ template void build_grid(FunctionTree &out, FunctionTreeVector &inp, int maxIter) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -104,18 +189,51 @@ template void build_grid(FunctionTree &out, std::vecto build_grid(out, inp_vec, maxIter); } +/** @brief Copy function from one tree onto the grid of another tree, fixed grid + * + * @param[out] out: Output function + * @param[in] inp: Input function + * + * @details The output function will be computed using the general algorithm: + * - Loop through current leaf nodes of the output tree + * - Copy MW coefs from the corresponding input node + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called and will overwrite any existing coefs. + * + */ template void copy_func(FunctionTree &out, FunctionTree &inp) { FunctionTreeVector tmp_vec; tmp_vec.push_back(std::make_tuple(1.0, &inp)); add(-1.0, out, tmp_vec); } +/** @brief Build empty grid that is identical to another MW grid + * + * @param[out] out: Output tree to be built + * @param[in] inp: Input tree + * + * @note The difference from the corresponding `build_grid` function is that + * this will first clear the grid of the `out` function, while `build_grid` + * will _extend_ the existing grid. + * + */ template void copy_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") out.clear(); build_grid(out, inp); } +/** @brief Build empty grid that is identical to another MW grid for every component + * + * @param[out] out: Output to be built + * @param[in] inp: Input + * + * @note The difference from the corresponding `build_grid` function is that + * this will first clear the grid of the `out` function, while `build_grid` + * will _extend_ the existing grid. + * + */ template void copy_grid(CompFunction &out, CompFunction &inp) { out.free(); out.func_ptr->data = inp.func_ptr->data; @@ -126,23 +244,56 @@ template void copy_grid(CompFunction &out, CompFunction &inp) { } } +/** @brief Clear the MW coefficients of a function representation + * + * @param[in,out] out: Output function to be cleared + * + * @note This will only clear the MW coefs in the existing nodes, it will not + * change the grid of the function. Use `FunctionTree::clear()` to remove all + * grid refinement as well. + * + */ template void clear_grid(FunctionTree &out) { TreeBuilder builder; DefaultCalculator calculator; builder.clear(out, calculator); } +/** @brief Refine the grid of a MW function representation + * + * @param[in,out] out: Output tree to be refined + * @param[in] scales: Number of refinement levels + * @returns The number of nodes that were split + * + * @details This will split ALL leaf nodes in the tree the given number of + * times, then it will compute scaling coefs of the new nodes, thus leaving + * the function representation unchanged, but on a larger grid. + * + */ template int refine_grid(FunctionTree &out, int scales) { auto nSplit = 0; auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; - SplitAdaptor adaptor(maxScale, true); + SplitAdaptor adaptor(maxScale, true); // Splits all nodes for (auto n = 0; n < scales; n++) { - nSplit += builder.split(out, adaptor, true); + nSplit += builder.split(out, adaptor, true); // Transfers coefs to children } return nSplit; } +/** @brief Refine the grid of a MW function representation + * + * @param[in,out] out: Output tree to be refined + * @param[in] prec: Precision for initial split check + * @param[in] absPrec: Build output tree based on absolute precision + * @returns The number of nodes that were split + * + * @details This will first perform a split check on the existing leaf nodes in + * the tree based on the provided precision parameter, then it will compute + * scaling coefs of the new nodes, thus leaving the function representation + * unchanged, but (possibly) on a larger grid. + * + */ template int refine_grid(FunctionTree &out, double prec, bool absPrec) { int maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -151,6 +302,18 @@ template int refine_grid(FunctionTree &out, double pre return nSplit; } +/** @brief Refine the grid of a MW function representation + * + * @param[in,out] out: Output tree to be refined + * @param[in] inp: Input tree that defines the new grid + * @returns The number of nodes that were split + * + * @details This will first perform a split check on the existing leaf nodes + * in the output tree based on the structure of the input tree (same as + * build_grid), then it will compute scaling coefs of the new nodes, thus + * leaving the function representation unchanged, but on a larger grid. + * + */ template int refine_grid(FunctionTree &out, FunctionTree &inp) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA") auto maxScale = out.getMRA().getMaxScale(); @@ -160,6 +323,19 @@ template int refine_grid(FunctionTree &out, FunctionTr return nSplit; } +/** @brief Refine the grid of a MW function representation + * + * @param[in,out] out: Output tree to be refined + * @param[in] inp: Input function + * + * @details This will first perform a split check on the existing leaf nodes + * in the output tree based on the structure of the input function (same as + * build_grid), then it will compute scaling coefs of the new nodes, thus + * leaving the function representation unchanged, but on a larger grid. + * It requires that the functions `isVisibleAtScale()` and `isZeroOnInterval()` + * is implemented in the particular `RepresentableFunction`. + * + */ template int refine_grid(FunctionTree &out, const RepresentableFunction &inp) { auto maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; @@ -249,4 +425,4 @@ template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp); template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp index b1d4af10c..b363bf806 100644 --- a/src/treebuilders/map.cpp +++ b/src/treebuilders/map.cpp @@ -38,13 +38,34 @@ namespace mrcpp { -template -void map(double prec, - FunctionTree &out, - FunctionTree &inp, - FMap fmap, - int maxIter, - bool absPrec) { +/** @brief map a MW function onto another representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Input function + * @param[in] fmap: mapping function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function tree will be computed by mapping the input tree values through the fmap function, + * using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * No assumption is made for how the mapping function looks. It is + * left to the end-user to guarantee that the mapping function does + * not lead to numerically unstable/inaccurate situations (e.g. divide + * by zero, overflow, etc...) + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void map(double prec, FunctionTree &out, FunctionTree &inp, FMap fmap, int maxIter, bool absPrec) { int maxScale = out.getMRA().getMaxScale(); TreeBuilder builder; diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp index d6fd1b1c1..4e046126e 100644 --- a/src/treebuilders/multiply.cpp +++ b/src/treebuilders/multiply.cpp @@ -44,6 +44,31 @@ namespace mrcpp { +/** @brief Multiplication of two MW function representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] c: Numerical coefficient + * @param[in] inp_a: Input function a + * @param[in] inp_b: Input function b + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * @param[in] useMaxNorms: Build output tree based on norm estimates from input + * + * @details The output function will be computed as the product of the two input + * functions (including the numerical coefficient), using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * - conjugate is applied on inp_b + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ template void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp_a, FunctionTree &inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { FunctionTreeVector tmp_vec; @@ -52,8 +77,31 @@ void multiply(double prec, FunctionTree &out, T c, FunctionTree &inp multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate); } -template -void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { +/** @brief Multiplication of several MW function representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Vector of input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * @param[in] useMaxNorms: Build output tree based on norm estimates from input + * + * @details The output function will be computed as the product of all input + * functions in the vector (including their numerical coefficients), using + * the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * - conjugate is applied on all the trees in inp, except the first + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void multiply(double prec, FunctionTree &out, FunctionTreeVector &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { for (auto i = 0; i < inp.size(); i++) if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA"); @@ -87,15 +135,34 @@ void multiply(double prec, FunctionTree &out, FunctionTreeVector &in print::separator(10, ' '); } -template -void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { +template void multiply(double prec, FunctionTree &out, std::vector *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { FunctionTreeVector inp_vec; for (auto &t : inp) inp_vec.push_back({1.0, t}); multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms, conjugate); } -template -void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter, bool absPrec, bool conjugate) { +/** @brief Out-of-place square of MW function representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Input function to square + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed as the square of the input + * function, using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void square(double prec, FunctionTree &out, FunctionTree &inp, int maxIter, bool absPrec, bool conjugate) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); int maxScale = out.getMRA().getMaxScale(); @@ -119,8 +186,29 @@ void square(double prec, FunctionTree &out, FunctionTree &inp, int m print::separator(10, ' '); } -template -void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter, bool absPrec) { +/** @brief Out-of-place power of MW function representations, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Input function to square + * @param[in] p: Numerical power + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed as the input function raised + * to the given power, using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void power(double prec, FunctionTree &out, FunctionTree &inp, double p, int maxIter, bool absPrec) { if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (inp.conjugate()) MSG_ABORT("Not implemented"); @@ -145,8 +233,24 @@ void power(double prec, FunctionTree &out, FunctionTree &inp, double print::separator(10, ' '); } -template -void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter, bool absPrec) { +/** @brief Dot product of two MW function vectors, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp_a: Input function vector + * @param[in] inp_b: Input function vector + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed as the dot product of the two + * input vectors (including their numerical coefficients). The precision + * parameter is used only in the multiplication part, the final addition will + * be on the fixed union grid of the components. + * + * @note The length of the input vectors must be the same. + * + */ +template void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, FunctionTreeVector &inp_b, int maxIter, bool absPrec) { if (inp_a.size() != inp_b.size()) MSG_ABORT("Input length mismatch"); FunctionTreeVector tmp_vec; @@ -166,8 +270,20 @@ void dot(double prec, FunctionTree &out, FunctionTreeVector &inp_a, clear(tmp_vec, true); } -template -V dot(FunctionTree &bra, FunctionTree &ket) { +/** @returns Dot product of two MW function representations + * + * @param[in] bra: Bra side input function + * @param[in] ket: Ket side input function + * + * @details The dot product is computed with the trees in compressed form, i.e. + * scaling coefs only on root nodes, wavelet coefs on all nodes. Since wavelet + * functions are orthonormal through ALL scales and the root scaling functions + * are orthonormal to all finer level wavelet functions, this becomes a rather + * efficient procedure as you only need to compute the dot product where the + * grids overlap. + * + */ +template V dot(FunctionTree &bra, FunctionTree &ket) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible"); MWNodeVector nodeTable; TreeIterator it(bra); @@ -179,7 +295,13 @@ V dot(FunctionTree &bra, FunctionTree &ket) { int nNodes = nodeTable.size(); V result = 0.0; V locResult = 0.0; - + // OMP is disabled in order to get EXACT results (to the very last digit), the + // order of summation makes the result different beyond the 14th digit or so. + // OMP does improve the performace, but its not worth it for the time being. + //#pragma omp parallel firstprivate(n_nodes, locResult) num_threads(mrcpp_get_num_threads()) + // shared(nodeTable,rhs,result) + // { + //#pragma omp for schedule(guided) for (int n = 0; n < nNodes; n++) { const auto &braNode = static_cast &>(*nodeTable[n]); const MWNode *mwNode = ket.findNode(braNode.getNodeIndex()); @@ -189,12 +311,25 @@ V dot(FunctionTree &bra, FunctionTree &ket) { if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode); locResult += dot_wavelet(braNode, ketNode); } + //#pragma omp critical result += locResult; + return result; } -template -double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact) { +/** @brief abs-dot product of two MW function representations + * + * @param[in] bra: Bra side input function + * @param[in] ket: Ket side input function + * + * If exact=true: the grid of ket MUST include the grid of bra. + * If exact=false: does not at any time read the coefficients individually. + * The product is done for the end nodes of the bra multiplied by the nodes from the + * ket with either the same idx, or using a lower scale and assuming uniform + * distribution within the node. + * If the product is zero, the functions are disjoints. + */ +template double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exact) { if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Incompatible MRA"); double result = 0.0; @@ -207,12 +342,14 @@ double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exac FunctionNode &node = bra.getEndFuncNode(n); const NodeIndex idx = node.getNodeIndex(); if (exact) { + // convert to interpolating coef, take abs, convert back FunctionNode *mwNode = static_cast *>(ket.findNode(idx)); if (mwNode == nullptr) MSG_ABORT("Trees must have same grid"); node.getAbsCoefs(valA); mwNode->getAbsCoefs(valB); for (int i = 0; i < ncoef; i++) result += std::norm(valA[i] * valB[i]); } else { + // approximate by product of node norms int rIdx = ket.getRootBox().getBoxIndex(idx); assert(rIdx >= 0); const MWNode &root = ket.getRootBox().getNode(rIdx); @@ -223,8 +360,6 @@ double node_norm_dot(FunctionTree &bra, FunctionTree &ket, bool exac return result; } -// ---- Explicit instantiations ------------------------------------------------ - template void multiply<1, double>(double prec, FunctionTree<1, double> &out, double c, FunctionTree<1, double> &tree_a, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate); template void diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp index 099cbf389..7eea89416 100644 --- a/src/treebuilders/project.cpp +++ b/src/treebuilders/project.cpp @@ -36,27 +36,58 @@ namespace mrcpp { -template -void project(double prec, - FunctionTree &out, - std::function &r)> func, - int maxIter, - bool absPrec) { +/** @brief Project an analytic function onto the MW basis, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void project(double prec, FunctionTree &out, std::function &r)> func, int maxIter, bool absPrec) { AnalyticFunction inp(func); + mrcpp::project(prec, out, inp, maxIter, absPrec); } -template -void project(double prec, - FunctionTree &out, - RepresentableFunction &inp, - int maxIter, - bool absPrec) { +/** @brief Project an analytic function onto the MW basis, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function to be built + * @param[in] inp: Input function + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void project(double prec, FunctionTree &out, RepresentableFunction &inp, int maxIter, bool absPrec) { int maxScale = out.getMRA().getMaxScale(); const auto scaling_factor = out.getMRA().getWorldBox().getScalingFactors(); - TreeBuilder builder; WaveletAdaptor adaptor(prec, maxScale, absPrec); + ProjectionCalculator calculator(inp, scaling_factor); builder.build(out, calculator, adaptor, maxIter); @@ -70,16 +101,29 @@ void project(double prec, print::separator(10, ' '); } -template -void project(double prec, - FunctionTreeVector &out, - std::vector &r)>> func, - int maxIter, - bool absPrec) { +/** @brief Project an analytic vector function onto the MW basis, adaptive grid + * + * @param[in] prec: Build precision of output function + * @param[out] out: Output function vector to be built + * @param[in] inp: Input function vector + * @param[in] maxIter: Maximum number of refinement iterations in output tree + * @param[in] absPrec: Build output tree based on absolute precision + * + * @details The output function will be computed using the general algorithm: + * - Compute MW coefs on current grid + * - Refine grid where necessary based on `prec` + * - Repeat until convergence or `maxIter` is reached + * - `prec < 0` or `maxIter = 0` means NO refinement + * - `maxIter < 0` means no bound + * + * @note This algorithm will start at whatever grid is present in the `out` + * tree when the function is called (this grid should however be EMPTY, e.i. + * no coefs). + * + */ +template void project(double prec, FunctionTreeVector &out, std::vector &r)>> func, int maxIter, bool absPrec) { if (out.size() != func.size()) MSG_ABORT("Size mismatch"); - for (auto j = 0; j < D; j++) { - mrcpp::project(prec, get_func(out, j), func[j], maxIter, absPrec); - } + for (auto j = 0; j < D; j++) mrcpp::project(prec, get_func(out, j), func[j], maxIter, absPrec); } template void project<1, double>(double prec, FunctionTree<1, double> &out, RepresentableFunction<1, double> &inp, int maxIter, bool absPrec); @@ -104,4 +148,4 @@ template void project<1, ComplexDouble>(double prec, FunctionTreeVector<1, Compl template void project<2, ComplexDouble>(double prec, FunctionTreeVector<2, ComplexDouble> &out, std::vector &r)>> inp, int maxIter, bool absPrec); template void project<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &out, std::vector &r)>> inp, int maxIter, bool absPrec); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/BandWidth.cpp b/src/trees/BandWidth.cpp index e536ab752..a79814d2f 100644 --- a/src/trees/BandWidth.cpp +++ b/src/trees/BandWidth.cpp @@ -26,16 +26,13 @@ #include "BandWidth.h" #include "utils/Printer.h" -#include -#include - namespace mrcpp { BandWidth &BandWidth::operator=(const BandWidth &bw) = default; bool BandWidth::isEmpty(int depth) const { - if (depth > getDepth()) return true; - if (this->widths(depth, 4) < 0) return true; + if (depth > getDepth()) { return true; } + if (this->widths(depth, 4) < 0) { return true; } return false; } @@ -44,7 +41,7 @@ void BandWidth::setWidth(int depth, int index, int wd) { assert(index >= 0 and index < 4); assert(wd >= 0); this->widths(depth, index) = wd; - if (wd > this->widths(depth, 4)) this->widths(depth, 4) = wd; + if (wd > this->widths(depth, 4)) { this->widths(depth, 4) = wd; } } std::ostream &BandWidth::print(std::ostream &o) const { @@ -63,4 +60,4 @@ std::ostream &BandWidth::print(std::ostream &o) const { return o; } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/BoundingBox.cpp b/src/trees/BoundingBox.cpp index 8309e733d..ff86abf2e 100644 --- a/src/trees/BoundingBox.cpp +++ b/src/trees/BoundingBox.cpp @@ -32,8 +32,19 @@ namespace mrcpp { -template -BoundingBox::BoundingBox(std::array box) { +/** @brief Constructor for BoundingBox object. + * + * @param[in] box: [lower, upper] bound in all dimensions. + * @returns New BoundingBox object. + * + * @details Creates a box with appropriate root scale and scaling + * factor to fit the given bounds, which applies to _all_ dimensions. + * Root scale is chosen such that the scaling factor becomes 1 < sfac < 2. + * + * Limitations: Box must be _either_ [0,L] _or_ [-L,L], with L a positive integer. + * This is the most general constructor, which will create a world with no periodic boundary conditions. + */ +template BoundingBox::BoundingBox(std::array box) { if (box[1] < 1) { MSG_ERROR("Invalid upper bound: " << box[1]); box[1] = 1; @@ -68,12 +79,25 @@ BoundingBox::BoundingBox(std::array box) { setDerivedParameters(); } +/** @brief Constructor for BoundingBox object. + * + * @param[in] n: Length scale, default 0. + * @param[in] l: Corner translation, default [0, 0, ...]. + * @param[in] nb: Number of boxes, default [1, 1, ...]. + * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. + * @param[in] pbc: Periodic boundary conditions, default false. + * @returns New BoundingBox object. + * + * @details Creates a box with given parameters. The parameter n defines the length scale, which, together with sf, determines the unit length of each side of the boxes by \f$ [2^{-n}]^D \f$. + * The parameter l defines the corner translation of the lower corner of the box relative to the world origin. + * The parameter nb defines the number of boxes in each dimension. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. + * This constructor is used for work in periodic systems. + * + */ template -BoundingBox::BoundingBox(int n, - const std::array &l, - const std::array &nb, - const std::array &sf, - bool pbc) +BoundingBox::BoundingBox(int n, const std::array &l, const std::array &nb, const std::array &sf, bool pbc) : cornerIndex(n, l) { setPeriodic(pbc); setNBoxes(nb); @@ -81,10 +105,21 @@ BoundingBox::BoundingBox(int n, setDerivedParameters(); } +/** @brief Constructor for BoundingBox object. + * + * @param[in] idx: index of the lower corner of the box. + * @param[in] nb: Number of boxes, default [1, 1, ...]. + * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. + * @returns New BoundingBox object. + * + * @details Creates a box with given parameters. + * The parameter idx defines the index of the lower corner of the box relative to the world origin. + * The parameter nb defines the number of boxes in each dimension. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * This constructor creates a world with no periodic boundary conditions. + */ template -BoundingBox::BoundingBox(const NodeIndex &idx, - const std::array &nb, - const std::array &sf) +BoundingBox::BoundingBox(const NodeIndex &idx, const std::array &nb, const std::array &sf) : cornerIndex(idx) { setPeriodic(false); setNBoxes(nb); @@ -92,6 +127,16 @@ BoundingBox::BoundingBox(const NodeIndex &idx, setDerivedParameters(); } +/** @brief Constructor for BoundingBox object. + * + * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. + * @param[in] pbc: Periodic boundary conditions, default true. + * + * @details Creates a box with given parameters. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. + * This construtor is used for work in periodic systems. + */ template BoundingBox::BoundingBox(const std::array &sf, bool pbc) : cornerIndex() { @@ -101,6 +146,17 @@ BoundingBox::BoundingBox(const std::array &sf, bool pbc) setDerivedParameters(); } +/** @brief Constructor for BoundingBox object. + * + * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. + * @param[in] pbc: Periodic boundary conditions, default true. + * @returns New BoundingBox object. + * + * @details Creates a box with given parameters. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes specific dimensions periodic. + * This is used for work in periodic systems. + */ template BoundingBox::BoundingBox(const std::array &sf, std::array pbc) : cornerIndex() { @@ -110,6 +166,14 @@ BoundingBox::BoundingBox(const std::array &sf, std::array setDerivedParameters(); } +/** @brief Constructor for BoundingBox object. + * + * @param[in] box: Other BoundingBox object. + * @returns New BoundingBox object. + * + * @details Creates a box identical to the input box paramter. + * This constructor uses all parameters from the other BoundingBox to create a new one. + */ template BoundingBox::BoundingBox(const BoundingBox &box) : cornerIndex(box.cornerIndex) { @@ -119,8 +183,14 @@ BoundingBox::BoundingBox(const BoundingBox &box) setDerivedParameters(); } -template -BoundingBox &BoundingBox::operator=(const BoundingBox &box) { +/** @brief Assignment operator overload for BoundingBox object. + * + * @returns New BoundingBox object. + * @param[in] box: Other BoundingBox object. + * + * @details Allocates all parameters in this BoundingBox to be that of the other BoundingBox. + */ +template BoundingBox &BoundingBox::operator=(const BoundingBox &box) { if (&box != this) { this->cornerIndex = box.cornerIndex; this->periodic = box.periodic; @@ -131,8 +201,14 @@ BoundingBox &BoundingBox::operator=(const BoundingBox &box) { return *this; } -template -void BoundingBox::setNBoxes(const std::array &nb) { +/** @brief Sets the number of boxes in each dimension. + * + * @param[in] nb: Number of boxes, default [1, 1, ...]. + * + * @details For each dimentions D it sets the number of boxes in that dimension in the nBoxes array and the total amount of boxes in the world in the totBoxes variable. + * This just sets counters for the number of boxes in each dimension. + */ +template void BoundingBox::setNBoxes(const std::array &nb) { this->totBoxes = 1; for (int d = 0; d < D; d++) { this->nBoxes[d] = (nb[d] > 0) ? nb[d] : 1; @@ -140,8 +216,17 @@ void BoundingBox::setNBoxes(const std::array &nb) { } } -template -void BoundingBox::setDerivedParameters() { +/** @brief Computes and sets all derived parameters. + * + * @details For all parameters that have been initialized in the constructor, + * this function will compute the necessary derived parameters in each dimension. + * The unit length is set to \a sfac \f$ \cdot 2^{-n} \f$ where \a sfac is the scaling factor (default 1.0) and n is the length scale. + * The unit length is the base unit which is used for the size and positioning of the boxes around origin. + * The boxLength is the total length of the box in each dimension, which is the unit length times the number of boxes in that dimension. + * The lowerBound is computed from the index of the lower corner of the box and the unit length. + * The upperBound is computed to be the lower corner plus the total length in that dimension. + */ +template void BoundingBox::setDerivedParameters() { assert(this->totBoxes > 0); const NodeIndex &cIdx = this->cornerIndex; for (int d = 0; d < D; d++) { @@ -153,8 +238,13 @@ void BoundingBox::setDerivedParameters() { } } -template -void BoundingBox::setScalingFactors(const std::array &sf) { +/** @brief Sets the number of boxes in each dimension. + * + * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. + * + * @details This checks that the sf variable has sane values before assigning it to the member variable scalingFactor. + */ +template void BoundingBox::setScalingFactors(const std::array &sf) { assert(this->totBoxes > 0); for (auto &x : sf) if (x <= 0.0 and sf != std::array{}) MSG_ABORT("Non-positive scaling factor: " << x); @@ -162,18 +252,37 @@ void BoundingBox::setScalingFactors(const std::array &sf) { if (scalingFactor == std::array{}) scalingFactor.fill(1.0); } -template -void BoundingBox::setPeriodic(bool pbc) { +/** @brief Sets which dimensions are periodic. + * + * @param[in] pbc: Boolean which is used to set all dimension to either periodic or not + * + * @details this fills in the periodic array with the values from the input. + */ +template void BoundingBox::setPeriodic(bool pbc) { this->periodic.fill(pbc); } -template -void BoundingBox::setPeriodic(std::array pbc) { +/** @brief Sets which dimensions are periodic. + * + * @param[in] pbs: D-dimensional array holding boolean values for each dimension. + * + * @details This fills in the periodic array with the values from the input array. + */ +template void BoundingBox::setPeriodic(std::array pbc) { this->periodic = pbc; } -template -NodeIndex BoundingBox::getNodeIndex(int bIdx) const { +/** @brief Fetches a NodeIndex object from a given box index. + * + * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. + * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. + * + * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, + * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex + * object of the cell at the lower back corner of the box object indexed by bIdx. + * Specialized for D=1 below + */ +template NodeIndex BoundingBox::getNodeIndex(int bIdx) const { assert(bIdx >= 0 and bIdx <= this->totBoxes); std::array l; for (int d = D - 1; d >= 0; d--) { @@ -191,8 +300,14 @@ NodeIndex BoundingBox::getNodeIndex(int bIdx) const { return NodeIndex(getScale(), l); } -template -int BoundingBox::getBoxIndex(Coord r) const { +/** @brief Fetches the index of a box from a given coordinate. + * + * @param[in] r: D-dimensional array representaing a coordinate in the simulation box + * @returns The index value of the boxes in the position given as it is in the generated world. + * + * @details Specialized for D=1 below + */ +template int BoundingBox::getBoxIndex(Coord r) const { if (this->isPeriodic()) { periodic::coord_manipulation(r, this->getPeriodic()); } @@ -219,8 +334,16 @@ int BoundingBox::getBoxIndex(Coord r) const { return bIdx; } -template -int BoundingBox::getBoxIndex(NodeIndex nIdx) const { +/** @brief Fetches the index of a box from a given NodeIndex. + * + * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. + * @returns The index value of the boxes in which the NodeIndex object is mapping to. + * + * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. + * Each node will get its own index value, but will still be part of one of the original boxes of the world. + * Specialized for D=1 below + */ +template int BoundingBox::getBoxIndex(NodeIndex nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation(nIdx, this->getPeriodic()); }; int n = nIdx.getScale(); @@ -243,8 +366,14 @@ int BoundingBox::getBoxIndex(NodeIndex nIdx) const { return bIdx; } -template -std::ostream &BoundingBox::print(std::ostream &o) const { +/** @brief Prints information about the BoundinBox object. + * + * @param[in] o: Output stream variable which will be used to print the information + * @returns The output stream variable. + * + * @details A function which prints information about the BoundingBox object. + */ +template std::ostream &BoundingBox::print(std::ostream &o) const { int oldprec = Printer::setPrecision(5); o << std::fixed; if (isPeriodic()) { o << " The World is Periodic" << std::endl; } @@ -272,16 +401,28 @@ std::ostream &BoundingBox::print(std::ostream &o) const { return o; } -template <> -NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { +/** @brief Fetches a NodeIndex object from a given box index, specialiced for 1-D. + * + * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. + * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. + * + * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, + * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex + * object of the cell at the lower back corner of the box object indexed by bIdx. + */ +template <> NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { const NodeIndex<1> &cIdx = this->cornerIndex; int n = cIdx.getScale(); int l = bIdx + cIdx[0]; return NodeIndex<1>(n, {l}); } -template <> -int BoundingBox<1>::getBoxIndex(Coord<1> r) const { +/** @brief Fetches the index of a box from a given coordinate, specialized for 1D. + * + * @param[in] r: 1-dimensional array representaing a coordinate in the simulation box + * @returns The index value of the boxes in the position given as it is in the generated world. + */ +template <> int BoundingBox<1>::getBoxIndex(Coord<1> r) const { if (this->isPeriodic()) { periodic::coord_manipulation<1>(r, this->getPeriodic()); } @@ -294,8 +435,15 @@ int BoundingBox<1>::getBoxIndex(Coord<1> r) const { return static_cast(iint); } -template <> -int BoundingBox<1>::getBoxIndex(NodeIndex<1> nIdx) const { +/** @brief Fetches the index of a box from a given NodeIndex specialized for 1-D. + * + * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. + * @returns The index value of the boxes in which the NodeIndex object is mapping to. + * + * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. + * Each node will get its own index value, but will still be part of one of the original boxes of the world. + */ +template <> int BoundingBox<1>::getBoxIndex(NodeIndex<1> nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation<1>(nIdx, this->getPeriodic()); }; int n = nIdx.getScale(); @@ -318,4 +466,4 @@ template class BoundingBox<1>; template class BoundingBox<2>; template class BoundingBox<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/CornerOperatorTree.cpp b/src/trees/CornerOperatorTree.cpp index 245666823..6de235dd3 100644 --- a/src/trees/CornerOperatorTree.cpp +++ b/src/trees/CornerOperatorTree.cpp @@ -32,6 +32,15 @@ using namespace Eigen; namespace mrcpp { +/** @brief Calculates band widths of the non-standard form matrices. + * + * @param[in] prec: Precision used for thresholding + * + * @details It is starting from \f$ l = 2^n \f$ and updating the band width value each time we encounter + * considerable value while keeping decreasing down to \f$ l = 0 \f$, that stands for the distance to the diagonal. + * This procedure is repeated for each matrix \f$ A, B \f$ and \f$ C \f$. + * + */ void CornerOperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -40,7 +49,7 @@ void CornerOperatorTree::calcBandWidth(double prec) { getMaxTranslations(max_transl); if (prec < 0.0) prec = this->normPrec; - double thrs = std::max(MachinePrec, prec / 10.0); + double thrs = std::max(MachinePrec, prec / 10.0); // should be enough due to oscillating behaviour of corner matrix elements (it's affected by polynomial order) for (int depth = 0; depth < this->getDepth(); depth++) { int l = (1 << depth) - 1; @@ -62,8 +71,17 @@ void CornerOperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } +/** @brief Checks if the distance to diagonal is lesser than the operator band width. + * + * @param[in] oTransl: distance to diagonal + * @param[in] o_depth: scaling order + * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. + * + * @returns True if \b oTransl is outside of the corner band (close to diagonal) and False otherwise. + * + */ bool CornerOperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { - return std::abs(oTransl) < this->bandWidth->getWidth(o_depth, idx); + return abs(oTransl) < this->bandWidth->getWidth(o_depth, idx); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp index 231c95c19..ff23fb394 100644 --- a/src/trees/FunctionNode.cpp +++ b/src/trees/FunctionNode.cpp @@ -42,14 +42,14 @@ using namespace Eigen; namespace mrcpp { +/** Function evaluation. + * Evaluate all polynomials defined on the node. */ template T FunctionNode::evalf(Coord r) { if (not this->hasCoefs()) MSG_ERROR("Evaluating node without coefs"); // The 1.0 appearing in the if tests comes from the period is always 1.0 // from the point of view of this function. - if (this->getMWTree().getRootBox().isPeriodic()) { - periodic::coord_manipulation(r, this->getMWTree().getRootBox().getPeriodic()); - } + if (this->getMWTree().getRootBox().isPeriodic()) { periodic::coord_manipulation(r, this->getMWTree().getRootBox().getPeriodic()); } this->threadSafeGenChildren(); int cIdx = this->getChildIndex(r); @@ -87,6 +87,11 @@ template T FunctionNode::evalScaling(const Coord &r return two_n * result; } +/** Function integration. + * + * Wrapper for function integration, that requires different methods depending + * on scaling type. Integrates the function represented on the node on the + * full support of the node. */ template T FunctionNode::integrate() const { if (not this->hasCoefs()) { return 0.0; } switch (this->getScalingType()) { @@ -101,12 +106,26 @@ template T FunctionNode::integrate() const { } } +/** Function integration, Legendre basis. + * + * Integrates the function represented on the node on the full support of the + * node. The Legendre basis is particularly easy to integrate, as the work is + * already done when calculating its coefficients. The coefficients of the + * node is defined as the projection integral + * s_i = int f(x)phi_i(x)dx + * and since the first Legendre function is the constant 1, the first + * coefficient is simply the integral of f(x). */ template T FunctionNode::integrateLegendre() const { double n = (D * this->getScale()) / 2.0; double two_n = std::pow(2.0, -n); return two_n * this->getCoefs()[0]; } +/** Function integration, Interpolating basis. + * + * Integrates the function represented on the node on the full support of the + * node. A bit more involved than in the Legendre basis, as is requires some + * coupling of quadrature weights. */ template T FunctionNode::integrateInterpolating() const { int qOrder = this->getKp1(); getQuadratureCache(qc); @@ -120,6 +139,7 @@ template T FunctionNode::integrateInterpolating() cons Eigen::Matrix coefs; this->getCoefs(coefs); for (int p = 0; p < D; p++) { + int n = 0; for (int i = 0; i < kp1_p[D - p - 1]; i++) { for (int j = 0; j < qOrder; j++) { @@ -137,6 +157,11 @@ template T FunctionNode::integrateInterpolating() cons return two_n * sum; } +/** Function integration, Interpolating basis. + * + * Integrates the function represented on the node on the full support of the + * node. A bit more involved than in the Legendre basis, as is requires some + * coupling of quadrature weights. */ template T FunctionNode::integrateValues() const { int qOrder = this->getKp1(); getQuadratureCache(qc); @@ -209,6 +234,12 @@ template void FunctionNode::getValues(Matrix void FunctionNode::getAbsCoefs(T *absCoefs) { T *coefsTmp = this->coefs; for (int i = 0; i < this->n_coefs; i++) absCoefs[i] = coefsTmp[i]; // copy @@ -350,6 +381,9 @@ template void FunctionNode::dealloc() { } } +/** Update the coefficients of the node by a mw transform of the scaling + * coefficients of the children. Option to overwrite or add up existing + * coefficients. Specialized for D=3 below. */ template void FunctionNode::reCompress() { MWNode::reCompress(); } @@ -374,6 +408,14 @@ template <> void FunctionNode<3>::reCompress() { } } +/** Inner product of the functions represented by the scaling basis of the nodes. + * + * Integrates the product of the functions represented by the scaling basis on + * the node on the full support of the nodes. The scaling basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -391,6 +433,14 @@ template double dot_scaling(const FunctionNode &bra, const Fu #endif } +/** Inner product of the functions represented by the scaling basis of the nodes. + * + * Integrates the product of the functions represented by the scaling basis on + * the node on the full support of the nodes. The scaling basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -417,6 +467,14 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } +/** Inner product of the functions represented by the scaling basis of the nodes. + * + * Integrates the product of the functions represented by the scaling basis on + * the node on the full support of the nodes. The scaling basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -435,6 +493,14 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } +/** Inner product of the functions represented by the scaling basis of the nodes. + * + * Integrates the product of the functions represented by the scaling basis on + * the node on the full support of the nodes. The scaling basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -453,6 +519,14 @@ template ComplexDouble dot_scaling(const FunctionNode &bra, c return result; } +/** Inner product of the functions represented by the wavelet basis of the nodes. + * + * Integrates the product of the functions represented by the wavelet basis on + * the node on the full support of the nodes. The wavelet basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -473,6 +547,14 @@ template double dot_wavelet(const FunctionNode &bra, const Fu #endif } +/** Inner product of the functions represented by the wavelet basis of the nodes. + * + * Integrates the product of the functions represented by the wavelet basis on + * the node on the full support of the nodes. The wavelet basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -501,6 +583,14 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } +/** Inner product of the functions represented by the wavelet basis of the nodes. + * + * Integrates the product of the functions represented by the wavelet basis on + * the node on the full support of the nodes. The wavelet basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -521,6 +611,14 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } +/** Inner product of the functions represented by the wavelet basis of the nodes. + * + * Integrates the product of the functions represented by the wavelet basis on + * the node on the full support of the nodes. The wavelet basis is fully + * orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * NB: will take conjugate of bra in case of complex values. + */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -577,4 +675,4 @@ template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, co template ComplexDouble dot_scaling(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket); template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp index 8b8553fd7..72ca68e39 100644 --- a/src/trees/FunctionTree.cpp +++ b/src/trees/FunctionTree.cpp @@ -42,6 +42,15 @@ using namespace Eigen; namespace mrcpp { +/** @returns New FunctionTree object + * + * @param[in] mra: Which MRA the function is defined + * @param[in] sh_mem: Pointer to MPI shared memory block + * + * @details Constructs an uninitialized tree, containing only empty root nodes. + * If a shared memory pointer is provided the tree will be allocated in this + * shared memory window, otherwise it will be local to each MPI process. + */ template FunctionTree::FunctionTree(const MultiResolutionAnalysis &mra, SharedMemory *sh_mem, const std::string &name) : MWTree(mra, name) @@ -98,6 +107,11 @@ template FunctionTree::~FunctionTree() { if (this->getNNodes() > 0) this->deleteRootNodes(); } +/** @brief Read a previously stored tree assuming text/ASCII format, + * in a representation using MADNESS conventions for n, l and index order. + * @param[in] file: File name + * @note This tree must have the exact same MRA the one that was saved(?) + */ template void FunctionTree::loadTreeTXT(const std::string &file) { std::ifstream in(file); int NDIM, k; @@ -271,6 +285,10 @@ template void FunctionTree::loadTreeTXT(const std::str this->calcSquareNorm(); } +/** @brief Write the tree to disk in text/ASCII format in a representation + * using MADNESS conventions for n, l and index order. + * @param[in] file: File name + */ template void FunctionTree::saveTreeTXT(const std::string &fname) { int nRoots = this->getRootBox().size(); MWNode **roots = this->getRootBox().getNodes(); @@ -314,9 +332,9 @@ template void FunctionTree::saveTreeTXT(const std::str std::array l; NodeIndex idx = this->endNodeTable[count]->getNodeIndex(); MWNode *node = &(this->getNode(idx, false)); - T *coefs = node->getCoefs(); - for (int i = 0; i < ncoefs * Tdim; i++) values[i] = coefs[i]; - node->attachCoefs(values); + T *coefs = node->getCoefs(); + for (int i = 0; i < ncoefs * Tdim; i++) values[i] = coefs[i]; + node->attachCoefs(values); int n = idx.getScale(); node->mwTransform(Reconstruction); node->cvTransform(Forward); @@ -335,11 +353,13 @@ template void FunctionTree::saveTreeTXT(const std::str for (int i = 0; i < ncoefs; i++) out << values[cix * ncoefs + mapMRC[i]] << " "; out << std::endl; } - node->attachCoefs(coefs); // put back original coeff - } + node->attachCoefs(coefs); // put back original coeff + } out.close(); } - +/** @brief Write the tree structure to disk, for later use + * @param[in] file: File name, will get ".tree" extension + */ template void FunctionTree::saveTree(const std::string &file) { Timer t1; @@ -356,13 +376,17 @@ template void FunctionTree::saveTree(const std::string f.write((char *)&nChunks, sizeof(int)); // Write tree data, chunk by chunk for (int iChunk = 0; iChunk < nChunks; iChunk++) { - f.write((char *)allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize()); - f.write((char *)allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize()); + f.write((char *)allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize()); + f.write((char *)allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize()); } f.close(); print::time(10, "Time write", t1); } +/** @brief Read a previously stored tree structure from disk + * @param[in] file: File name, will get ".tree" extension + * @note This tree must have the exact same MRA the one that was saved + */ template void FunctionTree::loadTree(const std::string &file) { Timer t1; @@ -395,6 +419,7 @@ template void FunctionTree::loadTree(const std::string print::time(10, "Time rewrite pointers", t2); } +/** @returns Integral of the function over the entire computational domain */ template T FunctionTree::integrate() const { T result = 0.0; @@ -413,6 +438,7 @@ template T FunctionTree::integrate() const { return jacobian * result; } +/** @returns Integral of a representable function over the grid given by the tree */ template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) { // traverse tree, and treat end nodes only std::vector *> stack; // node from this @@ -447,6 +473,20 @@ template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunct return jacobian * result; } +/** @returns Function value in a point, out of bounds returns zero + * + * @param[in] r: Cartesian coordinate + * + * @note This will only evaluate the _scaling_ part of the + * leaf nodes in the tree, which means that the function + * values will not be fully accurate. + * This is done to allow a fast and const function evaluation + * that can be done in OMP parallel. If you want to include + * also the _final_ wavelet part you can call the corresponding + * evalf_precise function, _or_ you can manually extend + * the MW grid by one level before evaluating, using + * `mrcpp::refine_grid(tree, 1)` + */ template T FunctionTree::evalf(const Coord &r) const { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -471,6 +511,16 @@ template T FunctionTree::evalf(const Coord &r) cons return coef * result; } +/** @returns Function value in a point, out of bounds returns zero + * + * @param[in] r: Cartesian coordinate + * + * @note This will evaluate the _true_ value (scaling + wavelet) of the + * leaf nodes in the tree. This requires an on-the-fly MW transform + * on the node which makes this function slow and non-const. If you + * need fast evaluation, use refine_grid(tree, 1) first, and then + * evalf. + */ template T FunctionTree::evalf_precise(const Coord &r) { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -496,6 +546,12 @@ template T FunctionTree::evalf_precise(const Coord return coef * result; } +/** @brief In-place square of MW function representations, fixed grid + * + * @details The leaf node point values of the function will be in-place + * squared, no grid refinement. + * + */ template void FunctionTree::square() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -519,6 +575,14 @@ template void FunctionTree::square() { this->calcSquareNorm(); } +/** @brief In-place power of MW function representations, fixed grid + * + * @param[in] p: Numerical power + * + * @details The leaf node point values of the function will be in-place raised + * to the given power, no grid refinement. + * + */ template void FunctionTree::power(double p) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -542,6 +606,14 @@ template void FunctionTree::power(double p) { this->calcSquareNorm(); } +/** @brief In-place multiplication by a scalar, fixed grid + * + * @param[in] c: Scalar coefficient + * + * @details The leaf node point values of the function will be + * in-place multiplied by the given coefficient, no grid refinement. + * + */ template void FunctionTree::rescale(T c) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads()) @@ -561,6 +633,7 @@ template void FunctionTree::rescale(T c) { this->calcSquareNorm(); } +/** @brief In-place rescaling by a function norm \f$ ||f||^{-1} \f$, fixed grid */ template void FunctionTree::normalize() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); double sq_norm = this->getSquareNorm(); @@ -568,6 +641,15 @@ template void FunctionTree::normalize() { this->rescale(1.0 / std::sqrt(sq_norm)); } +/** @brief In-place addition with MW function representations, fixed grid + * + * @param[in] c: Numerical coefficient of input function + * @param[in] inp: Input function to add + * + * @details The input function will be added in-place on the current grid of + * the function, i.e. no further grid refinement. + * + */ template void FunctionTree::add(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -588,7 +670,15 @@ template void FunctionTree::add(T c, FunctionTreecalcSquareNorm(); inp.deleteGenerated(); } - +/** @brief In-place addition with MW function representations, fixed grid + * + * @param[in] c: Numerical coefficient of input function + * @param[in] inp: Input function to add + * + * @details The input function will be added to the union of the current grid of + * and input the function grid. + * + */ template void FunctionTree::add_inplace(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -611,6 +701,15 @@ template void FunctionTree::add_inplace(T c, FunctionT inp.deleteGenerated(); } +/** @brief In-place addition of absolute values of MW function representations + * + * @param[in] c Numerical coefficient of input function + * @param[in] inp Input function to add + * + * The absolute value of input function will be added in-place on the current grid of the output + * function, i.e. no further grid refinement. + * + */ template void FunctionTree::absadd(T c, FunctionTree &inp) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads()) @@ -637,6 +736,15 @@ template void FunctionTree::absadd(T c, FunctionTree void FunctionTree::multiply(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -664,6 +772,14 @@ template void FunctionTree::multiply(T c, FunctionTree inp.deleteGenerated(); } +/** @brief In-place mapping with a predefined function f(x), fixed grid + * + * @param[in] fmap: mapping function + * + * @details The input function will be mapped in-place on the current grid + * of the function, i.e. no further grid refinement. + * + */ template void FunctionTree::map(FMap fmap) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); { @@ -723,6 +839,20 @@ template std::ostream &FunctionTree::print(std::ostrea return MWTree::print(o); } +/** @brief Reduce the precision of the tree by deleting nodes + * + * @param prec: New precision criterion + * @param splitFac: Splitting factor: 1, 2 or 3 + * @param absPrec: Use absolute precision + * + * @details This will run the tree building algorithm in "reverse", starting + * from the leaf nodes, and perform split checks on each node based on the given + * precision and the local wavelet norm. + * + * @note The splitting factor appears in the threshold for the wavelet norm as + * \f$ ||w|| < 2^{-sn/2} ||f|| \epsilon \f$. In principal, `s` should be equal + * to the dimension; in practice, it is set to `s=1`. + */ template int FunctionTree::crop(double prec, double splitFac, bool absPrec) { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -734,6 +864,10 @@ template int FunctionTree::crop(double prec, double sp return nChunks; } +/** Traverse tree using BFS and returns an array with the address of the coefs. + * Also returns an array with the corresponding indices defined as the + * values of serialIx in refTree, and an array with the indices of the parent. + * Set index -1 for nodes that are not present in refTree */ template void FunctionTree::makeCoeffVector(std::vector &coefs, std::vector &indices, @@ -784,6 +918,10 @@ void FunctionTree::makeCoeffVector(std::vector &coefs, } } +/** Traverse tree using DFS and reconstruct it using node info from the + * reference tree and a list of coefficients. + * It is the reference tree (refTree) which is traversed, but one does not descend + * into children if the norm of the tree is smaller than absPrec. */ template void FunctionTree::makeTreefromCoeff(MWTree &refTree, std::vector coefpVec, std::map &ix2coef, double absPrec, const std::string &mode) { std::vector *> stack; std::map *> ix2node; // gives the nodes in this tree for a given ix @@ -860,6 +998,9 @@ template void FunctionTree::makeTreefromCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -898,6 +1039,7 @@ template void FunctionTree::appendTreeNoCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -989,11 +1131,17 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() { return this->NodeIndex2serialIx.size(); } +/** @brief Deep copy of tree + * + * @details Exact copy without any binding between old and new tree + */ template void FunctionTree::deep_copy(FunctionTree *out) { copy_grid(*out, *this); copy_func(*out, *this); } +/** @brief New tree with only real part + */ template FunctionTree *FunctionTree::Real() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); @@ -1001,7 +1149,7 @@ template FunctionTree *FunctionTree::Real() //#pragma omp parallel num_threads(mrcpp_get_num_threads()) { int nNodes = this->getNEndNodes(); - //#pragma omp for schedule(guided) + //#pragma omp for schedule(guided) for (int n = 0; n < nNodes; n++) { MWNode &inp_node = *this->endNodeTable[n]; MWNode &out_node = out->getNode(inp_node.getNodeIndex(), true); @@ -1017,13 +1165,15 @@ template FunctionTree *FunctionTree::Real() return out; } +/** @brief New tree with only imaginary part + */ template FunctionTree *FunctionTree::Imag() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); //#pragma omp parallel num_threads(mrcpp_get_num_threads()) { int nNodes = this->getNEndNodes(); - //#pragma omp for schedule(guided) + //#pragma omp for schedule(guided) for (int n = 0; n < nNodes; n++) { MWNode &inp_node = *this->endNodeTable[n]; MWNode &out_node = out->getNode(inp_node.getNodeIndex(), true); @@ -1038,6 +1188,11 @@ template FunctionTree *FunctionTree::Imag() return out; } +/* + * From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents. + * Should use a deep_copy if generalized in the future. + */ + template <> void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&outTree) { delete outTree; double ref = 0.0; @@ -1207,4 +1362,4 @@ template class FunctionTree<1, ComplexDouble>; template class FunctionTree<2, ComplexDouble>; template class FunctionTree<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp index bc3a48330..2d521b468 100644 --- a/src/trees/MWNode.cpp +++ b/src/trees/MWNode.cpp @@ -23,6 +23,10 @@ * */ +/** + * Simple n-dimensional node + */ + #include "MWNode.h" #include "MWTree.h" #include "NodeAllocator.h" @@ -37,6 +41,10 @@ using namespace Eigen; namespace mrcpp { +/** @brief MWNode default constructor. + * + * @details Should be used only by NodeAllocator to obtain + * virtual table pointers for the derived classes. */ template MWNode::MWNode() : tree(nullptr) @@ -51,6 +59,13 @@ MWNode::MWNode() MRCPP_INIT_OMP_LOCK(); } +/** @brief MWNode constructor. + * + * @param[in] tree: the MWTree the root node belongs to + * @param[in] idx: the NodeIndex defining scale and translation of the node + * + * @details Constructor for an empty node, given the corresponding MWTree and NodeIndex + */ template MWNode::MWNode(MWTree *tree, const NodeIndex &idx) : tree(tree) @@ -64,6 +79,14 @@ MWNode::MWNode(MWTree *tree, const NodeIndex &idx) MRCPP_INIT_OMP_LOCK(); } +/** @brief MWNode constructor. + * + * @param[in] tree: the MWTree the root node belongs to + * @param[in] rIdx: the integer specifying the corresponding root node + * + * @details Constructor for root nodes. It requires the corresponding + * MWTree and an integer to fetch the right NodeIndex + */ template MWNode::MWNode(MWTree *tree, int rIdx) : tree(tree) @@ -77,6 +100,14 @@ MWNode::MWNode(MWTree *tree, int rIdx) MRCPP_INIT_OMP_LOCK(); } +/** @brief MWNode constructor. + * + * @param[in] parent: parent node + * @param[in] cIdx: child index of the current node + * + * @details Constructor for leaf nodes. It requires the corresponding + * parent and an integer to identify the correct child. + */ template MWNode::MWNode(MWNode *parent, int cIdx) : tree(parent->tree) @@ -90,6 +121,15 @@ MWNode::MWNode(MWNode *parent, int cIdx) MRCPP_INIT_OMP_LOCK(); } +/** @brief MWNode copy constructor. + * + * @param[in] node: the original node + * @param[in] allocCoef: if true MW coefficients are allocated and copied from the original node + * + * @details Creates loose nodes and optionally copy coefs. The node + * does not "belong" to the tree: it cannot be accessed by traversing + * the tree. + */ template MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) : tree(node.tree) @@ -119,15 +159,31 @@ MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) MRCPP_INIT_OMP_LOCK(); } +/** @brief MWNode destructor. + * + * @details Recursive deallocation of a node and all its decendants + */ template MWNode::~MWNode() { if (this->isLooseNode()) this->freeCoefs(); MRCPP_DESTROY_OMP_LOCK(); } +/** @brief Dummy deallocation of MWNode coefficients. + * + * @details This is just to make sure this method never really gets + * called (derived classes must implement their own version). This was + * to avoid having pure virtual methods in the base class. + */ template void MWNode::dealloc() { NOT_REACHED_ABORT; } +/** @brief Allocate the coefs vector. + * + * @details This is only used by loose nodes, because the loose nodes + * are not treated by the NodeAllocator class. + * + */ template void MWNode::allocCoefs(int n_blocks, int block_size) { if (this->n_coefs != 0) MSG_ABORT("n_coefs should be zero"); if (this->isAllocated()) MSG_ABORT("Coefs already allocated"); @@ -140,6 +196,12 @@ template void MWNode::allocCoefs(int n_blocks, int blo this->setIsAllocated(); } +/** @brief Deallocate the coefs vector. + * + * @details This is only used by loose nodes, because the loose nodes + * are not treated by the NodeAllocator class. + * + */ template void MWNode::freeCoefs() { if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!"); @@ -152,6 +214,8 @@ template void MWNode::freeCoefs() { this->clearIsAllocated(); } +/** @brief Printout of node coefficients + */ template void MWNode::printCoefs() const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); println(0, "\nMW coefs"); @@ -162,6 +226,8 @@ template void MWNode::printCoefs() const { } } +/** @brief wraps the MW coefficients into an eigen vector object + */ template void MWNode::getCoefs(Eigen::Matrix &c) const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); if (not this->hasCoefs()) MSG_ABORT("Node has no coefs"); @@ -170,6 +236,9 @@ template void MWNode::getCoefs(Eigen::Matrix::Map(this->coefs, this->n_coefs); } +/** @brief sets all MW coefficients and the norms to zero + * + */ template void MWNode::zeroCoefs() { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated " << *this); @@ -178,26 +247,68 @@ template void MWNode::zeroCoefs() { this->setHasCoefs(); } +/** @brief Attach a set of coefs to this node. Only used locally (the tree is not aware of this). + */ template void MWNode::attachCoefs(T *coefs) { this->coefs = coefs; this->setHasCoefs(); } +/** @brief assigns values to a block of coefficients + * + * @param[in] c: the input coefficients + * @param[in] block: the block index + * @param[in] block_size: size of the block + * + * @details a block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ template void MWNode::setCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = c[i]; } } +/** @brief adds values to a block of coefficients + * + * @param[in] c: the input coefficients + * @param[in] block: the block index + * @param[in] block_size: size of the block + * + * @details a block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ template void MWNode::addCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] += c[i]; } } +/** @brief sets values of a block of coefficients to zero + * + * @param[in] block: the block index + * @param[in] block_size: size of the block + * + * @details a block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ template void MWNode::zeroCoefBlock(int block, int block_size) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = 0.0; } } +/** @brief forward MW transform from this node to its children + * + * @param[in] overwrite: if true the coefficients of the children are + * overwritten. If false the values are summed to the already present + * ones. + * + * @details it performs forward MW transform inserting the result + * directly in the right place for each child node. The children must + * already be present and its memory allocated for this to work + * properly. + */ template void MWNode::giveChildrenCoefs(bool overwrite) { assert(this->isBranchNode()); if (not this->isAllocated()) MSG_ABORT("Not allocated!"); @@ -207,6 +318,7 @@ template void MWNode::giveChildrenCoefs(bool overwrite for (int i = 0; i < getTDim(); i++) getMWChild(i).zeroCoefs(); } + // coeff of child should be have been allocated already here int stride = getMWChild(0).getNCoefs(); T *inp = getCoefs(); T *out = getMWChild(0).getCoefs(); @@ -218,10 +330,21 @@ template void MWNode::giveChildrenCoefs(bool overwrite for (int i = 0; i < getTDim(); i++) { getMWChild(i).setHasCoefs(); - getMWChild(i).calcNorms(); + getMWChild(i).calcNorms(); // should need to compute only scaling norms } } +/** @brief forward MW transform to compute scaling coefficients of a single child + * + * @param[in] cIdx: child index + * @param[in] overwrite: if true the coefficients of the children are + * overwritten. If false the values are summed to the already present + * ones. + * + * @details it performs forward MW transform in place on a loose + * node. The scaling coefficients of the selected child are then + * copied/summed in the correct child node. + */ template void MWNode::giveChildCoefs(int cIdx, bool overwrite) { MWNode node_i = *this; @@ -242,6 +365,12 @@ template void MWNode::giveChildCoefs(int cIdx, bool ov child.calcNorms(); } +/** Takes a MWParent and generates coefficients, reverse operation from + * giveChildrenCoefs */ +/** @brief backward MW transform to compute scaling/wavelet coefficients of a parent + * + * \warning This routine is only used in connection with Periodic Boundary Conditions + */ template void MWNode::giveParentCoefs(bool overwrite) { MWNode node = *this; MWNode &parent = getMWParent(); @@ -258,6 +387,12 @@ template void MWNode::giveParentCoefs(bool overwrite) parent.calcNorms(); } +/** @brief Copy scaling coefficients from children to parent + * + * @details Takes the scaling coefficients of the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. + */ template void MWNode::copyCoefsFromChildren() { int kp1_d = this->getKp1_d(); int nChildren = this->getTDim(); @@ -268,6 +403,14 @@ template void MWNode::copyCoefsFromChildren() { } } +/** @brief Generates scaling coefficients of children + * + * @details If the node is a leafNode, it takes the scaling&wavelet + * coefficients of the parent and it generates the scaling + * coefficients for the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. + */ template void MWNode::threadSafeGenChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -278,6 +421,14 @@ template void MWNode::threadSafeGenChildren() { MRCPP_UNSET_OMP_LOCK(); } +/** @brief Creates scaling coefficients of children + * + * @details If the node is a leafNode, it takes the scaling&wavelet + * coefficients of the parent and it generates the scaling + * coefficients for the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. The new node is permanently added to the tree. + */ template void MWNode::threadSafeCreateChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -288,6 +439,16 @@ template void MWNode::threadSafeCreateChildren() { MRCPP_UNSET_OMP_LOCK(); } +/** @brief Coefficient-Value transform + * + * @details This routine transforms the scaling coefficients of the node to the + * function values in the corresponding quadrature roots (of its children). + * + * @param[in] operation: forward (coef->value) or backward (value->coef). + * + * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1) + * representation, instead of s/d (scaling and wavelet). + */ template void MWNode::cvTransform(int operation, bool firstchild) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -316,9 +477,9 @@ template void MWNode::cvTransform(int operation, bool const auto scaling_factor = this->getMWTree().getMRA().getWorldBox().getScalingFactors(); double sf_prod = 1.0; for (const auto &s : scaling_factor) sf_prod *= s; - if (sf_prod <= MachineZero) sf_prod = 1.0; + if (sf_prod <= MachineZero) sf_prod = 1.0; // When there is no scaling factor - int np1 = getScale() + 1; + int np1 = getScale() + 1; // we're working on scaling coefs on next scale double two_fac = std::pow(2.0, D * np1) / sf_prod; if (operation == Backward) { two_fac = std::sqrt(1.0 / two_fac); @@ -331,7 +492,72 @@ template void MWNode::cvTransform(int operation, bool for (int i = 0; i < nCoefs; i++) { this->coefs[i] *= two_fac; } } } +/* Old interpolating version, somewhat faster +template +void MWNode::cvTransform(int operation) { + const ScalingBasis &sf = this->getMWTree().getMRA().getScalingBasis(); + if (sf.getScalingType() != Interpol) { + NOT_IMPLEMENTED_ABORT; + } + + int quadratureOrder = sf.getQuadratureOrder(); + getQuadratureCache(qc); + + double two_scale = std::pow(2.0, this->getScale() + 1); + VectorXd modWeights = qc.getWeights(quadratureOrder); + if (operation == Forward) { + modWeights = modWeights.array().inverse(); + modWeights *= two_scale; + modWeights = modWeights.array().sqrt(); + } else if (operation == Backward) { + modWeights *= 1.0/two_scale; + modWeights = modWeights.array().sqrt(); + } else { + MSG_ABORT("Invalid operation"); + } + + int kp1 = this->getKp1(); + int kp1_d = this->getKp1_d(); + int kp1_p[D]; + for (int d = 0; d < D; d++) { + kp1_p[d] = math_utils::ipow(kp1, d); + } + + for (int m = 0; m < this->getTDim(); m++) { + for (int p = 0; p < D; p++) { + int n = 0; + for (int i = 0; i < kp1_p[D - p - 1]; i++) { + for (int j = 0; j < kp1; j++) { + for (int k = 0; k < kp1_p[p]; k++) { + this->coefs[m * kp1_d + n] *= modWeights[j]; + n++; + } + } + } + } + } +} +*/ +/** @brief Multiwavelet transform + * + * @details Application of the filters on one node to pass from a 0/1 (scaling + * on child 0 and 1) representation to an s/d (scaling and + * wavelet) representation. Bit manipulation is used in order to + * determine the correct filters and whether to apply them or just + * pass to the next couple of indexes. The starting coefficients are + * preserved until the application is terminated, then they are + * overwritten. With minor modifications this code can also be used + * for the inverse mw transform (just use the transpose filters) or + * for the application of an operator (using A, B, C and T parts of an + * operator instead of G1, G0, H1, H0). This is the version where the + * three directions are operated one after the other. Although this + * is formally faster than the other algorithm, the separation of the + * three dimensions prevent the possibility to use the norm of the + * operator in order to discard a priori negligible contributions. + * + * * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1). + */ template void MWNode::mwTransform(int operation) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -349,6 +575,9 @@ template void MWNode::mwTransform(int operation) { for (int gt = 0; gt < this->getTDim(); gt++) { T *out = out_vec + gt * kp1_d; for (int ft = 0; ft < this->getTDim(); ft++) { + /* Operate in direction i only if the bits along other + * directions are identical. The bit of the direction we + * operate on determines the appropriate filter/operator */ if ((gt | mask) == (ft | mask)) { T *in = in_vec + ft * kp1_d; int fIdx = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -368,16 +597,19 @@ template void MWNode::mwTransform(int operation) { } } +/** @brief Set all norms to Undefined. */ template void MWNode::clearNorms() { this->squareNorm = -1.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = -1.0; } } +/** @brief Set all norms to zero. */ template void MWNode::zeroNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = 0.0; } } +/** @brief Calculate and store square norm and component norms, if allocated. */ template void MWNode::calcNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { @@ -387,6 +619,7 @@ template void MWNode::calcNorms() { } } +/** @brief Calculate and return the squared scaling norm. */ template double MWNode::getScalingNorm() const { double sNorm = this->getComponentNorm(0); if (sNorm >= 0.0) { @@ -396,6 +629,7 @@ template double MWNode::getScalingNorm() const { } } +/** @brief Calculate and return the squared wavelet norm. */ template double MWNode::getWaveletNorm() const { double wNorm = 0.0; for (int i = 1; i < this->getTDim(); i++) { @@ -409,6 +643,7 @@ template double MWNode::getWaveletNorm() const { return wNorm; } +/** @brief Calculate the norm of one component (NOT the squared norm!). */ template double MWNode::calcComponentNorm(int i) const { if (this->isGenNode() and i != 0) return 0.0; assert(this->isAllocated()); @@ -419,10 +654,13 @@ template double MWNode::calcComponentNorm(int i) const int start = i * size; double sq_norm = 0.0; - for (int i2 = start; i2 < start + size; i2++) { sq_norm += std::norm(c[i2]); } + for (int i = start; i < start + size; i++) { sq_norm += std::norm(c[i]); } return std::sqrt(sq_norm); } +/** @brief Update the coefficients of the node by a mw transform of the scaling + * coefficients of the children. + */ template void MWNode::reCompress() { if (this->isGenNode()) NOT_IMPLEMENTED_ABORT; if (this->isBranchNode()) { @@ -434,6 +672,12 @@ template void MWNode::reCompress() { } } +/** @brief Recurse down until an EndNode is found, and then crop children below the given precision threshold + * + * @param[in] prec: precision required + * @param[in] splitFac: factor used in the split check (larger factor means tighter threshold for finer nodes) + * @param[in] absPrec: flag to switch from relative (false) to absolute (true) precision. + */ template bool MWNode::crop(double prec, double splitFac, bool absPrec) { if (this->isEndNode()) { return true; @@ -463,6 +707,11 @@ template void MWNode::genParent() { NOT_REACHED_ABORT; } +/** @brief Recursive deallocation of children and all their descendants. + * + * @details + * Leaves node as LeafNode and children[] as null pointer. + */ template void MWNode::deleteChildren() { if (this->isLeafNode()) return; for (int cIdx = 0; cIdx < getTDim(); cIdx++) { @@ -477,6 +726,7 @@ template void MWNode::deleteChildren() { this->setIsLeafNode(); } +/** @brief Recursive deallocation of parent and all their forefathers. */ template void MWNode::deleteParent() { if (this->parent == nullptr) return; MWNode &parent = getMWParent(); @@ -486,6 +736,7 @@ template void MWNode::deleteParent() { this->parent = nullptr; } +/** @brief Deallocation of all generated nodes . */ template void MWNode::deleteGenerated() { if (this->isBranchNode()) { if (this->isEndNode()) { @@ -496,6 +747,7 @@ template void MWNode::deleteGenerated() { } } +/** @brief returns the coordinates of the centre of the node */ template Coord MWNode::getCenter() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -505,6 +757,7 @@ template Coord MWNode::getCenter() const { return r; } +/** @brief returns the upper bounds of the D-interval defining the node */ template Coord MWNode::getUpperBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -514,6 +767,7 @@ template Coord MWNode::getUpperBounds() const { return ub; } +/** @brief returns the lower bounds of the D-interval defining the node */ template Coord MWNode::getLowerBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -523,6 +777,14 @@ template Coord MWNode::getLowerBounds() const { return lb; } +/** @brief Routine to find the path along the tree. + * + * @param[in] nIdx: the sought after node through its NodeIndex + * + * @details Given the translation indices at the final scale, computes the child m + * to be followed at the current scale in oder to get to the requested + * node at the final scale. The result is the index of the child needed. + * The index is obtained by bit manipulation of of the translation indices. */ template int MWNode::getChildIndex(const NodeIndex &nIdx) const { assert(isAncestor(nIdx)); int cIdx = 0; @@ -537,6 +799,12 @@ template int MWNode::getChildIndex(const NodeIndex return cIdx; } +/** @brief Routine to find the path along the tree. + * + * @param[in] r: the sought after node through the coordinates of a point in space + * + * @detailsGiven a point in space, determines which child should be followed + * to get to the corresponding terminal node. */ template int MWNode::getChildIndex(const Coord &r) const { assert(hasCoord(r)); int cIdx = 0; @@ -550,6 +818,18 @@ template int MWNode::getChildIndex(const Coord &r) return cIdx; } +/** @brief Returns the quadrature points in a given node + * + * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * + * @details The original quadrature points are fetched and then + * dilated and translated. For each cartesian direction \f$ \alpha = + * x,y,z... \f$ the set of quadrature points becomes \f$ x^\alpha_i = + * 2^{-n} (x_i + l^\alpha \f$. By taking all possible + * \f$(k+1)^d\f$ combinations, they will then define a d-dimensional + * grid of quadrature points. + * + */ template void MWNode::getPrimitiveQuadPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, kp1); @@ -562,6 +842,19 @@ template void MWNode::getPrimitiveQuadPts(MatrixXd &pt for (int d = 0; d < D; d++) pts.row(d) = sFac * (roots.array() + static_cast(l[d])); } +/** @brief Returns the quadrature points in a given node + * + * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * + * @details The original quadrature points are fetched and then + * dilated and translated to match the quadrature points in the + * children of the given node. For each cartesian direction \f$ \alpha = x,y,z... \f$ + * the set of quadrature points becomes \f$ x^\alpha_i = 2^{-n-1} (x_i + 2 l^\alpha + t^\alpha) \f$, where \f$ t^\alpha = + * 0,1 \f$. By taking all possible \f$(k+1)^d\combinations \f$, they will + * then define a d-dimensional grid of quadrature points for the child + * nodes. + * + */ template void MWNode::getPrimitiveChildPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, 2 * kp1); @@ -577,6 +870,16 @@ template void MWNode::getPrimitiveChildPts(MatrixXd &p } } +/** @brief Returns the quadrature points in a given node + * + * @param[in,out] pts: expanded quadrature points in a \f$ d \times + * (k+1)^d \f$ matrix form. + * + * @details The primitive quadrature points are used to obtain a + * tensor-product representation collecting all \f$ (k+1)^d \f$ + * vectors of quadrature points. + * + */ template void MWNode::getExpandedQuadPts(Eigen::MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveQuadPts(prim_pts); @@ -591,6 +894,16 @@ template void MWNode::getExpandedQuadPts(Eigen::Matrix if (D >= 4) NOT_IMPLEMENTED_ABORT; } +/** @brief Returns the quadrature points in a given node + * + * @param[in,out] pts: expanded quadrature points in a \f$ d \times + * 2^d(k+1)^d \f$ matrix form. + * + * @details The primitive quadrature points of the children are used to obtain a + * tensor-product representation collecting all \f$ 2^d (k+1)^d \f$ + * vectors of quadrature points. + * + */ template void MWNode::getExpandedChildPts(MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveChildPts(prim_pts); @@ -615,13 +928,23 @@ template void MWNode::getExpandedChildPts(MatrixXd &pt } } +/** @brief Const version of node retriever that NEVER generates. + * + * @param[in] idx: the requested NodeIndex + * + * @details + * Recursive routine to find and return the node with a given NodeIndex. + * This routine returns the appropriate Node, or a NULL pointer if + * the node does not exist, or if it is a GenNode. Recursion starts at at this + * node and ASSUMES the requested node is in fact decending from this node. + */ template const MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) const { - if (getScale() == idx.getScale()) { + if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); return this; } assert(this->isAncestor(idx)); - if (this->isEndNode()) { + if (this->isEndNode()) { // don't return GenNodes return nullptr; } int cIdx = getChildIndex(idx); @@ -629,13 +952,23 @@ template const MWNode *MWNode::retrieveNodeNoGen return this->children[cIdx]->retrieveNodeNoGen(idx); } +/** @brief Node retriever that NEVER generates. + * + * @param[in] idx: the requested NodeIndex + * + * @details + * Recursive routine to find and return the node with a given NodeIndex. + * This routine returns the appropriate Node, or a NULL pointer if + * the node does not exist, or if it is a GenNode. Recursion starts at at this + * node and ASSUMES the requested node is in fact decending from this node. + */ template MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) { - if (getScale() == idx.getScale()) { + if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); return this; } assert(this->isAncestor(idx)); - if (this->isEndNode()) { + if (this->isEndNode()) { // don't return GenNodes return nullptr; } int cIdx = getChildIndex(idx); @@ -643,6 +976,18 @@ template MWNode *MWNode::retrieveNodeNoGen(const return this->children[cIdx]->retrieveNodeNoGen(idx); } +/** @brief Node retriever that returns requested Node or EndNode (const version). + * + * @param[in] r: the coordinates of a point in the node + * @param[in] depth: the depth which one needs to descend + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ template const MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) const { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -650,6 +995,18 @@ template const MWNode *MWNode::retrieveNodeOrEnd return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } +/** @brief Node retriever that returns requested Node or EndNode. + * + * @param[in] r: the coordinates of a point in the node + * @param[in] depth: the depth which one needs to descend + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ template MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -657,30 +1014,68 @@ template MWNode *MWNode::retrieveNodeOrEndNode(c return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } +/** @brief Node retriever that returns requested Node or EndNode (const version). + * + * @param[in] idx: the NodeIndex of the requested node + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ template const MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) const { - if (getScale() == idx.getScale()) { + if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); return this; } assert(isAncestor(idx)); + // We should in principle lock before read, but it makes things slower, + // and the EndNode status does not change (normally ;) if (isEndNode()) { return this; } int cIdx = getChildIndex(idx); assert(children[cIdx] != nullptr); return this->children[cIdx]->retrieveNodeOrEndNode(idx); } +/** @brief Node retriever that returns requested Node or EndNode. + * + * @param[in] idx: the NodeIndex of the requested node + * + * @details + * Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ template MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) { - if (getScale() == idx.getScale()) { + if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); return this; } assert(isAncestor(idx)); + // We should in principle lock before read, but it makes things slower, + // and the EndNode status does not change (normally ;) if (isEndNode()) { return this; } int cIdx = getChildIndex(idx); assert(children[cIdx] != nullptr); return this->children[cIdx]->retrieveNodeOrEndNode(idx); } +/** @brief Node retriever that ALWAYS returns the requested node. + * + * @param[in] r: the coordinates of a point in the node + * @param[in] depth: the depth which one needs to descend + * + * @details + * Recursive routine to find and return the node with a given NodeIndex. + * This routine always returns the appropriate node, and will generate nodes + * that does not exist. Recursion starts at this node and ASSUMES the + * requested node is in fact decending from this node. + */ template MWNode *MWNode::retrieveNode(const Coord &r, int depth) { if (depth < 0) MSG_ABORT("Invalid argument"); @@ -692,10 +1087,26 @@ template MWNode *MWNode::retrieveNode(const Coor return this->children[cIdx]->retrieveNode(r, depth); } +/** @brief Node retriever that ALWAYS returns the requested node, possibly without coefs. + * + * @param[in] idx: the NodeIndex of the requested node + * + * @details + * Recursive routine to find and return the node with a given NodeIndex. This + * routine always returns the appropriate node, and will generate nodes that + * does not exist. Recursion starts at this node and ASSUMES the requested + * node is in fact descending from this node. + * If create = true, the nodes are permanently added to the tree. + */ template MWNode *MWNode::retrieveNode(const NodeIndex &idx, bool create) { - if (getScale() == idx.getScale()) { + if (getScale() == idx.getScale()) { // we're done if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; + // has to fetch coeff in Bank. NOT USED YET + // int ncoefs = (1 << D) * this->getKp1_d(); + // coefs = new double[ncoefs]; // TODO must be cleaned at some stage + // coefs = new double[ncoefs]; // TODO must be cleaned at some stage + // tree->getNodeCoeff(idx, coefs); } assert(getNodeIndex() == idx); return this; @@ -712,6 +1123,18 @@ template MWNode *MWNode::retrieveNode(const Node return this->children[cIdx]->retrieveNode(idx, create); } +/** Node retriever that ALWAYS returns the requested node. + * + * WARNING: This routine is NOT thread safe! Must be used within omp critical. + * + * @param[in] idx: the NodeIndex of the requested node + * + * @details + * Recursive routine to find and return the node with a given NodeIndex. This + * routine always returns the appropriate node, and will generate nodes that + * does not exist. Recursion starts at this node and ASSUMES the requested + * node is in fact related to this node. + */ template MWNode *MWNode::retrieveParent(const NodeIndex &idx) { if (getScale() < idx.getScale()) MSG_ABORT("Scale error") if (getScale() == idx.getScale()) return this; @@ -722,13 +1145,22 @@ template MWNode *MWNode::retrieveParent(const No return this->parent->retrieveParent(idx); } +/** @brief Gives the norm (absolute value) of the node at the given NodeIndex. + * + * @param[in] idx: the NodeIndex of the requested node + * + * @details + * Recursive routine to find the node with a given NodeIndex. When an EndNode is + * found, do not generate any new node, but rather give the value of the norm + * assuming the function is uniformly distributed within the node. + */ template double MWNode::getNodeNorm(const NodeIndex &idx) const { - if (this->getScale() == idx.getScale()) { + if (this->getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); return std::sqrt(this->squareNorm); } assert(isAncestor(idx)); - if (this->isEndNode()) { + if (this->isEndNode()) { // we infer norm at lower scales return std::sqrt(this->squareNorm * std::pow(2.0, -D * (idx.getScale() - getScale()))); } int cIdx = getChildIndex(idx); @@ -736,21 +1168,46 @@ template double MWNode::getNodeNorm(const NodeIndex return this->children[cIdx]->getNodeNorm(idx); } +/** @brief Test if a given coordinate is within the boundaries of the node. + * + * @param[in] r: point coordinates + */ template bool MWNode::hasCoord(const Coord &r) const { double sFac = std::pow(2.0, -getScale()); const NodeIndex &l = getNodeIndex(); + // println(1, "[" << r[0] << "," << r[1] << "," << r[2] << "]"); + // println(1, "[" << l[0] << "," << l[1] << "," << l[2] << "]"); + // println(1, *this); for (int d = 0; d < D; d++) { if (r[d] < sFac * l[d] or r[d] > sFac * (l[d] + 1)) { + // println(1, "false"); return false; } } + // println(1, "true"); return true; } +/** Testing if nodes are compatible wrt NodeIndex and Tree (order, rootScale, + * relPrec, etc). */ template bool MWNode::isCompatible(const MWNode &node) { NOT_IMPLEMENTED_ABORT; -} - + // if (nodeIndex != node.nodeIndex) { + // println(0, "nodeIndex mismatch" << std::endl); + // return false; + // } + // if (not this->tree->checkCompatible(*node.tree)) { + // println(0, "tree type mismatch" << std::endl); + // return false; + // } + // return true; +} + +/** @brief Test if the node is decending from a given NodeIndex, that is, if they have + * overlapping support. + * + * @param[in] idx: the NodeIndex of the requested node + */ template bool MWNode::isAncestor(const NodeIndex &idx) const { int relScale = idx.getScale() - getScale(); if (relScale < 0) return false; @@ -766,6 +1223,10 @@ template bool MWNode::isDecendant(const NodeIndex & NOT_IMPLEMENTED_ABORT; } +/** @brief printout ofm the node content. + * + * @param[in] o: the output stream + */ template std::ostream &MWNode::print(std::ostream &o) const { std::string flags = " "; o << getNodeIndex(); @@ -791,6 +1252,12 @@ template std::ostream &MWNode::print(std::ostream &o) return o; } +/** @brief recursively set maxSquaredNorm and maxWSquareNorm of parent and descendants + * + * @details + * normalization is such that a constant function gives constant value, + * i.e. *not* same normalization as a squareNorm + */ template void MWNode::setMaxSquareNorm() { auto n = this->getScale(); this->maxWSquareNorm = calcScaledWSquareNorm(); @@ -805,7 +1272,8 @@ template void MWNode::setMaxSquareNorm() { } } } - +/** @brief recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1 + */ template void MWNode::resetMaxSquareNorm() { auto n = this->getScale(); this->maxSquareNorm = -1.0; @@ -825,4 +1293,4 @@ template class MWNode<1, ComplexDouble>; template class MWNode<2, ComplexDouble>; template class MWNode<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp index 67e761bca..6a646d33f 100644 --- a/src/trees/MWTree.cpp +++ b/src/trees/MWTree.cpp @@ -40,23 +40,39 @@ using namespace Eigen; namespace mrcpp { +/** @brief MWTree constructor. + * + * @param[in] mra: the multiresolution analysis object + * @param[in] n: the name of the tree (only for printing purposes) + * + * @details Creates an empty tree object, containing only the set of + * root nodes. The information for the root node configuration to use + * is in the mra object which is passed to the constructor. + */ template MWTree::MWTree(const MultiResolutionAnalysis &mra, const std::string &n) : MRA(mra) - , order(mra.getOrder()) - , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) + , order(mra.getOrder()) /// polynomial order + , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) /// nr of scaling coefficients \f$ (k+1)^D \f$ , name(n) , squareNorm(-1.0) , rootBox(mra.getWorldBox()) { this->nodesAtDepth.push_back(0); } +/** @brief MWTree destructor. */ template MWTree::~MWTree() { this->endNodeTable.clear(); if (this->nodesAtDepth.size() != 1) MSG_ERROR("Nodes at depth != 1 -> " << this->nodesAtDepth.size()); if (this->nodesAtDepth[0] != 0) MSG_ERROR("Nodes at depth 0 != 0 -> " << this->nodesAtDepth[0]); } +/** @brief Deletes all the nodes in the tree + * + * @details This method will recursively delete all the nodes, + * including the root nodes. Derived classes will call this method + * when the object is deleted. + */ template void MWTree::deleteRootNodes() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -66,6 +82,14 @@ template void MWTree::deleteRootNodes() { } } +/** @brief Remove all nodes in the tree + * + * @details Leaves the tree in the same state as after construction, + * i.e. undefined tree structure containing only root nodes without + * coefficients. The assigned memory, including branch and leaf + * nodes, (nodeChunks in NodeAllocator) is NOT released, but is + * immediately available to the new function. + */ template void MWTree::clear() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -77,6 +101,11 @@ template void MWTree::clear() { this->clearSquareNorm(); } +/** @brief Calculate the squared norm \f$ ||f||^2_{\ldots} \f$ of a function represented as a tree. + * + * @details The norm is calculated using endNodes only. The specific + * type of norm which is computed will depend on the derived class + */ template void MWTree::calcSquareNorm(bool deep) { double treeNorm = 0.0; for (int n = 0; n < this->getNEndNodes(); n++) { @@ -88,6 +117,29 @@ template void MWTree::calcSquareNorm(bool deep) { this->squareNorm = treeNorm; } +/** @brief Full Multiwavelet transform of the tree in either directions + * + * @param[in] type: TopDown (from roots to leaves) or BottomUp (from + * leaves to roots) which specifies the direction of the MW transform + * @param[in] overwrite: if true, the result will overwrite + * preexisting coefficients. + * + * @details It performs a Multiwavlet transform of the whole tree. The + * input parameters will specify the direction (upwards or downwards) + * and whether the result is added to the coefficients or it + * overwrites them. See the documentation for the #mwTransformUp + * and #mwTransformDown for details. + * \f[ + * \pmatrix{ + * s_{nl}\\ + * d_{nl} + * } + * \rightleftarrows \pmatrix{ + * s_{n+1,2l}\\ + * s_{n+1,2l+1} + * } + * \f] + */ template void MWTree::mwTransform(int type, bool overwrite) { switch (type) { case TopDown: @@ -102,6 +154,15 @@ template void MWTree::mwTransform(int type, bool overw } } +/** @brief Regenerates all s/d-coeffs by backtransformation + * + * @details It starts at the bottom of the tree (scaling coefficients + * of the leaf nodes) and it generates the scaling and wavelet + * coefficients of the parent node. It then proceeds recursively all the + * way up to the root nodes. This is generally used after a function + * projection to purify the coefficients obtained by quadrature at + * coarser scales which are therefore not precise enough. + */ template void MWTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -119,6 +180,17 @@ template void MWTree::mwTransformUp() { } } +/** @brief Regenerates all scaling coeffs by MW transformation of existing s/w-coeffs + * on coarser scales + * + * @param[in] overwrite: if true the preexisting coefficients are overwritten + * + * @details The transformation starts at the rootNodes and proceeds + * recursively all the way to the leaf nodes. The existing scaling + * coefficeints will either be overwritten or added to. The latter + * operation is generally used after the operator application. + * + */ template void MWTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -143,6 +215,12 @@ template void MWTree::mwTransformDown(bool overwrite) } } +/** @brief Set the MW coefficients to zero, keeping the same tree structure + * + * @details Keeps the node structure of the tree, even though the zero + * function is representable at depth zero. One should then use \ref cropTree to remove + * unnecessary nodes. + */ template void MWTree::setZero() { TreeIterator it(*this); while (it.next()) { @@ -152,6 +230,13 @@ template void MWTree::setZero() { this->squareNorm = 0.0; } +/** @brief Increments node counter by one for non-GenNodes. + * + * @details TO BE DOCUMENTED + * \warning: This routine is not thread + * safe, and must NEVER be called outside a critical region in parallel. + * It's way. way too expensive to lock the tree, so don't even think + * about it. */ template void MWTree::incrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -169,6 +254,14 @@ template void MWTree::incrementNodeCount(int scale) { } } +/** @brief Decrements node counter by one for non-GenNodes. + * + * @details TO BE DOCUMENTED + * \warning: This routine is not thread + * safe, and must NEVER be called outside a critical region in parallel. + * It's way. way too expensive to lock the tree, so don't even think + * about it. + */ template void MWTree::decrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -184,6 +277,10 @@ template void MWTree::decrementNodeCount(int scale) { } } +/** @returns Total number of nodes in the tree, at given depth (not in use) + * + * @param[in] depth: Tree depth (0 depth is the coarsest scale) to count. + */ template int MWTree::getNNodesAtDepth(int depth) const { int N = 0; if (depth < 0) { @@ -194,11 +291,19 @@ template int MWTree::getNNodesAtDepth(int depth) const return N; } +/** @returns Size of all MW coefs in the tree, in kB */ template int MWTree::getSizeNodes() const { auto nCoefs = 1ll * getNNodes() * getTDim() * getKp1_d(); return sizeof(T) * nCoefs / 1024; } +/** @brief Finds and returns the node pointer with the given \ref NodeIndex, const version. + * + * @details Recursive routine to find and return the node with a given + * NodeIndex. This routine returns the appropriate Node, or a NULL + * pointer if the node does not exist, or if it is a + * GenNode. Recursion starts at the appropriate rootNode. + */ template const MWNode *MWTree::findNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -208,6 +313,13 @@ template const MWNode *MWTree::findNode(NodeInde return root.retrieveNodeNoGen(idx); } +/** @brief Finds and returns the node pointer with the given \ref NodeIndex. + * + * @details Recursive routine to find and return the node with a given + * NodeIndex. This routine returns the appropriate Node, or a NULL + * pointer if the node does not exist, or if it is a + * GenNode. Recursion starts at the appropriate rootNode. + */ template MWNode *MWTree::findNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -217,6 +329,14 @@ template MWNode *MWTree::findNode(NodeIndex i return root.retrieveNodeNoGen(idx); } +/** @brief Finds and returns the node reference with the given NodeIndex. + * + * @details This routine ALWAYS returns the node you ask for. If the + * node does not exist, it will be generated by MW + * transform. Recursion starts at the appropriate rootNode and descends + * from this. + * The nodes are permanently added to the tree if create = true + */ template MWNode &MWTree::getNode(NodeIndex idx, bool create) { if (getRootBox().isPeriodic()) periodic::index_manipulation(idx, getRootBox().getPeriodic()); @@ -231,6 +351,14 @@ template MWNode &MWTree::getNode(NodeIndex id return *out; } +/** @brief Finds and returns the node with the given NodeIndex. + * + * @details This routine returns the Node you ask for, or the EndNode + * on the path to the requested node, if the requested one is deeper + * than the leaf node ancestor. It will never create or return + * GenNodes. Recursion starts at the appropriate rootNode and decends + * from this. + */ template MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } MWNode &root = getRootBox().getNode(idx); @@ -238,6 +366,13 @@ template MWNode &MWTree::getNodeOrEndNode(NodeIn return *root.retrieveNodeOrEndNode(idx); } +/** @brief Finds and returns the node reference with the given NodeIndex. Const version. + * + * @details This routine ALWAYS returns the node you ask for. If the + * node does not exist, it will be generated by MW + * transform. Recursion starts at the appropriate rootNode and decends + * from this. + */ template const MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } const MWNode &root = getRootBox().getNode(idx); @@ -245,6 +380,15 @@ template const MWNode &MWTree::getNodeOrEndNode( return *root.retrieveNodeOrEndNode(idx); } +/** @brief Finds and returns the node at a given depth that contains a given coordinate. + * + * @param[in] depth: requested node depth from root scale. + * @param[in] r: coordinates of an arbitrary point in space + * + * @details This routine ALWAYS returns the node you ask for, and will + * generate nodes that do not exist. Recursion starts at the + * appropriate rootNode and decends from this. + */ template MWNode &MWTree::getNode(Coord r, int depth) { MWNode &root = getRootBox().getNode(r); if (depth >= 0) { @@ -254,18 +398,44 @@ template MWNode &MWTree::getNode(Coord r, int } } +/** @brief Finds and returns the node at a given depth that contains a given coordinate. + * + * @param[in] depth: requested node depth from root scale. + * @param[in] r: coordinates of an arbitrary point in space + * + * @details This routine returns the Node you ask for, or the EndNode on + * the path to the requested node, and will never create or return GenNodes. + * Recursion starts at the appropriate rootNode and decends from this. + */ template MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) { + if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } + MWNode &root = getRootBox().getNode(r); return *root.retrieveNodeOrEndNode(r, depth); } +/** @brief Finds and returns the node at a given depth that contains a given coordinate. Const version + * + * @param[in] depth: requested node depth from root scale. + * @param[in] r: coordinates of an arbitrary point in space + * + * @details This routine returns the Node you ask for, or the EndNode on + * the path to the requested node, and will never create or return GenNodes. + * Recursion starts at the appropriate rootNode and decends from this. + */ template const MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) const { + if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } const MWNode &root = getRootBox().getNode(r); return *root.retrieveNodeOrEndNode(r, depth); } +/** @brief Returns the list of all EndNodes + * + * @details copies the list of all EndNode pointers into a new vector + * and returns it. + */ template MWNodeVector *MWTree::copyEndNodeTable() { auto *nVec = new MWNodeVector; for (int n = 0; n < getNEndNodes(); n++) { @@ -275,6 +445,12 @@ template MWNodeVector *MWTree::copyEndNodeTable( return nVec; } +/** @brief Recreate the endNodeTable + * + * @details the endNodeTable is first deleted and then rebuilt from + * scratch. It makes use of the TreeIterator to traverse the tree. + * + */ template void MWTree::resetEndNodeTable() { clearEndNodeTable(); TreeIterator it(*this, TopDown, Hilbert); @@ -291,16 +467,55 @@ template int MWTree::countBranchNodes(int depth) { template int MWTree::countLeafNodes(int depth) { NOT_IMPLEMENTED_ABORT; + // int nNodes = 0; + // TreeIterator it(*this); + // while (it.next()) { + // MWNode &node = it.getNode(); + // if (node.getDepth() == depth or depth < 0) { + // if (node.isLeafNode()) { + // nNodes++; + // } + // } + // } + // return nNodes; } +/* Traverse tree and count nodes belonging to this rank. */ template int MWTree::countNodes(int depth) { NOT_IMPLEMENTED_ABORT; + // TreeIterator it(*this); + // int count = 0; + // while (it.next()) { + // MWNode &node = it.getNode(); + // if (node.isGenNode()) { + // continue; + // } + // if (not node.isForeign()) { + // count++; + // } + // } + // return count; } +/* Traverse tree and count nodes with allocated coefficients. */ template int MWTree::countAllocNodes(int depth) { NOT_IMPLEMENTED_ABORT; + // TreeIterator it(*this); + // int count = 0; + // while (it.next()) { + // MWNode &node = it.getNode(); + // if (node.isGenNode()) { + // continue; + // } + // if (node.hasCoefs()) { + // count++; + // } + // } + // return count; } +/** @brief Prints a summary of the tree structure on the output file + */ template std::ostream &MWTree::print(std::ostream &o) const { o << " square norm: " << this->squareNorm << std::endl; o << " root scale: " << this->getRootScale() << std::endl; @@ -313,14 +528,25 @@ template std::ostream &MWTree::print(std::ostream &o) return o; } +/** @brief sets values for maxSquareNorm in all nodes + * + * @details it defines the upper bound of the squared norm \f$ + * ||f||^2_{\ldots} \f$ in this node or its descendents + */ template void MWTree::makeMaxSquareNorms() { NodeBox &rBox = this->getRootBox(); MWNode **roots = rBox.getNodes(); for (int rIdx = 0; rIdx < rBox.size(); rIdx++) { + // recursively set value of children and descendants roots[rIdx]->setMaxSquareNorm(); } } +/** @brief gives serialIx of a node from its NodeIndex + * + * @details gives a unique integer for each nodes corresponding to the position + * of the node in the serialized representation + */ template int MWTree::getIx(NodeIndex nIdx) { if (this->isLocal == false) MSG_ERROR("getIx only implemented in local representation"); if (NodeIndex2serialIx.count(nIdx) == 0) @@ -345,4 +571,4 @@ template class MWTree<1, ComplexDouble>; template class MWTree<2, ComplexDouble>; template class MWTree<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp index e68dd8a36..43b39c32d 100644 --- a/src/trees/MultiResolutionAnalysis.cpp +++ b/src/trees/MultiResolutionAnalysis.cpp @@ -32,6 +32,22 @@ namespace mrcpp { +/** @returns New MultiResolutionAnalysis (MRA) object + * + * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) + * + * @param[in] bb: 2-element integer array [Lower, Upper] defining the bounds for a BoundingBox object representing the computational domain + * @param[in] order: Maximum polynomial order of the multiwavelet basis, + * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA + * @param[in] maxDepth: Exponent of the node refinement in base 2, relative to root scale. + * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. + * + * @details Constructor of the MultiResolutionAnalysis class from scratch, without requiring any pre-existing complex structure. + * The constructor calls the InterpolatingBasis basis constructor to generate the MultiWavelets basis of functions, + * then the BoundingBox constructor to create the computational domain. The constructor then checks if the generated node depth, or + * node refinement is beyond the root scale or the maximum depth allowed, in which case it will abort the process. + * Otherwise, the process goes on to setup the filters with the class' setupFilter method. + */ template MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int order, int depth) : maxDepth(depth) @@ -42,6 +58,18 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int o setupFilter(); } +/** @returns New MultiResolutionAnalysis (MRA) object + * + * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) from a pre-existing BoundingBox object + * + * @param[in] bb: BoundingBox object representing the computational domain + * @param[in] order: (integer) Maximum polynomial order of the multiwavelet basis, + * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA + * @param[in] maxDepth: (integer) Exponent of the node refinement in base 2, relative to root scale. + * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. + * + * @details Constructor of the MultiResolutionAnalysis class from a BoundingBox object. For more details see the first constructor. + */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, int order, int depth) : maxDepth(depth) @@ -52,6 +80,14 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, in setupFilter(); } +/** @returns New MultiResolutionAnalysis (MRA) object + * + * @brief Copy constructor for a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) + * + * @param[in] mra: Pre-existing MRA object + * + * @details Copy a MultiResolutionAnalysis object without modifying the original. For more details see the first constructor. + */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysis &mra) : maxDepth(mra.maxDepth) @@ -62,6 +98,17 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysi setupFilter(); } +/** @returns New MultiResolutionAnalysis object + * + * @brief Constructor for a MultiResolutionAnalysis object from a pre-existing BoundingBox (computational domain) and a ScalingBasis (Multiwavelet basis) objects + * + * @param[in] bb: Computational domain as a BoundingBox object, taken by constant reference + * @param[in] sb: Polynomial basis (MW) as a ScalingBasis object + * @param[in] depth: Maximum allowed resolution depth, relative to root scale + * + * @details Creates a MRA object from pre-existing BoundingBox and ScalingBasis objects. These objects are taken as reference. For more details about the constructor itself, see the first + * constructor. + */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, const ScalingBasis &sb, int depth) : maxDepth(depth) @@ -72,6 +119,16 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, co setupFilter(); } +/** @returns Whether the two MRA objects are equal. + * + * @brief Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis, computational domain and maximum depth, and false otherwise + * + * @param[in] mra: MRA object, taken by constant reference + * + * @details Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis + * object) and maximum depth (integer), and false otherwise. Computations on different MRA cannot be combined, this operator can be used to make sure that the multiple MRAs are compatible. For more + * information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes. + */ template bool MultiResolutionAnalysis::operator==(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return false; if (this->world != mra.world) return false; @@ -79,6 +136,16 @@ template bool MultiResolutionAnalysis::operator==(const MultiResoluti return true; } +/** @returns Whether the two MRA objects are not equal. + * + * @brief Inequality operator for the MultiResolutionAnalysis class, returns false if both MRAs have the same polynomial basis, computational domain and maximum depth, and true otherwise + * + * @param[in] mra: MRA object, taken by constant reference + * + * @details Inequality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis + * object) and maximum depth (integer), and false otherwise. Opposite of the == operator. For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their + * respective classes. + */ template bool MultiResolutionAnalysis::operator!=(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return true; if (this->world != mra.world) @@ -90,6 +157,14 @@ template bool MultiResolutionAnalysis::operator!=(const MultiResoluti return false; } +/** + * + * @brief Displays the MRA's attributes in the outstream defined in the Printer class + * + * @details This function displays the attributes of the MRA in the using the Printer class. + * By default, the Printer class writes all information in the output file, not the terminal. + * + */ template void MultiResolutionAnalysis::print() const { print::separator(0, ' '); print::header(0, "MultiResolution Analysis"); @@ -99,6 +174,15 @@ template void MultiResolutionAnalysis::print() const { print::separator(0, '=', 2); } +/** + * + * @brief Initializes the MW filters for the given MW basis. + * + * @details By calling the get() function for the appropriate MW basis, the global + * FilterCache Singleton object is initialized. Any subsequent reference to this + * particular filter will point to the same unique global object. + * + */ template void MultiResolutionAnalysis::setupFilter() { getLegendreFilterCache(lfilters); getInterpolatingFilterCache(ifilters); @@ -116,6 +200,11 @@ template void MultiResolutionAnalysis::setupFilter() { } } +/** @returns Maximum possible distance between two points in the MRA domain + * + * @brief Computes the difference between the lower and upper bounds of the computational domain + * + */ template double MultiResolutionAnalysis::calcMaxDistance() const { const Coord &lb = getWorldBox().getLowerBounds(); const Coord &ub = getWorldBox().getUpperBounds(); @@ -126,4 +215,4 @@ template class MultiResolutionAnalysis<1>; template class MultiResolutionAnalysis<2>; template class MultiResolutionAnalysis<3>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp index 8c01ed388..f4f72061b 100644 --- a/src/trees/NodeAllocator.cpp +++ b/src/trees/NodeAllocator.cpp @@ -44,6 +44,7 @@ NodeAllocator::NodeAllocator(FunctionTree *tree, SharedMemory *me , maxNodesPerChunk(nodesPerChunk) , tree_p(tree) , shmem_p(mem) { + // reserve space for chunk pointers to avoid excessive reallocation this->nodeChunks.reserve(100); this->coefChunks.reserve(100); @@ -60,6 +61,7 @@ NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory *mem, i , maxNodesPerChunk(nodesPerChunk) , tree_p(tree) , shmem_p(mem) { + // reserve space for chunk pointers to avoid excessive reallocation this->nodeChunks.reserve(100); this->coefChunks.reserve(100); @@ -76,7 +78,7 @@ template NodeAllocator::NodeAllocator(OperatorTree *tr template NodeAllocator::~NodeAllocator() { for (auto &chunk : this->nodeChunks) delete[](char *) chunk; - if (not isShared()) + if (not isShared()) // if the data is shared, it must be freed by MPI_Win_free for (auto &chunk : this->coefChunks) delete[] chunk; this->stackStatus.clear(); MRCPP_DESTROY_OMP_LOCK(); @@ -98,39 +100,46 @@ template T *NodeAllocator::getCoef_p(int sIdx) { template MWNode *NodeAllocator::getNodeNoLock(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr; - int chunk = sIdx / this->maxNodesPerChunk; - int cIdx = sIdx % this->maxNodesPerChunk; + int chunk = sIdx / this->maxNodesPerChunk; // which chunk + int cIdx = sIdx % this->maxNodesPerChunk; // position in chunk return this->nodeChunks[chunk] + cIdx; } template T *NodeAllocator::getCoefNoLock(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr; - int chunk = sIdx / this->maxNodesPerChunk; - int idx = sIdx % this->maxNodesPerChunk; + int chunk = sIdx / this->maxNodesPerChunk; // which chunk + int idx = sIdx % this->maxNodesPerChunk; // position in chunk return this->coefChunks[chunk] + idx * this->coefsPerNode; } template int NodeAllocator::alloc(int nNodes, bool coefs) { MRCPP_SET_OMP_LOCK(); if (nNodes <= 0 or nNodes > this->maxNodesPerChunk) MSG_ABORT("Cannot allocate " << nNodes << " nodes"); + // move topstack to start of next chunk if current chunk is too small int cIdx = this->topStack % (this->maxNodesPerChunk); bool chunkOverflow = ((cIdx + nNodes) > this->maxNodesPerChunk); if (chunkOverflow) this->topStack = this->maxNodesPerChunk * ((this->topStack + nNodes - 1) / this->maxNodesPerChunk); + // append chunk if necessary int chunk = this->topStack / this->maxNodesPerChunk; bool needNewChunk = (chunk >= this->nodeChunks.size()); if (needNewChunk) appendChunk(coefs); + // return value is index of first new node auto sIdx = this->topStack; + // we require that the index for first child is a multiple of 2**D + // so that we can find the sibling rank using rank=sIdx%(2**D) if (sIdx % nNodes != 0) MSG_WARN("Warning: recommended number of siblings is 2**D"); + // fill stack status auto &status = this->stackStatus; for (int i = sIdx; i < sIdx + nNodes; i++) { if (status[i] != 0) MSG_ERROR(" NodeStackStatus: not available [" << i << "] : " << status[i]); status[i] = 1; } + // advance stack pointers this->nNodes += nNodes; this->topStack += nNodes; this->last_p = getNodeNoLock(sIdx) + nNodes; @@ -144,12 +153,13 @@ template void NodeAllocator::dealloc(int sIdx) { if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx); auto *node_p = getNodeNoLock(sIdx); node_p->~MWNode(); - this->stackStatus[sIdx] = 0; - if (sIdx == this->topStack - 1) { + this->stackStatus[sIdx] = 0; // mark as available + if (sIdx == this->topStack - 1) { // top of stack while (this->stackStatus[this->topStack - 1] == 0) { this->topStack--; if (this->topStack < 1) break; } + // has to redefine last_p this->last_p = getNodeNoLock(this->topStack); } this->nNodes--; @@ -170,6 +180,7 @@ template void NodeAllocator::init(int nChunks, bool co if (nChunks <= 0) MSG_ABORT("Invalid number of chunks: " << nChunks); for (int i = getNChunks(); i < nChunks; i++) appendChunk(coefs); + // reinitialize stacks int nodeCount = this->nodeChunks.size() * this->maxNodesPerChunk; this->stackStatus.resize(nodeCount); std::fill(this->stackStatus.begin(), this->stackStatus.end(), 0); @@ -177,11 +188,14 @@ template void NodeAllocator::init(int nChunks, bool co } template void NodeAllocator::appendChunk(bool coefs) { + // make coeff chunk if (coefs) { T *c_chunk = nullptr; if (this->isShared()) { + // for coefficients, take from the shared memory block c_chunk = this->shmem_p->sh_end_ptr; this->shmem_p->sh_end_ptr += (this->coefsPerNode * this->maxNodesPerChunk); + // may increase size dynamically in the future if (this->shmem_p->sh_max_ptr < this->shmem_p->sh_end_ptr) MSG_ABORT("Shared block too small"); } else { c_chunk = new T[getCoefChunkSize() / sizeof(T)]; @@ -189,6 +203,7 @@ template void NodeAllocator::appendChunk(bool coefs) { this->coefChunks.push_back(c_chunk); } + // make node chunk auto n_chunk = (MWNode *)new char[getNodeChunkSize()]; for (int i = 0; i < this->maxNodesPerChunk; i++) { n_chunk[i].serialIx = -1; @@ -197,32 +212,35 @@ template void NodeAllocator::appendChunk(bool coefs) { } this->nodeChunks.push_back(n_chunk); + // append to stackStatus int oldsize = this->stackStatus.size(); int newsize = oldsize + this->maxNodesPerChunk; this->stackStatus.resize(newsize); std::fill(this->stackStatus.begin() + oldsize, this->stackStatus.end(), 0); } +/** Fill all holes in the chunks with occupied nodes, then remove all empty chunks */ template int NodeAllocator::compress() { MRCPP_SET_OMP_LOCK(); int nNodes = (1 << D); if (this->maxNodesPerChunk * this->nodeChunks.size() <= getTree().getNNodes() + this->maxNodesPerChunk + nNodes - 1) { MRCPP_UNSET_OMP_LOCK(); - return 0; + return 0; // nothing to compress } int posocc = 0; - int posavail = getTree().getRootBox().size(); + int posavail = getTree().getRootBox().size(); // start after root nodes while (true) { posavail = findNextAvailable(posavail, nNodes); - if (posavail >= this->topStack) break; + if (posavail >= this->topStack) break; // treated all nodes posocc = findNextOccupied(posavail); - if (posocc >= this->topStack) break; + if (posocc >= this->topStack) break; // treated all nodes moveNodes(nNodes, posocc, posavail); } + // find the last used node posocc = this->topStack - 1; while (this->stackStatus[posocc] == 0 and posocc > 0) posocc--; this->topStack = posocc + 1; @@ -236,18 +254,21 @@ template int NodeAllocator::compress() { } template int NodeAllocator::deleteUnusedChunks() { + // number of occupied chunks int nChunksTotal = getNChunks(); int nChunksUsed = getNChunksUsed(); - if (nChunksTotal == nChunksUsed) return 0; + if (nChunksTotal == nChunksUsed) return 0; // no unused chunks assert(nChunksTotal >= nChunksUsed); for (int i = nChunksUsed; i < nChunksTotal; i++) delete[](char *)(this->nodeChunks[i]); if (isShared()) { + // shared coefficients cannot be fully deallocated, only pointer is moved. getMemory().sh_end_ptr -= (nChunksTotal - nChunksUsed) * this->coefsPerNode * this->maxNodesPerChunk; } else { for (int i = nChunksUsed; i < nChunksTotal; i++) delete[] this->coefChunks[i]; } + // shrink the stacks this->nodeChunks.resize(nChunksUsed); this->coefChunks.resize(nChunksUsed); this->stackStatus.resize(nChunksUsed * this->maxNodesPerChunk); @@ -263,27 +284,34 @@ template void NodeAllocator::moveNodes(int nNodes, int assert(srcNode != nullptr); assert(dstNode != nullptr); + // check that all siblings are consecutive. Should never be root node. for (int i = 0; i < nNodes; i++) assert(this->stackStatus[dstIdx + i] == 0); - for (int i = 1; i < nNodes; i++) assert((srcNode + i)->parent->serialIx == srcNode->parent->serialIx); + for (int i = 1; i < nNodes; i++) assert((srcNode + i)->parent->serialIx == srcNode->parent->serialIx); // siblings + // just copy everything "as is" for (int i = 0; i < nNodes * this->sizeOfNode; i++) ((char *)dstNode)[i] = ((char *)srcNode)[i]; + // coefs have new adresses T *coefs_p = getCoefNoLock(dstIdx); - if (coefs_p == nullptr) NOT_IMPLEMENTED_ABORT; + if (coefs_p == nullptr) NOT_IMPLEMENTED_ABORT; // Nodes without coefs not handled atm for (int i = 0; i < nNodes; i++) (dstNode + i)->coefs = coefs_p + i * getNCoefs(); + // copy coefs to new adress if (not isShared()) { for (int i = 0; i < nNodes * this->coefsPerNode; i++) dstNode->coefs[i] = srcNode->coefs[i]; } else { - if (getMemory().rank == 0) + if (getMemory().rank == 0) // only master copy the data. careful with sync for (int i = 0; i < nNodes * this->coefsPerNode; i++) dstNode->coefs[i] = srcNode->coefs[i]; } + // update node for (int i = 0; i < nNodes; i++) (dstNode + i)->serialIx = dstIdx + i; + // update parent dstNode->parent->childSerialIx = dstIdx; for (int i = 0; i < nNodes; i++) dstNode->parent->children[i] = dstNode + i; + // update children for (int i = 0; i < nNodes; i++) { for (int j = 0; j < (dstNode + i)->getNChildren(); j++) { (dstNode + i)->children[j]->parentSerialIx = dstIdx + i; @@ -291,13 +319,16 @@ template void NodeAllocator::moveNodes(int nNodes, int } } + // mark moved nodes as occupied for (int i = 0; i < nNodes; i++) this->stackStatus[dstIdx + i] = 1; dstIdx += nNodes; + // delete "old" nodes for (int i = 0; i < nNodes; i++) this->stackStatus[srcIdx + i] = 0; for (int i = 0; i < nNodes; i++) (srcNode + i)->serialIx = -1; } +// Last positions on a chunk cannot be used if there is no place for nNodes siblings on the same chunk template int NodeAllocator::findNextAvailable(int sIdx, int nNodes) const { assert(sIdx >= 0); assert(sIdx < this->stackStatus.size()); @@ -331,6 +362,7 @@ template int NodeAllocator::findNextOccupied(int sIdx) return sIdx; } +/** Traverse tree and redefine pointer, counter and tables. */ template void NodeAllocator::reassemble() { MRCPP_SET_OMP_LOCK(); this->nNodes = 0; @@ -362,6 +394,7 @@ template void NodeAllocator::reassemble() { if (node_p->isEndNode()) getTree().squareNorm += node_p->getSquareNorm(); if (node_p->isEndNode()) getTree().endNodeTable.push_back(node_p); + // normally (intel) the virtual table does not change, but we overwrite anyway *(char **)(node_p) = this->cvptr; node_p->initNodeLock(); @@ -378,7 +411,7 @@ template void NodeAllocator::reassemble() { stack.push(child_p); child_p++; } - this->stackStatus[sIdx] = 1; + this->stackStatus[sIdx] = 1; // occupied } this->last_p = getNodeNoLock(this->topStack); assert(this->last_p != nullptr); @@ -415,4 +448,4 @@ template class NodeAllocator<1, ComplexDouble>; template class NodeAllocator<2, ComplexDouble>; template class NodeAllocator<3, ComplexDouble>; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/OperatorNode.cpp b/src/trees/OperatorNode.cpp index 74679d0bd..37f576eac 100644 --- a/src/trees/OperatorNode.cpp +++ b/src/trees/OperatorNode.cpp @@ -42,6 +42,17 @@ void OperatorNode::dealloc() { this->tree->getNodeAllocator().dealloc(sIdx); } +/** + * @brief Calculate one specific component norm of the OperatorNode (TODO: needs to be specified more). + * + * @param[in] i: TODO: deens to be specified + * + * @details OperatorNorms are defined as matrix 2-norms that are expensive to calculate. + * Thus we calculate some cheaper upper bounds for this norm for thresholding. + * First a simple vector norm, then a product of the 1- and infinity-norm. + * (TODO: needs to be more presiced). + * + */ double OperatorNode::calcComponentNorm(int i) const { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); @@ -53,7 +64,7 @@ double OperatorNode::calcComponentNorm(int i) const { int kp1 = this->getKp1(); int kp1_d = this->getKp1_d(); const VectorXd &comp_vec = coef_vec.segment(i * kp1_d, kp1_d); - const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1); + const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1); // one can use MatrixXd OperatorNode::getComponent(int i) double norm = 0.0; double vecNorm = comp_vec.norm(); @@ -68,6 +79,20 @@ double OperatorNode::calcComponentNorm(int i) const { return norm; } +/** @brief Matrix elements of the non-standard form. + * + * @param[in] i: Index enumerating the matrix type in the non-standard form. + * @returns A submatrix of \f$ (k + 1) \times (k + 1) \f$-size from the non-standard form. + * + * @details OperatorNode is uniquely associted with a scale \f$ n \f$ and translation + * \f$ l = -2^n + 1, \ldots, 2^n = 1 \f$. + * The non-standard form \f$ T_n, B_n, C_n, A_n \f$ defines matrices + * \f$ \sigma_l^n, \beta_l^n, \gamma_l^n, \alpha_l^n \f$ for a given pair \f$ (n, l) \f$. + * One of these matrices is returned by the method according to the choice of the index parameter + * \f$ i = 0, 1, 2, 3 \f$, respectively. + * For example, \f$ \alpha_l^n = \text{getComponent}(3) \f$. + * + */ MatrixXd OperatorNode::getComponent(int i) { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); @@ -95,6 +120,7 @@ void OperatorNode::createChildren(bool coefs) { this->childSerialIx = sIdx; for (int cIdx = 0; cIdx < nChildren; cIdx++) { + // construct into allocator memory new (child_p) OperatorNode(this, cIdx); this->children[cIdx] = child_p; @@ -129,4 +155,4 @@ void OperatorNode::deleteChildren() { this->setIsEndNode(); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/trees/OperatorTree.cpp b/src/trees/OperatorTree.cpp index f91f13a47..890f2677c 100644 --- a/src/trees/OperatorTree.cpp +++ b/src/trees/OperatorTree.cpp @@ -98,6 +98,14 @@ void OperatorTree::clearBandWidth() { this->bandWidth = nullptr; } +/** @brief Calculates band widths of the non-standard form matrices. + * + * @param[in] prec: Precision used for thresholding + * + * @details It is starting from \f$ l = 0 \f$ and updating the band width value each time we encounter + * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal. + * + */ void OperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -125,10 +133,32 @@ void OperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } +/** @brief Checks if the distance to diagonal is bigger than the operator band width. + * + * @param[in] oTransl: distance to diagonal + * @param[in] o_depth: scaling order + * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. + * + * @returns True if \b oTransl is outside of the band and False otherwise. + * + */ bool OperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { return abs(oTransl) > this->bandWidth->getWidth(o_depth, idx); } +/** @brief Cleans up end nodes. + * + * @param[in] trust_scale: there is no cleaning down below \b trust_scale (it speeds up operator building). + * + * @details Traverses the tree and rewrites end nodes having branch node twins, + * i. e. identical with respect to scale and translation. + * This method is very handy, when an adaptive operator construction + * can make a significunt noise at low scaling depth. + * Its need comes from the fact that mwTransform up cannot override + * rubbish that can potentially stick to end nodes at a particular level, + * and as a result spread further up to the root with mwTransform. + * + */ void OperatorTree::removeRoughScaleNoise(int trust_scale) { MWNode<2> *p_rubbish; // possibly inexact end node MWNode<2> *p_counterpart; // exact branch node @@ -161,6 +191,12 @@ void OperatorTree::getMaxTranslations(VectorXi &maxTransl) { } } +/** Make 1D lists, adressable from [-l, l] scale by scale, of operator node + * pointers for fast operator retrieval. This method is not thread safe, + * since it projects missing operator nodes on the fly. Hence, it must NEVER + * be called within a parallel region, or all hell will break loose. This is + * not really a problem, but you have been warned. + */ void OperatorTree::setupOperNodeCache() { int nScales = this->nodesAtDepth.size(); int rootScale = this->getRootScale(); @@ -209,6 +245,12 @@ void OperatorTree::clearOperNodeCache() { } } +/** Regenerate all s/d-coeffs by backtransformation, starting at the bottom and + * thus purifying all coefficients. Option to overwrite or add up existing + * coefficients of BranchNodes (can be used after operator application). + * Reimplementation of MWTree::mwTransform() without OMP, as calculation + * of OperatorNorm is done using random vectors, which is non-deterministic + * in parallel. FunctionTrees should be fine. */ void OperatorTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -222,6 +264,12 @@ void OperatorTree::mwTransformUp() { } } +/** Regenerate all scaling coeffs by MW transformation of existing s/w-coeffs + * on coarser scales, starting at the rootNodes. Option to overwrite or add up + * existing scaling coefficients (can be used after operator application). + * Reimplementation of MWTree::mwTransform() without OMP, as calculation + * of OperatorNorm is done using random vectors, which is non-deterministic + * in parallel. FunctionTrees should be fine. */ void OperatorTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -239,4 +287,4 @@ std::ostream &OperatorTree::print(std::ostream &o) const { return MWTree<2>::print(o); } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp index b924b233d..f8c111a53 100644 --- a/src/utils/Bank.cpp +++ b/src/utils/Bank.cpp @@ -1,28 +1,3 @@ -/* - * MRCPP, a numerical library based on multiresolution analysis and - * the multiwavelet basis which provide low-scaling algorithms as well as - * rigorous error control in numerical computations. - * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. - * - * This file is part of MRCPP. - * - * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * MRCPP is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with MRCPP. If not, see . - * - * For information on the complete list of contributors to MRCPP, see: - * - */ - #include "Printer.h" #include "Timer.h" @@ -33,31 +8,35 @@ namespace mrcpp { using namespace Eigen; using namespace std; -int metadata_block[3]; +int metadata_block[3]; // can add more metadata in future int const size_metadata = 3; Bank::~Bank() { + // delete all data and accounts } struct Blockdata_struct { - std::vector data; - int N_rows = 0; - std::map id2data; - std::vector id; + std::vector data; // to store the incoming data. One column for each orbital on the same node. + int N_rows = 0; // the number of coefficients in one column of the block. + std::map id2data; // internal index of the data in the block + std::vector id; // the id of each column. Either nodeid, or orbid }; struct OrbBlock_struct { - std::vector data; - std::map id2data; - std::vector id; + std::vector data; // pointer to the data + std::map id2data; // internal index of the data in the block + std::vector id; // the nodeid of the data + // note that N_rows can be different inside the same orbblock: root node have scaling and wavelets, other nodes have only wavelets }; struct mem_struct { - std::vector chunk_p; - int p = -1; - int chunk_size = 1024 * 1024 * 4; + std::vector chunk_p; // vector with allocated chunks + int p = -1; // position of next available memory (not allocated if < 0) + // on Betzy 1024*1024*4 ok, 1024*1024*2 NOT ok: leads to memory fragmentation (on "Betzy" 2023) + int chunk_size = 1024 * 1024 * 4; // chunksize (in number of doubles). data_p[i]+chunk_size is end of chunk i int account = -1; double *get_mem(int size) { - if (p < 0 or size > chunk_size or p + size > chunk_size) { + if (p < 0 or size > chunk_size or p + size > chunk_size) { // allocate new chunk of memory if (size > 1024 * 1024) { + // make a special chunk just for this double *m_p = new double[size]; chunk_p.push_back(m_p); p = -1; @@ -73,12 +52,12 @@ struct mem_struct { return m_p; } }; -std::map *> get_nodeid2block; -std::map *> get_orbid2block; +std::map *> get_nodeid2block; // to get block from its nodeid (all coeff for one node) +std::map *> get_orbid2block; // to get block from its orbid std::map mem; -int const MIN_SCALE = -999; +int const MIN_SCALE = -999; // Smaller than smallest scale int naccounts = 0; void Bank::open() { @@ -96,6 +75,7 @@ void Bank::open() { int next_task = 0; int tot_ntasks = 0; std::map> readytasks; + // The bank never goes out of this loop until it receives a close message! while (true) { MPI_Recv(messages, message_size, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, comm_bank, &status); if (printinfo) @@ -103,23 +83,26 @@ void Bank::open() { << messages[2] << std::endl; int message = messages[0]; + // can be called directly: if (message == CLOSE_BANK) { if (is_bank and printinfo) std::cout << "Bank is closing" << std::endl; this->clear_bank(); - break; + break; // close bank, i.e stop listening for incoming messages } else if (message == GET_MAXTOTDATA) { - int maxsize_int = maxsize / 1024; + int maxsize_int = maxsize / 1024; // convert into MB MPI_Send(&maxsize_int, 1, MPI_INT, status.MPI_SOURCE, 1171, comm_bank); continue; } else if (message == GET_TOTDATA) { - int maxsize_int = totcurrentsize / 1024; + int maxsize_int = totcurrentsize / 1024; // convert into MB MPI_Send(&maxsize_int, 1, MPI_INT, status.MPI_SOURCE, 1172, comm_bank); continue; } else if (message == NEW_ACCOUNT) { + // we just have to pick out a number that is not already assigned int account = (max_account_id + 1) % 1000000000; - while (get_deposits.count(account)) account = (account + 1) % 1000000000; + while (get_deposits.count(account)) account = (account + 1) % 1000000000; // improbable this is used max_account_id = account; naccounts++; + // create default content get_deposits[account] = new std::vector; get_deposits[account]->resize(1); get_id2ix[account] = new std::map; @@ -137,6 +120,8 @@ void Bank::open() { continue; } + // the following is only accessible through an account + int account = messages[1]; auto it_dep = get_deposits.find(account); if (it_dep == get_deposits.end() || it_dep->second == nullptr) { @@ -144,7 +129,7 @@ void Bank::open() { MSG_ABORT("Account error"); } std::vector &deposits = *get_deposits[account]; - std::map &id2ix = *get_id2ix[account]; + std::map &id2ix = *get_id2ix[account]; // gives zero if id is not defined std::map &id2qu = *get_id2qu[account]; std::vector &queue = *get_queue[account]; std::map &orbid2block = *get_orbid2block[account]; @@ -159,6 +144,7 @@ void Bank::open() { if (message == CLOSE_ACCOUNT) { get_numberofclients[account]--; if (get_numberofclients[account] == 0) { + // all clients have closed the account. We remove the account. remove_account(account); } } @@ -167,29 +153,33 @@ void Bank::open() { this->clear_bank(); for (auto const &block : nodeid2block) { if (block.second.data.size() > 0) { - currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; - totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; + currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB + totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; // converted into kB } } nodeid2block.clear(); orbid2block.clear(); + // send message that it is ready (value of message is not used) MPI_Ssend(&message, 1, MPI_INT, status.MPI_SOURCE, 77, comm_bank); } else if (message == GET_NODEDATA or message == GET_NODEBLOCK) { - int nodeid = messages[2]; + // NB: has no queue system yet + int nodeid = messages[2]; // which block to fetch from if (nodeid2block.count(nodeid)) { Blockdata_struct &block = nodeid2block[nodeid]; - int dataindex = 0; + int dataindex = 0; // internal index of the data in the block int size = 0; if (message == GET_NODEDATA) { - int orbid = messages[3]; - dataindex = block.id2data[orbid]; - size = block.N_rows; + int orbid = messages[3]; // which part of the block to fetch + dataindex = block.id2data[orbid]; // column of the data in the block + size = block.N_rows; // number of doubles to fetch if (size != messages[4]) std::cout << "ERROR nodedata has wrong size" << std::endl; double *data_p = block.data[dataindex]; if (size > 0) MPI_Send(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } else { + // send entire block. First make one contiguous superblock + // Prepare the data as one contiguous block if (block.data.size() == 0) std::cout << "Zero size blockdata! " << nodeid << " " << block.N_rows << std::endl; MatrixXd DataBlock(block.N_rows, block.data.size()); size = block.N_rows * block.data.size(); @@ -197,43 +187,49 @@ void Bank::open() { for (int j = 0; j < block.data.size(); j++) { for (int i = 0; i < block.N_rows; i++) { DataBlock(i, j) = block.data[j][i]; } } - dataindex = 0; - metadata_block[0] = nodeid; - metadata_block[1] = block.data.size(); - metadata_block[2] = size; + dataindex = 0; // start from first column + // send info about the size of the superblock + metadata_block[0] = nodeid; // nodeid + metadata_block[1] = block.data.size(); // number of columns + metadata_block[2] = size; // total size = rows*columns MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); + // send info about the id of each column MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank); if (size > 0) MPI_Send(DataBlock.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } } else { if (printinfo) std::cout << " block " << nodeid << " does not exist " << std::endl; + // Block with this id does not exist. if (message == GET_NODEDATA) { - int size = messages[4]; + int size = messages[4]; // number of doubles to send if (size == 0) { std::cout << "WARNING: GET_NODEDATA asks for zero size data" << std::endl; metadata_block[2] = size; MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 3, comm_bank); } else { - std::vector zero(size, 0.0); + std::vector zero(size, 0.0); // send zeroes MPI_Ssend(zero.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } } else { metadata_block[0] = nodeid; - metadata_block[1] = 0; - metadata_block[2] = 0; + metadata_block[1] = 0; // number of columns + metadata_block[2] = 0; // total size = rows*columns MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); } } } else if (message == GET_ORBBLOCK) { - int orbid = messages[2]; + // NB: BLOCKDATA has no queue system yet + int orbid = messages[2]; // which block to fetch from if (orbid2block.count(orbid)) { OrbBlock_struct &block = orbid2block[orbid]; if (block.data.size() == 0) std::cout << "Zero size blockdata! C " << orbid << " " << std::endl; + // send entire block. First make one contiguous superblock + // Prepare the data as one contiguous block int size = 0; for (int j = 0; j < block.data.size(); j++) { int nodeid = block.id[j]; - int Nrows = nodeid2block[nodeid].N_rows; + int Nrows = nodeid2block[nodeid].N_rows; // note that root nodes have scaling and wavelets, while other nodes have only wavelets -> N_rows is not a constant. size += Nrows; } std::vector coeff(size); @@ -243,28 +239,33 @@ void Bank::open() { int Nrows = nodeid2block[nodeid].N_rows; for (int i = 0; i < Nrows; i++) { coeff[ij++] = block.data[j][i]; } } + // send info about the size of the superblock metadata_block[0] = orbid; - metadata_block[1] = block.data.size(); - metadata_block[2] = size; + metadata_block[1] = block.data.size(); // number of columns + metadata_block[2] = size; // total size = rows*columns MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank); MPI_Send(coeff.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank); } else { + // it is possible and allowed that the block has not been written if (printinfo) std::cout << " block does not exist " << orbid << " " << orbid2block.count(orbid) << std::endl; + // Block with this id does not exist. metadata_block[0] = orbid; - metadata_block[1] = 0; - metadata_block[2] = 0; + metadata_block[1] = 0; // number of columns + metadata_block[2] = 0; // total size = rows*columns MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank); } } else if (message == GET_FUNCTION or message == GET_FUNCTION_AND_WAIT or message == GET_FUNCTION_AND_DELETE or message == GET_FUNCTION or message == GET_DATA) { + // withdrawal int id = messages[2]; if (message == GET_DATA and messages[3] > MIN_SCALE) { NodeIndex<3> nIdx; nIdx.setScale(messages[4]); nIdx.setTranslation({messages[2], messages[5], messages[6]}); if (nIdx2id.count(nIdx) == 0) { + // data is not yet saved, but one can hope it will be created at some stage id = nIdx2id.size(); nIdx2id[nIdx] = id; } else { @@ -275,15 +276,18 @@ void Bank::open() { if (id2ix.count(id) == 0 or ix == 0) { if (printinfo) std::cout << world_rank << " not found " << id << " " << message << std::endl; if (message == GET_FUNCTION or message == GET_FUNCTION_AND_DELETE) { + // do not wait for the orbital to arrive int found = 0; if (printinfo) std::cout << world_rank << " sending found 0 to " << status.MPI_SOURCE << std::endl; MPI_Send(&found, 1, MPI_INT, status.MPI_SOURCE, 117, comm_bank); } else { + // the id does not exist. Put in queue and Wait until it is defined if (printinfo) std::cout << world_rank << " queuing " << id << " " << id2ix.count(id) << ix << std::endl; if (id2qu[id] == 0) { queue.push_back({id, {status.MPI_SOURCE}}); id2qu[id] = queue.size() - 1; } else { + // somebody is already waiting for this id. queue in queue queue[id2qu[id]].clients.push_back(status.MPI_SOURCE); } } @@ -306,16 +310,18 @@ void Bank::open() { if (message == GET_DATA) { MPI_Send(deposits[ix].data, deposits[ix].datasize, MPI_DOUBLE, status.MPI_SOURCE, 1, comm_bank); } } } else if (message == SAVE_NODEDATA) { - int nodeid = messages[2]; - int orbid = messages[3]; - int size = messages[4]; + int nodeid = messages[2]; // which block to write + int orbid = messages[3]; // which part of the block + int size = messages[4]; // number of doubles + // test if the block exists already if (printinfo) std::cout << world_rank << " save data nodeid " << nodeid << " size " << size << std::endl; + // append the incoming data Blockdata_struct &block = nodeid2block[nodeid]; - block.id2data[orbid] = nodeid2block[nodeid].data.size(); - double *data_p = mem[account]->get_mem(size); - currentsize[account] += size / 128; - totcurrentsize += size / 128; + block.id2data[orbid] = nodeid2block[nodeid].data.size(); // internal index of the data in the block + double *data_p = mem[account]->get_mem(size); // new double[size]; + currentsize[account] += size / 128; // converted into kB + totcurrentsize += size / 128; // converted into kB this->maxsize = std::max(totcurrentsize, this->maxsize); block.data.push_back(data_p); block.id.push_back(orbid); @@ -323,15 +329,18 @@ void Bank::open() { block.N_rows = size; OrbBlock_struct &orbblock = orbid2block[orbid]; - orbblock.id2data[nodeid] = orbblock.data.size(); + orbblock.id2data[nodeid] = orbblock.data.size(); // internal index of the data in the block orbblock.data.push_back(data_p); orbblock.id.push_back(nodeid); + // orbblock.N_rows.push_back(size); MPI_Recv(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 1, comm_bank, &status); if (printinfo) std::cout << " written block " << nodeid << " id " << orbid << " subblocks " << nodeid2block[nodeid].data.size() << std::endl; } else if (message == SAVE_FUNCTION or message == SAVE_DATA) { + // make a new deposit int id = messages[2]; if (message == SAVE_DATA and messages[4] > MIN_SCALE) { + // has to find or create unique id from NodeIndex. Use the same internal mapping for all trees NodeIndex<3> nIdx; nIdx.setScale(messages[4]); nIdx.setTranslation({messages[2], messages[5], messages[6]}); @@ -346,26 +355,27 @@ void Bank::open() { if (id2ix[id]) { std::cout << "WARNING: id " << id << " exists already" << " " << status.MPI_SOURCE << " " << message << " " << messages[1] << std::endl; - ix = id2ix[id]; + ix = id2ix[id]; // the deposit exist from before. Will be overwritten exist_flag = 1; if (message == SAVE_DATA and !deposits[ix].hasdata) { datasize = messages[3]; exist_flag = 0; + // deposits[ix].data = new double[datasize]; deposits[ix].data = mem[account]->get_mem(datasize); - currentsize[account] += datasize / 128; - totcurrentsize += datasize / 128; + currentsize[account] += datasize / 128; // converted into kB + totcurrentsize += datasize / 128; // converted into kB this->maxsize = std::max(totcurrentsize, this->maxsize); deposits[ix].hasdata = true; } } else { - ix = deposits.size(); + ix = deposits.size(); // NB: ix is now index of last element + 1 deposits.resize(ix + 1); if (message == SAVE_FUNCTION) deposits[ix].orb = new CompFunction<3>(0); if (message == SAVE_DATA) { datasize = messages[3]; - deposits[ix].data = mem[account]->get_mem(datasize); - currentsize[account] += datasize / 128; - totcurrentsize += datasize / 128; + deposits[ix].data = mem[account]->get_mem(datasize); // new double[datasize]; + currentsize[account] += datasize / 128; // converted into kB + totcurrentsize += datasize / 128; // converted into kB this->maxsize = std::max(totcurrentsize, this->maxsize); deposits[ix].hasdata = true; } @@ -387,30 +397,33 @@ void Bank::open() { MPI_Recv(deposits[ix].data, datasize, MPI_DOUBLE, deposits[ix].source, 1, comm_bank, &status); } if (id2qu[deposits[ix].id] != 0) { + // someone is waiting for those data. Send to them int iq = id2qu[deposits[ix].id]; if (deposits[ix].id != queue[iq].id) std::cout << ix << " Bank queue accounting error " << std::endl; for (int iqq : queue[iq].clients) { if (message == SAVE_FUNCTION) { send_function(*deposits[ix].orb, iqq, 1, comm_bank); } if (message == SAVE_DATA) { MPI_Send(deposits[ix].data, messages[3], MPI_DOUBLE, iqq, 1, comm_bank); } } - queue[iq].clients.clear(); + queue[iq].clients.clear(); // cannot erase entire queue[iq], because that would require to shift all the + // id2qu value larger than iq queue[iq].id = -1; id2qu.erase(deposits[ix].id); } + // Task manager members: } else if (message == INIT_TASKS) { tot_ntasks = messages[2]; next_task = 0; } else if (message == GET_NEXTTASK) { int task = next_task; - if (next_task >= tot_ntasks) task = -1; + if (next_task >= tot_ntasks) task = -1; // flag to show all tasks are assigned MPI_Send(&task, 1, MPI_INT, status.MPI_SOURCE, 1, comm_bank); next_task++; } else if (message == PUT_READYTASK) { readytasks[messages[2]].push_back(messages[3]); } if (message == DEL_READYTASK) { - for (int i = 0; i < readytasks[messages[2]].size(); i++) { + for (int i = 0; i < readytasks[messages[2]].size(); i++) { // we expect small sizes if (readytasks[messages[2]][i] == messages[3]) { readytasks[messages[2]].erase(readytasks[messages[2]].begin() + i); break; @@ -432,6 +445,7 @@ void Bank::open() { #endif } +// Ask to close the Bank void Bank::close() { #ifdef MRCPP_HAS_MPI int messages[message_size]; @@ -470,7 +484,7 @@ void Bank::remove_account(int account) { currentsize[account] -= deposits[ix].datasize / 128; totcurrentsize -= deposits[ix].datasize / 128; } - if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; + if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; // indicate that it does not exist deposits[ix].hasdata = false; } deposits.clear(); @@ -488,8 +502,8 @@ void Bank::remove_account(int account) { std::map &orbid2block = *get_orbid2block[account]; for (auto const &block : nodeid2block) { - currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; - totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; + currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB + totcurrentsize -= block.second.N_rows * block.second.data.size() / 128; // converted into kB } nodeid2block.clear(); orbid2block.clear(); @@ -504,6 +518,7 @@ void Bank::remove_account(int account) { } int Bank::openAccount(int iclient, MPI_Comm comm) { + // NB: this is a collective call, since we need all the accounts to be synchronized int account_id[1] = {-1}; #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -530,6 +545,7 @@ int Bank::openAccount(int iclient, MPI_Comm comm) { } int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { + // NB: this is a collective call, since we need all the accounts to be synchronized int account_id = -1; #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -542,6 +558,7 @@ int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { MPI_Send(messages, 2, MPI_INT, task_bank, 0, comm_bank); MPI_Recv(&account_id, 1, MPI_INT, task_bank, 1, comm_bank, &status); if (tot_bank_size == bank_size) { + // make a dummy account so that all account_id are synchronized int account_id_i; for (int i = 0; i < bank_size; i++) { if (bankmaster[i] != task_bank) { @@ -564,6 +581,7 @@ int Bank::openTaskManager(int ntasks, int iclient, MPI_Comm comm) { } void Bank::closeAccount(int account_id) { +// The account will in reality not be removed before everybody has sent a close message #ifdef MRCPP_HAS_MPI MPI_Status status; int messages[message_size]; @@ -574,6 +592,7 @@ void Bank::closeAccount(int account_id) { } void Bank::closeTaskManager(int account_id) { +// The account will in reality not be removed before everybody has sent a close message #ifdef MRCPP_HAS_MPI MPI_Status status; int messages[message_size]; @@ -615,6 +634,13 @@ std::vector Bank::get_totalsize() { return tot; } +// Accounts: (clients) + +// save orbital in Bank with identity id + +// get orbital with identity id. +// If wait=0, return immediately with value zero if not available (default) +// else, wait until available int BankAccount::get_func(int id, CompFunction<3> &func, int wait) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -641,6 +667,8 @@ int BankAccount::get_func(int id, CompFunction<3> &func, int wait) { return 1; } +// get orbital with identity id, and delete from bank. +// return immediately with value zero if not available int BankAccount::get_func_del(int id, CompFunction<3> &orb) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -661,8 +689,10 @@ int BankAccount::get_func_del(int id, CompFunction<3> &orb) { return 1; } +// save function in Bank with identity id int BankAccount::put_func(int id, CompFunction<3> &func) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_FUNCTION; messages[1] = account_id; @@ -673,38 +703,44 @@ int BankAccount::put_func(int id, CompFunction<3> &func) { return 1; } +// save data in Bank with identity id . datasize MUST have been set already. NB:not tested int BankAccount::put_data(int id, int size, double *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = id; messages[3] = size; - messages[4] = MIN_SCALE; + messages[4] = MIN_SCALE; // to indicate that it is defined by id MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank); #endif return 1; } +// save data in Bank with identity id . datasize MUST have been set already. NB:not tested int BankAccount::put_data(int id, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = id; - messages[3] = size * 2; - messages[4] = MIN_SCALE; + messages[3] = size * 2; // save as twice as many doubles + messages[4] = MIN_SCALE; // to indicate that it is defined by id MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank); #endif return 1; } +// save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; @@ -720,13 +756,15 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) { return 1; } +// save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to id int messages[message_size]; messages[0] = SAVE_DATA; messages[1] = account_id; messages[2] = nIdx.getTranslation(0); - messages[3] = size * 2; + messages[3] = size * 2; // save as twice as many doubles messages[4] = nIdx.getScale(); messages[5] = nIdx.getTranslation(1); messages[6] = nIdx.getTranslation(2); @@ -737,6 +775,7 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { return 1; } +// get data with identity id int BankAccount::get_data(int id, int size, double *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -751,6 +790,7 @@ int BankAccount::get_data(int id, int size, double *data) { return 1; } +// get data with identity id int BankAccount::get_data(int id, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -760,11 +800,13 @@ int BankAccount::get_data(int id, int size, ComplexDouble *data) { messages[2] = id; messages[3] = MIN_SCALE; MPI_Send(messages, 4, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); + // fetch as twice as many doubles MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status); #endif return 1; } +// get data with identity id int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -783,6 +825,7 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) { return 1; } +// get data with identity id int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -796,72 +839,84 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) { messages[5] = nIdx.getTranslation(1); messages[6] = nIdx.getTranslation(2); MPI_Send(messages, 7, MPI_INT, bankmaster[id % bank_size], 0, comm_bank); + // fetch as twice as many doubles MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status); #endif return 1; } +// save data in Bank with identity id as part of block with identity nodeid. int BankAccount::put_nodedata(int id, int nodeid, int size, double *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to nodeid int messages[message_size]; messages[0] = SAVE_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; - messages[3] = id; - messages[4] = size; + messages[2] = nodeid; // which block + messages[3] = id; // id within block + messages[4] = size; // size of this data MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Send(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank); #endif return 1; } +// save data in Bank with identity id as part of block with identity nodeid. +// NB: Complex is stored as two doubles int BankAccount::put_nodedata(int id, int nodeid, int size, ComplexDouble *data) { #ifdef MRCPP_HAS_MPI + // for now we distribute according to nodeid int messages[message_size]; messages[0] = SAVE_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; - messages[3] = id; - messages[4] = 2 * size; + messages[2] = nodeid; // which block + messages[3] = id; // id within block + messages[4] = 2 * size; // size of this data MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Send(data, 2 * size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank); #endif return 1; } +// get data with identity id int BankAccount::get_nodedata(int id, int nodeid, int size, double *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; + // get the column with identity id int messages[message_size]; messages[0] = GET_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; - messages[3] = id; - messages[4] = size; + messages[2] = nodeid; // which block + messages[3] = id; // id within block. + messages[4] = size; // expected size of data MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status); #endif return 1; } +// get data with identity id int BankAccount::get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; + // get the column with identity id int messages[message_size]; messages[0] = GET_NODEDATA; messages[1] = account_id; - messages[2] = nodeid; - messages[3] = id; - messages[4] = size; + messages[2] = nodeid; // which block + messages[3] = id; // id within block. + messages[4] = size; // expected size of data MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank); MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status); #endif return 1; } +// get all data for nodeid (same nodeid, different orbitals) int BankAccount::get_nodeblock(int nodeid, double *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; + // get the entire superblock and also the id of each column int messages[message_size]; messages[0] = GET_NODEBLOCK; messages[1] = account_id; @@ -877,9 +932,11 @@ int BankAccount::get_nodeblock(int nodeid, double *data, std::vector &idVec return 1; } +// get all data for nodeid (same nodeid, different orbitals) int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector &idVec) { #ifdef MRCPP_HAS_MPI MPI_Status status; + // get the entire superblock and also the id of each column int messages[message_size]; messages[0] = GET_NODEBLOCK; messages[1] = account_id; @@ -895,10 +952,12 @@ int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector return 1; } +// get all data with identity orbid (same orbital, different nodes) int BankAccount::get_orbblock(int orbid, double *&data, std::vector &nodeidVec, int bankstart) { #ifdef MRCPP_HAS_MPI MPI_Status status; int nodeid = wrk_rank + bankstart; + // get the entire superblock and also the nodeid of each column int messages[message_size]; messages[0] = GET_ORBBLOCK; messages[1] = account_id; @@ -914,10 +973,12 @@ int BankAccount::get_orbblock(int orbid, double *&data, std::vector &nodeid return 1; } +// get all data with identity orbid (same orbital, different nodes) int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector &nodeidVec, int bankstart) { #ifdef MRCPP_HAS_MPI MPI_Status status; int nodeid = wrk_rank + bankstart; + // get the entire superblock and also the nodeid of each column int messages[message_size]; messages[0] = GET_ORBBLOCK; messages[1] = account_id; @@ -933,6 +994,7 @@ int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector return 1; } +// creator. NB: collective BankAccount::BankAccount(int iclient, MPI_Comm comm) { this->account_id = dataBank.openAccount(iclient, comm); #ifdef MRCPP_HAS_MPI @@ -940,14 +1002,18 @@ BankAccount::BankAccount(int iclient, MPI_Comm comm) { #endif } +// destructor BankAccount::~BankAccount() { + // The account will in reality not be removed before everybody has sent a delete message dataBank.closeAccount(this->account_id); } +// closes account and reopen a new empty account. NB: account_id will change void BankAccount::clear(int iclient, MPI_Comm comm) { this->account_id = dataBank.clearAccount(this->account_id, iclient, comm); } +// creator. NB: collective TaskManager::TaskManager(int ntasks, int iclient, MPI_Comm comm) { this->n_tasks = ntasks; if (bank_size == 0) return; @@ -957,7 +1023,9 @@ TaskManager::TaskManager(int ntasks, int iclient, MPI_Comm comm) { #endif } +// destructor TaskManager::~TaskManager() { + // The account will in reality not be removed before everybody has sent a delete message if (this->account_id < 0) return; dataBank.closeTaskManager(this->account_id); } @@ -1027,4 +1095,4 @@ std::vector TaskManager::get_readytask(int i, int del) { return readytasks; } -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp index ec80ce67b..bdec0f954 100644 --- a/src/utils/CompFunction.cpp +++ b/src/utils/CompFunction.cpp @@ -1,28 +1,3 @@ -/* - * MRCPP, a numerical library based on multiresolution analysis and - * the multiwavelet basis which provide low-scaling algorithms as well as - * rigorous error control in numerical computations. - * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. - * - * This file is part of MRCPP. - * - * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * MRCPP is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with MRCPP. If not, see . - * - * For information on the complete list of contributors to MRCPP, see: - * - */ - #include "CompFunction.h" #include "Bank.h" #include "Printer.h" @@ -34,6 +9,17 @@ #include "trees/FunctionNode.h" #include +/* Some rules for CompFunction: + * NComp is the number of components. If Ncomp>0, the corresponding trees must exist (can be only empty roots). + * The other trees should be set to nullptr. + * The trees and data can be shared among several CompFunction; this is managed automatically by "std::make_shared" + * Normally the CompFunction must be eiher real or complex (or none if noe is defined anyway). + * Though it is allowed in some cases to have both and the code should preferably allow this. (It is used temporary + * when we need a Complex type, but the trees are real: the tree is then copied as a complex tree in the same CompFunction). + * TreePtr (aka func_ptr) is the part potentially shared with others with "std::make_shared". It contains the pointers to the trees. + * The static data (number of components, real/complex, conjugaison, integers used for spin etc.) are store in func_ptr.data. + */ + namespace mrcpp { template MultiResolutionAnalysis *defaultCompMRA = nullptr; // Global MRA @@ -55,6 +41,9 @@ template CompFunction::CompFunction() { for (int i = 0; i < 4; i++) CompC[i] = nullptr; } +/* + * Empty functions (no components defined) + */ template CompFunction::CompFunction(int n1) { func_ptr = std::make_shared>(false); CompD = func_ptr->real; @@ -70,6 +59,9 @@ template CompFunction::CompFunction(int n1) { func_ptr->data.shared = false; } +/* + * Empty functions (no components defined) + */ template CompFunction::CompFunction(int n1, bool share) { func_ptr = std::make_shared>(share); CompD = func_ptr->real; @@ -85,6 +77,9 @@ template CompFunction::CompFunction(int n1, bool share) { func_ptr->data.shared = share; } +/* + * Empty functions (trees defined but zero) + */ template CompFunction::CompFunction(const CompFunctionData &indata, bool alloc) { func_ptr = std::make_shared>(indata.shared); func_ptr->data = indata; @@ -96,12 +91,22 @@ template CompFunction::CompFunction(const CompFunctionData &indata this->free(); } +/** @brief Copy constructor + * + * Shallow copy: meta data is copied along with the component pointers, + * NO transfer of ownership. + */ template CompFunction::CompFunction(const CompFunction &compfunc) { func_ptr = compfunc.func_ptr; CompD = func_ptr->real; CompC = func_ptr->cplx; } +/** @brief Copy constructor + * + * Shallow copy: meta data is copied along with the component pointers, + * NO transfer of ownership. + */ template CompFunction &CompFunction::operator=(const CompFunction &compfunc) { if (this != &compfunc) { func_ptr = compfunc.func_ptr; @@ -112,8 +117,13 @@ template CompFunction &CompFunction::operator=(const CompFunction< } template +/** @brief Parameter copy + * + * Returns a copy without defined trees. + */ CompFunction CompFunction::paramCopy(bool alloc) const { CompFunction out(func_ptr->data, alloc); + // we do not copy tree sizes: for (int i = 0; i < 4; i++) out.func_ptr->data.Nchunks[i] = 0; return out; } @@ -194,6 +204,10 @@ template double CompFunction::getSquareNorm() const { return norm; } +// Allocate empty trees. The tree must be defined as real or complex already. +// Allocates all ialloc trees, with indices 0,...ialloc-1 +// nalloc is the number of components allocated. ialloc=1 allocates one tree. +// deletes all old trees if found. template void CompFunction::alloc(int nalloc, bool zero) { if (defaultCompMRA == nullptr) MSG_ABORT("Default MRA not yet defined"); if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex"); @@ -213,6 +227,7 @@ template void CompFunction::alloc(int nalloc, bool zero) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); } for (int i = nalloc; i < Ncomp(); i++) { + // delete possible remaining components delete CompD[i]; delete CompC[i]; CompD[i] = nullptr; @@ -220,6 +235,10 @@ template void CompFunction::alloc(int nalloc, bool zero) { } } +// Allocate one empty trees for one specific component. +// The tree must be defined as real or complex already. +// ialloc is index allocated. ialloc=0 allocates the tree with index zero. +// deletes old tree if found. template void CompFunction::alloc_comp(int ialloc) { if (defaultCompMRA == nullptr) MSG_ABORT("Default MRA not yet defined"); if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex"); @@ -252,7 +271,7 @@ template void CompFunction::free() { } template int CompFunction::getSizeNodes() const { - int size_mb = 0; + int size_mb = 0; // Memory size in kB for (int i = 0; i < Ncomp(); i++) { if (isreal() and CompD[i] != nullptr) size_mb += CompD[i]->getSizeNodes(); if (iscomplex() and CompC[i] != nullptr) size_mb += CompC[i]->getSizeNodes(); @@ -269,6 +288,11 @@ template int CompFunction::getNNodes() const { return nNodes; } +/** @brief Soft complex conjugate + * + * Will use complex conjugate in operations (add, multiply etc.) + * Does change the state (conj flag), but does not actively change all coefficients. + */ template void CompFunction::dagger() { func_ptr->data.conj = not(func_ptr->data.conj); for (int i = 0; i < Ncomp(); i++) { @@ -281,7 +305,7 @@ template FunctionTree &CompFunction::real(int i) { if (CompD[i] == nullptr) alloc_comp(i); return *CompD[i]; } -template +template // NB: should return CompC in the future FunctionTree &CompFunction::imag(int i) { MSG_ABORT("Must choose real or complex"); if (!iscomplex()) MSG_ABORT("not complex function"); @@ -298,7 +322,7 @@ template const FunctionTree &CompFunction::real(int i) con if (!isreal()) MSG_ABORT("not real function"); return *CompD[i]; } -template +template // NB: should use complex or real const FunctionTree &CompFunction::imag(int i) const { MSG_ABORT("Must choose real or complex"); if (!iscomplex()) MSG_ABORT("not complex function"); @@ -309,8 +333,10 @@ template const FunctionTree &CompFunction::complex( return *CompC[i]; } +/* for backwards compatibility */ template void CompFunction::setReal(FunctionTree *tree, int i) { func_ptr->isreal = 1; + // if (CompD[i] != nullptr) delete CompD[i]; CompD[i] = tree; if (tree != nullptr) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); @@ -321,6 +347,7 @@ template void CompFunction::setReal(FunctionTree *tree, in template void CompFunction::setCplx(FunctionTree *tree, int i) { func_ptr->iscomplex = 1; + // if (CompC[i] != nullptr) delete CompC[i]; CompC[i] = tree; if (tree != nullptr) { func_ptr->Ncomp = std::max(Ncomp(), i + 1); @@ -329,6 +356,11 @@ template void CompFunction::setCplx(FunctionTree *t } } +/** @brief In place addition. + * + * Output is extended to union grid. + * + */ template void CompFunction::add(ComplexDouble c, CompFunction inp) { if (Ncomp() < inp.Ncomp()) { @@ -365,6 +397,7 @@ template int CompFunction::crop(double prec) { return nChunksremoved; } +/** @brief In place multiply with scalar. Fully in-place.*/ template void CompFunction::rescale(ComplexDouble c) { bool need_to_rescale = not(isShared()) or mpi::share_master(); if (need_to_rescale) { @@ -372,7 +405,7 @@ template void CompFunction::rescale(ComplexDouble c) { if (iscomplex()) { CompC[i]->rescale(c); } else { - if (abs(c.imag()) > MachineZero) { + if (abs(c.imag()) > MachineZero) { // works only only for NComp==1) CompD[i]->CopyTreeToComplex(CompC[i]); delete CompD[i]; CompD[i] = nullptr; @@ -395,6 +428,10 @@ template class CompFunction<1>; template class CompFunction<2>; template class CompFunction<3>; +/** @brief Deep copy that changes type from real to complex + * + * Deep copy: makes an exact copy with type complex from a real input + */ template void CopyToComplex(CompFunction &out, const CompFunction &inp) { out.func_ptr->data = inp.func_ptr->data; out.defcomplex(); @@ -410,6 +447,11 @@ template void CopyToComplex(CompFunction &out, const CompFunction } } + +/** @brief Deep copy + * + * Deep copy: meta data is copied along with the content of each component. + */ template void deep_copy(CompFunction *out, const CompFunction &inp) { out->func_ptr->data = inp.func_ptr->data; out->alloc(inp.Ncomp()); @@ -423,6 +465,10 @@ template void deep_copy(CompFunction *out, const CompFunction &inp } } +/** @brief Deep copy + * + * Deep copy: meta func_ptr->data is copied along with the content of each component. + */ template void deep_copy(CompFunction &out, const CompFunction &inp) { out.func_ptr->data = inp.func_ptr->data; out.alloc(inp.Ncomp()); @@ -436,24 +482,33 @@ template void deep_copy(CompFunction &out, const CompFunction &inp } } +/** @brief out = a*inp_a + b*inp_b + * + * Recast into linear_combination. + * + */ template void add(CompFunction &out, ComplexDouble a, CompFunction inp_a, ComplexDouble b, CompFunction inp_b, double prec, bool conjugate) { std::vector coefs(2); coefs[0] = a; coefs[1] = b; - std::vector> funcs; + std::vector> funcs; // NB: not a CompFunctionVector, because not run in parallel! funcs.push_back(inp_a); funcs.push_back(inp_b); linear_combination(out, coefs, funcs, prec, conjugate); } +/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N + * + * OMP parallel, but not MPI parallel + */ template void linear_combination(CompFunction &out, const std::vector &c, std::vector> &inp, double prec, bool conjugate) { double thrs = MachineZero; bool need_to_add = not(out.isShared()) or mpi::share_master(); bool share = out.isShared(); out.func_ptr->data = inp[0].func_ptr->data; - out.func_ptr->data.shared = share; + out.func_ptr->data.shared = share; // we don' inherit the shareness bool iscomplex = false; for (int i = 0; i < inp.size(); i++) if (inp[i].iscomplex() or c[i].imag() > MachineZero) iscomplex = true; @@ -464,7 +519,7 @@ template void linear_combination(CompFunction &out, const std::vector out.alloc(out.Ncomp()); for (int comp = 0; comp < inp[0].Ncomp(); comp++) { if (not iscomplex) { - FunctionTreeVector fvec; + FunctionTreeVector fvec; // one component vector for (int i = 0; i < inp.size(); i++) { if (std::norm(c[i]) < thrs) continue; if (inp[i].getNNodes() == 0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue; @@ -483,7 +538,7 @@ template void linear_combination(CompFunction &out, const std::vector } } } else { - FunctionTreeVector fvec; + FunctionTreeVector fvec; // one component vector for (int i = 0; i < inp.size(); i++) { if (inp[i].isreal()) { inp[i].CompD[comp]->CopyTreeToComplex(inp[i].CompC[comp]); @@ -513,9 +568,15 @@ template void linear_combination(CompFunction &out, const std::vector } } +/** @brief out = conj(inp) * inp + * + * Note that output is always real + * + */ template void make_density(CompFunction &out, CompFunction inp, double prec) { multiply(prec, out, 1.0, inp, inp, -1, false, false, true); if (out.iscomplex()) { + // copy onto real components for (int i = 0; i < out.Ncomp(); i++) { out.CompD[i] = out.CompC[i]->Real(); delete out.CompC[i]; @@ -525,10 +586,18 @@ template void make_density(CompFunction &out, CompFunction inp, do } } + +/** @brief out = inp_a * inp_b + * + */ template void multiply(CompFunction &out, CompFunction inp_a, CompFunction inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate) { multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms, conjugate); } +/** @brief out = inp_a * inp_b + * Takes conjugate of inp_a if conjugate=true + * In case of mixed real/complex inputs, the real functions are converted into complex functions. + */ template void multiply(double prec, CompFunction &out, double coef, CompFunction inp_a, CompFunction inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) { if (inp_b.func_ptr->conj) MSG_ABORT("Not implemented"); if (inp_a.func_ptr->conj) conjugate = (not conjugate); @@ -537,26 +606,29 @@ template void multiply(double prec, CompFunction &out, double coef, C if (out.Ncomp() == 0) out_allocated = false; bool share = out.isShared(); out.func_ptr->data = inp_a.func_ptr->data; - out.func_ptr->data.shared = share; - out.func_ptr->conj = false; + out.func_ptr->data.shared = share; // we don't inherit the shareness + out.func_ptr->conj = false; // we don't inherit conjugaison if (inp_a.getNNodes() == 0 or inp_b.getNNodes() == 0) { if (!out_allocated) out.alloc(out.Ncomp()); return; } for (int comp = 0; comp < inp_a.Ncomp(); comp++) { - out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; + out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; // we could put this is coef if everything is real? if (inp_a.isreal() and inp_b.isreal()) { if (need_to_multiply) { if (!out_allocated) out.alloc(out.Ncomp()); if (prec < 0.0) { + // Union grid build_grid(*out.CompD[comp], *inp_a.CompD[comp]); build_grid(*out.CompD[comp], *inp_b.CompD[comp]); mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate); } else { + // Adaptive grid mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate); } } } else { + // if one of the input is real, we simply make a new complex copy of it bool inp_aisReal = inp_a.isreal(); bool inp_bisReal = inp_b.isreal(); if (inp_aisReal) { @@ -572,6 +644,7 @@ template void multiply(double prec, CompFunction &out, double coef, C ComplexDouble coef = 1.0; if (need_to_multiply) { if (prec < 0.0) { + // Union grid out.func_ptr->iscomplex = 1; out.func_ptr->isreal = 0; delete out.CompD[comp]; @@ -580,8 +653,9 @@ template void multiply(double prec, CompFunction &out, double coef, C build_grid(*out.CompC[comp], *inp_a.CompC[comp]); build_grid(*out.CompC[comp], *inp_b.CompC[comp]); mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate); - } else { - if (out.CompD[comp] != nullptr) { + } else { // note that this assumes Ncomp=1 + // Adaptive grid + if (out.CompD[comp] != nullptr) { // NB: func_ptr has alreadybeen overwritten! if (out.CompD[comp]->getNNodes() > 0) { out.CompD[comp]->CopyTreeToComplex(out.CompC[comp]); out.func_ptr->iscomplex = 1; @@ -601,6 +675,7 @@ template void multiply(double prec, CompFunction &out, double coef, C mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate); } } + // restore original tree if (inp_aisReal) { delete inp_a.CompC[comp]; inp_a.CompC[comp] = nullptr; @@ -618,28 +693,41 @@ template void multiply(double prec, CompFunction &out, double coef, C mpi::share_function(out, 0, 9911, mpi::comm_share); } +/** @brief out = inp_a * f + * + * Only one component is multiplied + */ template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented"); if (inp_a.isreal() != 1) MSG_ABORT("Not implemented"); if (conjugate) MSG_ABORT("Not implemented"); - CompFunctionVector CompVec; + CompFunctionVector CompVec; // Should use vector? CompVec.push_back(inp_a); CompFunctionVector CompVecOut; CompVecOut = multiply(CompVec, f, prec, nullptr, nrefine, true); out = CompVecOut[0]; + // multiply(out, *inp_a.CompD[0], f, prec, nrefine, conjugate); } +/** @brief out = inp_a * f + * + * Only one component is multiplied + */ template void multiply(CompFunction &out, CompFunction &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { MSG_ABORT("Not implemented"); if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented"); if (inp_a.iscomplex() != 1) MSG_ABORT("Not implemented"); if (conjugate) MSG_ABORT("Not implemented"); - CompFunctionVector CompVec; + CompFunctionVector CompVec; // Should use vector? CompVec.push_back(inp_a); CompFunctionVector CompVecOut; + // CompVecOut = multiply(CompVec, f, prec, nrefine, true); out = CompVecOut[0]; } +/** @brief out = inp_a * f + * + */ template void multiply(CompFunction &out, FunctionTree &inp_a, RepresentableFunction &f, double prec, int nrefine, bool conjugate) { CompFunction func_a; func_a.func_ptr->isreal = 1; @@ -658,6 +746,12 @@ template void multiply(CompFunction &out, FunctionTree = int bra^\dag(r) * ket(r) dr. + * + * Sum of component dots. + * Notice that the ComplexDouble dot(CompFunction bra, CompFunction ket) { if (bra.func_ptr->conj or ket.func_ptr->conj) MSG_ABORT("Not implemented"); ComplexDouble dotprodtot = 0.0; @@ -679,6 +773,10 @@ template ComplexDouble dot(CompFunction bra, CompFunction ket) { return dotprodtot; } +/** @brief Compute = int |bra^\dag(r)| * |ket(r)| dr. + * + * sum of components + */ template double node_norm_dot(CompFunction bra, CompFunction ket) { double dotprodtot = 0.0; for (int comp = 0; comp < bra.Ncomp(); comp++) { @@ -692,7 +790,7 @@ template double node_norm_dot(CompFunction bra, CompFunction ket) } else { dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]); } - dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); + dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); // for fully complex values this does not really give the norm dotprodtot += dotprod; } return dotprodtot; @@ -753,6 +851,8 @@ template void project(CompFunction &out, RepresentableFunction>(N) { for (int i = 0; i < N; i++) (*this)[i].func_ptr->rank = i; @@ -762,9 +862,21 @@ void CompFunctionVector::distribute() { for (int i = 0; i < this->size(); i++) (*this)[i].func_ptr->rank = i; } +/** @brief Make a linear combination of functions + * + * Uses "local" representation: treats one node at a time. + * For each node, all functions are transformed simultaneously + * by a dense matrix multiplication. + * Phi input functions, Psi output functions + * Phi and Psi are complex. + */ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) { - bool serial = mpi::wrk_size == 1; + // The principle of this routine is that nodes for all orbitals are rotated one by one using matrix multiplication. + // The routine does avoid when possible to move data, but uses pointers and indices manipulation. + // MPI version does not use OMP yet, Serial version uses OMP + // size of input is N, size of output is M + bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch int N = Phi.size(); int M = Psi.size(); for (int i = 0; i < M; i++) { @@ -778,16 +890,18 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix"); if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix"); + // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff) FunctionTree<3> refTree(*Phi.vecMRA); mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); std::vector scalefac_ref; - std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; + std::vector coeffVec_ref; // not used! + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes int max_ix; + // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree); int max_n = indexVec_ref.size(); @@ -801,18 +915,25 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe Psi[i].func_ptr->data.iscomplex = 1; } - BankAccount nodesPhi; - BankAccount nodesRotated; + // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank + BankAccount nodesPhi; // to put the original nodes + BankAccount nodesRotated; // to put the rotated nodes + + // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); - std::map> node2orbVec; - std::vector> orb2node(N); + std::vector> indexVec(N); // serialIx of the nodes + std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the + // orbital given the node index in the reference tree if (serial) { - std::vector parindexVec; + // make list of all coefficients (coeffVec), and their reference indices (indexVec) + std::vector parindexVec; // serialIx of the parent nodes std::vector scalefac; for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid Phi[j].complex().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -820,167 +941,208 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe node2orbVec[ix].push_back(j); } } - } else { + } else { // MPI case + // send own nodes to bank, identifying them through the serialIx of refTree save_nodes(Phi, refTree, nodesPhi); - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. } - IntMatrix split_serial; - std::vector> coeffpVec(M); - std::vector> ix2coef(M); - int csize; - std::vector rotatedCoeffVec; + // 4) rotate all the nodes + IntMatrix split_serial; // in the serial case all split are stored in one array + std::vector> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case + std::vector> ix2coef(M); // to find the index in for example rotCoeffVec[] corresponding to a serialIx + int csize; // size of the current coefficients (different for roots and branches) + std::vector rotatedCoeffVec; // just to ensure that the data from rotatedCoeff is not deleted, since we point to it. + // j indices are for unrotated orbitals, i indices are for rotated orbitals if (serial) { - std::map ix2coef_ref; - split_serial.resize(M, max_n); + std::map ix2coef_ref; // to find the index n corresponding to a serialIx + split_serial.resize(M, max_n); // not use in the MPI case for (int n = 0; n < max_n; n++) { - int node_ix = indexVec_ref[n]; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree ix2coef_ref[node_ix] = n; for (int i = 0; i < M; i++) split_serial(i, n) = 1; } - std::vector nodeReady(max_n, 0); + std::vector nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits) + // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok. + // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding + // n is finished. #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { int csize; - int node_ix = indexVec_ref[n]; - std::vector orbjVec; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n + std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); if (node2orbVec[node_ix].size() <= 0) continue; csize = sizecoeffW; - if (parindexVec_ref[n] < 0) csize = sizecoeff; + if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff - int shift = sizecoeff - sizecoeffW; + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part if (parindexVec_ref[n] < 0) shift = 0; ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { + for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbjVec.push_back(j); } + // 4b) make a list of rotated orbitals needed for this node + // OMP must wait until parent is ready while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) { #pragma omp flush }; std::vector orbiVec; - for (int i = 0; i < M; i++) { - if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; + for (int i = 0; i < M; i++) { // loop over all rotated orbitals + if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets orbiVec.push_back(i); } - ComplexMatrix Un(orbjVec.size(), orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { + // 4c) rotate this node + ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices + for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); } } ComplexMatrix rotatedCoeff(csize, orbiVec.size()); - rotatedCoeff.noalias() = coeffBlock * Un; + // HERE IT HAPPENS! + // TODO: conjugaison + rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + // 4d) store and make rotated node pointers + // for now we allocate in buffer, in future could be directly allocated in the final trees double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; + // make all norms: for (int i = 0; i < orbiVec.size(); i++) { + // check if parent must be split if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) { + // mark this node for this orbital for later split #pragma omp critical { ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size(); - coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); + coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers } - double wnorm = 0.0; + // check norms for split + double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here int kstart = 0; - if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; + if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots for (int k = kstart; k < csize; k++) wnorm += std::real(rotatedCoeff(k, i) * std::conj(rotatedCoeff(k, i))); if (thres < wnorm or prec < 0) split_serial(orbiVec[i], n) = 1; else split_serial(orbiVec[i], n) = 0; } else { - ix2coef[orbiVec[i]][node_ix] = max_n + 1; - split_serial(orbiVec[i], n) = 0; + ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used + split_serial(orbiVec[i], n) = 0; // do not split if parent does not need to be split } } nodeReady[n] = 1; #pragma omp critical - { rotatedCoeffVec.push_back(std::move(rotatedCoeff)); } + { + // this ensures that rotatedCoeff is not deleted, when getting out of scope + rotatedCoeffVec.push_back(std::move(rotatedCoeff)); + } } - } else { + } else { // MPI case - std::vector split(M, -1.0); - std::vector needsplit(M, 1.0); + // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits. + std::vector split(M, -1.0); // which orbitals need splitting (at a given node). For now double for compatibilty with bank + std::vector needsplit(M, 1.0); // which orbitals need splitting BankAccount nodeSplits; - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. ComplexMatrix coeffBlock(sizecoeff, N); - max_ix++; + max_ix++; // largest node index + 1. to store rotated orbitals with different id TaskManager tasks(max_n); for (int nn = 0; nn < max_n; nn++) { int n = tasks.next_task(); if (n < 0) break; double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; + // 4a) make list of orbitals that should split the parent node, i.e. include this node int parentid = parindexVec_ref[n]; if (parentid == -1) { + // root node, split if output needed for (int i = 0; i < M; i++) { split[i] = 1.0; } csize = sizecoeff; } else { + // note that it will wait until data is available nodeSplits.get_data(parentid, M, split.data()); csize = sizecoeffW; } std::vector orbiVec; std::vector orbjVec; - for (int i = 0; i < M; i++) { - if (split[i] < 0.0) continue; + for (int i = 0; i < M; i++) { // loop over rotated orbitals + if (split[i] < 0.0) continue; // parent node has too small wavelets orbiVec.push_back(i); } - ComplexMatrix coeffBlock(csize, N); + // 4b) rotate this node + ComplexMatrix coeffBlock(csize, N); // largest possible used size nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part + // chunk of U, with reorganized indices and separate blocks for real and imag: ComplexMatrix Un(orbjVec.size(), orbiVec.size()); ComplexMatrix rotatedCoeff(csize, orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { - for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); } + for (int i = 0; i < orbiVec.size(); i++) { // loop over included rotated real and imag part of orbitals + for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts + Un(j, i) = U(orbjVec[j], orbiVec[i]); + } } - rotatedCoeff.noalias() = coeffBlock * Un; + // HERE IT HAPPENS + // TODO conjugaison + rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication - for (int i = 0; i < orbiVec.size(); i++) { - needsplit[orbiVec[i]] = -1.0; + // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while + // in cache). + for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals + needsplit[orbiVec[i]] = -1.0; // default, do not split + // check if this node/orbital needs further refinement double wnorm = 0.0; - int kwstart = csize - sizecoeffW; + int kwstart = csize - sizecoeffW; // do not include scaling for (int k = kwstart; k < csize; k++) wnorm += std::real(rotatedCoeff.col(i)[k] * std::conj(rotatedCoeff.col(i)[k])); if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0; nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data()); } nodeSplits.put_data(indexVec_ref[n], M, needsplit.data()); } - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready } + // 5) reconstruct trees using rotated nodes. + + // only serial case can use OMP, because MPI cannot be used by threads if (serial) { + // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main + // operation is writing the coefficient into the tree) + #pragma omp parallel for schedule(static) for (int j = 0; j < M; j++) { if (coeffpVec[j].size() == 0) continue; - Psi[j].alloc(1); + Psi[j].alloc(1); // All data is stored in coeffpVec[j] Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec); } - } else { + } else { // MPI case for (int j = 0; j < M; j++) { if (not mpi::my_func(j)) continue; - std::vector coeffpVec; - std::map ix2coef; + // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) + std::vector coeffpVec; // + std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx int ix = 0; - std::vector pointerstodelete; + std::vector pointerstodelete; // list of temporary arrays to clean up for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - ComplexDouble *dataVec; + ComplexDouble *dataVec; // will be allocated by bank nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); + assert(nodeidVec[n] - max_ix >= 0); // unrotated nodes have been deleted + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once ix2coef[nodeidVec[n] - max_ix] = ix++; csize = sizecoeffW; if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff; - coeffpVec.push_back(&dataVec[shift]); + coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers shift += csize; } } @@ -994,6 +1156,14 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe } } +/** @brief Make a linear combination of functions + * + * Uses "local" representation: treats one node at a time. + * For each node, all functions are transformed simultaneously + * by a dense matrix multiplication. + * Phi input functions, Psi output functions + * + */ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) { if (Phi[0].iscomplex()) { @@ -1001,21 +1171,27 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector return; } + // The principle of this routine is that nodes are rotated one by one using matrix multiplication. + // The routine does avoid when possible to move data, but uses pointers and indices manipulation. + // MPI version does not use OMP yet, Serial version uses OMP + // size of input is N, size of output is M int N = Phi.size(); int M = Psi.size(); if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix"); if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix"); + // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff) FunctionTree<3> refTree(*Phi.vecMRA); mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); std::vector scalefac_ref; - std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; + std::vector coeffVec_ref; // not used! + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes int max_ix; + // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree); int max_n = indexVec_ref.size(); for (int i = 0; i < M; i++) { @@ -1023,20 +1199,27 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector Psi[i].func_ptr->data.iscomplex = 0; } - bool serial = mpi::wrk_size == 1; - BankAccount nodesPhi; - BankAccount nodesRotated; + // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank + bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch + BankAccount nodesPhi; // to put the original nodes + BankAccount nodesRotated; // to put the rotated nodes + + // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); - std::map> node2orbVec; - std::vector> orb2node(N); + std::vector> indexVec(N); // serialIx of the nodes + std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the + // orbital given the node index in the reference tree if (serial) { - std::vector parindexVec; + // make list of all coefficients (coeffVec), and their reference indices (indexVec) + std::vector parindexVec; // serialIx of the parent nodes std::vector scalefac; for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -1044,142 +1227,181 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector node2orbVec[ix].push_back(j); } } - } else { + } else { // MPI case + // send own nodes to bank, identifying them through the serialIx of refTree save_nodes(Phi, refTree, nodesPhi); - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. } - IntMatrix split_serial; - std::vector> coeffpVec(M); - std::vector> ix2coef(M); - int csize; - std::vector rotatedCoeffVec; + // 4) rotate all the nodes + IntMatrix split_serial; // in the serial case all split are stored in one array + std::vector> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case + std::vector> ix2coef(M); // to find the index in for example rotCoeffVec[] corresponding to a serialIx + int csize; // size of the current coefficients (different for roots and branches) + std::vector rotatedCoeffVec; // just to ensure that the data from rotatedCoeff is not deleted, since we point to it. + // j indices are for unrotated orbitals, i indices are for rotated orbitals if (serial) { - std::map ix2coef_ref; - split_serial.resize(M, max_n); + std::map ix2coef_ref; // to find the index n corresponding to a serialIx + split_serial.resize(M, max_n); // not use in the MPI case for (int n = 0; n < max_n; n++) { - int node_ix = indexVec_ref[n]; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree ix2coef_ref[node_ix] = n; for (int i = 0; i < M; i++) split_serial(i, n) = 1; } - std::vector nodeReady(max_n, 0); + std::vector nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits) + + // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok. + // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding + // n is finished. #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { int csize; - int node_ix = indexVec_ref[n]; - std::vector orbjVec; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n + std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); if (node2orbVec[node_ix].size() <= 0) continue; csize = sizecoeffW; - if (parindexVec_ref[n] < 0) csize = sizecoeff; + if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff - int shift = sizecoeff - sizecoeffW; + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part if (parindexVec_ref[n] < 0) shift = 0; DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { + for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbjVec.push_back(j); } + // 4b) make a list of rotated orbitals needed for this node + // OMP must wait until parent is ready while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) { #pragma omp flush }; std::vector orbiVec; - for (int i = 0; i < M; i++) { - if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; + for (int i = 0; i < M; i++) { // loop over all rotated orbitals + if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets orbiVec.push_back(i); } - DoubleMatrix Un(orbjVec.size(), orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { + // 4c) rotate this node + DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices + for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); } } DoubleMatrix rotatedCoeff(csize, orbiVec.size()); - rotatedCoeff.noalias() = coeffBlock * Un; + // HERE IT HAPPENS! + rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication + // 4d) store and make rotated node pointers + // for now we allocate in buffer, in future could be directly allocated in the final trees double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; + // make all norms: for (int i = 0; i < orbiVec.size(); i++) { + // check if parent must be split if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) { + // mark this node for this orbital for later split #pragma omp critical { ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size(); - coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); + coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers } - double wnorm = 0.0; + // check norms for split + double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here int kstart = 0; - if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; + if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i); if (thres < wnorm or prec < 0) split_serial(orbiVec[i], n) = 1; else split_serial(orbiVec[i], n) = 0; } else { - ix2coef[orbiVec[i]][node_ix] = max_n + 1; - split_serial(orbiVec[i], n) = 0; + ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used + split_serial(orbiVec[i], n) = 0; // do not split if parent does not need to be split } } nodeReady[n] = 1; #pragma omp critical - { rotatedCoeffVec.push_back(std::move(rotatedCoeff)); } + { + // this ensures that rotatedCoeff is not deleted, when getting out of scope + rotatedCoeffVec.push_back(std::move(rotatedCoeff)); + } } - } else { + } else { // MPI case - std::vector split(M, -1.0); - std::vector needsplit(M, 1.0); + // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits. + std::vector split(M, -1.0); // which orbitals need splitting (at a given node). For now double for compatibilty with bank + std::vector needsplit(M, 1.0); // which orbitals need splitting BankAccount nodeSplits; - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. DoubleMatrix coeffBlock(sizecoeff, N); - max_ix++; + max_ix++; // largest node index + 1. to store rotated orbitals with different id TaskManager tasks(max_n); for (int nn = 0; nn < max_n; nn++) { int n = tasks.next_task(); if (n < 0) break; double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n]; + // 4a) make list of orbitals that should split the parent node, i.e. include this node int parentid = parindexVec_ref[n]; if (parentid == -1) { + // root node, split if output needed for (int i = 0; i < M; i++) { split[i] = 1.0; } csize = sizecoeff; } else { + // note that it will wait until data is available nodeSplits.get_data(parentid, M, split.data()); csize = sizecoeffW; } std::vector orbiVec; std::vector orbjVec; - for (int i = 0; i < M; i++) { - if (split[i] < 0.0) continue; + for (int i = 0; i < M; i++) { // loop over rotated orbitals + if (split[i] < 0.0) continue; // parent node has too small wavelets orbiVec.push_back(i); } - DoubleMatrix coeffBlock(csize, N); + // 4b) rotate this node + DoubleMatrix coeffBlock(csize, N); // largest possible used size nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part + // chunk of U, with reorganized indices and separate blocks for real and imag: DoubleMatrix Un(orbjVec.size(), orbiVec.size()); DoubleMatrix rotatedCoeff(csize, orbiVec.size()); - for (int i = 0; i < orbiVec.size(); i++) { - for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); } + for (int i = 0; i < orbiVec.size(); i++) { // loop over included rotated real and imag part of orbitals + for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts + Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); + } } - rotatedCoeff.noalias() = coeffBlock * Un; + // HERE IT HAPPENS + rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication - for (int i = 0; i < orbiVec.size(); i++) { - needsplit[orbiVec[i]] = -1.0; + // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while + // in cache). + for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals + needsplit[orbiVec[i]] = -1.0; // default, do not split + // check if this node/orbital needs further refinement double wnorm = 0.0; - int kwstart = csize - sizecoeffW; + int kwstart = csize - sizecoeffW; // do not include scaling for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k]; if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0; nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data()); } nodeSplits.put_data(indexVec_ref[n], M, needsplit.data()); } - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready } + // 5) reconstruct trees using rotated nodes. + + // only serial case can use OMP, because MPI cannot be used by threads if (serial) { + // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main + // operation is writing the coefficient into the tree) + #pragma omp parallel for schedule(static) for (int j = 0; j < M; j++) { if (coeffpVec[j].size() == 0) continue; @@ -1188,27 +1410,28 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec); } - } else { + } else { // MPI case for (int j = 0; j < M; j++) { if (not mpi::my_func(j)) continue; - std::vector coeffpVec; - std::map ix2coef; + // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) + std::vector coeffpVec; // + std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx int ix = 0; - std::vector pointerstodelete; + std::vector pointerstodelete; // list of temporary arrays to clean up for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - double *dataVec; + double *dataVec; // will be allocated by bank nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); + assert(nodeidVec[n] - max_ix >= 0); // unrotated nodes have been deleted + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once ix2coef[nodeidVec[n] - max_ix] = ix++; csize = sizecoeffW; if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff; - coeffpVec.push_back(&dataVec[shift]); + coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers shift += csize; } } @@ -1226,6 +1449,9 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec) { return; } +/** @brief Save all nodes in bank; identify them using serialIx from refTree + * shift is a shift applied in the id + */ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) { int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); @@ -1233,34 +1459,40 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount & std::vector coeffVec; std::vector coeffVec_cplx; std::vector scalefac; - std::vector indexVec; - std::vector parindexVec; + std::vector indexVec; // SerialIx of the node in refOrb + std::vector parindexVec; // SerialIx of the parent node int N = Phi.size(); int max_ix; for (int j = 0; j < N; j++) { if (not mpi::my_func(j)) continue; + // make vector with all coef address and their index in the union grid if (Phi[j].isreal()) { Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree); int max_n = indexVec.size(); + // send node coefs from Phi[j] to bank + // except for the root nodes, only wavelets are sent for (int i = 0; i < max_n; i++) { - if (indexVec[i] < 0) continue; + if (indexVec[i] < 0) continue; // nodes that are not in refOrb int csize = sizecoeffW; if (parindexVec[i] < 0) csize = sizecoeff; - if (sizes > 0) { + if (sizes > 0) { // fixed size account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]); } else { account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize])); } } } + // Complex components if (Phi[j].iscomplex()) { Phi[j].complex().makeCoeffVector(coeffVec_cplx, indexVec, parindexVec, scalefac, max_ix, refTree); int max_n = indexVec.size(); + // send node coefs from Phi[j] to bank for (int i = 0; i < max_n; i++) { - if (indexVec[i] < 0) continue; + if (indexVec[i] < 0) continue; // nodes that are not in refOrb + // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part int csize = sizecoeffW; if (parindexVec[i] < 0) csize = sizecoeff; - if (sizes > 0) { + if (sizes > 0) { // fixed size account.put_nodedata(j, indexVec[i], sizes, coeffVec_cplx[i]); } else { account.put_nodedata(j, indexVec[i], csize, &(coeffVec_cplx[i][sizecoeff - csize])); @@ -1270,10 +1502,21 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount & } } +/** @brief Multiply all orbitals with a function + * + * @param Phi: orbitals to multiply + * @param f : function to multiply + * + * Computes the product of each orbital with a function + * in parallel using a local representation. + * Input trees are extended by one scale at most. + */ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) { int N = Phi.size(); const int D = 3; - bool serial = mpi::wrk_size == 1; + bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch + // 1a) extend grid where f is large (around nuclei) + // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree? for (int i = 0; i < N; i++) { if (!mpi::my_func(i)) continue; @@ -1281,9 +1524,12 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f while (Phi[i].isreal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++; if (Phi[i].iscomplex()) MSG_ABORT("Not yet implemented"); irefine = 0; + // while (Phi[i].iscomplex() and irefine < nrefine and refine_grid(Phi[i].complex(), f) > 0) irefine++; } + // 1b) make union tree without coefficients FunctionTree refTree(*Phi.vecMRA); + // refine_grid(refTree, f); //to test mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk); int kp1 = refTree.getKp1(); @@ -1300,33 +1546,40 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f CompFunctionVector out(N); for (int i = 0; i < N; i++) { out[0] = Phi[i].paramCopy(); } if (not PsihasReIm[0] and not PsihasReIm[1]) { - return out; + return out; // do nothing } std::vector scalefac_ref; - std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; - std::vector *> refNodes; + std::vector coeffVec_ref; // not used! + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes + std::vector *> refNodes; // pointers to nodes int max_ix; + // get a list of all nodes in union tree, identified by their serialIx indices refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes); int max_n = indexVec_ref.size(); - std::map ix2n; + std::map ix2n; // for a given serialIx, give index in vectors for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn; - BankAccount nodesPhi; - BankAccount nodesMultiplied; + // 2a) send own nodes to bank, identifying them through the serialIx of refTree + BankAccount nodesPhi; // to put the original nodes + BankAccount nodesMultiplied; // to put the multiplied nodes + // used for serial only: std::vector> coeffVec(N); - std::vector> indexVec(N); - std::map> node2orbVec; - std::vector> orb2node(N); + std::vector> indexVec(N); // serialIx of the nodes + std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in the + // orbital given the node index in the reference tree if (serial) { - std::vector parindexVec; + // make list of all coefficients (coeffVec), and their reference indices (indexVec) + std::vector parindexVec; // serialIx of the parent nodes std::vector scalefac; for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid if (Phi[j].hasReal()) { Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j]) { orb2node[j][ix] = orb_node_ix++; @@ -1336,6 +1589,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } if (Phi[j].hasImag()) { Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec[j + N]) { orb2node[j + N][ix] = orb_node_ix++; @@ -1346,16 +1600,19 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } } else { save_nodes(Phi, refTree, nodesPhi, nCoefs); - mpi::barrier(mpi::comm_wrk); + mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet. } + // 2b) save Func in bank and remove its coefficients if (Func != nullptr and !serial) { + // put Func in local representation if not already done if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); } } - std::vector> coeffpVec(N); - std::vector multipliedCoeffVec; - std::vector> ix2coef(N); + // 3) mutiply for each node + std::vector> coeffpVec(N); // to put pointers to the multiplied coefficient for each orbital in serial case + std::vector multipliedCoeffVec; // just to ensure that the data from multipliedCoeff is not deleted, since we point to it. + std::vector> ix2coef(N); // to find the index in for example rotCoeffVec[] corresponding to a serialIx DoubleVector NODEP = DoubleVector::Zero(nCoefs); DoubleVector NODEF = DoubleVector::Zero(nCoefs); @@ -1363,65 +1620,78 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f #pragma omp parallel for schedule(dynamic) for (int n = 0; n < max_n; n++) { MWNode node(*(refNodes[n]), false); - int node_ix = indexVec_ref[n]; - Eigen::MatrixXd pts; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + + // 3a) make values for f at this node + // 3a1) get coordinates of quadrature points for this node + Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs); double fval[nCoefs]; Coord r; double *originalCoef = nullptr; MWNode<3> *Fnode = nullptr; if (Func == nullptr) { - node.getExpandedChildPts(pts); + node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); + for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? fval[j] = f.evalf(r); } } else { Fnode = Func->real().findNode(node.getNodeIndex()); if (Fnode == nullptr) { - node.getExpandedChildPts(pts); + node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); + for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? fval[j] = f.evalf(r); } } else { originalCoef = Fnode->getCoefs(); for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j]; - Fnode->attachCoefs(fval); + Fnode->attachCoefs(fval); // note that each thread has its own copy Fnode->mwTransform(Reconstruction); Fnode->cvTransform(Forward); } } DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size()); int i = 0; - std::vector orbjVec; - for (int j : node2orbVec[node_ix]) { + // 3b) fetch all orbitals at this node + std::vector orbjVec; // to remember which orbital correspond to each orbVec.size(); + for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2node[j][node_ix]; orbjVec.push_back(j); for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k]; + // 3c) transform to grid node.attachCoefs(&(multipliedCoeff(0, i))); node.mwTransform(Reconstruction); node.cvTransform(Forward); - for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; + // 3d) multiply + for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication? + // 3e) transform back to mw node.cvTransform(Backward); node.mwTransform(Compression); i++; } if (Func != nullptr and originalCoef != nullptr) { + // restablish original values Fnode->attachCoefs(originalCoef); } + // 3f) save multiplied nodes for (int i = 0; i < orbjVec.size(); i++) { #pragma omp critical { ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size(); - coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); + coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers } } #pragma omp critical - { multipliedCoeffVec.push_back(std::move(multipliedCoeff)); } - node.attachCoefs(nullptr); + { + // this ensures that multipliedCoeff is not deleted, when getting out of scope + multipliedCoeffVec.push_back(std::move(multipliedCoeff)); + } + node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor } } else { + // MPI int count1 = 0; int count2 = 0; TaskManager tasks(max_n); @@ -1429,55 +1699,70 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f int n = tasks.next_task(); if (n < 0) break; MWNode node(*(refNodes[n]), false); - Eigen::MatrixXd pts; - node.getExpandedChildPts(pts); + // 3a) make values for f + // 3a1) get coordinates of quadrature points for this node + Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs); + node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache). double fval[nCoefs]; Coord r; MWNode Fnode(*(refNodes[n]), false); if (Func == nullptr) { for (int j = 0; j < nCoefs; j++) { - for (int d = 0; d < D; d++) r[d] = pts(d, j); + for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]? fval[j] = f.evalf(r); } } else { int nIdx = Func->real().getIx(node.getNodeIndex()); count1++; if (nIdx < 0) { + // use the function f instead of Func count2++; for (int j = 0; j < nCoefs; j++) { for (int d = 0; d < D; d++) r[d] = pts(d, j); fval[j] = f.evalf(r); } } else { - Func->real().getNodeCoeff(nIdx, fval); + Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank Fnode.attachCoefs(fval); Fnode.mwTransform(Reconstruction); Fnode.cvTransform(Forward); } } - DoubleMatrix coeffBlock(nCoefs, N); + // 3b) fetch all orbitals at this node + DoubleMatrix coeffBlock(nCoefs, N); // largest possible used size std::vector orbjVec; nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec); - coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); + coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size()); - for (int j = 0; j < orbjVec.size(); j++) { + // 3c) transform to grid + for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ? + // TODO: select only nodes that are end nodes? node.attachCoefs(coeffBlock.col(j).data()); node.mwTransform(Reconstruction); node.cvTransform(Forward); + // 3d) multiply double *coefs = node.getCoefs(); for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i]; + // 3e) transform back to mw node.cvTransform(Backward); node.mwTransform(Compression); + // 3f) save multiplied nodes nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs); } - node.attachCoefs(nullptr); - Fnode.attachCoefs(nullptr); + node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor + Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor } - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! } + // 5) reconstruct trees using multiplied nodes. + + // only serial case can use OMP, because MPI cannot be used by threads if (serial) { + // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main + // operation is writing the coefficient into the tree) + #pragma omp parallel for schedule(static) for (int j = 0; j < N; j++) { if (j < N) { @@ -1485,6 +1770,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].alloc(1); out[j].real().clear(); out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy"); + // 6) reconstruct trees from end nodes out[j].real().mwTransform(BottomUp); out[j].real().calcSquareNorm(); } @@ -1501,22 +1787,23 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f } else { for (int j = 0; j < N; j++) { if (not mpi::my_func(j) and not all) continue; - std::vector coeffpVec; - std::map ix2coef; + // traverse possible nodes, and stop descending when norm is zero (leaf in out[j]) + std::vector coeffpVec; // + std::map ix2coef; // to find the index in coeffVec[] corresponding to a serialIx in refTree int ix = 0; - std::vector pointerstodelete; + std::vector pointerstodelete; // list of temporary arrays to clean up for (int ibank = 0; ibank < mpi::bank_size; ibank++) { std::vector nodeidVec; - double *dataVec; + double *dataVec; // will be allocated by bank nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank); if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec); int shift = 0; for (int n = 0; n < nodeidVec.size(); n++) { - assert(nodeidVec[n] - max_ix >= 0); - assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); + assert(nodeidVec[n] - max_ix >= 0); // unmultiplied nodes have been deleted + assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once ix2coef[nodeidVec[n] - max_ix] = ix++; - coeffpVec.push_back(&dataVec[shift]); + coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers shift += nCoefs; } } @@ -1525,10 +1812,12 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].alloc(1); out[j].real().clear(); out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy"); + // 6) reconstruct trees from end nodes out[j].real().mwTransform(BottomUp); out[j].real().calcSquareNorm(); out[j].real().resetEndNodeTable(); - if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); + // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped + if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi } } else { if (Phi[j].hasImag()) { @@ -1537,6 +1826,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f out[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy"); out[j].imag().mwTransform(BottomUp); out[j].imag().calcSquareNorm(); + // out[j].imag().crop(prec, 1.0, false); if (nrefine > 0) Phi[j].imag().crop(prec, 1.0, false); } } @@ -1556,6 +1846,7 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) { int N = Bra.size(); ComplexVector result = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { + // The bra is sent to the owner of the ket if (my_func(Bra[i]) != my_func(Ket[i])) { MSG_ABORT("same indices should have same ownership"); } result[i] = dot(Bra[i], Ket[i]); if (not mrcpp::mpi::my_func(i)) Bra[i].free(); @@ -1564,45 +1855,68 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) { return result; } +/** @brief Compute Löwdin orthonormalization matrix + * + * @param Phi: orbitals to orthonomalize + * + * Computes the inverse square root of the orbital overlap matrix S^(-1/2) + */ ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi) { ComplexMatrix S_tilde = calc_overlap_matrix(Phi); ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0); return S_m12; } +/** @brief Orbital transformation out_j = sum_i inp_i*U_ij + * + * NOTE: OrbitalVector is considered a ROW vector, so rotation + * means matrix multiplication from the right + * + * MPI: Rank distribution of output vector is the same as input vector + * + */ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { int N = BraKet.size(); ComplexMatrix S = ComplexMatrix::Zero(N, N); DoubleMatrix Sreal = S.real(); MultiResolutionAnalysis<3> *mra = BraKet.vecMRA; - FunctionTree<3> refTree(*mra); + // 1) make union tree without coefficients + mrcpp::FunctionTree<3> refTree(*mra); mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); + // get a list of all nodes in union grid, as defined by their indices std::vector scalefac; std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; - int max_ix; + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes + int max_ix; // largest index value (not used here) refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree); int max_n = indexVec_ref.size(); + // only used for serial case: std::vector> coeffVec(N); - std::map> node2orbVec; - std::vector> orb2node(N); + std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree - bool serial = mrcpp::mpi::wrk_size == 1; + bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch mrcpp::BankAccount nodesBraKet; + // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - std::vector parindexVec; - std::vector indexVec; + // 2) make list of all coefficients, and their reference indices + // for different orbitals, indexVec will give the same index for the same node in space + std::vector parindexVec; // serialIx of the parent nodes + std::vector indexVec; // serialIx of the nodes for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid BraKet[j].complex().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2node[j][ix] = orb_node_ix++; @@ -1610,33 +1924,36 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { node2orbVec[ix].push_back(j); } } - } else { + } else { // MPI case + // 2) send own nodes to bank, identifying them through the serialIx of refTree save_nodes(BraKet, refTree, nodesBraKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! } + // 3) make dot product for all the nodes and accumulate into S int ibank = 0; #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); + ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - int node_ix = indexVec_ref[n]; - std::vector orbVec; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + std::vector orbVec; // identifies which orbitals use this node if (serial and node2orbVec[node_ix].size() <= 0) continue; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; + // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank if (serial) { - int shift = sizecoeff - sizecoeffW; + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part if (parindexVec_ref[n] < 0) shift = 0; ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { + for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbVec.push_back(j); @@ -1653,7 +1970,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { } } } - } else { + } else { // MPI case ComplexMatrix coeffBlock(csize, N); nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec); @@ -1682,11 +1999,13 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { - if (i != j) S(j, i) = std::conj(S(i, j)); + if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri } } + // Assumes linearity: result is sum of all nodes contributions mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); + // multiply by CompFunction multiplicative factor ComplexVector Fac = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { @@ -1702,6 +2021,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) { return S; } ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { + // NB: should be spinseparated at this point! if (BraKet[0].iscomplex()) { return calc_overlap_matrix_cplx(BraKet); } int N = BraKet.size(); @@ -1709,33 +2029,42 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { MultiResolutionAnalysis<3> *mra = BraKet.vecMRA; - FunctionTree<3> refTree(*mra); + // 1) make union tree without coefficients + mrcpp::FunctionTree<3> refTree(*mra); mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk); int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); + // get a list of all nodes in union grid, as defined by their indices std::vector scalefac; std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; - int max_ix; + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes + int max_ix; // largest index value (not used here) refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree); int max_n = indexVec_ref.size(); + // only used for serial case: std::vector> coeffVec(N); - std::map> node2orbVec; - std::vector> orb2node(N); + std::map> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2node(N); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree - bool serial = mrcpp::mpi::wrk_size == 1; + bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch mrcpp::BankAccount nodesBraKet; + // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - std::vector parindexVec; - std::vector indexVec; + // 2) make list of all coefficients, and their reference indices + // for different orbitals, indexVec will give the same index for the same node in space + std::vector parindexVec; // serialIx of the parent nodes + std::vector indexVec; // serialIx of the nodes for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2node[j][ix] = orb_node_ix++; @@ -1743,33 +2072,36 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { node2orbVec[ix].push_back(j); } } - } else { + } else { // MPI case + // 2) send own nodes to bank, identifying them through the serialIx of refTree save_nodes(BraKet, refTree, nodesBraKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! } + // 3) make dot product for all the nodes and accumulate into S int ibank = 0; #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); + ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - int node_ix = indexVec_ref[n]; - std::vector orbVec; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + std::vector orbVec; // identifies which orbitals use this node if (serial and node2orbVec[node_ix].size() <= 0) continue; if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; + // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank if (serial) { - int shift = sizecoeff - sizecoeffW; + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part if (parindexVec_ref[n] < 0) shift = 0; DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size()); - for (int j : node2orbVec[node_ix]) { + for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2node[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift]; orbVec.push_back(j); @@ -1786,7 +2118,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { } } } - } else { + } else { // MPI case DoubleMatrix coeffBlock(csize, N); nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec); @@ -1815,12 +2147,14 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { for (int i = 0; i < N; i++) { for (int j = 0; j <= i; j++) { - if (i != j) S(j, i) = std::conj(S(i, j)); + if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri } } + // Assumes linearity: result is sum of all nodes contributions mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); + // multiply by CompFunction multiplicative factor ComplexVector Fac = ComplexVector::Zero(N); for (int i = 0; i < N; i++) { if (!mrcpp::mpi::my_func(BraKet[i])) continue; @@ -1834,11 +2168,16 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) { return S; } +/** @brief Compute the overlap matrix S_ij = + * + * Will take the conjugate of bra before integrating + */ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket) { - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings bool braisreal = !Bra[0].iscomplex(); bool ketisreal = !Ket[0].iscomplex(); if (braisreal or ketisreal) { + // temporary solution: copy as complex trees if (braisreal) { for (int i = 0; i < Bra.size(); i++) { Bra[i].CompD[0]->CopyTreeToComplex(Bra[i].CompC[0]); @@ -1871,15 +2210,18 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk); - FunctionTree<3> refTree(*mra); + // 1) make union tree without coefficients for Bra (supposed smallest) + mrcpp::FunctionTree<3> refTree(*mra); mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk); + // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero. int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); + // get a list of all nodes in union grid, as defined by their indices std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes std::vector scalefac; int max_ix; @@ -1887,22 +2229,32 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect int max_n = indexVec_ref.size(); max_ix++; - bool serial = mrcpp::mpi::wrk_size == 1; + bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + // only used for serial case: std::vector> coeffVecBra(N); - std::map> node2orbVecBra; - std::vector> orb2nodeBra(N); + std::map> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree std::vector> coeffVecKet(M); - std::map> node2orbVecKet; - std::vector> orb2nodeKet(M); + std::map> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree mrcpp::BankAccount nodesBra; mrcpp::BankAccount nodesKet; + // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - std::vector parindexVec; - std::vector indexVec; + // 2) make list of all coefficients, and their reference indices + // for different orbitals, indexVec will give the same index for the same node in space + // TODO? : do not copy coefficients, but use directly the pointers + // could OMP parallelize, but is fast anyway + std::vector parindexVec; // serialIx of the parent nodes + std::vector indexVec; // serialIx of the nodes for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid Bra[j].complex().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeBra[j][ix] = orb_node_ix++; @@ -1912,6 +2264,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } for (int j = 0; j < M; j++) { Ket[j].complex().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeKet[j][ix] = orb_node_ix++; @@ -1920,43 +2273,46 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } - } else { + } else { // MPI case + // 2) send own nodes to bank, identifying them through the serialIx of refTree save_nodes(Bra, refTree, nodesBra); save_nodes(Ket, refTree, nodesKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! } + // 3) make dot product for all the nodes and accumulate into S int totsiz = 0; int totget = 0; int mxtotsiz = 0; int ibank = 0; + // the omp crashes sometime for unknown reasons? #pragma omp parallel if (serial) { - ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); + ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread #pragma omp for schedule(dynamic) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - std::vector orbVecBra; - std::vector orbVecKet; + std::vector orbVecBra; // identifies which Bra orbitals use this node + std::vector orbVecKet; // identifies which Ket orbitals use this node if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; if (serial) { - int node_ix = indexVec_ref[n]; - int shift = sizecoeff - sizecoeffW; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part ComplexMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size()); ComplexMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size()); if (parindexVec_ref[n] < 0) shift = 0; - for (int j : node2orbVecBra[node_ix]) { + for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2nodeBra[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift]; orbVecBra.push_back(j); } - for (int j : node2orbVecKet[node_ix]) { + for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2nodeKet[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift]; orbVecKet.push_back(j); @@ -1983,12 +2339,12 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } } - } else { + } else { // MPI case ComplexMatrix coeffBlockBra(csize, N); ComplexMatrix coeffBlockKet(csize, M); - nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); - nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); + nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts + nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts totsiz += orbVecBra.size() * orbVecKet.size(); mxtotsiz += N * M; totget += orbVecBra.size() + orbVecKet.size(); @@ -2026,8 +2382,11 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect } } + // 4) collect results from all MPI. Linearity: result is sum of all node contributions + mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); + // multiply by CompFunction multiplicative factor ComplexVector FacBra = ComplexVector::Zero(N); ComplexVector FacKet = ComplexVector::Zero(M); for (int i = 0; i < N; i++) { @@ -2044,6 +2403,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect for (int j = 0; j < M; j++) { S(i, j) *= std::conj(FacBra[i]) * FacKet[j]; } } + // restore input if (braisreal) { for (int i = 0; i < Bra.size(); i++) { delete Bra[i].CompC[0]; @@ -2063,11 +2423,14 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect return S; } +/** @brief Compute the overlap matrix S_ij = + * + */ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) { if (Bra[0].iscomplex() or Ket[0].iscomplex()) { return calc_overlap_matrix_cplx(Bra, Ket); } - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings MultiResolutionAnalysis<3> *mra = Bra.vecMRA; @@ -2075,15 +2438,18 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K int M = Ket.size(); ComplexMatrix S = ComplexMatrix::Zero(N, M); - FunctionTree<3> refTree(*mra); + // 1) make union tree without coefficients for Bra (supposed smallest) + mrcpp::FunctionTree<3> refTree(*mra); mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk); + // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero. int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d(); int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d(); + // get a list of all nodes in union grid, as defined by their indices std::vector coeffVec_ref; - std::vector indexVec_ref; - std::vector parindexVec_ref; + std::vector indexVec_ref; // serialIx of the nodes + std::vector parindexVec_ref; // serialIx of the parent nodes std::vector scalefac; int max_ix; @@ -2091,21 +2457,31 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K int max_n = indexVec_ref.size(); max_ix++; - bool serial = mrcpp::mpi::wrk_size == 1; + bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch + // only used for serial case: std::vector> coeffVecBra(N); - std::map> node2orbVecBra; - std::vector> orb2nodeBra(N); + std::map> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree std::vector> coeffVecKet(M); - std::map> node2orbVecKet; - std::vector> orb2nodeKet(M); + std::map> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node + std::vector> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in + // the orbital given the node index in the reference tree mrcpp::BankAccount nodesBra; mrcpp::BankAccount nodesKet; + // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank if (serial) { - std::vector parindexVec; - std::vector indexVec; + // 2) make list of all coefficients, and their reference indices + // for different orbitals, indexVec will give the same index for the same node in space + // TODO? : do not copy coefficients, but use directly the pointers + // could OMP parallelize, but is fast anyway + std::vector parindexVec; // serialIx of the parent nodes + std::vector indexVec; // serialIx of the nodes for (int j = 0; j < N; j++) { + // make vector with all coef pointers and their indices in the union grid Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeBra[j][ix] = orb_node_ix++; @@ -2115,6 +2491,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } for (int j = 0; j < M; j++) { Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree); + // make a map that gives j from indexVec int orb_node_ix = 0; for (int ix : indexVec) { orb2nodeKet[j][ix] = orb_node_ix++; @@ -2123,42 +2500,45 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } - } else { + } else { // MPI case + // 2) send own nodes to bank, identifying them through the serialIx of refTree save_nodes(Bra, refTree, nodesBra); save_nodes(Ket, refTree, nodesKet); - mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); + mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching! } + // 3) make dot product for all the nodes and accumulate into S int totsiz = 0; int totget = 0; int mxtotsiz = 0; int ibank = 0; #pragma omp parallel if (serial) { - DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); + DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread + // NB: dynamic does give strange errors? #pragma omp for schedule(static) for (int n = 0; n < max_n; n++) { if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue; int csize; - std::vector orbVecBra; - std::vector orbVecKet; + std::vector orbVecBra; // identifies which Bra orbitals use this node + std::vector orbVecKet; // identifies which Ket orbitals use this node if (parindexVec_ref[n] < 0) csize = sizecoeff; else csize = sizecoeffW; if (serial) { - int node_ix = indexVec_ref[n]; - int shift = sizecoeff - sizecoeffW; + int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree + int shift = sizecoeff - sizecoeffW; // to copy only wavelet part DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size()); DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size()); if (parindexVec_ref[n] < 0) shift = 0; - for (int j : node2orbVecBra[node_ix]) { + for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2nodeBra[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift]; orbVecBra.push_back(j); } - for (int j : node2orbVecKet[node_ix]) { + for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node int orb_node_ix = orb2nodeKet[j][node_ix]; for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift]; orbVecKet.push_back(j); @@ -2177,12 +2557,12 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } } - } else { + } else { // MPI case DoubleMatrix coeffBlockBra(csize, N); DoubleMatrix coeffBlockKet(csize, M); - nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); - nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); + nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts + nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts totsiz += orbVecBra.size() * orbVecKet.size(); mxtotsiz += N * M; totget += orbVecBra.size() + orbVecKet.size(); @@ -2210,8 +2590,11 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K } } + // 4) collect results from all MPI. Linearity: result is sum of all node contributions + mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk); + // multiply by CompFunction multiplicative factor ComplexVector FacBra = ComplexVector::Zero(N); ComplexVector FacKet = ComplexVector::Zero(M); for (int i = 0; i < N; i++) { @@ -2231,7 +2614,11 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K return S; } +/** @brief Orthogonalize the functions in Bra against all orbitals in Ket + * + */ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket) { + // TODO: generalize for cases where Ket functions are not orthogonal to each other? ComplexMatrix S = calc_overlap_matrix(Bra, Ket); int N = Bra.size(); int M = Ket.size(); @@ -2251,6 +2638,9 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket } } +/** @brief Orthogonalize the Bra against Ket + * + */ template void orthogonalize(double prec, CompFunction &Bra, CompFunction &Ket) { ComplexDouble overlap = dot(Bra, Ket); double sq_norm = Ket.getSquareNorm(); diff --git a/src/utils/Plotter.cpp b/src/utils/Plotter.cpp index 524bd3631..c29b3ee2e 100644 --- a/src/utils/Plotter.cpp +++ b/src/utils/Plotter.cpp @@ -33,6 +33,10 @@ using namespace Eigen; namespace mrcpp { +/** @returns New Plotter object + * + * @param[in] o: Plot origin, default `(0, 0, ... , 0)` + */ template Plotter::Plotter(const Coord &o) : O(o) { @@ -42,20 +46,49 @@ Plotter::Plotter(const Coord &o) setSuffix(Plotter::Grid, ".grid"); } +/** @brief Set file extension for output file + * + * @param[in] t: Plot type (`Plotter::Line`, `::Surface`, `::Cube`, `::Grid`) + * @param[in] s: Extension string, default `.line`, `.surf`, `.cube`, `.grid` + * + * @details The file name you decide for the output will get a predefined + * suffix that differentiates between different types of plot. + */ template void Plotter::setSuffix(int t, const std::string &s) { this->suffix.insert(std::pair(t, s)); } +/** @brief Set the point of origin for the plot + * + * @param[in] o: Plot origin, default `(0, 0, ... , 0)` + */ template void Plotter::setOrigin(const Coord &o) { this->O = o; } +/** @brief Set boundary vectors A, B and C for the plot + * + * @param[in] a: A vector + * @param[in] b: B vector + * @param[in] c: C vector + */ template void Plotter::setRange(const Coord &a, const Coord &b, const Coord &c) { this->A = a; this->B = b; this->C = c; } +/** @brief Grid plot of a MWTree + * + * @param[in] tree: MWTree to plot + * @param[in] fname: File name for output, without extension + * + * @details Writes a file named fname + file extension (".grid" as default) + * to be read by geomview to visualize the grid (of endNodes) where the + * multiresolution function is defined. In MPI, each process will write a + * separate file, and will print only nodes owned by itself (pluss the + * rootNodes). + */ template void Plotter::gridPlot(const MWTree &tree, const std::string &fname) { println(20, "----------Grid Plot-----------"); std::stringstream file; @@ -66,11 +99,21 @@ template void Plotter::gridPlot(const MWTree &tr printout(20, std::endl); } +/** @brief Parametric plot of a function + * + * @param[in] npts: Number of points along A + * @param[in] func: Function to plot + * @param[in] fname: File name for output, without extension + * + * @details Plots the function func parametrically with npts[0] along the + * vector A starting from the origin O to a file named fname + file extension + * (".line" as default). + */ template void Plotter::linePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "----------Line Plot-----------"); std::stringstream file; file << fname << this->suffix[Plotter::Line]; - if (verifyRange(1)) { + if (verifyRange(1)) { // Verifies only A vector Eigen::MatrixXd coords = calcLineCoordinates(npts[0]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -82,11 +125,21 @@ template void Plotter::linePlot(const std::array void Plotter::surfPlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "--------Surface Plot----------"); std::stringstream file; file << fname << this->suffix[Plotter::Surface]; - if (verifyRange(2)) { + if (verifyRange(2)) { // Verifies A and B vectors Eigen::MatrixXd coords = calcSurfCoordinates(npts[0], npts[1]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -98,11 +151,22 @@ template void Plotter::surfPlot(const std::array void Plotter::cubePlot(const std::array &npts, const RepresentableFunction &func, const std::string &fname) { println(20, "----------Cube Plot-----------"); std::stringstream file; file << fname << this->suffix[Plotter::Cube]; - if (verifyRange(3)) { + if (verifyRange(3)) { // Verifies A, B and C vectors Eigen::MatrixXd coords = calcCubeCoordinates(npts[0], npts[1], npts[2]); Eigen::Matrix values = evaluateFunction(func, coords); openPlot(file.str()); @@ -114,6 +178,11 @@ template void Plotter::cubePlot(const std::array Eigen::MatrixXd Plotter::calcLineCoordinates(int pts_a) const { MatrixXd coords; if (pts_a > 0) { @@ -128,6 +197,11 @@ template Eigen::MatrixXd Plotter::calcLineCoordinates( return coords; } +/** @brief Calculating coordinates to be evaluated + * + * @details Generating a vector of equidistant coordinates that makes up the + * area spanned by vectors A and B in D dimensions, starting from the origin O. + */ template Eigen::MatrixXd Plotter::calcSurfCoordinates(int pts_a, int pts_b) const { if (D < 2) MSG_ERROR("Cannot surfPlot less than 2D"); @@ -151,6 +225,12 @@ template Eigen::MatrixXd Plotter::calcSurfCoordinates( return coords; } +/** @brief Calculating coordinates to be evaluated + * + * @details Generating a vector of equidistant coordinates that makes up the + * volume spanned by vectors A, B and C in D dimensions, starting from + * the origin O. + */ template Eigen::MatrixXd Plotter::calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const { if (D < 3) MSG_ERROR("Cannot cubePlot less than 3D function"); @@ -177,6 +257,12 @@ template Eigen::MatrixXd Plotter::calcCubeCoordinates( return coords; } +/** @brief Evaluating a function in a set of predfined coordinates + * + * @details Given that the set of coordinates ("coords") has been calculated, + * this routine evaluates the function in these points and stores the results + * in the vector "values". + */ template Eigen::Matrix Plotter::evaluateFunction(const RepresentableFunction &func, const Eigen::MatrixXd &coords) const { auto npts = coords.rows(); if (npts == 0) MSG_ERROR("Empty coordinates"); @@ -190,6 +276,13 @@ template Eigen::Matrix Plotter:: return values; } +/** @brief Writing plot data to file + * + * @details This will write the contents of the "coords" matrix along with the + * function values to the file stream fout. File will contain on each line the + * point number (between 0 and nPoints), coordinates 1 through D and the + * function value. + */ template void Plotter::writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix &values) { if (coords.rows() != values.size()) INVALID_ARG_ABORT; std::ofstream &o = *this->fout; @@ -203,18 +296,25 @@ template void Plotter::writeData(const Eigen::MatrixXd } } +// Specialized for D=3 below template void Plotter::writeCube(const std::array &npts, const Eigen::Matrix &values) { NOT_IMPLEMENTED_ABORT } +// Specialized for D=3 below template void Plotter::writeNodeGrid(const MWNode &node, const std::string &color) { NOT_IMPLEMENTED_ABORT } +// Specialized for D=3 below template void Plotter::writeGrid(const MWTree &tree) { NOT_IMPLEMENTED_ABORT } +/** @brief Opening file for output + * + * @details Opens a file output stream fout for file named fname. + */ template void Plotter::openPlot(const std::string &fname) { if (fname.empty()) { if (this->fout == nullptr) { @@ -235,11 +335,20 @@ template void Plotter::openPlot(const std::string &fna } } +/** @brief Closing file + * + * @details Closes the file output stream fout. + */ template void Plotter::closePlot() { if (this->fout != nullptr) this->fout->close(); this->fout = nullptr; } +/** @brief Writing plot data to file + * + * @details This will write a cube file (readable by blob) of the function + * values previously calculated (the "values" vector). + */ template <> void Plotter<3>::writeCube(const std::array &npts, const Eigen::VectorXd &values) { std::ofstream &o = *this->fout; @@ -253,30 +362,35 @@ template <> void Plotter<3>::writeCube(const std::array &npts, const Eig o.setf(std::ios::scientific); o.precision(6); + // Origin o << std::setw(5) << 0; o << std::setw(15) << this->O[0]; o << std::setw(15) << this->O[1]; o << std::setw(15) << this->O[2] << std::endl; + // Vector A o << std::setw(5) << npts[0]; o << std::setw(15) << a[0]; o << std::setw(15) << a[1]; o << std::setw(15) << a[2] << std::endl; + // Vector B o << std::setw(5) << npts[1]; o << std::setw(15) << b[0]; o << std::setw(15) << b[1]; o << std::setw(15) << b[2] << std::endl; + // Vector C o << std::setw(5) << npts[2]; o << std::setw(15) << c[0]; o << std::setw(15) << c[1]; o << std::setw(15) << c[2] << std::endl; + // Function values o.precision(4); for (int n = 0; n < values.size(); n++) { o << std::setw(12) << values[n]; - if (n % 6 == 5) o << std::endl; + if (n % 6 == 5) o << std::endl; // Line break after 6 values } } @@ -305,6 +419,12 @@ template <> void Plotter<3>::writeNodeGrid(const MWNode<3> &node, const std::str << origin[0] << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] + length << color << std::endl; } +/** @brief Writing grid data to file + * + * @details This will write a grid file (readable by geomview) of the grid + * (of endNodes) where the multiresolution function is defined. Currently + * only working in 3D. + */ template <> void Plotter<3>::writeGrid(const MWTree<3> &tree) { std::ostream &o = *this->fout; o << "CQUAD" << std::endl; @@ -321,7 +441,9 @@ template <> void Plotter<3>::writeGrid(const MWTree<3> &tree) { } } +/** @brief Checks the validity of the plotting range */ template bool Plotter::verifyRange(int dim) const { + auto is_len_zero = [](Coord vec) { double vec_sq = 0.0; for (auto d = 0; d < D; d++) vec_sq += vec[d] * vec[d]; @@ -340,6 +462,7 @@ template bool Plotter::verifyRange(int dim) const { return true; } +/** @brief Compute step length to cover vector with `pts` points, including edges */ template Coord Plotter::calcStep(const Coord &vec, int pts) const { Coord step; for (auto d = 0; d < D; d++) step[d] = vec[d] / (pts - 1.0); diff --git a/src/utils/Printer.cpp b/src/utils/Printer.cpp index c7fd7637e..24585feb3 100644 --- a/src/utils/Printer.cpp +++ b/src/utils/Printer.cpp @@ -45,6 +45,19 @@ int Printer::printRank = 0; int Printer::printSize = 1; std::ostream *Printer::out = &std::cout; +/** @brief Initialize print environment + * + * @param[in] level: Desired print level of output + * @param[in] rank: MPI rank of current process + * @param[in] size: Total number of MPI processes + * @param[in] file: File name for printed output, will get "-{rank}.out" extension + * + * @details Only print statements with lower printlevel than level will be + * displayed. If a file name is given, each process will print to a separate + * file called {file}-{rank}.out. If no file name is given, only processes + * which initialize the printer with rank=0 will print to screen. By default, + * all ranks initialize with rank=0, i.e. all ranks print to screen by default. + */ void Printer::init(int level, int rank, int size, const char *file) { printLevel = level; printRank = rank; @@ -62,13 +75,19 @@ void Printer::init(int level, int rank, int size, const char *file) { } } else { if (printRank > 0) { - setPrintLevel(-1); + setPrintLevel(-1); // Higher ranks be quiet } } setScientific(); } +/** @brief Print information about MRCPP version and build configuration + * + * @param[in] level: Activation level for print statement + * + **/ void print::environment(int level) { + // clang-format off if (level > Printer::getPrintLevel()) return; printout(level, std::endl); @@ -105,14 +124,28 @@ void print::environment(int level) { printout(level, std::endl); print::separator(level, '-', 2); + // clang-format on } +/** @brief Print a full line of a single character + * + * @param[in] level: Activation level for print statement + * @param[in] c: Character to fill the line + * @param[in] newlines: Number of extra newlines + */ void print::separator(int level, const char &c, int newlines) { if (level > Printer::getPrintLevel()) return; printout(level, std::string(Printer::getWidth(), c)); for (int i = 0; i <= newlines; i++) printout(level, std::endl); } +/** @brief Print a text header + * + * @param[in] level: Activation level for print statement + * @param[in] txt: Header text + * @param[in] newlines: Number of extra newlines + * @param[in] c: Character to fill the first line + */ void print::header(int level, const std::string &txt, int newlines, const char &c) { if (level > Printer::getPrintLevel()) return; @@ -123,6 +156,13 @@ void print::header(int level, const std::string &txt, int newlines, const char & print::separator(level, '-', newlines); } +/** @brief Print a footer with elapsed wall time + * + * @param[in] level: Activation level for print statement + * @param[in] t: Timer to be evaluated + * @param[in] newlines: Number of extra newlines + * @param[in] c: Character to fill the last line + */ void print::footer(int level, const Timer &t, int newlines, const char &c) { if (level > Printer::getPrintLevel()) return; @@ -140,6 +180,14 @@ void print::footer(int level, const Timer &t, int newlines, const char &c) { print::separator(level, c, newlines); } +/** @brief Print a scalar value, including unit + * + * @param[in] level: Activation level for print statement + * @param[in] v: Scalar value to print + * @param[in] unit: Unit of scalar + * @param[in] p: Floating point precision + * @param[in] sci: Use scientific notation + */ void print::value(int level, const std::string &txt, double v, const std::string &unit, int p, bool sci) { if (level > Printer::getPrintLevel()) return; @@ -162,6 +210,14 @@ void print::value(int level, const std::string &txt, double v, const std::string println(level, o.str()); } +/** @brief Print tree parameters (nodes, memory) and wall time + * + * @param[in] level: Activation level for print statement + * @param[in] txt: Text string + * @param[in] n: Number of tree nodes + * @param[in] m: Memory usage (kB) + * @param[in] t: Wall time (sec) + */ void print::tree(int level, const std::string &txt, int n, int m, double t) { if (level > Printer::getPrintLevel()) return; @@ -202,6 +258,13 @@ void print::tree(int level, const std::string &txt, int n, int m, double t) { println(level, o.str()); } +/** @brief Print tree parameters (nodes, memory) and wall time + * + * @param[in] level: Activation level for print statement + * @param[in] txt: Text string + * @param[in] tree: Tree to be printed + * @param[in] timer: Timer to be evaluated + */ template void print::tree(int level, const std::string &txt, const MWTree &tree, const Timer &timer) { if (level > Printer::getPrintLevel()) return; @@ -211,6 +274,12 @@ template void print::tree(int level, const std::string &txt, print::tree(level, txt, n, m, t); } +/** @brief Print elapsed time from Timer + * + * @param[in] level: Activation level for print statement + * @param[in] txt: Text string + * @param[in] timer: Timer to be evaluated + */ void print::time(int level, const std::string &txt, const Timer &timer) { if (level > Printer::getPrintLevel()) return; @@ -228,6 +297,11 @@ void print::time(int level, const std::string &txt, const Timer &timer) { println(level, o.str()); } +/** @brief Print the current memory usage of this process, obtained from system + * + * @param[in] level: Activation level for print statement + * @param[in] txt: Text string + */ void print::memory(int level, const std::string &txt) { if (level > Printer::getPrintLevel()) return; @@ -260,4 +334,4 @@ template void print::tree<1>(int level, const std::string &txt, const MWTree<1> template void print::tree<2>(int level, const std::string &txt, const MWTree<2> &tree, const Timer &timer); template void print::tree<3>(int level, const std::string &txt, const MWTree<3> &tree, const Timer &timer); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/utils/Timer.cpp b/src/utils/Timer.cpp index 54fd802ac..67bd0c713 100644 --- a/src/utils/Timer.cpp +++ b/src/utils/Timer.cpp @@ -28,15 +28,24 @@ namespace mrcpp { +/** @returns New Timer object + * @param[in] start_timer: option to start timer immediately + */ Timer::Timer(bool start_timer) { if (start_timer) start(); } +/** @returns Copy of Timer object, including its current state + * @param[in] timer: Object to copy + */ Timer::Timer(const Timer &timer) : running(timer.running) , time_used(timer.time_used) , clock_start(timer.clock_start) {} +/** @returns Copy of Timer object, including its current state + * @param[in] timer: Object to copy + */ Timer &Timer::operator=(const Timer &timer) { if (this != &timer) { this->running = timer.running; @@ -46,24 +55,28 @@ Timer &Timer::operator=(const Timer &timer) { return *this; } +/** @brief Start timer from zero */ void Timer::start() { this->clock_start = now(); this->time_used = 0.0; this->running = true; } +/** @brief Resume timer from previous time */ void Timer::resume() { if (this->running) MSG_WARN("Timer already running"); this->clock_start = now(); this->running = true; } +/** @brief Stop timer */ void Timer::stop() { if (not this->running) MSG_WARN("Timer not running"); this->time_used += diffTime(now(), this->clock_start); this->running = false; } +/** @returns Current elapsed time, in seconds */ double Timer::elapsed() const { return (this->running) ? diffTime(now(), this->clock_start) : this->time_used; } diff --git a/src/utils/details.cpp b/src/utils/details.cpp index 3801fb91a..b5bed3998 100644 --- a/src/utils/details.cpp +++ b/src/utils/details.cpp @@ -37,15 +37,16 @@ namespace mrcpp { namespace details { - bool directory_exists(std::string path) { struct stat info; + int statRC = stat(path.c_str(), &info); if (statRC != 0) { - if (errno == ENOENT) { return 0; } - if (errno == ENOTDIR) { return 0; } + if (errno == ENOENT) { return 0; } // something along the path does not exist + if (errno == ENOTDIR) { return 0; } // something in path prefix is not a dir MSG_ABORT("Non-existent directory " + path); } + return (info.st_mode & S_IFDIR) ? true : false; } @@ -59,6 +60,7 @@ std::string find_filters() { break; } } + if (filters.empty()) { MSG_ABORT("Could not find a folder containing filters!"); } else { @@ -66,10 +68,12 @@ std::string find_filters() { } } +// helper function: parse a string and returns the nth integer number int get_val(char *line, int n) { char *p = line; int len = 0; for (int i = 0; i < n - 1; i++) { + // jump over n-1 first numbers while (*p < '0' || *p > '9') p++; while (*p >= '0' && *p <= '9') p++; } @@ -84,26 +88,27 @@ int get_val(char *line, int n) { return atoi(p); } +/** @brief returns the current memory usage of this process, in kB */ int get_memory_usage() { int mem_val = -1; FILE *file = fopen("/proc/self/statm", "r"); if (file != nullptr) { char line[80]; while (fgets(line, 80, file) != nullptr) { - mem_val = 4 * get_val(line, 6); + mem_val = 4 * get_val(line, 6); // sixth number is data+stack in pages (4kB) } fclose(file); } return mem_val; } -template -bool are_all_equal(const std::array &exponent) { +/** @brief checks if all elements of an array of doubles are equal */ +template bool are_all_equal(const std::array &exponent) { return std::all_of(exponent.begin(), exponent.end(), [ex = std::begin(exponent)](double i) { return i == *ex; }); } -template -std::array convert_to_std_array(T *arr) { +/** @brief converts c_type arrays to std::arrays */ +template std::array convert_to_std_array(T *arr) { auto ret_arr = std::array{}; for (auto d = 0; d < D; d++) { ret_arr[d] = arr[d]; } return ret_arr; @@ -120,6 +125,5 @@ template std::array convert_to_std_array(double *arr); template std::array convert_to_std_array(int *arr); template std::array convert_to_std_array(int *arr); template std::array convert_to_std_array(int *arr); - -} -} \ No newline at end of file +} // namespace details +} // namespace mrcpp diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp index 1fdb55f54..69a13f300 100644 --- a/src/utils/math_utils.cpp +++ b/src/utils/math_utils.cpp @@ -41,6 +41,7 @@ using namespace Eigen; namespace mrcpp { +/** @brief Calculate \f$ m^e\f$ for integers (for convenience, not speed!) */ int math_utils::ipow(int m, int e) { if (e < 0) MSG_ABORT("Exponent cannot be negative: " << e) int result = 1; @@ -48,18 +49,35 @@ int math_utils::ipow(int m, int e) { return result; } +/** @brief Compute the norm of a matrix given as a vector + * + * The norm of the matrix is computed by iterating the following operation: + * \f$ x_n = M^t \cdot M \cdot x_{n-1} \f$ + * + * The norm of the matrix is obtained as: + * \f$ ||M|| \lim_{n \rightarrow \infty} ||x_n||/||x_{n-1}||\f$ + */ double math_utils::matrix_norm_2(const MatrixXd &M) { return M.lpNorm<2>(); } +/** Compute the norm of a matrix given as a vector. + * + * The norm of the matrix is obtained by taking the column with the + * largest norm. + */ double math_utils::matrix_norm_1(const MatrixXd &M) { return M.colwise().lpNorm<1>().maxCoeff(); } +/** Compute the infinity norm of a matrix given as a vector. + * The norm of the matrix is obtained by taking the row with the largest norm. + */ double math_utils::matrix_norm_inf(const MatrixXd &M) { return M.rowwise().lpNorm<1>().maxCoeff(); } +/** Compute the binomial coefficient n!/((n-j)! j!) */ double math_utils::binomial_coeff(int n, int j) { double binomial_n_j = 1.0; if (n < 0 || j < 0 || j > n) { @@ -77,10 +95,11 @@ double math_utils::binomial_coeff(int n, int j) { VectorXd math_utils::get_binomial_coefs(unsigned int order) { VectorXd coefs = VectorXd::Ones(order + 1); - for (int k = 0; k <= (int)order; k++) { coefs[k] = math_utils::binomial_coeff(order, k); } + for (int k = 0; k <= order; k++) { coefs[k] = math_utils::binomial_coeff(order, k); } return coefs; } +/** Compute k! = GAMMA(k+1) for integer argument k */ double math_utils::factorial(int n) { int k = 1; double fac_n = 1.0; @@ -98,6 +117,7 @@ double math_utils::factorial(int n) { return fac_n; } +/** Compute the tensor product of two matrices */ MatrixXd math_utils::tensor_product(const MatrixXd &A, const MatrixXd &B) { int Ar = A.rows(); int Ac = A.cols(); @@ -110,6 +130,7 @@ MatrixXd math_utils::tensor_product(const MatrixXd &A, const MatrixXd &B) { return tprod; } +/** Compute the tensor product of a matrix and a vector */ MatrixXd math_utils::tensor_product(const MatrixXd &A, const VectorXd &B) { int Ar = A.rows(); int Ac = A.cols(); @@ -119,6 +140,7 @@ MatrixXd math_utils::tensor_product(const MatrixXd &A, const VectorXd &B) { return tprod; } +/** Compute the tensor product of a matrix and a vector */ MatrixXd math_utils::tensor_product(const VectorXd &A, const MatrixXd &B) { int Ar = A.rows(); int Br = B.rows(); @@ -128,6 +150,7 @@ MatrixXd math_utils::tensor_product(const VectorXd &A, const MatrixXd &B) { return tprod; } +/** Compute the tensor product of a column vector and a row vector */ MatrixXd math_utils::tensor_product(const VectorXd &A, const VectorXd &B) { int Ar = A.rows(); int Br = B.rows(); @@ -136,18 +159,20 @@ MatrixXd math_utils::tensor_product(const VectorXd &A, const VectorXd &B) { return tprod; } +/** Compute the tensor product of a vector and itself */ void math_utils::tensor_self_product(const VectorXd &A, VectorXd &tprod) { int Ar = A.rows(); for (int i = 0; i < Ar; i++) { tprod.segment(i * Ar, Ar) = A(i) * A; } } +/** Compute the tensor product of a vector and itself */ void math_utils::tensor_self_product(const VectorXd &A, MatrixXd &tprod) { int Ar = A.rows(); for (int i = 0; i < Ar; i++) { tprod.block(i, 0, 1, Ar) = A(i) * A; } } -template -void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) { +/** Matrix multiplication of the filter with the input coefficients */ +template void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) { if constexpr (std::is_same::value) { Map f(in, kp1, kp1_dm1); Map g(out, kp1_dm1, kp1); @@ -168,6 +193,11 @@ void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, in NOT_IMPLEMENTED_ABORT; } +/** Make a nD-representation from 1D-representations of separable functions. + * + * This method uses the "output" vector as initial input, in order to + * avoid the use of temporaries. + */ void math_utils::tensor_expand_coefs(int dim, int dir, int kp1, int kp1_d, const MatrixXd &primitive, VectorXd &expanded) { if (dir < dim - 1) { int idx = math_utils::ipow(kp1, dir + 1); @@ -204,13 +234,30 @@ void math_utils::tensor_expand_coords_3D(int kp1, const MatrixXd &primitive, Mat } } +/** @brief Compute the eigenvalues and eigenvectors of a Hermitian matrix + * + * @param A: matrix to diagonalize (not modified) + * @param b: vector to store eigenvalues + * + * Returns the matrix of eigenvectors and stores the eigenvalues in the input vector. + */ ComplexMatrix math_utils::diagonalize_hermitian_matrix(const ComplexMatrix &A, DoubleVector &diag) { Eigen::SelfAdjointEigenSolver es(A.cols()); es.compute(A); - diag = es.eigenvalues(); - return es.eigenvectors(); + diag = es.eigenvalues(); // real + return es.eigenvectors(); // complex } +/** @brief Compute the power of a Hermitian matrix + * + * @param A: matrix + * @param b: exponent + * + * The matrix is first diagonalized, then the diagonal elements are raised + * to the given power, and the diagonalization is reversed. Sanity check for + * eigenvalues close to zero, necessary for negative exponents in combination + * with slightly negative eigenvalues. + */ ComplexMatrix math_utils::hermitian_matrix_pow(const ComplexMatrix &A, double b) { DoubleVector diag; ComplexMatrix U = diagonalize_hermitian_matrix(A, diag); @@ -226,6 +273,15 @@ ComplexMatrix math_utils::hermitian_matrix_pow(const ComplexMatrix &A, double b) return U * B * U.adjoint(); } +/** @brief Compute the eigenvalues and eigenvectors of a Hermitian matrix block + * + * @param A: matrix to diagonalize (updated in place) + * @param U: matrix of eigenvectors + * @param nstart: upper left corner of block + * @param nsize: size of block + * + * Assumes that the given block is a proper Hermitian sub matrix. + */ void math_utils::diagonalize_block(ComplexMatrix &A, ComplexMatrix &U, int nstart, int nsize) { Eigen::SelfAdjointEigenSolver es(nsize); es.compute(A.block(nstart, nstart, nsize, nsize)); @@ -235,15 +291,15 @@ void math_utils::diagonalize_block(ComplexMatrix &A, ComplexMatrix &U, int nstar A.block(nstart, nstart, nsize, nsize) = ei_val.asDiagonal(); } -template -double math_utils::calc_distance(const Coord &a, const Coord &b) { +/** Calculate the distance between two points in n-dimensions */ +template double math_utils::calc_distance(const Coord &a, const Coord &b) { double r = 0.0; for (int i = 0; i < D; i++) { r += std::pow(a[i] - b[i], 2.0); } return std::sqrt(r); } -template -std::vector> math_utils::cartesian_product(std::vector A, std::vector B) { +/** Calculate the cartesian_product A x B */ +template std::vector> math_utils::cartesian_product(std::vector A, std::vector B) { std::vector> output; for (auto &a : A) { for (auto &b : B) output.push_back(std::vector{a, b}); @@ -251,8 +307,8 @@ std::vector> math_utils::cartesian_product(std::vector A, std: return output; } -template -std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B) { +/** Calculate the cartesian product between a matrix l_A and the vector B */ +template std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B) { std::vector> output; for (auto A : l_A) { for (auto &b : B) { @@ -264,8 +320,9 @@ std::vector> math_utils::cartesian_product(std::vector -std::vector> math_utils::cartesian_product(std::vector A, int dim) { +/** Calculate the cartesian product between A vector and itself with A repeater, + ie. reapeat 4 is equal to the cartesian product A x A x A x A */ +template std::vector> math_utils::cartesian_product(std::vector A, int dim) { std::vector> output; if (dim < 0) MSG_ABORT("Dimension has to be 1 or greater") if (dim == 1) { @@ -292,4 +349,4 @@ template std::vector> math_utils::cartesian_product(std::vec template std::vector> math_utils::cartesian_product(std::vector> l_A, std::vector B); template std::vector> math_utils::cartesian_product(std::vector A, int dim); -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp diff --git a/src/utils/mpi_utils.cpp b/src/utils/mpi_utils.cpp index d6d58a165..77526375b 100644 --- a/src/utils/mpi_utils.cpp +++ b/src/utils/mpi_utils.cpp @@ -31,6 +31,11 @@ namespace mrcpp { +/** @brief SharedMemory constructor + * + * @param[in] comm: Communicator sharing resources + * @param[in] sh_size: Memory size, in MB + */ template SharedMemory::SharedMemory(mrcpp::mpi_comm comm, int sh_size) : sh_start_ptr(nullptr) @@ -71,6 +76,19 @@ template SharedMemory::~SharedMemory() { #endif } +/** @brief Send FunctionTree to a given MPI rank using blocking communication + * + * @param[in] tree: FunctionTree to send + * @param[in] dst: MPI rank to send to + * @param[in] tag: unique identifier + * @param[in] comm: Communicator that defines ranks + * @param[in] nChunks: Number of memory chunks to send + * + * @details The number of memory chunks must be known before we can send the + * tree. This can be specified in the last argument if known a priori, in order + * to speed up communication, otherwise it will be communicated in a separate + * step before the main communication. + */ template void send_tree(FunctionTree &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) { #ifdef MRCPP_HAS_MPI auto &allocator = tree.getNodeAllocator(); @@ -90,6 +108,19 @@ template void send_tree(FunctionTree &tree, int dst, i #endif } +/** @brief Receive FunctionTree from a given MPI rank using blocking communication + * + * @param[in] tree: FunctionTree to write into + * @param[in] src: MPI rank to receive from + * @param[in] tag: unique identifier + * @param[in] comm: Communicator that defines ranks + * @param[in] nChunks: Number of memory chunks to receive + * + * @details The number of memory chunks must be known before we can receive the + * tree. This can be specified in the last argument if known a priori, in order + * to speed up communication, otherwise it will be communicated in a separate + * step before the main communication. + */ template void recv_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -115,6 +146,16 @@ template void recv_tree(FunctionTree &tree, int src, i #endif } +/** @brief Share a FunctionTree among MPI processes that share the same physical memory + * + * @param[in] tree: FunctionTree to write into + * @param[in] src: MPI rank that last updated the function + * @param[in] tag: unique identifier + * @param[in] comm: Communicator that defines ranks + * + * @details This function should be called every time a shared function is + * updated, in order to update the local memory of each MPI process. + */ template void share_tree(FunctionTree &tree, int src, int tag, mrcpp::mpi_comm comm) { #ifdef MRCPP_HAS_MPI Timer t1; diff --git a/src/utils/omp_utils.cpp b/src/utils/omp_utils.cpp index 63c88aaaa..67ccd3069 100644 --- a/src/utils/omp_utils.cpp +++ b/src/utils/omp_utils.cpp @@ -27,6 +27,7 @@ #include namespace mrcpp { +// By default we get OMP_NUM_THREADS int max_threads = mrcpp_get_max_threads(); void set_max_threads(int threads) { diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp index b47ea1b4a..510e34a1e 100644 --- a/src/utils/parallel.cpp +++ b/src/utils/parallel.cpp @@ -1,28 +1,3 @@ -/* - * MRCPP, a numerical library based on multiresolution analysis and - * the multiwavelet basis which provide low-scaling algorithms as well as - * rigorous error control in numerical computations. - * Copyright (C) 2021 Stig Rune Jensen, Jonas Juselius, Luca Frediani and contributors. - * - * This file is part of MRCPP. - * - * MRCPP is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * MRCPP is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with MRCPP. If not, see . - * - * For information on the complete list of contributors to MRCPP, see: - * - */ - #include #include #include @@ -52,7 +27,9 @@ using namespace std; namespace mrcpp { namespace omp { + int n_threads = mrcpp_get_max_threads(); + } // namespace omp using namespace Eigen; @@ -64,6 +41,7 @@ namespace mpi { bool numerically_exact = false; int shared_memory_size = 1000; +// these parameters set by initialize() int world_size = 1; int world_rank = 0; int wrk_size = 1; @@ -74,24 +52,24 @@ int sh_group_rank = 0; int is_bank = 0; int is_centralbank = 0; int is_bankclient = 1; -int is_bankmaster = 0; +int is_bankmaster = 0; // only one bankmaster is_bankmaster int bank_size = 0; int bank_per_node = 0; -int omp_threads = -1; -int use_omp_num_threads = -1; -int tot_bank_size = 0; -int max_tag = 0; +int omp_threads = -1; // can be set to force number of threads +int use_omp_num_threads = -1; // can be set to use number of threads from env +int tot_bank_size = 0; // size of bank, including the task manager +int max_tag = 0; // max value allowed by MPI vector bankmaster; -int task_bank = -1; +int task_bank = -1; // world rank of the task manager MPI_Comm comm_wrk; MPI_Comm comm_share; MPI_Comm comm_sh_group; MPI_Comm comm_bank; -int id_shift; +int id_shift; // to ensure that nodes, orbitals and functions do not collide -extern int metadata_block[3]; +extern int metadata_block[3]; // can add more metadata in future extern int const size_metadata = 3; void initialize() { @@ -103,15 +81,22 @@ void initialize() { MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + // divide the world into groups + // each group has its own group communicator definition + + // count the number of process per node MPI_Comm node_comm; MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm); int node_rank, node_size; MPI_Comm_rank(node_comm, &node_rank); MPI_Comm_size(node_comm, &node_size); - comm_bank = MPI_COMM_WORLD; - MPI_Comm comm_remainder; + // define independent group of MPI processes, that are not part of comm_wrk + // for now the new group does not include comm_share + comm_bank = MPI_COMM_WORLD; // clients and master + MPI_Comm comm_remainder; // clients only + // set bank_size automatically if not defined by user if (world_size < 2) { bank_size = 0; } else if (bank_size < 0) { @@ -128,13 +113,15 @@ void initialize() { bankmaster.resize(bank_size); for (int i = 0; i < bank_size; i++) { - bankmaster[i] = world_size - i - 1; + bankmaster[i] = world_size - i - 1; // rank of the bankmasters } if (world_rank < world_size - bank_size) { + // everything which is left is_bank = 0; is_centralbank = 0; is_bankclient = 1; } else { + // special group of centralbankmasters is_bank = 1; is_centralbank = 1; is_bankclient = 0; @@ -142,41 +129,53 @@ void initialize() { } MPI_Comm_split(MPI_COMM_WORLD, is_bankclient, world_rank, &comm_remainder); + // split world into groups that can share memory MPI_Comm_split_type(comm_remainder, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share); MPI_Comm_rank(comm_share, &share_rank); MPI_Comm_size(comm_share, &share_size); + // define a rank of the group MPI_Comm_split(comm_remainder, share_rank, world_rank, &comm_sh_group); + // mpiShRank is color (same color->in same group) + // MPI_worldrank is key (orders rank within the groups) + // we define a new orbital rank, so that the orbitals within + // a shared memory group, have consecutive ranks MPI_Comm_rank(comm_sh_group, &sh_group_rank); wrk_rank = share_rank + sh_group_rank * world_size; MPI_Comm_split(comm_remainder, 0, wrk_rank, &comm_wrk); + // 0 is color (same color->in same group) + // mpiOrbRank is key (orders rank in the group) MPI_Comm_rank(comm_wrk, &wrk_rank); MPI_Comm_size(comm_wrk, &wrk_size); + // if bank_size is large enough, we reserve one as "task manager" tot_bank_size = bank_size; if (bank_size <= 2 and bank_size > 0) { + // use the first bank as task manager task_bank = bankmaster[0]; } else if (bank_size > 1) { + // reserve one bank for task management only bank_size--; - task_bank = bankmaster[bank_size]; + task_bank = bankmaster[bank_size]; // the last rank is reserved as task manager } + // determine the maximum value alowed for mpi tags void *val; int flag; - MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &val, &flag); + MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &val, &flag); // max value allowed by MPI for tags max_tag = *(int *)val / 2; - id_shift = max_tag / 2; + id_shift = max_tag / 2; // half is reserved for non orbital. - MPI_Comm comm_share_world; + MPI_Comm comm_share_world; // all that share the memory MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share_world); - int n_bank_thisnode; + int n_bank_thisnode; // number of banks on this node MPI_Allreduce(&is_bank, &n_bank_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world); - int n_wrk_thisnode; + int n_wrk_thisnode; // number of workers on this node MPI_Allreduce(&is_bankclient, &n_wrk_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world); int omp_threads_available = thread::hardware_concurrency(); @@ -184,13 +183,42 @@ void initialize() { int nthreads = 1; int my_OMP_NUM_THREADS = mrcpp_get_max_threads(); MPI_Bcast(&my_OMP_NUM_THREADS, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (use_omp_num_threads) { + if (use_omp_num_threads) { // we assume that the user has set the environment variable + // OMP_NUM_THREADS, such that the total number of threads that can be used on each node is + // OMP_NUM_THREADS * (number of MPI processes per node) + // NB: OMP_NUM_THREADS is the number of threads for all MPI processes on one node. + // The bank need only one thread, and can give "their" remaining share to workers. int total_omp_threads_per_node = my_OMP_NUM_THREADS * (n_bank_thisnode + n_wrk_thisnode); nthreads = (total_omp_threads_per_node - n_bank_thisnode) / n_wrk_thisnode; } else { - if (is_bankclient) nthreads = (omp_threads_available / 2 - n_bank_thisnode) / n_wrk_thisnode; - nthreads = min(nthreads, mrcpp_get_num_procs() / 2); - if (is_bank) nthreads = 1; + // we determine the number of threads by detecting what is available + // determine the number of threads we can assign to each mpi worker. + // mrcpp_get_num_procs is total number of hardware logical threads accessible by this mpi + // NB: We assume that half of them are physical cores (not easily detectable). + // mrcpp_get_max_threads is OMP_NUM_THREADS (environment variable) but is NOT USED. + // omp_threads_available is the total number of logical threads available on this compute-node + // We assume that half of them are physical cores. + // + // five conditions should be satisfied: + // 1) the total number of threads used on the compute-node must not exceed thread::hardware_concurrency()/2 + // 2) no one use more than omp_get_num_procs()/2 + // 3) Bank needs only one thread + // 4) workers need as many threads as possible (but all workers use same number of threads) + // 5) at least one thread + if (is_bankclient) nthreads = (omp_threads_available / 2 - n_bank_thisnode) / n_wrk_thisnode; // 1) and 4) + // cout< 0) { if (omp_threads != nthreads and world_rank == 0) { cout << "Warning: recommended number of threads is " << nthreads << endl; @@ -199,7 +227,7 @@ void initialize() { nthreads = omp_threads; } } - nthreads = max(1, nthreads); + nthreads = max(1, nthreads); // 5) if (nthreads * n_wrk_thisnode + n_bank_thisnode < omp_threads_available / 3 and world_rank == 0) { std::cout << "WARNING: only " << nthreads * n_wrk_thisnode + n_bank_thisnode << " threads used per node while " << omp_threads_available << " logical cpus are accessible " << std::endl; @@ -211,6 +239,7 @@ void initialize() { mrcpp::set_max_threads(nthreads); if (is_bank) { + // bank is open until end of program if (is_centralbank) { dataBank.open(); } finalize(); exit(EXIT_SUCCESS); @@ -227,7 +256,7 @@ void finalize() { println(4, " max data in bank " << dataBank.get_maxtotalsize() << " MB "); dataBank.close(); } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(MPI_COMM_WORLD); // to ensure everybody got here MPI_Finalize(); #endif } @@ -238,6 +267,10 @@ void barrier(MPI_Comm comm) { #endif } +/********************************* + * Orbital related MPI functions * + *********************************/ + bool grand_master() { return (world_rank == 0 and is_bankclient) ? true : false; } @@ -246,24 +279,29 @@ bool share_master() { return (share_rank == 0) ? true : false; } +/** @brief Test if function belongs to this MPI rank */ bool my_func(int j) { return ((j) % wrk_size == wrk_rank) ? true : false; } +/** @brief Test if function belongs to this MPI rank */ bool my_func(const CompFunction<3> &func) { return my_func(func.rank()); } +/** @brief Test if function belongs to this MPI rank */ bool my_func(CompFunction<3> *func) { return my_func(func->rank()); } +/** @brief Free all function pointers not belonging to this MPI rank */ void free_foreign(CompFunctionVector &Phi) { for (CompFunction<3> &i : Phi) { if (not my_func(i)) i.free(); } } +/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(IntVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -271,6 +309,7 @@ void allreduce_vector(IntVector &vec, MPI_Comm comm) { #endif } +/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(DoubleVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -278,6 +317,7 @@ void allreduce_vector(DoubleVector &vec, MPI_Comm comm) { #endif } +/** @brief Add up each entry of the vector with contributions from all MPI ranks */ void allreduce_vector(ComplexVector &vec, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = vec.size(); @@ -285,6 +325,7 @@ void allreduce_vector(ComplexVector &vec, MPI_Comm comm) { #endif } +/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(IntMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -292,6 +333,7 @@ void allreduce_matrix(IntMatrix &mat, MPI_Comm comm) { #endif } +/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -299,6 +341,7 @@ void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) { #endif } +/** @brief Add up each entry of the matrix with contributions from all MPI ranks */ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI int N = mat.size(); @@ -306,9 +349,11 @@ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) { #endif } +// send a component function with MPI void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI for (int i = 0; i < func.Ncomp(); i++) { + // make sure that Nchunks is up to date if (func.isreal()) func.Nchunks()[i] = func.CompD[i]->getNChunks(); else @@ -324,6 +369,7 @@ void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) #endif } +// receive a component function with MPI void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { #ifdef MRCPP_HAS_MPI MPI_Status status; @@ -339,6 +385,7 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { #endif } +/** Update a shared function after it has been changed by one of the MPI ranks. */ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { if (func.isShared()) { #ifdef MRCPP_HAS_MPI @@ -352,31 +399,40 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) { } } +/** @brief Add all mpi function into rank zero */ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) { +/* 1) Each odd rank send to the left rank + 2) All odd ranks are "deleted" (can exit routine) + 3) new "effective" ranks are defined within the non-deleted ranks + effective rank = rank/fac , where fac are powers of 2 + 4) repeat + */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; + int fac = 1; // powers of 2 while (fac < comm_size) { if ((comm_rank / fac) % 2 == 0) { + // receive int src = comm_rank + fac; if (src < comm_size) { CompFunction<3> func_i; int tag = 3333 + src; recv_function(func_i, src, tag, comm); - func.add(1.0, func_i); + func.add(1.0, func_i); // add in place using union grid func.crop(prec); } } if ((comm_rank / fac) % 2 == 1) { + // send int dest = comm_rank - fac; if (dest >= 0) { int tag = 3333 + comm_rank; send_function(func, dest, tag, comm); - break; + break; // once data is sent we are done } } fac *= 2; @@ -385,30 +441,39 @@ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) { #endif } +/** @brief make union tree and send into rank zero */ template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) { +/* 1) Each odd rank send to the left rank + 2) All odd ranks are "deleted" (can exit routine) + 3) new "effective" ranks are defined within the non-deleted ranks + effective rank = rank/fac , where fac are powers of 2 + 4) repeat + */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; + int fac = 1; // powers of 2 while (fac < comm_size) { if ((comm_rank / fac) % 2 == 0) { + // receive int src = comm_rank + fac; if (src < comm_size) { int tag = 3333 + src; mrcpp::FunctionTree<3, T> tree_i(tree.getMRA()); mrcpp::recv_tree(tree_i, src, tag, comm, -1, false); - tree.appendTreeNoCoeff(tree_i); + tree.appendTreeNoCoeff(tree_i); // make union grid } } if ((comm_rank / fac) % 2 == 1) { + // send int dest = comm_rank - fac; if (dest >= 0) { int tag = 3333 + comm_rank; mrcpp::send_tree(tree, dest, tag, comm, -1, false); - break; + break; // once data is sent we are done } } fac *= 2; @@ -417,7 +482,14 @@ template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, #endif } +/** @brief make union tree without coeff and send to all + */ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector> &Phi, MPI_Comm comm) { + /* 1) make union grid of own orbitals + 2) make union grid with others orbitals (sent to rank zero) + 3) rank zero broadcast func to everybody + */ + int N = Phi.size(); for (int j = 0; j < N; j++) { if (not my_func(j)) continue; @@ -429,7 +501,14 @@ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tre #endif } +/** @brief make union tree without coeff and send to all + */ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector> &Phi, MPI_Comm comm) { + /* 1) make union grid of own orbitals + 2) make union grid with others orbitals (sent to rank zero) + 3) rank zero broadcast func to everybody + */ + int N = Phi.size(); for (int j = 0; j < N; j++) { if (not my_func(j)) continue; @@ -442,24 +521,28 @@ template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tre #endif } +/** @brief Distribute rank zero function to all ranks */ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) { +/* use same strategy as a reduce, but in reverse order */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; + int fac = 1; // powers of 2 while (fac < comm_size) fac *= 2; fac /= 2; while (fac > 0) { if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) { + // receive int src = comm_rank - fac; int tag = 4334 + comm_rank; recv_function(func, src, tag, comm); } if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) { + // send int dst = comm_rank + fac; int tag = 4334 + dst; if (dst < comm_size) send_function(func, dst, tag, comm); @@ -470,24 +553,28 @@ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) { #endif } +/** @brief Distribute rank zero function to all ranks */ template void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) { +/* use same strategy as a reduce, but in reverse order */ #ifdef MRCPP_HAS_MPI int comm_size, comm_rank; MPI_Comm_rank(comm, &comm_rank); MPI_Comm_size(comm, &comm_size); if (comm_size == 1) return; - int fac = 1; + int fac = 1; // powers of 2 while (fac < comm_size) fac *= 2; fac /= 2; while (fac > 0) { if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) { + // receive int src = comm_rank - fac; int tag = 4334 + comm_rank; mrcpp::recv_tree(tree, src, tag, comm, -1, false); } if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) { + // send int dst = comm_rank + fac; int tag = 4334 + dst; if (dst < comm_size) mrcpp::send_tree(tree, dst, tag, comm, -1, false); diff --git a/src/utils/periodic_utils.cpp b/src/utils/periodic_utils.cpp index afc23d80d..af8f753e2 100644 --- a/src/utils/periodic_utils.cpp +++ b/src/utils/periodic_utils.cpp @@ -76,6 +76,8 @@ template void index_manipulation(NodeIndex &idx, const std::array void coord_manipulation(Coord &r, const std::array &periodic) { for (auto i = 0; i < D; i++) { r[i] *= 0.5; diff --git a/src/utils/tree_utils.cpp b/src/utils/tree_utils.cpp index 8cc22d564..333544f6e 100644 --- a/src/utils/tree_utils.cpp +++ b/src/utils/tree_utils.cpp @@ -39,6 +39,11 @@ namespace mrcpp { +/** Calculate the threshold for the wavelet norm. + * + * Calculates the threshold that has to be met in the wavelet norm in order to + * guarantee the precision in the function representation. Depends on the + * square norm of the function and the requested relative accuracy. */ template bool tree_utils::split_check(const MWNode &node, double prec, double split_fac, bool abs_prec) { bool split = false; if (prec > 0.0) { @@ -59,6 +64,8 @@ template bool tree_utils::split_check(const MWNode &no return split; } +/** Traverse tree along the Hilbert path and find nodes of any rankId. + * Returns one nodeVector for the whole tree. GenNodes disregarded. */ template void tree_utils::make_node_table(MWTree &tree, MWNodeVector &table) { TreeIterator it(tree, TopDown, Hilbert); it.setReturnGenNodes(false); @@ -74,6 +81,8 @@ template void tree_utils::make_node_table(MWTree &tree } } +/** Traverse tree along the Hilbert path and find nodes of any rankId. + * Returns one nodeVector per scale. GenNodes disregarded. */ template void tree_utils::make_node_table(MWTree &tree, std::vector> &table) { TreeIterator it(tree, TopDown, Hilbert); it.setReturnGenNodes(false); @@ -81,6 +90,7 @@ template void tree_utils::make_node_table(MWTree &tree MWNode &node = it.getNode(); if (node.getDepth() == 0) continue; int depth = node.getDepth() + tree.getNNegScales(); + // Add one more element if (depth + 1 > table.size()) table.push_back(MWNodeVector()); table[depth].push_back(&node); } @@ -88,11 +98,18 @@ template void tree_utils::make_node_table(MWTree &tree while (it.next()) { MWNode &node = it.getNode(); int depth = node.getDepth() + tree.getNNegScales(); + // Add one more element if (depth + 1 > table.size()) table.push_back(MWNodeVector()); table[depth].push_back(&node); } } +/** Make children scaling coefficients from parent + * Other node info are not used/set + * coeff_in are not modified. + * The output is written directly into the 8 children scaling coefficients. + * NB: ASSUMES that the children coefficients are separated by Children_Stride! + */ template void tree_utils::mw_transform(const MWTree &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite) { int operation = Reconstruction; int kp1 = tree.getKp1(); @@ -110,6 +127,8 @@ template void tree_utils::mw_transform(const MWTree &t ftlim = 1; ftlim2 = 2; ftlim3 = 4; + // NB: Careful: tmpcoeff tmpcoeff2 are not initialized to zero + // must not read these unitialized values! } overwrite = 0.0; @@ -118,6 +137,9 @@ template void tree_utils::mw_transform(const MWTree &t for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff + gt * kp1_d; for (int ft = 0; ft < ftlim; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_in + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -131,10 +153,13 @@ template void tree_utils::mw_transform(const MWTree &t } if (D > 1) { i++; - mask = 2; + mask = 2; // 1 << i; for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff2 + gt * kp1_d; for (int ft = 0; ft < ftlim2; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -151,10 +176,13 @@ template void tree_utils::mw_transform(const MWTree &t overwrite = 1.0; if (b_overwrite) overwrite = 0.0; i++; - mask = 4; + mask = 4; // 1 << i; for (int gt = 0; gt < tDim; gt++) { - T *out = coeff_out + gt * stride; + T *out = coeff_out + gt * stride; // write right into children for (int ft = 0; ft < ftlim3; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff2 + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -192,6 +220,12 @@ template void tree_utils::mw_transform(const MWTree &t // NOT_IMPLEMENTED_ABORT; //} +/** Make parent from children scaling coefficients + * Other node info are not used/set + * coeff_in are not modified. + * The output is read directly from the 8 children scaling coefficients. + * NB: ASSUMES that the children coefficients are separated by Children_Stride! + */ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride) { int operation = Compression; int kp1 = tree.getKp1(); @@ -211,6 +245,9 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * for (int gt = 0; gt < tDim; gt++) { T *out = coeff_out + gt * kp1_d; for (int ft = 0; ft < ftlim; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_in + ft * stride; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -223,10 +260,13 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * overwrite = 0.0; } i++; - mask = 2; + mask = 2; // 1 << i; for (int gt = 0; gt < tDim; gt++) { T *out = tmpcoeff + gt * kp1_d; for (int ft = 0; ft < ftlim2; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = coeff_out + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); @@ -239,10 +279,14 @@ template void tree_utils::mw_transform_back(MWTree<3, T> &tree, T * overwrite = 0.0; } i++; - mask = 4; + mask = 4; // 1 << i; for (int gt = 0; gt < tDim; gt++) { T *out = coeff_out + gt * kp1_d; + // T *out = coeff_out + gt * N_coeff; for (int ft = 0; ft < ftlim3; ft++) { + // Operate in direction i only if the bits along other + // directions are identical. The bit of the direction we + // operate on determines the appropriate filter/operator if ((gt | mask) == (ft | mask)) { T *in = tmpcoeff + ft * kp1_d; int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1); From 2c4a6552bd67c9ef3f8f6677bf48b771d53a90f0 Mon Sep 17 00:00:00 2001 From: Tarek Scheele Date: Thu, 6 Nov 2025 13:55:03 +0100 Subject: [PATCH 21/51] Start documenting MWNode --- src/core/ScalingBasis.h | 2 +- src/trees/MWNode.h | 327 ++++++++++++++++++++++++++++------------ src/trees/MWTree.h | 4 +- 3 files changed, 234 insertions(+), 99 deletions(-) diff --git a/src/core/ScalingBasis.h b/src/core/ScalingBasis.h index d5914a9b6..c44942877 100644 --- a/src/core/ScalingBasis.h +++ b/src/core/ScalingBasis.h @@ -104,7 +104,7 @@ class ScalingBasis { /** @return Const reference to the k-th basis polynomial φ_k. */ const Polynomial &getFunc(int k) const { return this->funcs[k]; } - /** @return Family tag (Legendre or Interpol; see MRCPP/constants.h). */ + /** @return The type of scaling basis (Legendre or Interpol; see MRCPP/constants.h) */ int getScalingType() const { return this->type; } /** @return Polynomial order k. */ int getScalingOrder() const { return this->order; } diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h index de8558322..96955828b 100644 --- a/src/trees/MWNode.h +++ b/src/trees/MWNode.h @@ -37,155 +37,290 @@ namespace mrcpp { -/** - * @file MWNode.h - * @brief Base node for multiresolution (multiwavelet) trees. - * - * @details - * A node stores scaling/wavelet coefficients for one cell at scale `n` and - * translation `l` in `D` spatial dimensions. It also keeps structural - * information (parent/children, Hilbert path, status flags) and provides - * utilities to: - * - * - allocate/attach coefficient buffers, - * - compute and cache norms (total, per-component, maximum scaled norms), - * - perform CV/MW transforms on the node, - * - navigate and generate parts of the tree (parents/children), - * - fetch geometry (bounds, center) and quadrature/child evaluation points. - * - * This class is templated on spatial dimension `D` (1, 2, or 3) and on the - * scalar type `T` (e.g., `double` or `ComplexDouble`). - */ - /** * @class MWNode * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Scalar type of coefficients (e.g., double, ComplexDouble). + * @tparam T Coefficient type (e.g. double, ComplexDouble). * - * @brief Base class for multiwavelet tree nodes. + * @brief Base class for Multiwavelet nodes + * + * @details A MWNode will contain the scaling and wavelet coefficients + * to represent functions or operators within a Multiwavelet + * framework. The nodes are multidimensional. The dimensionality is + * set through the template parameter D=1,2,3. In addition to the + * coefficients, the node contains metadata such as the scale, the + * translation index, the norm, pointers to parent node and child + * nodes, pointer to the corresponding MWTree etc... See member and + * data descriptions for details. * * @note - * Nodes are created and managed by @ref MWTree and specialized trees - * (e.g., @ref FunctionTree). Most users should not instantiate nodes + * Nodes are created and managed by MWTree and specialized trees + * (e.g., FunctionTree). Most users should not instantiate nodes * directly; instead, operate at the tree level. */ template class MWNode { public: /** - * @brief Copy-construct a node. - * @param node Source node. - * @param allocCoef If true, allocate a new coefficient buffer. - * @param SetCoef If true and @p allocCoef is true, copy coefficients. + * @brief MWNode copy constructor + * @param[in] node The original node + * @param allocCoef If true, allocate MW coefficients and copy from the original node + * @param SetCoef If true and @p allocCoef is true, copy coefficients + * + * @details Creates loose nodes and optionally copy coefs. The node + * does not "belong" to the tree: It cannot be accessed by traversing + * the tree. */ MWNode(const MWNode &node, bool allocCoef = true, bool SetCoef = true); MWNode &operator=(const MWNode &node) = delete; - virtual ~MWNode(); - /// @name Basis/order and topology queries - ///@{ - int getKp1() const { return getMWTree().getKp1(); } - int getKp1_d() const { return getMWTree().getKp1_d(); } - int getOrder() const { return getMWTree().getOrder(); } - int getScalingType() const { return getMWTree().getMRA().getScalingBasis().getScalingType(); } - int getTDim() const { return (1 << D); } - int getDepth() const { return getNodeIndex().getScale() - getMWTree().getRootScale(); } - int getScale() const { return getNodeIndex().getScale(); } - int getNChildren() const { return (isBranchNode()) ? getTDim() : 0; } - int getSerialIx() const { return this->serialIx; } - void setSerialIx(int Ix) { this->serialIx = Ix; } - - const NodeIndex &getNodeIndex() const { return this->nodeIndex; } - const HilbertPath &getHilbertPath() const { return this->hilbertPath; } - ///@} + /// @brief Recursive deallocation of a node and all its decendants + virtual ~MWNode(); - /// @name Geometry - ///@{ - Coord getCenter() const; - Coord getUpperBounds() const; - Coord getLowerBounds() const; + /* + * Getters and setters + */ + int getOrder() const { return getMWTree().getOrder(); } ///< @return Polynomial order k + int getKp1() const { return getMWTree().getKp1(); } ///< @return k+1 + int getKp1_d() const { return getMWTree().getKp1_d(); } ///< @return (k+1)^D + int getScalingType() const { return getMWTree().getMRA().getScalingBasis().getScalingType(); } ///< @return The type of scaling basis (Legendre or Interpol; see MRCPP/constants.h) + int getTDim() const { return (1 << D); } ///< @return 2^D (number of children per internal node) + int getDepth() const { return getNodeIndex().getScale() - getMWTree().getRootScale(); } ///< @return The depth of this node + int getScale() const { return getNodeIndex().getScale(); } ///< @return The scale of this node + int getNChildren() const { return (isBranchNode()) ? getTDim() : 0; } ///< @return The number of children of this node + int getSerialIx() const { return this->serialIx; } ///< @return The index in the serial tree + void setSerialIx(int Ix) { this->serialIx = Ix; } ///< @param Ix The index in the serial tree + + const NodeIndex &getNodeIndex() const { return this->nodeIndex; } ///< @return The index (scale and translation) for this node + const HilbertPath &getHilbertPath() const { return this->hilbertPath; } // TODO document this + + Coord getCenter() const; ///< @return The coordinates of the centre of the node + Coord getUpperBounds() const; ///< @return The upper bounds of the D-interval defining the node + Coord getLowerBounds() const; ///< @return The lower bounds of the D-interval defining the node + /** + * @brief Test if a given coordinate is within the boundaries of the node + * @param[in] r Point coordinates + */ bool hasCoord(const Coord &r) const; - ///@} - /// @name Structural relations - ///@{ + /// @warning This method is currently not implemented. bool isCompatible(const MWNode &node); + + /** + * @brief Test if the node is decending from a given NodeIndex, that is, if they have + * overlapping support. + * @param[in] idx the NodeIndex of the requested node + */ bool isAncestor(const NodeIndex &idx) const; + + /// @warning This method is currently not implemented. bool isDecendant(const NodeIndex &idx) const; - ///@} - /// @name Norms - ///@{ - double getSquareNorm() const { return this->squareNorm; } - double getMaxSquareNorm() const { return (maxSquareNorm > 0.0) ? maxSquareNorm : calcScaledSquareNorm(); } - double getMaxWSquareNorm() const { return (maxWSquareNorm > 0.0) ? maxWSquareNorm : calcScaledWSquareNorm(); } + double getSquareNorm() const { return this->squareNorm; } ///< @return Squared norm of all 2^D (k+1)^D coefficients + double getMaxSquareNorm() const { return (maxSquareNorm > 0.0) ? maxSquareNorm : calcScaledSquareNorm(); } ///< @return Largest squared norm among itself and descendants. + double getMaxWSquareNorm() const { return (maxWSquareNorm > 0.0) ? maxWSquareNorm : calcScaledWSquareNorm(); } ///< @return Largest wavelet squared norm among itself and descendants. + /** + * @brief Calculate and return the squared scaling norm + * @return The scaling norm + */ double getScalingNorm() const; + /** + * @brief Calculate and return the squared wavelet norm + * @return The squared wavelet norm + */ virtual double getWaveletNorm() const; + /** + * @param i The component index + * @return The squared norm of the component at the given index + */ double getComponentNorm(int i) const { return this->componentNorms[i]; } - ///@} - /// @name Coefficients access - ///@{ - int getNCoefs() const { return this->n_coefs; } + int getNCoefs() const { return this->n_coefs; } ///< @return The number of coefficients + /** + * @brief Wraps the MW coefficients into an Eigen vector object + * @param[out] c The coefficient matrix + */ void getCoefs(Eigen::Matrix &c) const; - void printCoefs() const; - T *getCoefs() { return this->coefs; } - const T *getCoefs() const { return this->coefs; } - ///@} + void printCoefs() const; ///< @brief Printout of node coefficients - /// @name Evaluation points (quadrature / children) - ///@{ + T *getCoefs() { return this->coefs; } ///< @return The 2^D (k+1)^D MW coefficients + const T *getCoefs() const { return this->coefs; } ///< @return The 2^D (k+1)^D MW coefficients + + /** + * @brief Returns the quadrature points of this node + * + * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * + * @details The original quadrature points are fetched and then + * dilated and translated. For each cartesian direction \f$ \alpha = + * x,y,z... \f$ the set of quadrature points becomes \f$ x^\alpha_i = + * 2^{-n} (x_i + l^\alpha \f$. By taking all possible + * \f$(k+1)^d\f$ combinations, they will then define a d-dimensional + * grid of quadrature points. + */ void getPrimitiveQuadPts(Eigen::MatrixXd &pts) const; + + /** + * @brief Returns the quadrature points of this node + * + * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * + * @details The original quadrature points are fetched and then + * dilated and translated to match the quadrature points in the + * children of this node. For each cartesian direction \f$ \alpha = x,y,z... \f$ + * the set of quadrature points becomes \f$ x^\alpha_i = 2^{-n-1} (x_i + 2 l^\alpha + t^\alpha) \f$, where \f$ t^\alpha = + * 0,1 \f$. By taking all possible \f$(k+1)^d\f$ combinations, they will + * then define a d-dimensional grid of quadrature points for the child + * nodes. + */ void getPrimitiveChildPts(Eigen::MatrixXd &pts) const; + + /** + * @brief Returns the quadrature points of this node + * + * @param[out] pts Expanded quadrature points in a \f$ d \times + * (k+1)^d \f$ matrix form. + * + * @details The primitive quadrature points are used to obtain a + * tensor-product representation collecting all \f$ (k+1)^d \f$ + * vectors of quadrature points. + */ void getExpandedQuadPts(Eigen::MatrixXd &pts) const; + + /** + * @brief Returns the quadrature points of this node + * + * @param[out] pts Expanded quadrature points in a \f$ d \times + * 2^d(k+1)^d \f$ matrix form. + * + * @details The primitive quadrature points of the children are used to obtain a + * tensor-product representation collecting all \f$ 2^d (k+1)^d \f$ + * vectors of quadrature points. + */ void getExpandedChildPts(Eigen::MatrixXd &pts) const; - ///@} - /// @name Tree navigation (typed accessors) - ///@{ - MWTree &getMWTree() { return static_cast &>(*this->tree); } - MWNode &getMWParent() { return static_cast &>(*this->parent); } + MWTree &getMWTree() { return static_cast &>(*this->tree); } ///< @return The tree this node belongs to + MWNode &getMWParent() { return static_cast &>(*this->parent); } ///< @return The parent of this node + + /** + * @param i The index of the child + * @return The child at the given index + */ MWNode &getMWChild(int i) { return static_cast &>(*this->children[i]); } - const MWTree &getMWTree() const { return static_cast &>(*this->tree); } - const MWNode &getMWParent() const { return static_cast &>(*this->parent); } + const MWTree &getMWTree() const { return static_cast &>(*this->tree); } ///< @return The tree this node belongs to + const MWNode &getMWParent() const { return static_cast &>(*this->parent); } ///< @return The parent of this node + + /** + * @param i The index of the child + * @return The child at the given index + */ const MWNode &getMWChild(int i) const { return static_cast &>(*this->children[i]); } - ///@} - /// @name Coefficients editing (block-wise) - ///@{ + /// @brief Sets all MW coefficients and the norms to zero void zeroCoefs(); + + /** + * @brief Assigns values to a block of coefficients + * @param block The block index + * @param block_size Size of the block + * @param[in] c The input coefficients + * + * @details A block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ void setCoefBlock(int block, int block_size, const T *c); + + /** + * @brief Adds values to a block of coefficients + * @param block The block index + * @param block_size Size of the block + * @param[in] c The input coefficients + * + * @details A block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ void addCoefBlock(int block, int block_size, const T *c); + + /** + * @brief Sets values of a block of coefficients to zero + * @param[in] block The block index + * @param[in] block_size Size of the block + * + * @details A block is typically containing one kind of coefficients + * (given scaling/wavelet in each direction). Its size is then \f$ + * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. + */ void zeroCoefBlock(int block, int block_size); + + /** + * @brief Attach a set of coefficients to this node. Only used locally (the tree is not aware of this). + * @param[in] coefs The coefficients to attach + * + * @note The number of coefficients must remain the same. + */ void attachCoefs(T *coefs); - ///@} - /// @name Norm bookkeeping - ///@{ - void calcNorms(); - void zeroNorms(); - void clearNorms(); - ///@} + void calcNorms(); ///< @brief Calculate and store square norm and component norms, if allocated. + void zeroNorms(); ///< @brief Set all norms to zero. + void clearNorms(); ///< @brief Set all norms to Undefined. - /// @name Topology modification - ///@{ + /* + * Implemented in child classes + */ virtual void createChildren(bool coefs); virtual void genChildren(); virtual void genParent(); + + /** + * @brief Recursive deallocation of children and all their descendants. + * + * @details Leaves node as LeafNode and children[] as null pointer. + */ virtual void deleteChildren(); + + /// @brief Recursive deallocation of parent and all their forefathers. virtual void deleteParent(); - ///@} - /// @name Local transforms - ///@{ - virtual void cvTransform(int kind, bool firstchild = false); - virtual void mwTransform(int kind); - ///@} + /** + * @brief Coefficient-Value transform + * @param operation Forward (coef->value) or backward (value->coef). + * + * @details This routine transforms the scaling coefficients of the node to the + * function values in the corresponding quadrature roots (of its children). + * + * @note This routine assumes a 0/1 (scaling on child 0 and 1) + * representation, instead of s/d (scaling and wavelet). + */ + virtual void cvTransform(int operation, bool firstchild = false); // TODO document firstchild parameter + + /** + * @brief Multiwavelet transform + * @param operation compression (s0,s1->s,d) or reconstruction (s,d->s0,s1). + * + * @details Application of the filters on one node to pass from a 0/1 (scaling + * on child 0 and 1) representation to an s/d (scaling and + * wavelet) representation. Bit manipulation is used in order to + * determine the correct filters and whether to apply them or just + * pass to the next couple of indexes. The starting coefficients are + * preserved until the application is terminated, then they are + * overwritten. With minor modifications this code can also be used + * for the inverse mw transform (just use the transpose filters) or + * for the application of an operator (using A, B, C and T parts of an + * operator instead of G1, G0, H1, H0). This is the version where the + * three directions are operated one after the other. Although this + * is formally faster than the other algorithm, the separation of the + * three dimensions prevent the possibility to use the norm of the + * operator in order to discard a priori negligible contributions. + * + */ + virtual void mwTransform(int operation); /** * @brief Node-norm at an arbitrary index. @@ -256,8 +391,8 @@ class MWNode { int n_coefs{0}; ///< Number of coefficients in @ref coefs. // -------- Serialization helpers -------- - int serialIx{-1}; ///< Index in a serialized traversal. - int parentSerialIx{-1}; ///< Serialized index of parent, or -1 for roots. + int serialIx{-1}; ///< Index in the serial tree + int parentSerialIx{-1}; ///< Index of parent in the serial tree, or -1 for roots // -------- Indexing and space-filling path -------- NodeIndex nodeIndex; ///< Scale and translation of this node. diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h index 8e96d47e1..cbbbafec0 100644 --- a/src/trees/MWTree.h +++ b/src/trees/MWTree.h @@ -60,7 +60,7 @@ class BankAccount; /** * @class MWTree * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient scalar type (e.g. double, ComplexDouble). + * @tparam T Coefficient type (e.g. double, ComplexDouble). * * @brief Base class for MW tree structures (e.g., FunctionTree, OperatorTree). * @@ -138,7 +138,7 @@ template class MWTree { int getDim() const { return D; } /// @return 2^D (number of children per internal node). int getTDim() const { return (1 << D); } - /// @return Total number of nodes currently allocated in the tree. + /// @return Total number of nodes currently in the tree. int getNNodes() const { return getNodeAllocator().getNNodes(); } /// @return Number of records kept for negative-depth counts. int getNNegScales() const { return this->nodesAtNegativeDepth.size(); } From 0d060b6f1e14de997cde664dcaec30f4ab674d74 Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Thu, 6 Nov 2025 14:54:56 +0100 Subject: [PATCH 22/51] Comments HilbertPath --- src/trees/HilbertPath.h | 48 +++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/src/trees/HilbertPath.h b/src/trees/HilbertPath.h index 007a7edcd..e18ab349c 100644 --- a/src/trees/HilbertPath.h +++ b/src/trees/HilbertPath.h @@ -49,7 +49,7 @@ namespace mrcpp { * @class HilbertPath * @tparam D Spatial dimension (e.g., 2 for quadtree, 3 for octree). * - * @brief Encapsulates the current Hilbert orientation state and child mappings. + * @brief Traverse the leaf nodes of a tree following the Hilbert space-filling curve. * * @details * Each node visit in a Hilbert traversal has an associated **state** that @@ -63,15 +63,17 @@ namespace mrcpp { template class HilbertPath final { public: - /** @brief Default constructor; initializes to the canonical root state. */ + /** + * @brief Default constructor + */ HilbertPath() = default; - - /** @brief Copy constructor. */ + /** + * @brief Copy constructor + */ HilbertPath(const HilbertPath &p) : path(p.path) {} - /** - * @brief Construct a child-state from a parent-state and a child index. + * @brief Construct a child path from a parent path and a child index. * * @param[in] p Parent @ref HilbertPath state. * @param[in] cIdx Child index expressed in **Morton (Z-order)** for this parent. @@ -85,34 +87,34 @@ class HilbertPath final { int hIdx = p.getHIndex(cIdx); this->path = p.getChildPath(hIdx); } - - /** @brief Assignment operator. */ + /** + * @brief Assignment operator + */ HilbertPath &operator=(const HilbertPath &p) { this->path = p.path; return *this; } - - /** @brief Return the current orientation state identifier. */ - short int getPath() const { return this->path; } - + short int getPath() const { return this->path; } ///< @return the current path */ /** - * @brief Transition: state after descending to Hilbert child @p hIdx. - * @param[in] hIdx Child index in **Hilbert** order for the current state. - * @return Orientation state identifier for the child. + * @brief Get path index of selected child + * + * @param hIdx Child index in **Hilbert** order for the current state. + * @return Path index for the selected child */ short int getChildPath(int hIdx) const { return this->pTable[this->path][hIdx]; } - /** - * @brief Map Hilbert child index to Morton (Z-order) child index. - * @param[in] hIdx Child index in **Hilbert** order for the current state. - * @return Corresponding **Morton** child index. + * @brief Map Hilbert child index to Morton (Z-order) child index + * + * @param hIdx Child index in **Hilbert** order + * @return **Morton** child index. */ int getZIndex(int hIdx) const { return this->zTable[this->path][hIdx]; } /** * @brief Map Morton (Z-order) child index to Hilbert child index. - * @param[in] zIdx Child index in **Morton** order for the current state. - * @return Corresponding **Hilbert** child index. + * + * @param zIdx Child index in **Morton** order + * @return **Hilbert** child index */ int getHIndex(int zIdx) const { return this->hTable[this->path][zIdx]; } @@ -121,9 +123,9 @@ class HilbertPath final { short int path{0}; /** - * @name Lookup tables (declared here, defined in the .cpp) + * @name Lookup tables (declared in header, defined in the .cpp) * Each table has 2^D columns (up to 8 for D=3) and one row per state. - * @{ + * */ static const short int pTable[][8]; ///< Next-state table: state × h -> state' static const int zTable[][8]; ///< Mapping: state × h -> z From 7111375e6a12a401d065a40e5605512f58c368de Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Thu, 6 Nov 2025 15:02:40 +0100 Subject: [PATCH 23/51] Comments HilbertPath --- src/trees/HilbertPath.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/trees/HilbertPath.h b/src/trees/HilbertPath.h index e18ab349c..4bb7092b9 100644 --- a/src/trees/HilbertPath.h +++ b/src/trees/HilbertPath.h @@ -40,25 +40,25 @@ namespace mrcpp { * - the corresponding **Z-order (Morton) index** \f$z\f$, * as well as the **next orientation state** after descending to child \f$h\f$. * - * The mappings are implemented via static lookup tables (declared here, defined - * in the corresponding translation unit). The template parameter @p D is the - * spatial dimension; typical values are 2 (quadtree) or 3 (octree). */ /** * @class HilbertPath * @tparam D Spatial dimension (e.g., 2 for quadtree, 3 for octree). * - * @brief Traverse the leaf nodes of a tree following the Hilbert space-filling curve. + * @brief Traverse the nodes of a tree following the Hilbert space-filling curve. * - * @details + * @details The Hilbert curve is a continuous fractal space-filling curve that + * has good locality properties. We use it to traverse the nodes of a tree. * Each node visit in a Hilbert traversal has an associated **state** that - * determines how the children are ordered. Given the current state: + * determines how the children are ordered. Alternativly a Z-ordering can be used. * - @ref getZIndex maps a Hilbert child index to the corresponding Morton * (Z-order) child index; * - @ref getHIndex performs the inverse mapping (Morton to Hilbert); and * - @ref getChildPath returns the orientation state to use after descending * to a specific Hilbert child. + * + * The mappings are implemented via static lookup tables. */ template class HilbertPath final { From 7cdfeb8af7084884ab456b0f3d0aa95d064d31e6 Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Thu, 6 Nov 2025 15:04:38 +0100 Subject: [PATCH 24/51] Comments HilbertPath --- src/trees/HilbertPath.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/trees/HilbertPath.h b/src/trees/HilbertPath.h index 4bb7092b9..c6c005a14 100644 --- a/src/trees/HilbertPath.h +++ b/src/trees/HilbertPath.h @@ -27,21 +27,6 @@ namespace mrcpp { -/** - * @file HilbertPath.h - * @brief Lookup-based helper to traverse octree/quadtree children in Hilbert order. - * - * @details - * A Hilbert curve traversal depends on an **orientation state** that changes - * from parent to child. This lightweight class stores the current state and - * provides constant-time conversions between: - * - the **Hilbert child index** \f$h \in \{0,\dots,2^D-1\}\f$ for the - * current state, and - * - the corresponding **Z-order (Morton) index** \f$z\f$, - * as well as the **next orientation state** after descending to child \f$h\f$. - * - */ - /** * @class HilbertPath * @tparam D Spatial dimension (e.g., 2 for quadtree, 3 for octree). From 4a3b4c1dfb7e0bdd803de86fd02b8ee0bf12685e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Thu, 6 Nov 2025 15:38:52 +0100 Subject: [PATCH 25/51] MWTree documented --- src/trees/MWTree.cpp | 185 -------------- src/trees/MWTree.h | 560 ++++++++++++++++++++++++------------------- 2 files changed, 320 insertions(+), 425 deletions(-) diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp index 6a646d33f..4f26b9a48 100644 --- a/src/trees/MWTree.cpp +++ b/src/trees/MWTree.cpp @@ -40,15 +40,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief MWTree constructor. - * - * @param[in] mra: the multiresolution analysis object - * @param[in] n: the name of the tree (only for printing purposes) - * - * @details Creates an empty tree object, containing only the set of - * root nodes. The information for the root node configuration to use - * is in the mra object which is passed to the constructor. - */ template MWTree::MWTree(const MultiResolutionAnalysis &mra, const std::string &n) : MRA(mra) @@ -60,19 +51,12 @@ MWTree::MWTree(const MultiResolutionAnalysis &mra, const std::string &n this->nodesAtDepth.push_back(0); } -/** @brief MWTree destructor. */ template MWTree::~MWTree() { this->endNodeTable.clear(); if (this->nodesAtDepth.size() != 1) MSG_ERROR("Nodes at depth != 1 -> " << this->nodesAtDepth.size()); if (this->nodesAtDepth[0] != 0) MSG_ERROR("Nodes at depth 0 != 0 -> " << this->nodesAtDepth[0]); } -/** @brief Deletes all the nodes in the tree - * - * @details This method will recursively delete all the nodes, - * including the root nodes. Derived classes will call this method - * when the object is deleted. - */ template void MWTree::deleteRootNodes() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -82,14 +66,6 @@ template void MWTree::deleteRootNodes() { } } -/** @brief Remove all nodes in the tree - * - * @details Leaves the tree in the same state as after construction, - * i.e. undefined tree structure containing only root nodes without - * coefficients. The assigned memory, including branch and leaf - * nodes, (nodeChunks in NodeAllocator) is NOT released, but is - * immediately available to the new function. - */ template void MWTree::clear() { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -101,11 +77,6 @@ template void MWTree::clear() { this->clearSquareNorm(); } -/** @brief Calculate the squared norm \f$ ||f||^2_{\ldots} \f$ of a function represented as a tree. - * - * @details The norm is calculated using endNodes only. The specific - * type of norm which is computed will depend on the derived class - */ template void MWTree::calcSquareNorm(bool deep) { double treeNorm = 0.0; for (int n = 0; n < this->getNEndNodes(); n++) { @@ -117,29 +88,6 @@ template void MWTree::calcSquareNorm(bool deep) { this->squareNorm = treeNorm; } -/** @brief Full Multiwavelet transform of the tree in either directions - * - * @param[in] type: TopDown (from roots to leaves) or BottomUp (from - * leaves to roots) which specifies the direction of the MW transform - * @param[in] overwrite: if true, the result will overwrite - * preexisting coefficients. - * - * @details It performs a Multiwavlet transform of the whole tree. The - * input parameters will specify the direction (upwards or downwards) - * and whether the result is added to the coefficients or it - * overwrites them. See the documentation for the #mwTransformUp - * and #mwTransformDown for details. - * \f[ - * \pmatrix{ - * s_{nl}\\ - * d_{nl} - * } - * \rightleftarrows \pmatrix{ - * s_{n+1,2l}\\ - * s_{n+1,2l+1} - * } - * \f] - */ template void MWTree::mwTransform(int type, bool overwrite) { switch (type) { case TopDown: @@ -154,15 +102,6 @@ template void MWTree::mwTransform(int type, bool overw } } -/** @brief Regenerates all s/d-coeffs by backtransformation - * - * @details It starts at the bottom of the tree (scaling coefficients - * of the leaf nodes) and it generates the scaling and wavelet - * coefficients of the parent node. It then proceeds recursively all the - * way up to the root nodes. This is generally used after a function - * projection to purify the coefficients obtained by quadrature at - * coarser scales which are therefore not precise enough. - */ template void MWTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -180,17 +119,6 @@ template void MWTree::mwTransformUp() { } } -/** @brief Regenerates all scaling coeffs by MW transformation of existing s/w-coeffs - * on coarser scales - * - * @param[in] overwrite: if true the preexisting coefficients are overwritten - * - * @details The transformation starts at the rootNodes and proceeds - * recursively all the way to the leaf nodes. The existing scaling - * coefficeints will either be overwritten or added to. The latter - * operation is generally used after the operator application. - * - */ template void MWTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -215,12 +143,6 @@ template void MWTree::mwTransformDown(bool overwrite) } } -/** @brief Set the MW coefficients to zero, keeping the same tree structure - * - * @details Keeps the node structure of the tree, even though the zero - * function is representable at depth zero. One should then use \ref cropTree to remove - * unnecessary nodes. - */ template void MWTree::setZero() { TreeIterator it(*this); while (it.next()) { @@ -230,13 +152,6 @@ template void MWTree::setZero() { this->squareNorm = 0.0; } -/** @brief Increments node counter by one for non-GenNodes. - * - * @details TO BE DOCUMENTED - * \warning: This routine is not thread - * safe, and must NEVER be called outside a critical region in parallel. - * It's way. way too expensive to lock the tree, so don't even think - * about it. */ template void MWTree::incrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -254,14 +169,6 @@ template void MWTree::incrementNodeCount(int scale) { } } -/** @brief Decrements node counter by one for non-GenNodes. - * - * @details TO BE DOCUMENTED - * \warning: This routine is not thread - * safe, and must NEVER be called outside a critical region in parallel. - * It's way. way too expensive to lock the tree, so don't even think - * about it. - */ template void MWTree::decrementNodeCount(int scale) { int depth = scale - getRootScale(); if (depth < 0) { @@ -277,10 +184,6 @@ template void MWTree::decrementNodeCount(int scale) { } } -/** @returns Total number of nodes in the tree, at given depth (not in use) - * - * @param[in] depth: Tree depth (0 depth is the coarsest scale) to count. - */ template int MWTree::getNNodesAtDepth(int depth) const { int N = 0; if (depth < 0) { @@ -291,19 +194,11 @@ template int MWTree::getNNodesAtDepth(int depth) const return N; } -/** @returns Size of all MW coefs in the tree, in kB */ template int MWTree::getSizeNodes() const { auto nCoefs = 1ll * getNNodes() * getTDim() * getKp1_d(); return sizeof(T) * nCoefs / 1024; } -/** @brief Finds and returns the node pointer with the given \ref NodeIndex, const version. - * - * @details Recursive routine to find and return the node with a given - * NodeIndex. This routine returns the appropriate Node, or a NULL - * pointer if the node does not exist, or if it is a - * GenNode. Recursion starts at the appropriate rootNode. - */ template const MWNode *MWTree::findNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -313,13 +208,6 @@ template const MWNode *MWTree::findNode(NodeInde return root.retrieveNodeNoGen(idx); } -/** @brief Finds and returns the node pointer with the given \ref NodeIndex. - * - * @details Recursive routine to find and return the node with a given - * NodeIndex. This routine returns the appropriate Node, or a NULL - * pointer if the node does not exist, or if it is a - * GenNode. Recursion starts at the appropriate rootNode. - */ template MWNode *MWTree::findNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } int rIdx = getRootBox().getBoxIndex(idx); @@ -329,14 +217,6 @@ template MWNode *MWTree::findNode(NodeIndex i return root.retrieveNodeNoGen(idx); } -/** @brief Finds and returns the node reference with the given NodeIndex. - * - * @details This routine ALWAYS returns the node you ask for. If the - * node does not exist, it will be generated by MW - * transform. Recursion starts at the appropriate rootNode and descends - * from this. - * The nodes are permanently added to the tree if create = true - */ template MWNode &MWTree::getNode(NodeIndex idx, bool create) { if (getRootBox().isPeriodic()) periodic::index_manipulation(idx, getRootBox().getPeriodic()); @@ -351,14 +231,6 @@ template MWNode &MWTree::getNode(NodeIndex id return *out; } -/** @brief Finds and returns the node with the given NodeIndex. - * - * @details This routine returns the Node you ask for, or the EndNode - * on the path to the requested node, if the requested one is deeper - * than the leaf node ancestor. It will never create or return - * GenNodes. Recursion starts at the appropriate rootNode and decends - * from this. - */ template MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } MWNode &root = getRootBox().getNode(idx); @@ -366,13 +238,6 @@ template MWNode &MWTree::getNodeOrEndNode(NodeIn return *root.retrieveNodeOrEndNode(idx); } -/** @brief Finds and returns the node reference with the given NodeIndex. Const version. - * - * @details This routine ALWAYS returns the node you ask for. If the - * node does not exist, it will be generated by MW - * transform. Recursion starts at the appropriate rootNode and decends - * from this. - */ template const MWNode &MWTree::getNodeOrEndNode(NodeIndex idx) const { if (getRootBox().isPeriodic()) { periodic::index_manipulation(idx, getRootBox().getPeriodic()); } const MWNode &root = getRootBox().getNode(idx); @@ -380,15 +245,6 @@ template const MWNode &MWTree::getNodeOrEndNode( return *root.retrieveNodeOrEndNode(idx); } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine ALWAYS returns the node you ask for, and will - * generate nodes that do not exist. Recursion starts at the - * appropriate rootNode and decends from this. - */ template MWNode &MWTree::getNode(Coord r, int depth) { MWNode &root = getRootBox().getNode(r); if (depth >= 0) { @@ -398,15 +254,6 @@ template MWNode &MWTree::getNode(Coord r, int } } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine returns the Node you ask for, or the EndNode on - * the path to the requested node, and will never create or return GenNodes. - * Recursion starts at the appropriate rootNode and decends from this. - */ template MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) { if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } @@ -415,15 +262,6 @@ template MWNode &MWTree::getNodeOrEndNode(Coord< return *root.retrieveNodeOrEndNode(r, depth); } -/** @brief Finds and returns the node at a given depth that contains a given coordinate. Const version - * - * @param[in] depth: requested node depth from root scale. - * @param[in] r: coordinates of an arbitrary point in space - * - * @details This routine returns the Node you ask for, or the EndNode on - * the path to the requested node, and will never create or return GenNodes. - * Recursion starts at the appropriate rootNode and decends from this. - */ template const MWNode &MWTree::getNodeOrEndNode(Coord r, int depth) const { if (getRootBox().isPeriodic()) { periodic::coord_manipulation(r, getRootBox().getPeriodic()); } @@ -431,11 +269,6 @@ template const MWNode &MWTree::getNodeOrEndNode( return *root.retrieveNodeOrEndNode(r, depth); } -/** @brief Returns the list of all EndNodes - * - * @details copies the list of all EndNode pointers into a new vector - * and returns it. - */ template MWNodeVector *MWTree::copyEndNodeTable() { auto *nVec = new MWNodeVector; for (int n = 0; n < getNEndNodes(); n++) { @@ -445,12 +278,6 @@ template MWNodeVector *MWTree::copyEndNodeTable( return nVec; } -/** @brief Recreate the endNodeTable - * - * @details the endNodeTable is first deleted and then rebuilt from - * scratch. It makes use of the TreeIterator to traverse the tree. - * - */ template void MWTree::resetEndNodeTable() { clearEndNodeTable(); TreeIterator it(*this, TopDown, Hilbert); @@ -514,8 +341,6 @@ template int MWTree::countAllocNodes(int depth) { // return count; } -/** @brief Prints a summary of the tree structure on the output file - */ template std::ostream &MWTree::print(std::ostream &o) const { o << " square norm: " << this->squareNorm << std::endl; o << " root scale: " << this->getRootScale() << std::endl; @@ -528,11 +353,6 @@ template std::ostream &MWTree::print(std::ostream &o) return o; } -/** @brief sets values for maxSquareNorm in all nodes - * - * @details it defines the upper bound of the squared norm \f$ - * ||f||^2_{\ldots} \f$ in this node or its descendents - */ template void MWTree::makeMaxSquareNorms() { NodeBox &rBox = this->getRootBox(); MWNode **roots = rBox.getNodes(); @@ -542,11 +362,6 @@ template void MWTree::makeMaxSquareNorms() { } } -/** @brief gives serialIx of a node from its NodeIndex - * - * @details gives a unique integer for each nodes corresponding to the position - * of the node in the serialized representation - */ template int MWTree::getIx(NodeIndex nIdx) { if (this->isLocal == false) MSG_ERROR("getIx only implemented in local representation"); if (NodeIndex2serialIx.count(nIdx) == 0) diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h index cbbbafec0..aae6ded6e 100644 --- a/src/trees/MWTree.h +++ b/src/trees/MWTree.h @@ -23,23 +23,6 @@ * */ -/** - * @file MWTree.h - * @brief Base template for multiwavelet (MW) tree data structures. - * - * @details - * An MW tree stores a hierarchical collection of @ref MWNode "MWNode" - * objects arranged as a 2^D-ary tree over a @ref MultiResolutionAnalysis - * (computational domain + basis). It provides: - * - ownership and construction of the root nodes (via @ref NodeBox), - * - navigation and on-demand generation of nodes, - * - bookkeeping of per-depth node counts, - * - utilities for MW transforms, norms, and end-node tables, and - * - access to a @ref NodeAllocator for memory management. - * - * This class is a base for both **function** and **operator** trees. - */ - #pragma once #include @@ -59,280 +42,352 @@ class BankAccount; /** * @class MWTree - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (e.g. double, ComplexDouble). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * @brief Base class for MW tree structures (e.g., FunctionTree, OperatorTree). + * @brief Base class for Multiwavelet tree structures, such as FunctionTree and OperatorTree * - * @details - * A tree is defined over a @ref MultiResolutionAnalysis (MRA). The set of root - * nodes is determined by the MRA world box. Each node has up to 2^D children. - * Some accessors only *find* existing nodes, while others may *create* nodes - * lazily (e.g., by splitting and transferring coefficients). - * - * ### Node retrieval semantics - * - @ref findNode returns a pointer or `nullptr` if the node is missing. - * - @ref getNode and @ref getNodeOrEndNode return a reference and can - * create intermediate nodes if requested (see parameters). - * - * ### Norms - * @ref calcSquareNorm computes the global L2 norm (squared) either from - * the existing coefficients only, or by visiting descendants when `deep=true`. + * @details The MWTree class is the base class for all tree structures + * needed for Multiwavelet calculations. The MWTree is a D-dimensional + * tree structure of MWNodes. The tree starts from a set of root nodes + * at a common given scale, defining the world box. The most common + * settings are either a single root node or \f$ 2^D \f$ root + * nodes. Other configurations are however allowed. For example, in 3D + * one could have a total of 12 root nodes (a 2x2x3 set of root + * nodes). Once the tree structure is generated, each node will have a + * parent node (except for the root nodes) and \f$ 2^D \f$ child nodes + * (except for leaf nodes). Most of the methods deal with traversing + * the tree structure in different ways to fetch specific nodes. Some + * of them will return a node present in the tree; some other methods + * will generate the required node on the fly using the MW transform; + * some methods will return an empty pointer if the node is not + * present. See specific methods for details. */ template class MWTree { public: /** - * @brief Construct an empty tree bound to an MRA. - * @param mra Multi-resolution analysis (domain + basis). - * @param n A short name for logging/printing. - * - * @post Root nodes are created according to the MRA world box. - * No coefficients are computed; the tree is “undefined”. - */ + * @brief MWTree constructor. + * + * @param[in] mra The multiresolution analysis object + * @param[in] n The name of the tree (only for printing purposes) + * + * @details Creates an empty tree object, containing only the set of + * root nodes. The information for the root node configuration to use + * is in the mra object which is passed to the constructor. + */ MWTree(const MultiResolutionAnalysis &mra, const std::string &n); - /// Non-copyable. MWTree(const MWTree &tree) = delete; - /// Non-assignable. MWTree &operator=(const MWTree &tree) = delete; - /// Virtual destructor. + /// @brief MWTree destructor. virtual ~MWTree(); /** - * @brief Set all existing node coefficients to zero (structure unchanged). - * @note Does not refine/coarsen the tree, only zeroes values. + * @brief Set the MW coefficients to zero, keeping the same tree structure + * + * @details Keeps the node structure of the tree, even though the zero + * function is representable at depth zero. One should then use \ref cropTree to remove + * unnecessary nodes. */ void setZero(); - /** - * @brief Remove all nodes and reset to a freshly constructed state. - * @post Root nodes are recreated; end-node table and counters cleared. + /** @brief Remove all nodes in the tree + * + * @details Leaves the tree in the same state as after construction, + * i.e. undefined tree structure containing only root nodes without + * coefficients. The assigned memory, including branch and leaf + * nodes, (nodeChunks in NodeAllocator) is NOT released, but is + * immediately available to the new function. */ void clear(); - /** @name Norms */ - ///@{ - /// @return Global squared L2 norm of the representation (negative if undefined). - double getSquareNorm() const { return this->squareNorm; } + double getSquareNorm() const { return this->squareNorm; } ///< @return The squared L2 norm of the function - /** - * @brief Recompute the global squared L2 norm. - * @param deep If `true`, may traverse deeper to ensure accuracy. + /** @brief Calculate the squared norm \f$ ||f||^2_{\ldots} \f$ of a function represented as a tree. + * + * @details The norm is calculated using endNodes only. The specific + * type of norm which is computed will depend on the derived class. */ void calcSquareNorm(bool deep = false); - /// @brief Mark the norm as undefined (sets it to -1). - void clearSquareNorm() { this->squareNorm = -1.0; } - ///@} - - /** @name Basis/structure parameters */ - ///@{ - /// @return Polynomial order k. - int getOrder() const { return this->order; } - /// @return k+1. - int getKp1() const { return this->order + 1; } - /// @return (k+1)^D. - int getKp1_d() const { return this->kp1_d; } - /// @return Spatial dimension D. - int getDim() const { return D; } - /// @return 2^D (number of children per internal node). - int getTDim() const { return (1 << D); } - /// @return Total number of nodes currently in the tree. - int getNNodes() const { return getNodeAllocator().getNNodes(); } - /// @return Number of records kept for negative-depth counts. - int getNNegScales() const { return this->nodesAtNegativeDepth.size(); } - /// @return Root scale (MRA world scale). - int getRootScale() const { return this->rootBox.getScale(); } - /// @return Number of depth levels for which counters are stored. - int getDepth() const { return this->nodesAtDepth.size(); } - /// @return Number of nodes counted at a given depth. + void clearSquareNorm() { this->squareNorm = -1.0; } //< @brief Mark the norm as undefined (sets it to -1) + + int getOrder() const { return this->order; } ///< @return Polynomial order k + int getKp1() const { return this->order + 1; } ///< @return k+1 + int getKp1_d() const { return this->kp1_d; } ///< @return (k+1)^D + int getDim() const { return D; } ///< @return The spatial dimension D + int getTDim() const { return (1 << D); } ///< @return 2^D (number of children per internal node) + int getNNodes() const { return getNodeAllocator().getNNodes(); } ///< @return The total number of nodes in this tree + int getNNegScales() const { return this->nodesAtNegativeDepth.size(); } ///< @return The number of negative scales in this tree + int getRootScale() const { return this->rootBox.getScale(); } ///< @return The root scale of this tree + int getDepth() const { return this->nodesAtDepth.size(); } ///< @return The maximum depth of this tree + int getSizeNodes() const; ///< @return The size of all MW coefficients in the tree (in kB) + /** + * @brief Returns the total number of nodes in the tree, at given depth (not in use) + * @param i Tree depth (0 depth is the coarsest scale) to count + * @return Number of nodes at depth i + */ int getNNodesAtDepth(int i) const; - /// @return Approximate memory footprint of nodes (kB). - int getSizeNodes() const; - ///@} - - /** @name MRA / root access */ - ///@{ - /// @return Mutable root-node container. - NodeBox &getRootBox() { return this->rootBox; } - /// @return Const root-node container. - const NodeBox &getRootBox() const { return this->rootBox; } - /// @return MRA bound to this tree. - const MultiResolutionAnalysis &getMRA() const { return this->MRA; } - ///@} - /** - * @brief Perform a multiresolution transform. - * @param type Transform kind (implementation-defined selector). - * @param overwrite If `true`, may reuse buffers for speed. - * @note Typical directions are “top-down” and “bottom-up”; see implementation. + NodeBox &getRootBox() { return this->rootBox; } ///< @return The container of nodes + const NodeBox &getRootBox() const { return this->rootBox; } ///< @return The container of nodes + const MultiResolutionAnalysis &getMRA() const { return this->MRA; } ///< @return The MRA object used by this tree + + /** + * @brief Full Multiwavelet transform of the tree in either directions + * + * @param type TopDown (from roots to leaves) or BottomUp (from + * leaves to roots) which specifies the direction of the MW transform + * @param overwrite If true, the result will overwrite preexisting coefficients. + * + * @details It performs a Multiwavlet transform of the whole tree. The + * input parameters will specify the direction (upwards or downwards) + * and whether the result is added to the coefficients or it + * overwrites them. See the documentation for the #mwTransformUp + * and #mwTransformDown for details. + * \f[ + * \pmatrix{ + * s_{nl}\\ + * d_{nl} + * } + * \rightleftarrows \pmatrix{ + * s_{n+1,2l}\\ + * s_{n+1,2l+1} + * } + * \f] */ void mwTransform(int type, bool overwrite = true); - /** @name Naming */ - ///@{ - /// Set a short descriptive name (used in logs). + /** + * @brief Set the name of the tree + * @param n The new name + */ void setName(const std::string &n) { this->name = n; } - /// Get the current name. - const std::string &getName() const { return this->name; } - ///@} + const std::string &getName() const { return this->name; } ///< @return The name of the tree - /** @name Root-index helpers */ - ///@{ - /// @return Root-box index containing coordinate @p r, or -1 if out-of-bounds (non-periodic). + /** + * @param r Spatial coordinates + * @return The index of the root box containng r + */ int getRootIndex(Coord r) const { return this->rootBox.getBoxIndex(r); } - /// @return Root-box index containing node @p nIdx, or -1 if out-of-bounds (non-periodic). + /** + * @param nIdx Index of a node + * @return The index of the root box containng nIdx + */ int getRootIndex(NodeIndex nIdx) const { return this->rootBox.getBoxIndex(nIdx); } - ///@} - /** @name Node lookup / retrieval */ - ///@{ /** - * @brief Find an existing node. - * @param nIdx Target node index. - * @return Pointer to the node if present, otherwise `nullptr`. - * @warning Does not create nodes. + * @brief Finds and returns the node pointer with the given NodeIndex + * @param nIdx The NodeIndex to search for + * + * @details Recursive routine to find and return the node with a given + * NodeIndex. This routine returns the appropriate Node, or a NULL + * pointer if the node does not exist, or if it is a + * GenNode. Recursion starts at the appropriate rootNode. + * + * @return Pointer to the required node. */ MWNode *findNode(NodeIndex nIdx); - - /// Const overload of @ref findNode. + /** + * @brief Finds and returns the node pointer with the given NodeIndex + * @param nIdx The NodeIndex to search for + * + * @details Recursive routine to find and return the node with a given + * NodeIndex. This routine returns the appropriate Node, or a NULL + * pointer if the node does not exist, or if it is a + * GenNode. Recursion starts at the appropriate rootNode. + * + * @return Pointer to the required node. + */ const MWNode *findNode(NodeIndex nIdx) const; /** - * @brief Get a node by index, optionally creating it. - * @param nIdx Target node index. - * @param create If `true`, missing nodes may be generated on demand. - * @return Reference to the node. + * @brief Finds and returns the node reference with the given NodeIndex. + * @param nIdx The NodeIndex to search for + * @param create If true, previously non-existing nodes will be stored permanently in the tree + * + * @details This routine ALWAYS returns the node you ask for. If the + * node does not exist, it will be generated by MW + * transform. Recursion starts at the appropriate rootNode and descends + * from this. + * + * @return Reference to the required node. + * @note The nodes are permanently added to the tree if create = true. */ MWNode &getNode(NodeIndex nIdx, bool create = false); /** - * @brief Get a node or the “closest” end node containing it. - * @param nIdx Target node index. - * @return Reference to an existing node; may be an end node if exact match is absent. - * @note Never creates new nodes. + * @brief Finds and returns the node (or EndNode) for the given NodeIndex. + * @param nIdx The NodeIndex to search for + * + * @details This routine returns the Node you ask for, or the EndNode + * on the path to the requested node, if the requested one is deeper + * than the leaf node ancestor. It will never create or return + * GenNodes. Recursion starts at the appropriate rootNode and decends + * from this. + * + * @return Reference to the required node or EndNode. */ MWNode &getNodeOrEndNode(NodeIndex nIdx); - - /// Const overload of @ref getNodeOrEndNode(NodeIndex). + /** + * @brief Finds and returns the node (or EndNode) for the given NodeIndex. + * @param nIdx The NodeIndex to search for + * + * @details This routine returns the Node you ask for, or the EndNode + * on the path to the requested node, if the requested one is deeper + * than the leaf node ancestor. It will never create or return + * GenNodes. Recursion starts at the appropriate rootNode and decends + * from this. + * + * @return Reference to the required node or EndNode. + */ const MWNode &getNodeOrEndNode(NodeIndex nIdx) const; - /** - * @brief Get a node by spatial coordinate. - * @param r Spatial coordinate. - * @param depth Desired depth; if negative, use current deepest. - * @return Reference to the node; may create if required by the implementation. + /** + * @brief Finds and returns the node at a given depth that contains a given coordinate. + * + * @param r Coordinates of an arbitrary point in space + * @param depth Requested node depth from root scale + * + * @details This routine ALWAYS returns the node you ask for, and will + * generate nodes that do not exist. Recursion starts at the + * appropriate rootNode and decends from this. + * + * @return Reference to the required node. */ MWNode &getNode(Coord r, int depth = -1); - /** - * @brief Get a node or containing end node by coordinate. - * @param r Spatial coordinate. - * @param depth Desired depth; if negative, use current deepest. - * @return Reference to an existing node; may be an end node. + /** + * @brief Finds and returns the node at a given depth that contains a given coordinate. + * + * @param r Coordinates of an arbitrary point in space + * @param depth Requested node depth from root scale. + * + * @details This routine returns the Node you ask for, or the EndNode on + * the path to the requested node, and will never create or return GenNodes. + * Recursion starts at the appropriate rootNode and decends from this. + * + * @return Reference to the required node or EndNode. */ MWNode &getNodeOrEndNode(Coord r, int depth = -1); - - /// Const overload of @ref getNodeOrEndNode(Coord,int). + /** + * @brief Finds and returns the node at a given depth that contains a given coordinate. + * + * @param r Coordinates of an arbitrary point in space + * @param depth Requested node depth from root scale. + * + * @details This routine returns the Node you ask for, or the EndNode on + * the path to the requested node, and will never create or return GenNodes. + * Recursion starts at the appropriate rootNode and decends from this. + * + * @return Reference to the required node or EndNode. + */ const MWNode &getNodeOrEndNode(Coord r, int depth = -1) const; - ///@} - /** @name End-node table */ - ///@{ - /// @return Number of nodes currently listed as “end nodes”. - int getNEndNodes() const { return this->endNodeTable.size(); } - /// @return Number of root nodes. - int getNRootNodes() const { return this->rootBox.size(); } + int getNEndNodes() const { return this->endNodeTable.size(); } ///< @return The number of end nodes + int getNRootNodes() const { return this->rootBox.size(); } ///< @return The number of root nodes - /// @return Mutable reference to i-th end node. + /** + * @param i Index of the end node + * @return Reference to the i-th end node + */ MWNode &getEndMWNode(int i) { return *this->endNodeTable[i]; } - /// @return Mutable reference to i-th root node. + /** + * @param i Index of the root node + * @return Reference to the i-th root node + */ MWNode &getRootMWNode(int i) { return this->rootBox.getNode(i); } - - /// @return Const reference to i-th end node. + /** + * @param i Index of the end node + * @return Reference to the i-th end node + */ const MWNode &getEndMWNode(int i) const { return *this->endNodeTable[i]; } - /// @return Const reference to i-th root node. + /** + * @param i Index of the root node + * @return Reference to the i-th root node + */ const MWNode &getRootMWNode(int i) const { return this->rootBox.getNode(i); } - ///@} - /// @return `true` if the underlying world box has any periodic directions. - bool isPeriodic() const { return this->MRA.getWorldBox().isPeriodic(); } + bool isPeriodic() const { return this->MRA.getWorldBox().isPeriodic(); } ///< @return Whether the world is periodic - /** - * @brief Copy the current end-node table. - * @return New heap-allocated vector; caller takes ownership. + /** + * @brief Returns the list of all EndNodes + * @details Copies the list of all EndNode pointers into a new vector and returns it. + * @return The copied end-node table. */ MWNodeVector *copyEndNodeTable(); + MWNodeVector *getEndNodeTable() { return &this->endNodeTable; } ///< @return The end-node table - /// @return Direct pointer to the internal end-node table. - MWNodeVector *getEndNodeTable() { return &this->endNodeTable; } - - /** @name Tree maintenance */ - ///@{ - /// Delete all root nodes and reset root structures. + /** + * @brief Deletes all the nodes in the tree + * @details This method will recursively delete all the nodes, + * including the root nodes. Derived classes will call this method + * when the object is deleted. + */ void deleteRootNodes(); - /// Rebuild the end-node table by traversing the tree. + /** + * @brief Recreate the endNodeTable + * @details the endNodeTable is first deleted and then rebuilt from + * scratch. It makes use of the TreeIterator to traverse the tree. + */ void resetEndNodeTable(); - /// Clear the end-node table without traversing. + /// @brief Clear the end-node table void clearEndNodeTable() { this->endNodeTable.clear(); } - ///@} - /** @name Node statistics (current tree) */ - ///@{ - /// Count branch (non-leaf) nodes; if depth < 0, count all depths. + //// @warning This method is currently not implemented. int countBranchNodes(int depth = -1); - /// Count leaf nodes; if depth < 0, count all depths. + //// @warning This method is currently not implemented. int countLeafNodes(int depth = -1); - /// Count allocated nodes; if depth < 0, count all depths. + //// @warning This method is currently not implemented. int countAllocNodes(int depth = -1); - /// Count nodes; if depth < 0, count all depths. + //// @warning This method is currently not implemented. int countNodes(int depth = -1); - ///@} + /// @brief Whether the tree coefficients are stored in the Bank + bool isLocal = false; - /// If `true`, coefficients are stored externally (Bank); used by serialization tools. - bool isLocal = false; - - /** - * @brief Map a node index to its serial index (when stored locally). - * @param nIdx Node index. - * @return Serial index, or a negative value if not present. + /** + * @brief Gives serialIx of a node from its NodeIndex + * @param nIdx The NodeIndex of the node + * + * @details Gives a unique integer for each nodes corresponding to the position + * of the node in the serialized representation. Only works if isLocal == true. + * + * @return The serial index of the node. */ int getIx(NodeIndex nIdx); - /** - * @brief Precompute per-node maxima used by some adaptive algorithms. - * @details Fills `maxSquareNorm` and `maxWSquareNorm` for all nodes. + /** + * @brief Sets values for maxSquareNorm and maxWSquaredNorm in all nodes + * @details It defines the upper bound of the squared norm \f$ + * ||f||^2_{\ldots} \f$ in this node or its descendents. */ void makeMaxSquareNorms(); - /** @name Allocator access */ - ///@{ - /// @return Mutable reference to the node allocator. - NodeAllocator &getNodeAllocator() { return *this->nodeAllocator_p; } - /// @return Const reference to the node allocator. - const NodeAllocator &getNodeAllocator() const { return *this->nodeAllocator_p; } - ///@} + NodeAllocator &getNodeAllocator() { return *this->nodeAllocator_p; } ///< @return Reference to the node allocator. + const NodeAllocator &getNodeAllocator() const { return *this->nodeAllocator_p; } ///< @return Reference to the node allocator. - /// Vector of final projected nodes (end nodes). - MWNodeVector endNodeTable; + MWNodeVector endNodeTable; ///< @brief Final projected nodes /** - * @brief Fetch coefficients of a specific node (when using a Bank). - * @param nIdx Node index. - * @param data Destination buffer of size (k+1)^D * 2^D. + * @brief Fetch coefficients of a specific node stored in Bank + * @param nIdx Node index + * @param[out] data The node coefficients are copied into this array */ void getNodeCoeff(NodeIndex nIdx, T *data); - /// @return Whether the tree is marked as conjugated (used by some ops). - bool conjugate() const { return this->conj; } - /// Set or clear the conjugation flag. - void setConjugate(bool conjug) { this->conj = conjug; } - - /// Print tree summary to a stream. + bool conjugate() const { return this->conj; } ///< @return Whether the tree is conjugated + void setConjugate(bool conjug) { this->conj = conjug; } ///< @param conjug Set whether the tree is conjugated + + /** + * @brief Print a formatted summary of the tree + * @param o The output stream + * @param tree The tree to print + * @return The output stream + */ friend std::ostream &operator<<(std::ostream &o, const MWTree &tree) { return tree.print(o); } - // Friends that require access to internals + // Friend classes friend class MWNode; friend class FunctionNode; friend class OperatorNode; @@ -340,51 +395,76 @@ template class MWTree { friend class NodeAllocator; protected: - /** @name Immutable construction-time state */ - ///@{ - const MultiResolutionAnalysis MRA; ///< Domain and basis. - - const int order; ///< Polynomial order k. - const int kp1_d; ///< (k+1)^D. - ///@} - - /// Map node index -> serial index (used by local/banked storage). - std::map, int> NodeIndex2serialIx; - - /** @name User-settable metadata */ - ///@{ - std::string name; ///< Short name for diagnostics. - ///@} - - /// Node memory allocator. - std::unique_ptr> nodeAllocator_p{nullptr}; - - /** @name Tree data & counters */ - ///@{ - double squareNorm; ///< Global squared L2 norm (-1 if undefined). - NodeBox rootBox; ///< Container of root nodes. - std::vector nodesAtDepth; ///< Per-depth node counts (depth >= 0). - std::vector nodesAtNegativeDepth; ///< For negative-depth bookkeeping. - ///@} - - /** @name MW transforms (internals) */ - ///@{ + // Parameters that are set in construction and should never change + const MultiResolutionAnalysis MRA; ///< Domain and basis + + // Constant parameters that are derived internally + const int order; ///< Polynomial order k + const int kp1_d; ///< (k+1)^D + + std::map, int> NodeIndex2serialIx; ///< To store nodes serialIx + + // Parameters that are dynamic and can be set by user + std::string name; ///< Name of this tree + std::unique_ptr> nodeAllocator_p{nullptr}; ///< Node allocator + + // Tree data + double squareNorm; ///< Global squared L2 norm (-1 if undefined). + NodeBox rootBox; ///< The actual container of nodes + std::vector nodesAtDepth; ///< Node counter + std::vector nodesAtNegativeDepth; ///< Node counter + + /** + * @brief Regenerates all scaling coeffs by MW transformation of existing s/w-coeffs + * on coarser scales + * @param overwrite If true, the preexisting coefficients are overwritten + * + * @details The transformation starts at the rootNodes and proceeds + * recursively all the way to the leaf nodes. The existing scaling + * coefficeints will either be overwritten or added to. The latter + * operation is generally used after the operator application. + */ virtual void mwTransformDown(bool overwrite); + + /** + * @brief Regenerates all s/d-coeffs by backtransformation + * + * @details It starts at the bottom of the tree (scaling coefficients + * of the leaf nodes) and it generates the scaling and wavelet + * coefficients of the parent node. It then proceeds recursively all the + * way up to the root nodes. This is generally used after a function + * projection to purify the coefficients obtained by quadrature at + * coarser scales which are therefore not precise enough. + */ virtual void mwTransformUp(); - ///@} - /// Increment per-depth counters for a node at the given scale. + /** + * @brief Increments node counter by one for non-GenNodes + * @param scale Scale of the node + * @warning: This routine is not thread safe, and must NEVER be called + * outside a critical region in parallel. It's way, way too expensive to + * lock the tree, so don't even think about it. + */ void incrementNodeCount(int scale); - /// Decrement per-depth counters for a node at the given scale. + + /** + * @brief Decrements node counter by one for non-GenNodes + * @param scale Scale of the node + * @warning: This routine is not thread safe, and must NEVER be called + * outside a critical region in parallel. It's way, way too expensive to + * lock the tree, so don't even think about it. + */ void decrementNodeCount(int scale); - /// Optional external storage of coefficients. - BankAccount *NodesCoeff = nullptr; + BankAccount *NodesCoeff = nullptr; ///< Bank account for node coefficients - /// Conjugation flag for algorithms that need it. - bool conj{false}; + bool conj{false}; ///< Whether the tree is conjugated - /// Print a formatted summary (override in derived classes if needed). + /** + * @brief Prints a summary of the tree structure on the output file + * @param o The output stream + * @return The formatted output stream + */ virtual std::ostream &print(std::ostream &o) const; }; From e8887d37a615f5b4a9cac11b54de9b7faaa3c865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Thu, 6 Nov 2025 15:51:06 +0100 Subject: [PATCH 26/51] BandWidth documented --- src/trees/BandWidth.h | 58 ++++++++++++++----------------------------- 1 file changed, 18 insertions(+), 40 deletions(-) diff --git a/src/trees/BandWidth.h b/src/trees/BandWidth.h index fa520c26e..aa8976a4c 100644 --- a/src/trees/BandWidth.h +++ b/src/trees/BandWidth.h @@ -23,22 +23,6 @@ * */ -/** - * @file BandWidth.h - * @brief Lightweight storage for per-depth operator bandwidths. - * - * @details - * This class stores, for each tree depth, four component band widths plus - * a cached maximum among them. A negative width denotes “unset/empty”. - * - * - Rows correspond to depths in \f$\{0,\dots,\text{maxDepth}\}\f$. - * - Columns \f$0..3\f$ are per-component widths (e.g. for blocks T, C, B, A). - * - Column \f$4\f$ caches the **maximum** width at that depth. - * - * The class provides convenience accessors, mutation with automatic update of - * the per-depth maximum, and formatted printing. - */ - #pragma once #include @@ -49,15 +33,14 @@ namespace mrcpp { /** * @class BandWidth - * @brief Container for band widths over depths and components. + * @brief Container for band widths over depths and components */ class BandWidth final { public: /** - * @brief Construct with storage for @p depth + 1 rows. - * @param depth Maximum depth to allocate (inclusive). - * - * All entries are initialized to -1 (empty). + * @brief Constructor with storage for @p depth + 1 rows + * @param depth Maximum depth to allocate (inclusive) + * @details All entries are initialized to -1 (empty). */ BandWidth(int depth = 0) : widths(depth + 1, 5) { @@ -65,31 +48,28 @@ class BandWidth final { } /** - * @brief Copy-construct from another instance. + * @brief Copy-constructor + * @param bw Instance to copy from */ BandWidth(const BandWidth &bw) : widths(bw.widths) {} - /** - * @brief Copy-assign from another instance. - */ + /// @brief Copy-assign from another instance. BandWidth &operator=(const BandWidth &bw); - /** - * @brief Set all widths (including cached maxima) to -1. - */ + /// @brief Reset all width entries to -1 (empty). void clear() { this->widths.setConstant(-1); } /** - * @brief Check whether the row for @p depth is effectively empty. - * @param depth Depth to test. - * @return True if @p depth is out of range or the cached max is < 0. + * @brief Check whether the row for @p depth is effectively empty + * @param depth Depth to test + * @return True if @p depth is out of range or the last values of the row is < 0 */ bool isEmpty(int depth) const; /** - * @brief Highest valid depth index stored. - * @return The maximum depth (rows - 1). + * @brief Highest valid depth index stored + * @return The maximum depth (rows - 1) */ int getDepth() const { return this->widths.rows() - 1; } @@ -102,8 +82,8 @@ class BandWidth final { /** * @brief Component width accessor. - * @param depth Depth to query. - * @param index Component in {0,1,2,3}. + * @param depth Depth index + * @param index Component index * @return Width for (@p depth, @p index), or -1 if @p depth is out of range. */ int getWidth(int depth, int index) const { return (depth > getDepth()) ? -1 : this->widths(depth, index); } @@ -111,21 +91,19 @@ class BandWidth final { /** * @brief Set component width and update the cached per-depth maximum. * @param depth Depth to modify (0..getDepth()). - * @param index Component in {0,1,2,3}. + * @param index Component in {0, 1, 2, 3}. * @param wd Non-negative band width. */ void setWidth(int depth, int index, int wd); - /** - * @brief Stream pretty-printer. - */ + /// @brief Formatted printing of the BandWidth instance. friend std::ostream &operator<<(std::ostream &o, const BandWidth &bw) { return bw.print(o); } private: /// Matrix of widths; columns 0..3 = components, column 4 = cached max per depth. Eigen::MatrixXi widths; - /// Implementation of formatted printing. + /// @brief Formatted printing of the BandWidth instance. std::ostream &print(std::ostream &o) const; }; From 78a27a53220e0d14f091d3833a9aa703ff63830f Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Thu, 6 Nov 2025 16:15:13 +0100 Subject: [PATCH 27/51] Documents OperatorNode --- src/trees/OperatorNode.cpp | 25 -------- src/trees/OperatorNode.h | 127 ++++++++++++++++++------------------- 2 files changed, 63 insertions(+), 89 deletions(-) diff --git a/src/trees/OperatorNode.cpp b/src/trees/OperatorNode.cpp index 37f576eac..adcb3e0d1 100644 --- a/src/trees/OperatorNode.cpp +++ b/src/trees/OperatorNode.cpp @@ -42,17 +42,6 @@ void OperatorNode::dealloc() { this->tree->getNodeAllocator().dealloc(sIdx); } -/** - * @brief Calculate one specific component norm of the OperatorNode (TODO: needs to be specified more). - * - * @param[in] i: TODO: deens to be specified - * - * @details OperatorNorms are defined as matrix 2-norms that are expensive to calculate. - * Thus we calculate some cheaper upper bounds for this norm for thresholding. - * First a simple vector norm, then a product of the 1- and infinity-norm. - * (TODO: needs to be more presiced). - * - */ double OperatorNode::calcComponentNorm(int i) const { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); @@ -79,20 +68,6 @@ double OperatorNode::calcComponentNorm(int i) const { return norm; } -/** @brief Matrix elements of the non-standard form. - * - * @param[in] i: Index enumerating the matrix type in the non-standard form. - * @returns A submatrix of \f$ (k + 1) \times (k + 1) \f$-size from the non-standard form. - * - * @details OperatorNode is uniquely associted with a scale \f$ n \f$ and translation - * \f$ l = -2^n + 1, \ldots, 2^n = 1 \f$. - * The non-standard form \f$ T_n, B_n, C_n, A_n \f$ defines matrices - * \f$ \sigma_l^n, \beta_l^n, \gamma_l^n, \alpha_l^n \f$ for a given pair \f$ (n, l) \f$. - * One of these matrices is returned by the method according to the choice of the index parameter - * \f$ i = 0, 1, 2, 3 \f$, respectively. - * For example, \f$ \alpha_l^n = \text{getComponent}(3) \f$. - * - */ MatrixXd OperatorNode::getComponent(int i) { int depth = getDepth(); double prec = getOperTree().getNormPrecision(); diff --git a/src/trees/OperatorNode.h b/src/trees/OperatorNode.h index 406574ee8..842f35432 100644 --- a/src/trees/OperatorNode.h +++ b/src/trees/OperatorNode.h @@ -23,21 +23,6 @@ * */ -/** - * @file OperatorNode.h - * @brief Node type for operator trees (2D non-standard form blocks). - * - * @details - * This header declares @ref mrcpp::OperatorNode, a concrete node type used by - * @ref mrcpp::OperatorTree. It specializes @ref mrcpp::MWNode with spatial - * dimension @c D=2 to store and manage the coefficients of non-standard form - * operator blocks (typically the corner blocks \f$T, A, B, C\f$). - * - * The class offers typed accessors to the owning @ref mrcpp::OperatorTree and - * to parent/children nodes, and overrides a few hooks related to allocation, - * child generation, and norm computation that are specific to operator nodes. - */ - #pragma once #include // for Eigen::MatrixXd @@ -49,54 +34,51 @@ namespace mrcpp { /** * @class OperatorNode - * @brief Leaf/branch node used inside @ref OperatorTree (fixed to 2D). + * + * @brief Node of an @ref OperatorTree. * * @details - * An OperatorNode stores the \f$2^D(k+1)^D\f$ multiwavelet coefficients for an - * operator block at a given scale/translation and exposes: - * - typed getters for the owning @ref OperatorTree and relatives, - * - overrides for child creation/deletion and coefficient management, - * - an overridden component-norm calculation suitable for operator blocks, and - * - a helper to extract a single component block as an Eigen matrix. + * An operator node is formally a 2D node which stores the coefficients of an operator + * for a given scale and translation. The translation in this case corresponds to the difference + * in translation index betweeen input and output nodes of the function to which the operator is applied. + * The scaling and wavelet structure of the nodes encodes the which component of the operator the coefficients + * refer to (T scaling-scaling, C scaling-wavelet, B wavelet-scaling, A wavelet-wavelet) according to the non-standard form. + * + * The class offers typed accessors to the owning @ref mrcpp::OperatorTree and + * to parent/children nodes, and overrides a few hooks related to allocation, + * child generation, and norm computation that are specific to operator nodes. * * @note The spatial dimension is fixed to @c D=2 for operator trees. */ class OperatorNode final : public MWNode<2> { public: - /** @name Typed accessors to tree/relatives */ - ///@{ - /// @return Owning operator tree (non-const). - OperatorTree &getOperTree() { return static_cast(*this->tree); } - /// @return Parent node as @ref OperatorNode (non-const). - OperatorNode &getOperParent() { return static_cast(*this->parent); } - /// @return Child @p i as @ref OperatorNode (non-const). - OperatorNode &getOperChild(int i) { return static_cast(*this->children[i]); } - - /// @return Owning operator tree (const). - const OperatorTree &getOperTree() const { return static_cast(*this->tree); } - /// @return Parent node as @ref OperatorNode (const). - const OperatorNode &getOperParent() const { return static_cast(*this->parent); } - /// @return Child @p i as @ref OperatorNode (const). - const OperatorNode &getOperChild(int i) const { return static_cast(*this->children[i]); } - ///@} + OperatorTree &getOperTree() { return static_cast(*this->tree); } ///< @return Owning operator tree + OperatorNode &getOperParent() { return static_cast(*this->parent); } ///< @return Parent node + OperatorNode &getOperChild(int i) { return static_cast(*this->children[i]); } ///< @return Child @p i as @ref OperatorNode (non-const). + const OperatorTree &getOperTree() const { return static_cast(*this->tree); } ///< @return Owning operator tree + const OperatorNode &getOperParent() const { return static_cast(*this->parent); } ///< @return Parent node as @ref OperatorNode (const) + const OperatorNode &getOperChild(int i) const { return static_cast(*this->children[i]); } ///< @return Child @p i as @ref OperatorNode (const). /** - * @brief Create children nodes. + * @brief Create child nodes + * * @param coefs If @c true, also allocate coefficient storage for each child. + * * @details Overrides @ref MWNode::createChildren to honor operator-specific * allocation and bookkeeping. */ void createChildren(bool coefs) override; /** - * @brief Generate children on demand (without necessarily allocating coefs). - * @details Overrides @ref MWNode::genChildren to implement the operator-tree - * generation policy. + * @brief Generate child nodes and populates their coefficients. + * + * @details Overrides @ref MWNode::genChildren to implement the operator */ void genChildren() override; /** - * @brief Delete all children nodes (and their coefficient storage). + * @brief Delete all child nodes (and their coefficient storage) + * * @details Overrides @ref MWNode::deleteChildren with operator-specific cleanup. */ void deleteChildren() override; @@ -105,25 +87,33 @@ class OperatorNode final : public MWNode<2> { friend class NodeAllocator<2>; protected: - /** @name Construction and assignment */ - ///@{ - /// Default constructor (used by allocators). + /** + * @brief Default constructor (used by allocators). + */ OperatorNode() : MWNode<2>(){}; - /// Root-node constructor (called by the owning tree). + /** + * @brief Root node constructor (used by allocators). + */ OperatorNode(MWTree<2> *tree, int rIdx) : MWNode<2>(tree, rIdx){}; - /// Child-node constructor (called when splitting a parent). + /** + * @brief Child node constructor (used by allocators). + */ OperatorNode(MWNode<2> *parent, int cIdx) : MWNode<2>(parent, cIdx){}; - /// Non-copyable. + /** + * @brief Operator nodes cannot be copied. + */ OperatorNode(const OperatorNode &node) = delete; - /// Non-assignable. + /** + * @brief Operator nodes cannot be assigned. + */ OperatorNode &operator=(const OperatorNode &node) = delete; - /// Virtual destructor. + /** + * @brief Default destructor. + */ ~OperatorNode() = default; - ///@} - /** * @brief Release coefficient storage (if owned) and reset node state. * @details Overrides @ref MWNode::dealloc to ensure operator-node invariants. @@ -131,21 +121,30 @@ class OperatorNode final : public MWNode<2> { void dealloc() override; /** - * @brief Compute squared norm of a specific component (scaling/wavelet block). - * @param i Component index in \f$[0, 2^D)\f$. - * @return Squared L2 norm of the requested component. - * @details Overrides @ref MWNode::calcComponentNorm to match the operator - * interpretation of components (e.g., corner blocks in non-standard form). + * @brief Calculate the norm of a given component of the OperatorNode + * + * @param[in] i: component index in [0, 3] (2D operator node has 4 components) + * + * @details OperatorNorms are defined as matrix 2-norms that are expensive to calculate. + * Thus we calculate some cheaper upper bounds for this norm for thresholding. + * First a simple vector norm, then a product of the 1- and infinity-norm. */ double calcComponentNorm(int i) const override; - /** - * @brief Extract a component block as a dense matrix. - * @param i Component index in \f$[0, 2^D)\f$. - * @return A matrix view/copy of the component coefficients (size \f$(k+1)\times(k+1)\f$). - * @note Primarily intended for diagnostics and I/O. + /** @brief Gets a given component of the non-standard form. + * + * @param[in] i: Index enumerating the non-standard form component (A, B, C, T). + * @returns The requested \f$ (k + 1) \times (k + 1) \f$-size matrix of the non-standard form. + * + * @details OperatorNode is uniquely associted with a scale \f$ n \f$ and translation + * \f$ l = -2^n + 1, \ldots, 2^n = 1 \f$. + * The non-standard form \f$ T_n, B_n, C_n, A_n \f$ defines matrices + * \f$ \sigma_l^n, \beta_l^n, \gamma_l^n, \alpha_l^n \f$ for a given pair \f$ (n, l) \f$. + * One of these matrices is returned by the method according to the choice of the index parameter + * \f$ i = 0, 1, 2, 3 \f$, respectively. + * For example, \f$ \alpha_l^n = \text{getComponent}(3) \f$. */ Eigen::MatrixXd getComponent(int i); }; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp From 6fd2a3e0173105ff0e9ad618544343299dc5f020 Mon Sep 17 00:00:00 2001 From: Jacopo Date: Thu, 6 Nov 2025 16:42:39 +0100 Subject: [PATCH 28/51] FuntionTree.h FunctionTree.cpp and FunctionTreeVector.h (yes, for the vector it was all in the header) --- src/trees/FunctionTree.cpp | 181 +++---------------- src/trees/FunctionTree.h | 320 +++++++++++++++++++++++---------- src/trees/FunctionTreeVector.h | 87 ++++----- 3 files changed, 287 insertions(+), 301 deletions(-) diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp index 72ca68e39..39a73b506 100644 --- a/src/trees/FunctionTree.cpp +++ b/src/trees/FunctionTree.cpp @@ -42,15 +42,7 @@ using namespace Eigen; namespace mrcpp { -/** @returns New FunctionTree object - * - * @param[in] mra: Which MRA the function is defined - * @param[in] sh_mem: Pointer to MPI shared memory block - * - * @details Constructs an uninitialized tree, containing only empty root nodes. - * If a shared memory pointer is provided the tree will be allocated in this - * shared memory window, otherwise it will be local to each MPI process. - */ + template FunctionTree::FunctionTree(const MultiResolutionAnalysis &mra, SharedMemory *sh_mem, const std::string &name) : MWTree(mra, name) @@ -107,11 +99,7 @@ template FunctionTree::~FunctionTree() { if (this->getNNodes() > 0) this->deleteRootNodes(); } -/** @brief Read a previously stored tree assuming text/ASCII format, - * in a representation using MADNESS conventions for n, l and index order. - * @param[in] file: File name - * @note This tree must have the exact same MRA the one that was saved(?) - */ + template void FunctionTree::loadTreeTXT(const std::string &file) { std::ifstream in(file); int NDIM, k; @@ -285,10 +273,7 @@ template void FunctionTree::loadTreeTXT(const std::str this->calcSquareNorm(); } -/** @brief Write the tree to disk in text/ASCII format in a representation - * using MADNESS conventions for n, l and index order. - * @param[in] file: File name - */ + template void FunctionTree::saveTreeTXT(const std::string &fname) { int nRoots = this->getRootBox().size(); MWNode **roots = this->getRootBox().getNodes(); @@ -357,9 +342,8 @@ template void FunctionTree::saveTreeTXT(const std::str } out.close(); } -/** @brief Write the tree structure to disk, for later use - * @param[in] file: File name, will get ".tree" extension - */ + + template void FunctionTree::saveTree(const std::string &file) { Timer t1; @@ -383,10 +367,7 @@ template void FunctionTree::saveTree(const std::string print::time(10, "Time write", t1); } -/** @brief Read a previously stored tree structure from disk - * @param[in] file: File name, will get ".tree" extension - * @note This tree must have the exact same MRA the one that was saved - */ + template void FunctionTree::loadTree(const std::string &file) { Timer t1; @@ -438,7 +419,7 @@ template T FunctionTree::integrate() const { return jacobian * result; } -/** @returns Integral of a representable function over the grid given by the tree */ + template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) { // traverse tree, and treat end nodes only std::vector *> stack; // node from this @@ -473,20 +454,7 @@ template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunct return jacobian * result; } -/** @returns Function value in a point, out of bounds returns zero - * - * @param[in] r: Cartesian coordinate - * - * @note This will only evaluate the _scaling_ part of the - * leaf nodes in the tree, which means that the function - * values will not be fully accurate. - * This is done to allow a fast and const function evaluation - * that can be done in OMP parallel. If you want to include - * also the _final_ wavelet part you can call the corresponding - * evalf_precise function, _or_ you can manually extend - * the MW grid by one level before evaluating, using - * `mrcpp::refine_grid(tree, 1)` - */ + template T FunctionTree::evalf(const Coord &r) const { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -511,16 +479,7 @@ template T FunctionTree::evalf(const Coord &r) cons return coef * result; } -/** @returns Function value in a point, out of bounds returns zero - * - * @param[in] r: Cartesian coordinate - * - * @note This will evaluate the _true_ value (scaling + wavelet) of the - * leaf nodes in the tree. This requires an on-the-fly MW transform - * on the node which makes this function slow and non-const. If you - * need fast evaluation, use refine_grid(tree, 1) first, and then - * evalf. - */ + template T FunctionTree::evalf_precise(const Coord &r) { // Handle potential scaling const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors(); @@ -546,12 +505,7 @@ template T FunctionTree::evalf_precise(const Coord return coef * result; } -/** @brief In-place square of MW function representations, fixed grid - * - * @details The leaf node point values of the function will be in-place - * squared, no grid refinement. - * - */ + template void FunctionTree::square() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -575,14 +529,7 @@ template void FunctionTree::square() { this->calcSquareNorm(); } -/** @brief In-place power of MW function representations, fixed grid - * - * @param[in] p: Numerical power - * - * @details The leaf node point values of the function will be in-place raised - * to the given power, no grid refinement. - * - */ + template void FunctionTree::power(double p) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -606,14 +553,7 @@ template void FunctionTree::power(double p) { this->calcSquareNorm(); } -/** @brief In-place multiplication by a scalar, fixed grid - * - * @param[in] c: Scalar coefficient - * - * @details The leaf node point values of the function will be - * in-place multiplied by the given coefficient, no grid refinement. - * - */ + template void FunctionTree::rescale(T c) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads()) @@ -633,7 +573,7 @@ template void FunctionTree::rescale(T c) { this->calcSquareNorm(); } -/** @brief In-place rescaling by a function norm \f$ ||f||^{-1} \f$, fixed grid */ + template void FunctionTree::normalize() { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); double sq_norm = this->getSquareNorm(); @@ -641,15 +581,7 @@ template void FunctionTree::normalize() { this->rescale(1.0 / std::sqrt(sq_norm)); } -/** @brief In-place addition with MW function representations, fixed grid - * - * @param[in] c: Numerical coefficient of input function - * @param[in] inp: Input function to add - * - * @details The input function will be added in-place on the current grid of - * the function, i.e. no further grid refinement. - * - */ + template void FunctionTree::add(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -670,15 +602,7 @@ template void FunctionTree::add(T c, FunctionTreecalcSquareNorm(); inp.deleteGenerated(); } -/** @brief In-place addition with MW function representations, fixed grid - * - * @param[in] c: Numerical coefficient of input function - * @param[in] inp: Input function to add - * - * @details The input function will be added to the union of the current grid of - * and input the function grid. - * - */ + template void FunctionTree::add_inplace(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -701,15 +625,7 @@ template void FunctionTree::add_inplace(T c, FunctionT inp.deleteGenerated(); } -/** @brief In-place addition of absolute values of MW function representations - * - * @param[in] c Numerical coefficient of input function - * @param[in] inp Input function to add - * - * The absolute value of input function will be added in-place on the current grid of the output - * function, i.e. no further grid refinement. - * - */ + template void FunctionTree::absadd(T c, FunctionTree &inp) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads()) @@ -736,15 +652,7 @@ template void FunctionTree::absadd(T c, FunctionTree void FunctionTree::multiply(T c, FunctionTree &inp) { if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA"); if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); @@ -772,14 +680,7 @@ template void FunctionTree::multiply(T c, FunctionTree inp.deleteGenerated(); } -/** @brief In-place mapping with a predefined function f(x), fixed grid - * - * @param[in] fmap: mapping function - * - * @details The input function will be mapped in-place on the current grid - * of the function, i.e. no further grid refinement. - * - */ + template void FunctionTree::map(FMap fmap) { if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared"); { @@ -839,20 +740,7 @@ template std::ostream &FunctionTree::print(std::ostrea return MWTree::print(o); } -/** @brief Reduce the precision of the tree by deleting nodes - * - * @param prec: New precision criterion - * @param splitFac: Splitting factor: 1, 2 or 3 - * @param absPrec: Use absolute precision - * - * @details This will run the tree building algorithm in "reverse", starting - * from the leaf nodes, and perform split checks on each node based on the given - * precision and the local wavelet norm. - * - * @note The splitting factor appears in the threshold for the wavelet norm as - * \f$ ||w|| < 2^{-sn/2} ||f|| \epsilon \f$. In principal, `s` should be equal - * to the dimension; in practice, it is set to `s=1`. - */ + template int FunctionTree::crop(double prec, double splitFac, bool absPrec) { for (int i = 0; i < this->rootBox.size(); i++) { MWNode &root = this->getRootMWNode(i); @@ -864,10 +752,7 @@ template int FunctionTree::crop(double prec, double sp return nChunks; } -/** Traverse tree using BFS and returns an array with the address of the coefs. - * Also returns an array with the corresponding indices defined as the - * values of serialIx in refTree, and an array with the indices of the parent. - * Set index -1 for nodes that are not present in refTree */ + template void FunctionTree::makeCoeffVector(std::vector &coefs, std::vector &indices, @@ -918,10 +803,7 @@ void FunctionTree::makeCoeffVector(std::vector &coefs, } } -/** Traverse tree using DFS and reconstruct it using node info from the - * reference tree and a list of coefficients. - * It is the reference tree (refTree) which is traversed, but one does not descend - * into children if the norm of the tree is smaller than absPrec. */ + template void FunctionTree::makeTreefromCoeff(MWTree &refTree, std::vector coefpVec, std::map &ix2coef, double absPrec, const std::string &mode) { std::vector *> stack; std::map *> ix2node; // gives the nodes in this tree for a given ix @@ -998,9 +880,7 @@ template void FunctionTree::makeTreefromCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -1039,7 +919,7 @@ template void FunctionTree::appendTreeNoCoeff(MWTree void FunctionTree::appendTreeNoCoeff(MWTree &inTree) { std::vector *> instack; // node from inTree std::vector *> thisstack; // node from this Tree @@ -1131,17 +1011,13 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() { return this->NodeIndex2serialIx.size(); } -/** @brief Deep copy of tree - * - * @details Exact copy without any binding between old and new tree - */ + template void FunctionTree::deep_copy(FunctionTree *out) { copy_grid(*out, *this); copy_func(*out, *this); } -/** @brief New tree with only real part - */ + template FunctionTree *FunctionTree::Real() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); @@ -1165,8 +1041,7 @@ template FunctionTree *FunctionTree::Real() return out; } -/** @brief New tree with only imaginary part - */ + template FunctionTree *FunctionTree::Imag() { FunctionTree *out = new FunctionTree(this->getMRA(), this->getName()); out->setZero(); @@ -1188,10 +1063,6 @@ template FunctionTree *FunctionTree::Imag() return out; } -/* - * From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents. - * Should use a deep_copy if generalized in the future. - */ template <> void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&outTree) { delete outTree; diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h index 0b4da8789..792313bf8 100644 --- a/src/trees/FunctionTree.h +++ b/src/trees/FunctionTree.h @@ -33,24 +33,10 @@ namespace mrcpp { -/** - * @file FunctionTree.h - * @brief Declaration of the FunctionTree class template. - * - * @details - * A FunctionTree represents a scalar field on a multiresolution (MW) grid. - * It owns the MW-node topology, coefficient storage, and basic utilities - * for evaluation, integration, and in–place algebra on the represented - * function. Construction initializes the tree structure (root nodes and - * allocator) according to a given MultiResolutionAnalysis (MRA), but does - * not compute coefficients; initially the function is undefined and the - * tree's square norm is negative to signal this state. - */ - /** * @class FunctionTree * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Scalar type (double or ComplexDouble). + * @tparam T Coefficient type (e.g. double, ComplexDouble). * @brief Function representation in the MW basis with adaptive topology. * * @details @@ -64,46 +50,51 @@ class FunctionTree final : public MWTree, public RepresentableFunction &mra, const std::string &name) : FunctionTree(mra, nullptr, name) {} /** * @brief Construct a tree bound to an MRA with optional shared memory and name. - * @param mra Multi-resolution analysis (domain and basis). - * @param sh_mem Optional shared-memory arena for coefficient storage. - * @param name Optional textual name of the function. + * @param[in] mra: Which MRA the function is defined + * @param[in] sh_mem: Pointer to MPI shared memory block + * @param name Optional textual name of the function + * + * @returns New FunctionTree object * - * @details Root nodes and allocators are created. The function is - * undefined until coefficients are computed by a builder/calculator. + * @details Constructs an uninitialized tree, containing only empty root nodes. + * If a shared memory pointer is provided the tree will be allocated in this + * shared memory window, otherwise it will be local to each MPI process. */ FunctionTree(const MultiResolutionAnalysis &mra, SharedMemory *sh_mem = nullptr, const std::string &name = "nn"); - /// Deleted copy semantics (trees are heavy objects). + FunctionTree(const FunctionTree &tree) = delete; FunctionTree &operator=(const FunctionTree &tree) = delete; - /// Virtual destructor. + /// FunctionTree destructor ~FunctionTree() override; /** - * @brief Integrate the represented function over the world domain. - * @return Integral value. + * @brief Integrate the represented function over the MRA box + * @returns Integral of the function over the entire computational domain */ T integrate() const; /** - * @brief Integrate only end nodes against a provided analytic function. - * @param f RepresentableFunction used as integrand partner. - * @return Integral value as double. + * @brief Integrate a representable function using the this tree's grid + * @param[in] f RepresentableFunction used as integrand partner + * @returns Integral of the representable function * - * @details Useful for quadrature-like post-processing on the current grid. + * @details You can evaluate the integral of any representable function + * over the most refined scale of the 'this' FunctionTree's grid. */ double integrateEndNodes(RepresentableFunction_M &f); @@ -114,88 +105,166 @@ class FunctionTree final : public MWTree, public RepresentableFunction &r); + /** - * @brief Evaluate the function at a given coordinate. - * @param r Physical coordinate. - * @return Function value. + * @brief Fast but approximate evaluation of this FunctionTree at a given coordinate. + * @param[in] r: Cartesian coordinate to be evaluated + * @return Approximate Function value + * + * @note This will only evaluate the _scaling_ part of the + * leaf nodes in the tree, which means that the function + * values will not be fully accurate. + * This is done to allow a fast and const function evaluation + * that can be done in OMP parallel. If you want to include + * also the _final_ wavelet part you can call the corresponding + * evalf_precise function, _or_ you can manually extend + * the MW grid by one level before evaluating, using + * `mrcpp::refine_grid(tree, 1)` */ T evalf(const Coord &r) const override; + /** + * @brief Slow but high-accuracy , evaluation of the function at a given coordinate + * @param[in] r: Cartesian coordinate to be evaluated + * @returns Exact value of this FunctionTree in the point r + * @note This will evaluate the _true_ value (scaling + wavelet) of the + * leaf nodes in the tree. This requires an on-the-fly MW transform + * on the node which makes this function slow and non-const. If you + * need fast evaluation, use refine_grid(tree, 1) first, and then + * evalf. + */ + T evalf_precise(const Coord &r); + /** - * @brief Number of generated (non-root) nodes currently alive. - * @return Count of nodes managed by the generated-node allocator. + * @brief Number of generated (non-root) nodes currently alive + * @return Count of nodes (managed by the generated-node allocator) */ int getNGenNodes() const { return getGenNodeAllocator().getNNodes(); } /** - * @brief Collect values on end nodes into a dense vector. - * @param[out] data Column vector sized to the total number of end-node values. + * @brief Collect values on end nodes into a dense Eigen type vector + * @param[out] data Column vector sized to the total number of end-node values */ void getEndValues(Eigen::Matrix &data); /** - * @brief Set end-node values from a dense vector. - * @param[in] data Column vector holding values; its size must match. + * @brief Set end-node values as the components of the dense Eigen type vector + * @param[in] data Column vector holding values; its size must match */ void setEndValues(Eigen::Matrix &data); /** - * @brief Persist the tree (binary). + * @brief Write the tree to disk in text/ASCII format in a representation + * using MADNESS conventions for n, l and index order. * @param file Output filename. */ - void saveTree(const std::string &file); + void saveTreeTXT(const std::string &file); /** - * @brief Persist the tree (text). - * @param file Output filename. + * @brief Write the tree structure to disk, for later use + * @param[in] file: File name, will get ".tree" extension */ - void saveTreeTXT(const std::string &file); + void saveTree(const std::string &file); /** - * @brief Load the tree (binary). - * @param file Input filename. + * @brief Read a previously stored tree structure from disk + * @param[in] file File name, will get ".tree" extension + * @note This tree must have the exact same MRA the one that was saved */ void loadTree(const std::string &file); /** - * @brief Load the tree (text). - * @param file Input filename. + * @brief Read a previously stored tree assuming text/ASCII format, using MADNESS conventions (n, l and index order) + * @param[in] file Input filename + * @note Make sure that the MRA of this tree matches the one used to create the file */ void loadTreeTXT(const std::string &file); - /** @name In-place algebra on the represented function */ - ///@{ - - /// Square the function pointwise. - void square(); + /** @brief In-place square of MW function representations, fixed grid + * + * @details The leaf node point values of the function will be in-place + * squared, no grid refinement. + * + */ + void square(); /// Raise the function to power p pointwise. + /** + * @brief In-place power of MW function representations, fixed grid + * + * @param p Exponent + * + * @details The leaf node point values of the function will be in-place raised + * to the given power, no grid refinement. + */ void power(double p); - - /// Multiply the function by a scalar c. - void rescale(T c); - - /// Normalize the function to unit norm (when meaningful). - void normalize(); - - /// Compute this := this + c * inp (alloc/refine as needed). + /** + * @brief In-place multiplication by a scalar, fixed grid + * + * @param c Scaling factor (with the same data type as the coefficients) + * + * @details The leaf node point values of the function will be + * in-place multiplied by the given coefficient, no grid refinement. + */ + void rescale(T c); + void normalize(); ///< In-place rescaling by a function norm \f$ ||f||^{-1} \f$, fixed grid + + /** + * @brief this + inp (fixed grid) + * + * @param c: Numerical coefficient of input function + * @param[in] inp: Input function to be added on this FunctionTree + * + * @details The input function will be added in-place on the current grid of + * the function, i.e. no further grid refinement. Addition done within the MW representations. + */ void add(T c, FunctionTree &inp); - /// In-place addition on the existing grid only. + /** + * @brief this + inp (uniting the two grids) + + * @param c: Numerical coefficient of input function + * @param[in] inp: Input function to be added on this FunctionTree + * + * @details The input function will be added to the union of the current grid of + * and input the function grid. Addition done within the MW representations. + */ void add_inplace(T c, FunctionTree &inp); - /// Compute this := this + c * |inp| (absolute values). + /** + * @brief this + abs(inp) (fixed grid) + + * @param c: Numerical coefficient of input function + * @param[in] inp: Input function to be added on this FunctionTree + * + * @details The absolute value of input function will be added in-place on the current grid of the output + * function, i.e. no further grid refinement. Addition done within the MW representations. + */ void absadd(T c, FunctionTree &inp); - /// Compute this := this * (c * inp) pointwise. + /** + * @brief this * (c * inp), fixed grid + * @param c: Numerical coefficient of input function + * @param[in] inp: Input function to be multiplied with this FunctionTree + * + * @details The input function will be multiplied in-place on the current grid + * of the function, i.e. no further grid refinement. + */ void multiply(T c, FunctionTree &inp); - /// Apply a scalar-to-scalar map pointwise. + /** + * @brief In-place mapping with a predefined function f(x), fixed grid + * + * @param[in] fmap: mapping function + * + * @details The input function will be mapped in-place on the current grid + * of the function, i.e. no further grid refinement. + */ void map(FMap fmap); - ///@} + + /** * @brief Number of memory chunks reserved for nodes. @@ -209,34 +278,41 @@ class FunctionTree final : public MWTree, public RepresentableFunctiongetNodeAllocator().getNChunksUsed(); } - /** - * @brief Prune small contributions and optionally refine slightly. - * @param prec Threshold used for pruning. - * @param splitFac Optional split factor for balancing. - * @param absPrec If true, use absolute thresholding. - * @return Number of nodes removed or affected. + /** @brief Reduce the precision of the tree by deleting nodes + * + * @param prec: New precision criterion + * @param splitFac: Splitting factor: 1, 2 or 3 + * @param absPrec: Use absolute precision + * + * @details This will run the tree building algorithm in "reverse", starting + * from the leaf nodes, and perform split checks on each node based on the given + * precision and the local wavelet norm. + * + * @note The splitting factor appears in the threshold for the wavelet norm as + * \f$ ||w|| < 2^{-sn/2} ||f|| \epsilon \f$. In principal, `s` should be equal + * to the dimension; in practice, it is set to `s=1`. */ int crop(double prec, double splitFac = 1.0, bool absPrec = true); /** @name Typed access to nodes */ ///@{ - /// Get i-th end node cast to FunctionNode (non-const). + /// @return i-th end node cast to FunctionNode FunctionNode &getEndFuncNode(int i) { return static_cast &>(this->getEndMWNode(i)); } - /// Get i-th root node cast to FunctionNode (non-const). + /// @return i-th root node cast to FunctionNode FunctionNode &getRootFuncNode(int i) { return static_cast &>(this->rootBox.getNode(i)); } - /// Allocator for generated nodes (non-const). + /// @return Allocator for generated nodes NodeAllocator &getGenNodeAllocator() { return *this->genNodeAllocator_p; } - /// Allocator for generated nodes (const). + /// @return Allocator for generated nodes ìì const NodeAllocator &getGenNodeAllocator() const { return *this->genNodeAllocator_p; } - /// Get i-th end node cast to FunctionNode (const). + /// @return i-th end node cast to FunctionNode const FunctionNode &getEndFuncNode(int i) const { return static_cast &>(this->getEndMWNode(i)); } - /// Get i-th root node cast to FunctionNode (const). + /// @return i-th root node cast to FunctionNode const FunctionNode &getRootFuncNode(int i) const { return static_cast &>(this->rootBox.getNode(i)); } ///@} @@ -244,7 +320,7 @@ class FunctionTree final : public MWTree, public RepresentableFunction, public RepresentableFunction, public RepresentableFunction &coefs, std::vector &indices, @@ -277,11 +359,16 @@ class FunctionTree final : public MWTree, public RepresentableFunction &refTree, std::vector coefpVec, @@ -289,50 +376,91 @@ class FunctionTree final : public MWTree, public RepresentableFunction &inTree); - /// @overload + /** + * @brief Append topology from another tree with real-type coefficients (no coefficients copied) + * @param[in] inTree Input tree. + * + * @note It will append only the nodes structure, without copying any coefficient data, + * therefore it won't matter the datatype of the input tree for the result. + */ void appendTreeNoCoeff(MWTree &inTree); /** - * @brief Copy topology and coefficients from a real-valued tree. - * @param inTree Source tree. + * @brief Copy topology AND coefficients from a real-valued tree + * @param[in] inTree Source tree. + * + * @note The copy process is a shallow copy for the coefficients, i.e. + * the new tree nodes will point to the same coefficient blocks as the input tree. + * Therefore, modifying the coefficients in one tree will affect the other. */ void CopyTree(FunctionTree &inTree); /** - * @brief Move all node coefficients to a bank and remove them from nodes. + * @brief Move all node coefficients to a bank and remove them from nodes * @return Number of nodes affected. */ int saveNodesAndRmCoeff(); /** - * @brief Deep-copy entire tree into out (topology and data). - * @param out Destination tree pointer (must be non-null and compatible). + * @brief Deep-copy entire tree into out (topology and data) + * @param[out] out Destination tree pointer (must be non-null and compatible) + * + * @details Exact copy without any binding between old and new tree */ void deep_copy(FunctionTree *out); /** - * @brief Extract real part into a newly allocated real tree. - * @return Pointer to a new FunctionTree of type double. + * @brief Extract real part into a newly allocated real tree + * @return Pointer to a new FunctionTree of type double */ FunctionTree *Real(); /** - * @brief Extract imaginary part into a newly allocated real tree. - * @return Pointer to a new FunctionTree of type double. + * @brief Extract imaginary part into a newly allocated real tree + * @return Pointer to a new FunctionTree of type double */ FunctionTree *Imag(); /** @name Real/complex conversion helpers */ ///@{ + /** + * @brief Deep-copy this tree into a complex-valued tree. + * + * @param[out] out Destination tree pointer (must be non-null). + * @details Exact copy into a complex tree, with imaginary parts set to zero + */ void CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&out); + /** + * @brief Deep-copy this tree into a complex-valued tree. + * + * @param[out] out Destination tree pointer (must be non-null). + * @details Exact copy into a complex tree, with imaginary parts set to zero + */ void CopyTreeToComplex(FunctionTree<2, ComplexDouble> *&out); + /** + * @brief Deep-copy this tree into a complex-valued tree. + * + * @param[out] out Destination tree pointer (must be non-null). + * @details Exact copy into a complex tree, with imaginary parts set to zero + */ void CopyTreeToComplex(FunctionTree<1, ComplexDouble> *&out); + + /** + * @brief Deep-copy this tree into a real-valued tree. + * + * @param[out] out Destination tree pointer (must be non-null). + * @details Exact copy into a real tree, taking only the real parts + */ void CopyTreeToReal(FunctionTree<3, double> *&out); // for testing ///@} diff --git a/src/trees/FunctionTreeVector.h b/src/trees/FunctionTreeVector.h index 332867d46..675def8e6 100644 --- a/src/trees/FunctionTreeVector.h +++ b/src/trees/FunctionTreeVector.h @@ -32,22 +32,12 @@ namespace mrcpp { -/** - * @file FunctionTreeVector.h - * @brief Lightweight helpers for working with collections of FunctionTree objects. - * - * @details - * Many high-level algorithms (addition, multiplication, dot products, etc.) - * operate on *lists* of trees paired with a numeric coefficient. This header - * provides two aliases for such lists and a few utility functions to manage - * them without introducing additional container classes. - */ /** * @brief Alias for a weighted FunctionTree pointer. * - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Scalar type of the trees (defaults to double). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * * @details * The tuple layout is: @@ -62,23 +52,15 @@ using CoefsFunctionTree = std::tuple *>; /** * @brief Alias for a vector of weighted FunctionTree pointers. * - * @tparam D Spatial dimension. - * @tparam T Scalar type (defaults to double). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) */ template using FunctionTreeVector = std::vector>; -/** - * @brief Remove all entries from a FunctionTreeVector. - * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in,out] fs Vector to clear. - * @param[in] dealloc If true, delete each stored FunctionTree pointer - * before clearing the vector. - * - * @note When @p dealloc is false (the default), this function does not - * take ownership of the pointers and will not delete them. +/** @brief Remove all entries in the vector + * @param[in] fs: Vector to clear + * @param[in] dealloc: Option to free FunctionTree pointer before clearing */ template void clear(FunctionTreeVector &fs, bool dealloc = false) { @@ -95,10 +77,11 @@ void clear(FunctionTreeVector &fs, bool dealloc = false) { /** * @brief Compute the total number of nodes across all trees in the vector. * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in] fs Vector to inspect. - * @return Sum of @c getNNodes() over all non-null trees. + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) + * @param[in] fs Vector to fetch from + * + * @returns Total number of nodes of all trees in the vector */ template int get_n_nodes(const FunctionTreeVector &fs) { @@ -112,11 +95,12 @@ int get_n_nodes(const FunctionTreeVector &fs) { /** * @brief Compute the total size of all trees in the vector (in kilobytes). - * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in] fs Vector to inspect. - * @return Sum of @c getSizeNodes() over all non-null trees. + * + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) + * @param[in] fs Vector to fetch from. + * + * @returns Total size of all trees in the vector, in kB */ template int get_size_nodes(const FunctionTreeVector &fs) { @@ -131,11 +115,12 @@ int get_size_nodes(const FunctionTreeVector &fs) { /** * @brief Access the numeric coefficient at a given position. * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in] fs Vector to access. - * @param[in] i Zero-based position. - * @return The coefficient stored at position @p i. + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) + * @param[in] fs Vector to fetch from + * @param[in] i Position in vector (zero-based) + * + * @returns Numerical coefficient at given position in vector * * @pre @p i must be a valid index in @p fs. */ @@ -147,12 +132,13 @@ T get_coef(const FunctionTreeVector &fs, int i) { /** * @brief Access the FunctionTree at a given position (non-const). * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in] fs Vector to access. - * @param[in] i Zero-based position. - * @return Reference to the @c FunctionTree at position @p i. + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) + * @param[in] fs Vector to fetch from + * @param[in] i Position in vector (zero-based) * + * @return FunctionTree at given position in vector + * * @pre The pointer stored at position @p i must be non-null. */ template @@ -161,14 +147,15 @@ FunctionTree &get_func(FunctionTreeVector &fs, int i) { } /** - * @brief Access the FunctionTree at a given position (const). + * @brief Access the FunctionTree at a given position (non-const). * - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @param[in] fs Vector to access. - * @param[in] i Zero-based position. - * @return Const reference to the @c FunctionTree at position @p i. + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) + * @param[in] fs Vector to fetch from + * @param[in] i Position in vector (zero-based) * + * @return FunctionTree at given position in vector + * * @pre The pointer stored at position @p i must be non-null. */ template From 6a14eedecd0e253004d07c15ff151dadcc955093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Thu, 6 Nov 2025 16:45:16 +0100 Subject: [PATCH 29/51] BoundingBox documented --- src/trees/BoundingBox.cpp | 168 ------------------ src/trees/BoundingBox.h | 353 +++++++++++++++++++++----------------- 2 files changed, 198 insertions(+), 323 deletions(-) diff --git a/src/trees/BoundingBox.cpp b/src/trees/BoundingBox.cpp index ff86abf2e..e0bc63f91 100644 --- a/src/trees/BoundingBox.cpp +++ b/src/trees/BoundingBox.cpp @@ -32,18 +32,6 @@ namespace mrcpp { -/** @brief Constructor for BoundingBox object. - * - * @param[in] box: [lower, upper] bound in all dimensions. - * @returns New BoundingBox object. - * - * @details Creates a box with appropriate root scale and scaling - * factor to fit the given bounds, which applies to _all_ dimensions. - * Root scale is chosen such that the scaling factor becomes 1 < sfac < 2. - * - * Limitations: Box must be _either_ [0,L] _or_ [-L,L], with L a positive integer. - * This is the most general constructor, which will create a world with no periodic boundary conditions. - */ template BoundingBox::BoundingBox(std::array box) { if (box[1] < 1) { MSG_ERROR("Invalid upper bound: " << box[1]); @@ -79,23 +67,6 @@ template BoundingBox::BoundingBox(std::array box) { setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] n: Length scale, default 0. - * @param[in] l: Corner translation, default [0, 0, ...]. - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default false. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. The parameter n defines the length scale, which, together with sf, determines the unit length of each side of the boxes by \f$ [2^{-n}]^D \f$. - * The parameter l defines the corner translation of the lower corner of the box relative to the world origin. - * The parameter nb defines the number of boxes in each dimension. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. - * This constructor is used for work in periodic systems. - * - */ template BoundingBox::BoundingBox(int n, const std::array &l, const std::array &nb, const std::array &sf, bool pbc) : cornerIndex(n, l) { @@ -105,19 +76,6 @@ BoundingBox::BoundingBox(int n, const std::array &l, const std::array setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] idx: index of the lower corner of the box. - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. - * The parameter idx defines the index of the lower corner of the box relative to the world origin. - * The parameter nb defines the number of boxes in each dimension. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * This constructor creates a world with no periodic boundary conditions. - */ template BoundingBox::BoundingBox(const NodeIndex &idx, const std::array &nb, const std::array &sf) : cornerIndex(idx) { @@ -127,16 +85,6 @@ BoundingBox::BoundingBox(const NodeIndex &idx, const std::array &n setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default true. - * - * @details Creates a box with given parameters. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. - * This construtor is used for work in periodic systems. - */ template BoundingBox::BoundingBox(const std::array &sf, bool pbc) : cornerIndex() { @@ -146,17 +94,6 @@ BoundingBox::BoundingBox(const std::array &sf, bool pbc) setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * @param[in] pbc: Periodic boundary conditions, default true. - * @returns New BoundingBox object. - * - * @details Creates a box with given parameters. - * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. - * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes specific dimensions periodic. - * This is used for work in periodic systems. - */ template BoundingBox::BoundingBox(const std::array &sf, std::array pbc) : cornerIndex() { @@ -166,14 +103,6 @@ BoundingBox::BoundingBox(const std::array &sf, std::array setDerivedParameters(); } -/** @brief Constructor for BoundingBox object. - * - * @param[in] box: Other BoundingBox object. - * @returns New BoundingBox object. - * - * @details Creates a box identical to the input box paramter. - * This constructor uses all parameters from the other BoundingBox to create a new one. - */ template BoundingBox::BoundingBox(const BoundingBox &box) : cornerIndex(box.cornerIndex) { @@ -183,13 +112,6 @@ BoundingBox::BoundingBox(const BoundingBox &box) setDerivedParameters(); } -/** @brief Assignment operator overload for BoundingBox object. - * - * @returns New BoundingBox object. - * @param[in] box: Other BoundingBox object. - * - * @details Allocates all parameters in this BoundingBox to be that of the other BoundingBox. - */ template BoundingBox &BoundingBox::operator=(const BoundingBox &box) { if (&box != this) { this->cornerIndex = box.cornerIndex; @@ -201,13 +123,6 @@ template BoundingBox &BoundingBox::operator=(const BoundingBox return *this; } -/** @brief Sets the number of boxes in each dimension. - * - * @param[in] nb: Number of boxes, default [1, 1, ...]. - * - * @details For each dimentions D it sets the number of boxes in that dimension in the nBoxes array and the total amount of boxes in the world in the totBoxes variable. - * This just sets counters for the number of boxes in each dimension. - */ template void BoundingBox::setNBoxes(const std::array &nb) { this->totBoxes = 1; for (int d = 0; d < D; d++) { @@ -216,16 +131,6 @@ template void BoundingBox::setNBoxes(const std::array &nb) { } } -/** @brief Computes and sets all derived parameters. - * - * @details For all parameters that have been initialized in the constructor, - * this function will compute the necessary derived parameters in each dimension. - * The unit length is set to \a sfac \f$ \cdot 2^{-n} \f$ where \a sfac is the scaling factor (default 1.0) and n is the length scale. - * The unit length is the base unit which is used for the size and positioning of the boxes around origin. - * The boxLength is the total length of the box in each dimension, which is the unit length times the number of boxes in that dimension. - * The lowerBound is computed from the index of the lower corner of the box and the unit length. - * The upperBound is computed to be the lower corner plus the total length in that dimension. - */ template void BoundingBox::setDerivedParameters() { assert(this->totBoxes > 0); const NodeIndex &cIdx = this->cornerIndex; @@ -238,12 +143,6 @@ template void BoundingBox::setDerivedParameters() { } } -/** @brief Sets the number of boxes in each dimension. - * - * @param[in] sf: Scaling factor, default [1.0, 1.0, ...]. - * - * @details This checks that the sf variable has sane values before assigning it to the member variable scalingFactor. - */ template void BoundingBox::setScalingFactors(const std::array &sf) { assert(this->totBoxes > 0); for (auto &x : sf) @@ -252,36 +151,14 @@ template void BoundingBox::setScalingFactors(const std::array{}) scalingFactor.fill(1.0); } -/** @brief Sets which dimensions are periodic. - * - * @param[in] pbc: Boolean which is used to set all dimension to either periodic or not - * - * @details this fills in the periodic array with the values from the input. - */ template void BoundingBox::setPeriodic(bool pbc) { this->periodic.fill(pbc); } -/** @brief Sets which dimensions are periodic. - * - * @param[in] pbs: D-dimensional array holding boolean values for each dimension. - * - * @details This fills in the periodic array with the values from the input array. - */ template void BoundingBox::setPeriodic(std::array pbc) { this->periodic = pbc; } -/** @brief Fetches a NodeIndex object from a given box index. - * - * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. - * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. - * - * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, - * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex - * object of the cell at the lower back corner of the box object indexed by bIdx. - * Specialized for D=1 below - */ template NodeIndex BoundingBox::getNodeIndex(int bIdx) const { assert(bIdx >= 0 and bIdx <= this->totBoxes); std::array l; @@ -300,13 +177,6 @@ template NodeIndex BoundingBox::getNodeIndex(int bIdx) const { return NodeIndex(getScale(), l); } -/** @brief Fetches the index of a box from a given coordinate. - * - * @param[in] r: D-dimensional array representaing a coordinate in the simulation box - * @returns The index value of the boxes in the position given as it is in the generated world. - * - * @details Specialized for D=1 below - */ template int BoundingBox::getBoxIndex(Coord r) const { if (this->isPeriodic()) { periodic::coord_manipulation(r, this->getPeriodic()); } @@ -334,15 +204,6 @@ template int BoundingBox::getBoxIndex(Coord r) const { return bIdx; } -/** @brief Fetches the index of a box from a given NodeIndex. - * - * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. - * @returns The index value of the boxes in which the NodeIndex object is mapping to. - * - * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. - * Each node will get its own index value, but will still be part of one of the original boxes of the world. - * Specialized for D=1 below - */ template int BoundingBox::getBoxIndex(NodeIndex nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation(nIdx, this->getPeriodic()); }; @@ -366,13 +227,6 @@ template int BoundingBox::getBoxIndex(NodeIndex nIdx) const { return bIdx; } -/** @brief Prints information about the BoundinBox object. - * - * @param[in] o: Output stream variable which will be used to print the information - * @returns The output stream variable. - * - * @details A function which prints information about the BoundingBox object. - */ template std::ostream &BoundingBox::print(std::ostream &o) const { int oldprec = Printer::setPrecision(5); o << std::fixed; @@ -401,15 +255,6 @@ template std::ostream &BoundingBox::print(std::ostream &o) const { return o; } -/** @brief Fetches a NodeIndex object from a given box index, specialiced for 1-D. - * - * @param[in] bIdx: Box index, the index of the box we want to fetch the cell index from. - * @returns The NodeIndex object of the index given as it is in the Multiresolutoin analysis. - * - * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, - * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex - * object of the cell at the lower back corner of the box object indexed by bIdx. - */ template <> NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { const NodeIndex<1> &cIdx = this->cornerIndex; int n = cIdx.getScale(); @@ -417,11 +262,6 @@ template <> NodeIndex<1> BoundingBox<1>::getNodeIndex(int bIdx) const { return NodeIndex<1>(n, {l}); } -/** @brief Fetches the index of a box from a given coordinate, specialized for 1D. - * - * @param[in] r: 1-dimensional array representaing a coordinate in the simulation box - * @returns The index value of the boxes in the position given as it is in the generated world. - */ template <> int BoundingBox<1>::getBoxIndex(Coord<1> r) const { if (this->isPeriodic()) { periodic::coord_manipulation<1>(r, this->getPeriodic()); } @@ -435,14 +275,6 @@ template <> int BoundingBox<1>::getBoxIndex(Coord<1> r) const { return static_cast(iint); } -/** @brief Fetches the index of a box from a given NodeIndex specialized for 1-D. - * - * @param[in] nIdx: NodeIndex object, representing the node and its index in the adaptive tree. - * @returns The index value of the boxes in which the NodeIndex object is mapping to. - * - * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. - * Each node will get its own index value, but will still be part of one of the original boxes of the world. - */ template <> int BoundingBox<1>::getBoxIndex(NodeIndex<1> nIdx) const { if (this->isPeriodic()) { periodic::index_manipulation<1>(nIdx, this->getPeriodic()); }; diff --git a/src/trees/BoundingBox.h b/src/trees/BoundingBox.h index 768a42c31..84cfa83d8 100644 --- a/src/trees/BoundingBox.h +++ b/src/trees/BoundingBox.h @@ -36,61 +36,49 @@ namespace mrcpp { -/** - * @file BoundingBox.h - * @brief Declaration of the @ref BoundingBox domain descriptor. - * - * @details - * The bounding box defines the computational “world” for multiresolution - * trees. In \(D\) dimensions it is described by: - * - a **corner index** (scale and integer translation), - * - a **count of boxes** per dimension (all on the same scale), - * - a **scaling factor** per dimension (physical unit lengths), - * - and optional **periodic boundary conditions** per dimension. - * - * From these fundamental parameters, derived quantities such as unit - * lengths, total box lengths and physical bounds are computed. - */ - -/** +/** * @class BoundingBox - * @tparam D Spatial dimension (1, 2, or 3). - * @brief Defines the \(D\)-dimensional computational domain (“world”). + * @tparam D Spatial dimension (1, 2, or 3) * - * @details - * The world is a Cartesian grid of equally-sized boxes at a given scale - * \(n\). Each box has edge length \(2^{-n}\) in grid units, optionally - * multiplied by a per-dimension scaling factor to reflect physical units. - * The lower-back corner of the world is given by an integer translation - * at the same scale. Periodicity can be enabled per dimension. + * @brief Class defining the computational domain + * + * @details The computational domain is made up of a collection of D-dimensional + * boxes on a particular length scale \f$ n \f$. The size of each box is then + * \f$ [2^{-n}]^D \f$, i.e. higher scale means smaller boxes, and the scale + * may be negative. The number of boxes can be different in each dimension + * \f$ [n_x, n_y, \dots] \f$, but they must all be on the same scale (size). + * Box translations relative to the world origin _must_ be an integer + * multiple of the given scale size \f$ 2^{-n} \f$. */ template class BoundingBox { public: - /** - * @brief Construct a non-periodic world from symmetric or half-open bounds. + /** + * @brief Constructor for BoundingBox object + * @param box [lower, upper] bound in all dimensions * - * @param box Two integers \{lower, upper\}. Supported forms are - * \{0, L\} or \{-L, L\} with \(L>0\). + * @details Creates a box with appropriate root scale and scaling + * factor to fit the given bounds, which applies to _all_ dimensions. + * Root scale is chosen such that the scaling factor becomes 1 < sfac < 2. * - * @details - * Chooses a root scale so that the per-dimension scaling factor - * satisfies \(1 < \text{sfac} < 2\). The same bounds apply to all - * dimensions. Periodicity is disabled. + * @note Limitations: Box must be _either_ [0,L] _or_ [-L,L], with L a positive integer. + * This is the most general constructor, which will create a world with no periodic boundary conditions. */ explicit BoundingBox(std::array box); - /** - * @brief Fully-specified constructor (all dimensions share the same scale). + /** + * @brief Constructor for BoundingBox object + * @param n Length scale, default 0 + * @param l Corner translation, default [0, 0, ...] + * @param nb Number of boxes, default [1, 1, ...] + * @param sf Scaling factor, default [1.0, 1.0, ...] + * @param pbc Periodic boundary conditions, default false * - * @param n Root scale (can be negative). - * @param l Integer translation (corner index) per dimension. - * @param nb Number of boxes per dimension (non-zero positives will be used; zeros mean 1). - * @param sf Scaling factor per dimension (non-positive entries are rejected). - * @param pbc If true, all dimensions are periodic. - * - * @details - * This is the most general constructor for rectangular worlds at a single - * multiresolution scale. Periodicity is global (all-or-nothing). + * @details Creates a box with given parameters. The parameter n defines the length scale, which, together with sf, determines the unit length of each side of the boxes by \f$ [2^{-n}]^D \f$. + * The parameter l defines the corner translation of the lower corner of the box relative to the world origin. + * The parameter nb defines the number of boxes in each dimension. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. + * This constructor is used for work in periodic systems. */ explicit BoundingBox(int n = 0, const std::array &l = {}, @@ -98,179 +86,237 @@ template class BoundingBox { const std::array &sf = {}, bool pbc = false); - /** - * @brief Construct from a corner @ref NodeIndex and per-dimension sizes. - * - * @param idx Corner node index (scale and integer translation). - * @param nb Number of boxes per dimension. - * @param sf Scaling factor per dimension. + /** + * @brief Constructor for BoundingBox objec + * @param idx index of the lower corner of the box + * @param nb Number of boxes, default [1, 1, ...] + * @param sf Scaling factor, default [1.0, 1.0, ...] * - * @details - * Periodicity is disabled. Useful when the corner is already known - * in multiresolution units. + * @details Creates a box with given parameters + * The parameter idx defines the index of the lower corner of the box relative to the world origin. + * The parameter nb defines the number of boxes in each dimension. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * This constructor creates a world with no periodic boundary conditions. */ explicit BoundingBox(const NodeIndex &idx, const std::array &nb = {}, const std::array &sf = {}); - /** - * @brief Construct periodic (all dimensions) world from scaling factors. + /** + * @brief Constructor for BoundingBox object + * @param sf Scaling factor, default [1.0, 1.0, ...] + * @param pbc Periodic boundary conditions, default true * - * @param sf Scaling factor per dimension. - * @param pbc If true, enables periodicity for all dimensions (default true). + * @details Creates a box with given parameters. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes all dimensions periodic. + * This construtor is used for work in periodic systems. */ explicit BoundingBox(const std::array &sf, bool pbc = true); - /** - * @brief Construct world with per-dimension periodicity flags. + /** + * @brief Constructor for BoundingBox object + * @param sf Scaling factor, default [1.0, 1.0, ...] + * @param pbc Periodic boundary conditions, default true * - * @param sf Scaling factor per dimension. - * @param pbc Periodicity flags per dimension. + * @details Creates a box with given parameters. + * The parameter sf defines the scaling factor, which determines the box translations around the origin, i.e. the amount of boxes around origin. + * The parameter pbc defines whether the world is periodic or not. In this constructor this value makes specific dimensions periodic. + * This is used for work in periodic systems. */ BoundingBox(const std::array &sf, std::array pbc); - /** - * @brief Copy constructor. + /** + * @brief Copy constructor for BoundingBox object + * @param box Other BoundingBox object + * + * @details Creates a box identical to the input box paramter. + * This constructor uses all parameters from the other BoundingBox to create a new one. */ BoundingBox(const BoundingBox &box); - /** - * @brief Copy assignment. + /** + * @brief Assignment operator overload for BoundingBox object + * @param box Other BoundingBox object + * @details Allocates all parameters in this BoundingBox to be that of the other BoundingBox. + * @return New BoundingBox object */ BoundingBox &operator=(const BoundingBox &box); - /// Defaulted virtual destructor. virtual ~BoundingBox() = default; /** - * @name Equality - * @brief Compare corner and per-dimension box counts. - * @{ + * @brief Equality: same corner index and per-dimension box counts + * @param box Other BoundingBox object + * @return True if equal, false otherwise */ inline bool operator==(const BoundingBox &box) const; + /** + * @brief Inequality: differs in corner index or in any per-dimension box count + * @param box Other BoundingBox object + * @return True if not equal, false otherwise + */ inline bool operator!=(const BoundingBox &box) const; - /// @} - /** - * @brief Convert a world-box index to a @ref NodeIndex at the root scale. - * @param bIdx Linear index of the box within the world. - * @return Corner node index for that box. + /** + * @brief Fetches a NodeIndex object from a given box index + * @param bIdx The index of the box we want to fetch the cell index from * - * @note Specializations provide efficient versions for \(D=1\). + * @details During the adaptive refinement, each original box will contain an increasing number of smaller cells, + * each of which will be part of a specific node in the tree. These cells are divided adaptivelly. This function returns the NodeIndex + * object of the cell at the lower back corner of the box object indexed by bIdx. + * + * @return The NodeIndex object of the index given as it is in the Multiresolutoin analysis + * @note Specialized for D=1 below */ NodeIndex getNodeIndex(int bIdx) const; - /** - * @brief Map a physical coordinate to the enclosing world-box index. - * @param r Physical coordinate (scaled by @ref getScalingFactors()). - * @return Linear index of the box, or -1 if outside and non-periodic. + /** + * @brief Fetches the index of a box from a given coordinate + * @param r D-dimensional array representaing a coordinate in the simulation box + * @return The index value of the boxes in the position given as it is in the generated world + * @note Specialized for D=1 below */ int getBoxIndex(Coord r) const; - /** - * @brief Map a @ref NodeIndex to the enclosing world-box index. - * @param nIdx Node index (possibly at a finer scale). - * @return Linear index of the box, or -1 if outside or at coarser scale. + /** + * @brief Fetches the index of a box from a given NodeIndex + * @param nIdx NodeIndex object, representing the node and its index in the adaptive tree + * + * @details During the multiresolution analysis the boxes will be divided into smaller boxes, which means that each individual box will be part of a specific node in the tree. + * Each node will get its own index value, but will still be part of one of the original boxes of the world. + * + * @return The index value of the boxes in which the NodeIndex object is mapping to + * @note Specialized for D=1 below */ int getBoxIndex(NodeIndex nIdx) const; - /// @name Size and scale queries - /// @{ - int size() const { return this->totBoxes; } ///< Total number of boxes. - int size(int d) const { return this->nBoxes[d]; } ///< Number of boxes along dimension @p d. - int getScale() const { return this->cornerIndex.getScale(); } ///< Root scale \(n\). - /// @} - - /// @name Geometry (per-dimension) - /// @{ - double getScalingFactor(int d) const { return this->scalingFactor[d]; } ///< Physical scaling factor. - double getUnitLength(int d) const { return this->unitLengths[d]; } ///< Unit length \(= \text{sfac}\cdot 2^{-n}\). - double getBoxLength(int d) const { return this->boxLengths[d]; } ///< Total world length along @p d. - double getLowerBound(int d) const { return this->lowerBounds[d]; } ///< Physical lower bound. - double getUpperBound(int d) const { return this->upperBounds[d]; } ///< Physical upper bound. - /// @} - - /// @name Periodicity - /// @{ - bool isPeriodic() const { return details::are_any(this->periodic, true); } ///< Any dimension periodic? - const std::array &getPeriodic() const { return this->periodic; } ///< Per-dimension flags. - /// @} - - /// @name Bulk getters - /// @{ - const Coord &getUnitLengths() const { return this->unitLengths; } - const Coord &getBoxLengths() const { return this->boxLengths; } - const Coord &getLowerBounds() const { return this->lowerBounds; } - const Coord &getUpperBounds() const { return this->upperBounds; } - const NodeIndex &getCornerIndex() const { return this->cornerIndex; } - const std::array &getScalingFactors() const { return this->scalingFactor; } - /// @} - + int size() const { return this->totBoxes; } ///< @return Total number of boxes /** - * @brief Pretty-printer (human-readable). + * @param d Dimension index + * @return Number of boxes along dimension @p d */ - friend std::ostream &operator<<(std::ostream &o, const BoundingBox &box) { return box.print(o); } - -protected: - // ---------------- Fundamental parameters ---------------- + int size(int d) const { return this->nBoxes[d]; } + int getScale() const { return this->cornerIndex.getScale(); } ///< @return Root scale \(n\) - NodeIndex cornerIndex; ///< Lower-corner node (scale + integer translation). - std::array nBoxes{}; ///< Number of boxes per dimension. - std::array scalingFactor{}; ///< Physical scaling factors per dimension. - std::array periodic{}; ///< Periodicity flags per dimension. + /** + * @param d Dimension index + * @return Scaling factor to scale this box by along dimension @p d + */ + double getScalingFactor(int d) const { return this->scalingFactor[d]; } + /** + * @param d Dimension index + * @return Unit length along dimension @p d + */ + double getUnitLength(int d) const { return this->unitLengths[d]; } + /** + * @param d Dimension index + * @return Box length along dimension @p d + */ + double getBoxLength(int d) const { return this->boxLengths[d]; } + /** + * @param d Dimension index + * @return Lower bound of this box coordinates along dimension @p d + */ + double getLowerBound(int d) const { return this->lowerBounds[d]; } + /** + * @param d Dimension index + * @return Upper bound of this box coordinates along dimension @p d + */ + double getUpperBound(int d) const { return this->upperBounds[d]; } - // ---------------- Derived parameters ---------------- + bool isPeriodic() const { return details::are_any(this->periodic, true); } ///< @return Is any dimension periodic? + const std::array &getPeriodic() const { return this->periodic; } ///< @return Periodicity flags per dimension - int totBoxes{1}; ///< Product of @ref nBoxes. - Coord unitLengths; ///< Per-dimension unit length (\( \text{sfac}\cdot 2^{-n} \)). - Coord boxLengths; ///< Total world length per dimension. - Coord lowerBounds; ///< Physical lower bounds. - Coord upperBounds; ///< Physical upper bounds. + const Coord &getUnitLengths() const { return this->unitLengths; } ///< @return The unit lengths + const Coord &getBoxLengths() const { return this->boxLengths; } ///< @return The box lengths + const Coord &getLowerBounds() const { return this->lowerBounds; } ///< @return The lower bounds of the coordinates of this box + const Coord &getUpperBounds() const { return this->upperBounds; } ///< @return The upper bounds of the coordinates of this box + const NodeIndex &getCornerIndex() const { return this->cornerIndex; } ///< @return The corner index + const std::array &getScalingFactors() const { return this->scalingFactor; } ///< @return The scaling factors to scale this box by /** - * @brief Set number of boxes per dimension. - * @param nb If an entry is zero, it is treated as one. + * @brief Stream output operator + * @param o Output stream + * @param box BoundingBox object + * @return Reference to output stream */ - void setNBoxes(const std::array &nb = {}); + friend std::ostream &operator<<(std::ostream &o, const BoundingBox &box) { return box.print(o); } - /** - * @brief Compute all derived parameters from fundamentals. +protected: + // Fundamental parameters + NodeIndex cornerIndex; ///< Index defining the lower corner of the box + std::array nBoxes{}; ///< Number of boxes in each dim, last entry total + std::array scalingFactor{}; ///< Scaling factors to scale this box by, per dimension + std::array periodic{}; ///< Sets which dimension has Periodic boundary conditions + + // Derived parameters + + int totBoxes{1}; ///< Number of total boxes + Coord unitLengths; ///< 1/2^initialScale + Coord boxLengths; ///< Total length (unitLength times nBoxes) + Coord lowerBounds; ///< Box lower bound (not real) + Coord upperBounds; ///< Box upper bound (not real) + + /** + * @brief Sets the number of boxes in each dimension + * @param nb Number of boxes, default [1, 1, ...] * - * @details - * Uses @ref cornerIndex, @ref nBoxes and @ref scalingFactor to fill - * unit lengths, box lengths and physical bounds. + * @details For each dimentions D it sets the number of boxes in that dimension in the nBoxes array and the total amount of boxes in the world in the totBoxes variable. + * This just sets counters for the number of boxes in each dimension. + */ + void setNBoxes(const std::array &nb = {}); + + /** + * @brief Computes and sets all derived parameters + * + * @details For all parameters that have been initialized in the constructor, + * this function will compute the necessary derived parameters in each dimension. + * The unit length is set to \a sfac \f$ \cdot 2^{-n} \f$ where \a sfac is the scaling factor (default 1.0) and n is the length scale. + * The unit length is the base unit which is used for the size and positioning of the boxes around origin. + * The boxLength is the total length of the box in each dimension, which is the unit length times the number of boxes in that dimension. + * The lowerBound is computed from the index of the lower corner of the box and the unit length. + * The upperBound is computed to be the lower corner plus the total length in that dimension. */ void setDerivedParameters(); - /** - * @brief Set scaling factors per dimension, validating positivity. - * @param sf Per-dimension scaling factors. Empty value means all ones. + /** + * @brief Sets the scaling factors in each dimension + * @param sf Scaling factor, default [1.0, 1.0, ...] + * + * @details This checks that the sf variable has sane values before assigning it to the member variable scalingFactor. */ void setScalingFactors(const std::array &sf); - /** - * @brief Set periodicity per dimension. - * @param periodic Flags per dimension. + /** + * @brief Sets whether all dimensions are periodic + * @param pbc Boolean which is used to set all dimension to either periodic or not + * + * @details This fills in the periodic array with the values from the input. */ void setPeriodic(std::array periodic); - /** - * @brief Set global periodicity (all-or-nothing). - * @param periodic If true, all dimensions are periodic. + /** + * @brief Sets which dimensions are periodic + * @param pbc D-dimensional array holding boolean values for each dimension + * + * @details This fills in the periodic array with the values from the input array. */ void setPeriodic(bool periodic); /** - * @brief Print a formatted summary to stream @p o. + * @brief Prints information about the BoundinBox object + * @param o Output stream variable which will be used to print the information + * + * @details A function which prints information about the BoundingBox object. + * + * @return The output stream variable */ std::ostream &print(std::ostream &o) const; }; -// ---------------- Inline comparisons ---------------- +// Inline comparisons -/** - * @brief Equality: same corner index and per-dimension box counts. - */ template bool BoundingBox::operator==(const BoundingBox &box) const { if (getCornerIndex() != box.getCornerIndex()) return false; for (int d = 0; d < D; d++) { @@ -279,9 +325,6 @@ template bool BoundingBox::operator==(const BoundingBox &box) cons return true; } -/** - * @brief Inequality: differs in corner index or in any per-dimension box count. - */ template bool BoundingBox::operator!=(const BoundingBox &box) const { if (getCornerIndex() != box.getCornerIndex()) return true; for (int d = 0; d < D; d++) { From efd88b4af2bbfeed31441c3933a19e4282d6c7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Thu, 6 Nov 2025 16:56:15 +0100 Subject: [PATCH 30/51] CornerOperatorTree documented --- src/trees/CornerOperatorTree.cpp | 18 ------- src/trees/CornerOperatorTree.h | 82 +++++++------------------------- 2 files changed, 17 insertions(+), 83 deletions(-) diff --git a/src/trees/CornerOperatorTree.cpp b/src/trees/CornerOperatorTree.cpp index 6de235dd3..9d4fa817c 100644 --- a/src/trees/CornerOperatorTree.cpp +++ b/src/trees/CornerOperatorTree.cpp @@ -32,15 +32,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief Calculates band widths of the non-standard form matrices. - * - * @param[in] prec: Precision used for thresholding - * - * @details It is starting from \f$ l = 2^n \f$ and updating the band width value each time we encounter - * considerable value while keeping decreasing down to \f$ l = 0 \f$, that stands for the distance to the diagonal. - * This procedure is repeated for each matrix \f$ A, B \f$ and \f$ C \f$. - * - */ void CornerOperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -71,15 +62,6 @@ void CornerOperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } -/** @brief Checks if the distance to diagonal is lesser than the operator band width. - * - * @param[in] oTransl: distance to diagonal - * @param[in] o_depth: scaling order - * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. - * - * @returns True if \b oTransl is outside of the corner band (close to diagonal) and False otherwise. - * - */ bool CornerOperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { return abs(oTransl) < this->bandWidth->getWidth(o_depth, idx); } diff --git a/src/trees/CornerOperatorTree.h b/src/trees/CornerOperatorTree.h index 8da4f5a11..545fd6813 100644 --- a/src/trees/CornerOperatorTree.h +++ b/src/trees/CornerOperatorTree.h @@ -29,87 +29,39 @@ namespace mrcpp { -/** - * @file CornerOperatorTree.h - * @brief Declaration of CornerOperatorTree, a specialization of OperatorTree - * for "corner" non-standard form operators. +/** @class CornerOperatorTree * - * @details - * Many MRCPP operators are represented in non-standard form and decompose - * into the four corner submatrices T, A, B, C. This helper class provides: - * - computation of per-depth band widths for those corner blocks, and - * - a fast band screen used during operator application. + * @brief Special case of OperatorTree class * - * The band width information is stored in a BandWidth object owned by - * the base class OperatorTree. - * - * @par Example - * @code - * CornerOperatorTree cot(mra, 10); // maxDepth = 10 - * cot.calcBandWidth(1e-8); // build band widths with a threshold - * bool within = cot.isOutsideBand(3, 4, 1); // oTransl=3, o_depth=4, idx=1 - * @endcode - */ - -/** - * @class CornerOperatorTree - * @brief Operator tree for non-standard form corner matrices. - * - * @details - * This final class only adds band-handling logic on top of @ref OperatorTree. - * Construction and storage are inherited from the base class; the only - * public operations exposed here are: - * - @ref calcBandWidth to build/update the band widths, and - * - @ref isOutsideBand for a quick test against the stored band. + * @details Tree structure of operators having corner matrices + * \f$ A, B, C \f$ in the non-standard form. */ class CornerOperatorTree final : public OperatorTree { public: - /// Inherit the valid constructor(s) from OperatorTree. + /// Inherit the valid constructorfrom OperatorTree. using OperatorTree::OperatorTree; CornerOperatorTree(const CornerOperatorTree &tree) = delete; CornerOperatorTree &operator=(const CornerOperatorTree &tree) = delete; ~CornerOperatorTree() override = default; - /** - * @brief Compute per-depth band widths for the corner matrices. - * - * @param prec Threshold used when scanning matrix entries. - * If negative, the implementation falls back to the - * tree’s internal default (e.g. @c normPrec ). + /** + * @brief Calculates band widths of the non-standard form matrices + * @param prec Precision used for thresholding * - * @details - * For each depth and for each corner component \f$\{T,A,B,C\}\f$, - * the routine scans along increasing distance from the diagonal and - * records the largest translation \f$\ell\f$ for which the component - * norm still exceeds the threshold. The resulting widths are stored - * in the underlying @ref BandWidth structure. - * - * @note Calling this will (re)allocate and overwrite the stored band - * widths for the whole tree. + * @details It is starting from \f$ l = 2^n \f$ and updating the band width value each time we encounter + * considerable value while keeping decreasing down to \f$ l = 0 \f$, that stands for the distance to the diagonal. + * This procedure is repeated for each matrix \f$ A, B \f$ and \f$ C \f$. */ void calcBandWidth(double prec = -1.0) override; - /** - * @brief Test an offset against the stored band width. - * - * @param oTransl Integer offset (translation) from the diagonal. - * @param o_depth Operator depth at which to query the band. - * @param idx Corner component selector in \f$\{0,1,2,3\}\f$ - * corresponding to \f$\{T,A,B,C\}\f$. - * - * @return - * **true** if \f$|oTransl| < \mathrm{width}(o\_depth, idx)\f$, - * **false** otherwise. - * - * @details - * Despite the historical name, this method returns @b true when the - * offset lies @em inside the retained band (i.e., strictly smaller - * than the stored width). Callers typically use it as a quick screen - * to decide whether a sparse block needs to be applied. + /** + * @brief Checks if the distance to diagonal is lesser than the operator band width + * @param oTransl distance to diagonal + * @param o_depth scaling order + * @param idx index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$ * - * @warning This assumes @ref calcBandWidth has been called at least - * once; otherwise widths may be unset or conservative. + * @returns True if @p oTransl is outside of the corner band (close to diagonal) and False otherwise. */ bool isOutsideBand(int oTransl, int o_depth, int idx) override; }; From 90965e4294ef1fb9d0dd258c27d6821d3545b9cb Mon Sep 17 00:00:00 2001 From: ylvao Date: Thu, 6 Nov 2025 16:28:17 +0100 Subject: [PATCH 31/51] documenting NodeIndex.h --- src/trees/NodeIndex.h | 182 +++++++++++++++++++++--------------------- 1 file changed, 90 insertions(+), 92 deletions(-) diff --git a/src/trees/NodeIndex.h b/src/trees/NodeIndex.h index f5236fa25..b7a984084 100644 --- a/src/trees/NodeIndex.h +++ b/src/trees/NodeIndex.h @@ -23,21 +23,6 @@ * */ -/** - * @file NodeIndex.h - * @brief Compact storage for multiresolution node indices (scale and translation). - * - * @details - * A NodeIndex encodes the position of a node in a multiresolution tree by: - * - an integer **scale** \f$N\f$ (node size \f$\propto 2^{-N}\f$), and - * - an integer **translation** vector \f$\mathbf{L}\in\mathbb{Z}^D\f$. - * - * The class provides helpers to obtain the parent/child indices, comparisons - * (including a strict weak ordering for associative containers), and utilities - * to test ancestry/sibling relations (see free functions @ref related and - * @ref siblings below). - */ - #pragma once #include @@ -48,70 +33,89 @@ namespace mrcpp { /** * @class NodeIndex - * @tparam D Spatial dimension (1, 2 or 3). - * @brief Scale–translation pair identifying a node in a MW tree. + * @tparam D Spatial dimension (1, 2, or 3) + * @brief Storage class for scale and translation indexes * * @details + * A NodeIndex encodes the position of a node in a multiresolution tree by: + * - an integer **scale** N (node size proportional to 2^{-N}) + * - D-dimensional **translation** vector L og integers + * + * Provides helpers to obtain the parent/child indices, comparisons + * (including a strict weak ordering for associative containers), and utilities + * to test ancestry/sibling relations + * + * The usefulness of the class becomes evident when examining + * the parallel algorithms for projection & friends + * + * @note * The scale is stored as a short integer; the translation is a D-dimensional - * integer vector. The translation follows the standard dyadic refinement: - * children are obtained by doubling each component and adding the child-bit - * extracted from the child index. + * integer vector. The translation follows the standard dyadic refinement. */ template class NodeIndex final { public: /** - * @name Constructors - * @{ - */ - - /** - * @brief Construct from scale and translation. - * @param n Scale \f$N\f$. - * @param l Translation vector \f$\mathbf{L}\f$ (defaults to all zeros). + * @brief Regular constructor for NodeIndex + * @param[in] n Scale (defaults to zero) + * @param[in] l Translation vector with dimension D + * + * @details Casts n to a short int (N) and directly assigns L as l */ NodeIndex(int n = 0, const std::array &l = {}) : N(static_cast(n)) , L(l) {} - /** - * @brief Index of the parent node (one level coarser). - * @return Parent index \f$(N-1, \lfloor L/2 \rfloor)\f$ with correct rounding for negatives. + * @brief Relative constructor of the parent NodeIndex + * @return Parent NodeIndex + * + * @details Parents (N = N - 1) are obtained by floor rounding L/2 */ NodeIndex parent() const { std::array l; for (int d = 0; d < D; d++) l[d] = (this->L[d] < 0) ? (this->L[d] - 1) / 2 : this->L[d] / 2; return NodeIndex(this->N - 1, l); } - /** - * @brief Index of a child node (one level finer). - * @param cIdx Child linear index in \f$[0, 2^D)\f$; bit @c d selects the offset in dimension @c d. - * @return Child index \f$(N+1, 2L + b)\f$ with @c b given by the bits of @p cIdx. + * @brief Relative constructor of child NodeIndex + * @param cIdx Child linear index + * @return Child NodeIndex + * + * @details Children (N = N + 1) are obtained by L = 2L + b, where @c b is given by the bits of @p cIdx */ NodeIndex child(int cIdx) const { std::array l; for (int d = 0; d < D; d++) l[d] = (2 * this->L[d]) + ((cIdx >> d) & 1); return NodeIndex(this->N + 1, l); } - /// @} /** - * @name Comparisons - * @{ + * @brief Defines inequality operator + * @param[in] idx NodeIndex of the comparing node + * @return True if N and/or L are different */ - /// Inequality. bool operator!=(const NodeIndex &idx) const { return not(*this == idx); } - - /// Equality (same scale and same translation vector). + /** + * @brief Defines equality operator + * @param[in] idx NodeIndex of comparing node + * @return True if both N and L are equal + */ bool operator==(const NodeIndex &idx) const { bool out = (this->N == idx.N); for (int d = 0; d < D; d++) out &= (this->L[d] == idx.L[d]); return out; } - /** - * @brief Strict weak ordering (by scale, then lexicographically by translation). - * @details Enables usage as key in @c std::map / @c std::set. + * @brief Defines comparison operator + * @param[in] idy NodeIndex of comparing node + * @return True if *this is smaller than idy + * + * @details + * Comparison rules (by order): + * 1. NodeIndex with smallest N is considered smallest + * 2. NodeIndex with the first component of L be smaller is considered smaller + * + * @note + * Strict weak ordering provides strict weak ordering to enables usage in std::map */ bool operator<(const NodeIndex &idy) const { const NodeIndex &idx = *this; @@ -120,48 +124,43 @@ template class NodeIndex final { if (idx.L[1] != idy.L[1] or D < 3) return idx.L[1] < idy.L[1]; return idx.L[2] < idy.L[2]; } - /// @} - /** - * @name Setters - * @{ + /* + * Getters and setters */ - /// Set the scale. - void setScale(int n) { this->N = static_cast(n); } - - /// Set the translation vector. - void setTranslation(const std::array &l) { this->L = l; } - /// @} - + int getScale() const { return this->N; } ///< @return Scale of node + std::array getTranslation() const { return this->L; } ///< @return Full translation vector + void setScale(int n) { this->N = static_cast(n); } ///< @param n Scale of node + void setTranslation(const std::array &l) { this->L = l; } ///< @param l Translation vector of dimension D + /** - * @name Getters (values) - * @{ - */ - /// @return The scale \f$N\f$. - int getScale() const { return this->N; } - - /// @return Component @p d of the translation vector. + * @brief Get a specific component of translation vector, L + * @param[in] d Index of wanted component + * @return Translation vector component @p d + */ int getTranslation(int d) const { return this->L[d]; } - /// @return Full translation vector. - std::array getTranslation() const { return this->L; } - /// @} - /** - * @name Getters (references) - * @{ - */ - /// Mutable access to translation component @p d. + * @brief Define indexing operator of translation vector, L + * @param[in] d Index of wanted component + * @return Translation vector component @p d + */ int &operator[](int d) { return this->L[d]; } - /// Const access to translation component @p d. + /** + * @brief Const version of @ref &operator[] + * @param[in] d Index of wanted component + * @return Translation vector component @p d + */ const int &operator[](int d) const { return this->L[d]; } - /// @} /** - * @brief Print as "[ N | L0, L1, ... ]". - * @param o Output stream. - * @return The stream @p o. + * @brief Creates output stream of NodeIndex in readable format + * @param o Output stream + * @return A formatted version of @o + * + * @details + * Prints NodeIndex on the form "[ N | L0, L1, ... ]" */ std::ostream &print(std::ostream &o) const { o << "[ " << std::setw(3) << this->N << " | "; @@ -171,28 +170,28 @@ template class NodeIndex final { } private: - short int N{0}; ///< Length-scale index \f$N\f$ (node size \f$\propto 2^{-N}\f$). - std::array L{}; ///< Translation vector \f$\mathbf{L}\f$. + short int N{0}; ///< Length scale index 2^N + std::array L{}; ///< Translation index [x,y,z,...] }; /** - * @brief Stream inserter for @ref NodeIndex. - * @relates NodeIndex + * @brief Defines operator for print of a @ref NodeIndex + * @param o Output stream + * @param[in] idx NodeIndex of wanted node + * @return Print stream of @ref NodeIndex */ template std::ostream &operator<<(std::ostream &o, const NodeIndex &idx) { return idx.print(o); } /** - * @brief Test if two indices are on the same branch (ancestor/descendant relation). - * @tparam D Dimension. - * @param a First index. - * @param b Second index. - * @return @c true if the coarser index equals the finer index truncated to the coarser scale. + * @brief Check whether two NodeIndices are directly related + * @param[in] a First NodeIndex + * @param[in] b Second NodeIndex + * @return True if related * - * @details - * Let @c sr be the shallower (coarser) of @p a and @p b, and @c jr the deeper (finer). - * They are related if \f$\mathbf{L}_{\text{sr}} = \lfloor \mathbf{L}_{\text{jr}} / 2^{N_{\text{jr}}-N_{\text{sr}}}\rfloor\f$. + * @details @p a and @p b are related if they follow the relation rules described + * in the @ref child() and @ref parent() constructors */ template bool related(const NodeIndex &a, const NodeIndex &b) { const auto &sr = (a.getScale() < b.getScale()) ? a : b; @@ -205,11 +204,10 @@ template bool related(const NodeIndex &a, const NodeIndex &b) { } /** - * @brief Test if two indices are siblings (share the same parent). - * @tparam D Dimension. - * @param a First index. - * @param b Second index. - * @return @c true if @p a.parent() == @p b.parent(). + * @brief Check whether two NodeIndices are siblings, i.e. same parent + * @param[in] a First NodeIndex + * @param[in] b Second NodeIndex + * @return True if siblings */ template bool siblings(const NodeIndex &a, const NodeIndex &b) { return (a.parent() == b.parent()); From 1d2ee866cc53d38098a6d53af1f9fa71ffcbd8ef Mon Sep 17 00:00:00 2001 From: ylvao Date: Thu, 6 Nov 2025 16:44:05 +0100 Subject: [PATCH 32/51] small fixes --- src/trees/NodeIndex.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/trees/NodeIndex.h b/src/trees/NodeIndex.h index b7a984084..e230afef6 100644 --- a/src/trees/NodeIndex.h +++ b/src/trees/NodeIndex.h @@ -38,8 +38,8 @@ namespace mrcpp { * * @details * A NodeIndex encodes the position of a node in a multiresolution tree by: - * - an integer **scale** N (node size proportional to 2^{-N}) - * - D-dimensional **translation** vector L og integers + * - N: Scale of node stored as a short integer + * - L: D-dimensional translation vector of integers * * Provides helpers to obtain the parent/child indices, comparisons * (including a strict weak ordering for associative containers), and utilities @@ -47,10 +47,6 @@ namespace mrcpp { * * The usefulness of the class becomes evident when examining * the parallel algorithms for projection & friends - * - * @note - * The scale is stored as a short integer; the translation is a D-dimensional - * integer vector. The translation follows the standard dyadic refinement. */ template class NodeIndex final { public: From e139a1460bcc00262a1c16b40282518d83662da2 Mon Sep 17 00:00:00 2001 From: Jacopo Date: Thu, 6 Nov 2025 17:20:24 +0100 Subject: [PATCH 33/51] OperatorTree.h OperatorTree.cpp documented! --- src/trees/OperatorTree.cpp | 54 ++------------ src/trees/OperatorTree.h | 140 ++++++++++++++++++++++--------------- 2 files changed, 90 insertions(+), 104 deletions(-) diff --git a/src/trees/OperatorTree.cpp b/src/trees/OperatorTree.cpp index 890f2677c..fb283a99a 100644 --- a/src/trees/OperatorTree.cpp +++ b/src/trees/OperatorTree.cpp @@ -98,14 +98,7 @@ void OperatorTree::clearBandWidth() { this->bandWidth = nullptr; } -/** @brief Calculates band widths of the non-standard form matrices. - * - * @param[in] prec: Precision used for thresholding - * - * @details It is starting from \f$ l = 0 \f$ and updating the band width value each time we encounter - * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal. - * - */ + void OperatorTree::calcBandWidth(double prec) { if (this->bandWidth == nullptr) clearBandWidth(); this->bandWidth = new BandWidth(getDepth()); @@ -133,32 +126,12 @@ void OperatorTree::calcBandWidth(double prec) { println(100, "\nOperator BandWidth" << *this->bandWidth); } -/** @brief Checks if the distance to diagonal is bigger than the operator band width. - * - * @param[in] oTransl: distance to diagonal - * @param[in] o_depth: scaling order - * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$. - * - * @returns True if \b oTransl is outside of the band and False otherwise. - * - */ + bool OperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) { return abs(oTransl) > this->bandWidth->getWidth(o_depth, idx); } -/** @brief Cleans up end nodes. - * - * @param[in] trust_scale: there is no cleaning down below \b trust_scale (it speeds up operator building). - * - * @details Traverses the tree and rewrites end nodes having branch node twins, - * i. e. identical with respect to scale and translation. - * This method is very handy, when an adaptive operator construction - * can make a significunt noise at low scaling depth. - * Its need comes from the fact that mwTransform up cannot override - * rubbish that can potentially stick to end nodes at a particular level, - * and as a result spread further up to the root with mwTransform. - * - */ + void OperatorTree::removeRoughScaleNoise(int trust_scale) { MWNode<2> *p_rubbish; // possibly inexact end node MWNode<2> *p_counterpart; // exact branch node @@ -191,12 +164,7 @@ void OperatorTree::getMaxTranslations(VectorXi &maxTransl) { } } -/** Make 1D lists, adressable from [-l, l] scale by scale, of operator node - * pointers for fast operator retrieval. This method is not thread safe, - * since it projects missing operator nodes on the fly. Hence, it must NEVER - * be called within a parallel region, or all hell will break loose. This is - * not really a problem, but you have been warned. - */ +// NOT THREAD SAFE void OperatorTree::setupOperNodeCache() { int nScales = this->nodesAtDepth.size(); int rootScale = this->getRootScale(); @@ -245,12 +213,7 @@ void OperatorTree::clearOperNodeCache() { } } -/** Regenerate all s/d-coeffs by backtransformation, starting at the bottom and - * thus purifying all coefficients. Option to overwrite or add up existing - * coefficients of BranchNodes (can be used after operator application). - * Reimplementation of MWTree::mwTransform() without OMP, as calculation - * of OperatorNorm is done using random vectors, which is non-deterministic - * in parallel. FunctionTrees should be fine. */ + void OperatorTree::mwTransformUp() { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); @@ -264,12 +227,7 @@ void OperatorTree::mwTransformUp() { } } -/** Regenerate all scaling coeffs by MW transformation of existing s/w-coeffs - * on coarser scales, starting at the rootNodes. Option to overwrite or add up - * existing scaling coefficients (can be used after operator application). - * Reimplementation of MWTree::mwTransform() without OMP, as calculation - * of OperatorNorm is done using random vectors, which is non-deterministic - * in parallel. FunctionTrees should be fine. */ + void OperatorTree::mwTransformDown(bool overwrite) { std::vector> nodeTable; tree_utils::make_node_table(*this, nodeTable); diff --git a/src/trees/OperatorTree.h b/src/trees/OperatorTree.h index 792bcde2a..de80ce347 100644 --- a/src/trees/OperatorTree.h +++ b/src/trees/OperatorTree.h @@ -23,23 +23,6 @@ * */ -/** - * @file OperatorTree.h - * @brief Declaration of the multiwavelet operator tree (2D non-standard form). - * - * @details - * An @ref mrcpp::OperatorTree stores a bivariate (D=2) operator in - * multiwavelet (MW) **non-standard form**, i.e. split into corner blocks - * \f$T, A, B, C\f$ at each scale. It provides: - * - adaptive storage and traversal via the base @ref mrcpp::MWTree, - * - optional **band-width screening** of corner blocks through @ref BandWidth, - * - cached direct access to operator nodes to avoid repeated tree lookups, and - * - MW up/down transforms specialized for operator data. - * - * Only trees built from **compatible** MRAs (same domain, order, and depth) - * should be combined in further computations. - */ - #pragma once #include // for Eigen::VectorXi @@ -55,7 +38,7 @@ class OperatorNode; /** * @class OperatorTree - * @brief Base class for 2D operator trees in non-standard form. + * @brief Base class for 2D operator trees in non-standard form * * @details * The tree is organized like any MW tree (roots/branches/leaves) but stores @@ -66,91 +49,136 @@ class OperatorNode; class OperatorTree : public MWTree<2> { public: /** - * @brief Construct an operator tree. - * @param mra Multi-resolution analysis (domain + basis) shared by the tree. - * @param np “Norm precision” used when estimating/screening norms. - * @param name Optional diagnostic name. + * @brief Construct an operator tree + * @param[in] mra Multi-resolution analysis (domain + basis) shared by the tree + * @param[in] np “Norm precision” used when estimating/screening norms + * @param[in] name Optional diagnostic name */ OperatorTree(const MultiResolutionAnalysis<2> &mra, double np, const std::string &name = "nn"); OperatorTree(const OperatorTree &tree) = delete; OperatorTree &operator=(const OperatorTree &tree) = delete; - /// Virtual destructor. + /// Virtual destructor virtual ~OperatorTree() override; - /// @return The precision value used for norm-based screening. + /// @return The precision value used for norm-based screening double getNormPrecision() const { return this->normPrec; } /** - * @brief Release any existing @ref BandWidth object and set the pointer to null. - * @details Call this if the operator has changed and band widths must be recomputed. + * @brief Release any existing @ref BandWidth object and set the pointer to null + * @details Call this if the operator has changed and band widths must be recomputed */ void clearBandWidth(); - /** - * @brief Estimate per-depth band widths for the corner matrices. - * @param prec Threshold used when deciding if a component is significant. - * If negative, the internal @ref getNormPrecision() is used. - * @details Populates the internally owned @ref BandWidth structure. + /** @brief Calculates band widths of the non-standard form matrices + * + * @param[in] prec: Precision used for thresholding + * + * @details It is starting from \f$ l = 0 \f$ and updating the band width value each time we encounter + * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal */ virtual void calcBandWidth(double prec = -1.0); - /** - * @brief Quick band-screening predicate. - * @param oTransl Distance from the diagonal in translation space (|l\_bra−l\_ket|). - * @param o_depth Depth/scale index where the test is performed. - * @param idx Corner block selector: 0 = T, 1 = C, 2 = B, 3 = A (convention as used internally). - * @return @c true if @p oTransl is **outside** the currently stored band at @p o_depth for block @p idx. - * @note Requires a previously computed @ref BandWidth (see @ref calcBandWidth()). + /** @brief Checks if the distance to diagonal is bigger than the operator band width. + * + * @param[in] oTransl: distance to diagonal + * @param[in] o_depth: scaling order + * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$.ì + * + * @returns True if \b oTransl is outside of the band and False otherwise */ virtual bool isOutsideBand(int oTransl, int o_depth, int idx); - /** - * @brief Dampen/remove rough-scale numerical noise in the operator. - * @param trust_scale Scales finer (greater or equal to this) are trusted and preserved. - * @details Useful after building operators from noisy input data. + /** @brief Cleans up end nodes. + * + * @param[in] trust_scale: there is no cleaning down below \b trust_scale (it speeds up operator building). + * + * @details Traverses the tree and rewrites end nodes having branch node twins, + * i. e. identical with respect to scale and translation. + * This method is very handy, when an adaptive operator construction + * can make a significunt noise at low scaling depth. + * Its need comes from the fact that mwTransform up cannot override + * rubbish that can potentially stick to end nodes at a particular level, + * and as a result spread further up to the root with mwTransform. */ void removeRoughScaleNoise(int trust_scale = 10); /** - * @brief Build cache tables for direct @ref OperatorNode access. + * @brief Make 1D lists, adressable from [-l, l] scale by scale, of operator node pointers for fast operator retrieval + * * @details Populates @ref nodePtrStore and @ref nodePtrAccess to avoid repeated lookups. + * + * @warning This method is not thread safe, + * since it projects missing operator nodes on the fly. Hence, it must NEVER + * be called within a parallel region, or all hell will break loose. This is + * not really a problem, but you have been warned. */ void setupOperNodeCache(); - /// @brief Clear the operator-node caches built by @ref setupOperNodeCache(). + /// @brief Clear the operator-node caches built by @ref setupOperNodeCache() void clearOperNodeCache(); - /// @return Mutable reference to the stored @ref BandWidth (must exist). + /// @return Mutable reference to the stored @ref BandWidth (must exist) BandWidth &getBandWidth() { return *this->bandWidth; } - /// @return Const reference to the stored @ref BandWidth (must exist). + /// @return Const reference to the stored @ref BandWidth (must exist) const BandWidth &getBandWidth() const { return *this->bandWidth; } /** - * @brief Fast accessor to a node by (scale, diagonal distance). - * @param n Scale (depth measured from the root scale). - * @param l Distance to the diagonal (translation difference); l=0 hits the diagonal. + * @brief Fast accessor to a node by indices (scale, diagonal distance) + * + * @param[in] n Scale (depth measured from the root scale). + * @param[in] l Distance to the diagonal (translation difference); l=0 hits the diagonal + * * @return Reference to the requested @ref OperatorNode. - * @warning Valid only after calling @ref setupOperNodeCache(). + * @warning Valid only after calling @ref setupOperNodeCache() */ OperatorNode &getNode(int n, int l) { return *nodePtrAccess[n][l]; } - /// Const overload of @ref getNode(int,int). + /// @overload const OperatorNode &getNode(int n, int l) const { return *nodePtrAccess[n][l]; } + + + + + + + + /** + * @brief Regenerate all s/d-coeffs by backtransformation, starting at the bottom and thus purifying all coefficients + * + * @param overwrite If @c true, child coefficients may overwrite parent storage + * + * @details Option to overwrite or add up existing + * coefficients of BranchNodes (can be used after operator application). + * Reimplementation of MWTree::mwTransform() without OMP, as calculation + * of OperatorNorm is done using random vectors, which is non-deterministic + * in parallel. FunctionTrees should be fine. + */ + void mwTransformUp() override; + + /** - * @brief Downward MW transform specialized for operator data. - * @param overwrite If @c true, child coefficients may overwrite parent storage. + * @brief Regenerate all scaling coeffs by MW transformation of existing s/w-coeffs on coarser scales, starting at the rootNodes + * + * @param overwrite If @c true, child coefficients may overwrite existing scaling coefficients + * + * @details Option to overwrite or add up existing + * coefficients of BranchNodes (can be used after operator application). + * Reimplementation of MWTree::mwTransform() without OMP, as calculation + * of OperatorNorm is done using random vectors, which is non-deterministic + * in parallel. FunctionTrees should be fine. */ void mwTransformDown(bool overwrite) override; - /// @brief Upward MW transform specialized for operator data. - void mwTransformUp() override; - // Bring MWTree overloads into scope. + + + /// @overload using MWTree<2>::getNode; + /// @overload using MWTree<2>::findNode; protected: From f9d19805bd0fc2beed1c6f03c2de862f6fdfac58 Mon Sep 17 00:00:00 2001 From: Tarek Scheele Date: Thu, 6 Nov 2025 17:24:34 +0100 Subject: [PATCH 34/51] Finish documenting MWNode --- src/trees/MWNode.cpp | 389 +------------------------------------- src/trees/MWNode.h | 438 +++++++++++++++++++++++++++++++++---------- 2 files changed, 340 insertions(+), 487 deletions(-) diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp index 2d521b468..7c952d8a6 100644 --- a/src/trees/MWNode.cpp +++ b/src/trees/MWNode.cpp @@ -41,10 +41,6 @@ using namespace Eigen; namespace mrcpp { -/** @brief MWNode default constructor. - * - * @details Should be used only by NodeAllocator to obtain - * virtual table pointers for the derived classes. */ template MWNode::MWNode() : tree(nullptr) @@ -59,13 +55,6 @@ MWNode::MWNode() MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] tree: the MWTree the root node belongs to - * @param[in] idx: the NodeIndex defining scale and translation of the node - * - * @details Constructor for an empty node, given the corresponding MWTree and NodeIndex - */ template MWNode::MWNode(MWTree *tree, const NodeIndex &idx) : tree(tree) @@ -79,14 +68,6 @@ MWNode::MWNode(MWTree *tree, const NodeIndex &idx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] tree: the MWTree the root node belongs to - * @param[in] rIdx: the integer specifying the corresponding root node - * - * @details Constructor for root nodes. It requires the corresponding - * MWTree and an integer to fetch the right NodeIndex - */ template MWNode::MWNode(MWTree *tree, int rIdx) : tree(tree) @@ -100,14 +81,6 @@ MWNode::MWNode(MWTree *tree, int rIdx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode constructor. - * - * @param[in] parent: parent node - * @param[in] cIdx: child index of the current node - * - * @details Constructor for leaf nodes. It requires the corresponding - * parent and an integer to identify the correct child. - */ template MWNode::MWNode(MWNode *parent, int cIdx) : tree(parent->tree) @@ -121,15 +94,6 @@ MWNode::MWNode(MWNode *parent, int cIdx) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode copy constructor. - * - * @param[in] node: the original node - * @param[in] allocCoef: if true MW coefficients are allocated and copied from the original node - * - * @details Creates loose nodes and optionally copy coefs. The node - * does not "belong" to the tree: it cannot be accessed by traversing - * the tree. - */ template MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) : tree(node.tree) @@ -159,18 +123,14 @@ MWNode::MWNode(const MWNode &node, bool allocCoef, bool SetCoef) MRCPP_INIT_OMP_LOCK(); } -/** @brief MWNode destructor. - * - * @details Recursive deallocation of a node and all its decendants - */ template MWNode::~MWNode() { if (this->isLooseNode()) this->freeCoefs(); MRCPP_DESTROY_OMP_LOCK(); } -/** @brief Dummy deallocation of MWNode coefficients. +/* Dummy deallocation of MWNode coefficients. * - * @details This is just to make sure this method never really gets + * This is just to make sure this method never really gets * called (derived classes must implement their own version). This was * to avoid having pure virtual methods in the base class. */ @@ -178,12 +138,6 @@ template void MWNode::dealloc() { NOT_REACHED_ABORT; } -/** @brief Allocate the coefs vector. - * - * @details This is only used by loose nodes, because the loose nodes - * are not treated by the NodeAllocator class. - * - */ template void MWNode::allocCoefs(int n_blocks, int block_size) { if (this->n_coefs != 0) MSG_ABORT("n_coefs should be zero"); if (this->isAllocated()) MSG_ABORT("Coefs already allocated"); @@ -196,12 +150,6 @@ template void MWNode::allocCoefs(int n_blocks, int blo this->setIsAllocated(); } -/** @brief Deallocate the coefs vector. - * - * @details This is only used by loose nodes, because the loose nodes - * are not treated by the NodeAllocator class. - * - */ template void MWNode::freeCoefs() { if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!"); @@ -214,8 +162,6 @@ template void MWNode::freeCoefs() { this->clearIsAllocated(); } -/** @brief Printout of node coefficients - */ template void MWNode::printCoefs() const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); println(0, "\nMW coefs"); @@ -226,8 +172,6 @@ template void MWNode::printCoefs() const { } } -/** @brief wraps the MW coefficients into an eigen vector object - */ template void MWNode::getCoefs(Eigen::Matrix &c) const { if (not this->isAllocated()) MSG_ABORT("Node is not allocated"); if (not this->hasCoefs()) MSG_ABORT("Node has no coefs"); @@ -236,9 +180,6 @@ template void MWNode::getCoefs(Eigen::Matrix::Map(this->coefs, this->n_coefs); } -/** @brief sets all MW coefficients and the norms to zero - * - */ template void MWNode::zeroCoefs() { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated " << *this); @@ -247,68 +188,26 @@ template void MWNode::zeroCoefs() { this->setHasCoefs(); } -/** @brief Attach a set of coefs to this node. Only used locally (the tree is not aware of this). - */ template void MWNode::attachCoefs(T *coefs) { this->coefs = coefs; this->setHasCoefs(); } -/** @brief assigns values to a block of coefficients - * - * @param[in] c: the input coefficients - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::setCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = c[i]; } } -/** @brief adds values to a block of coefficients - * - * @param[in] c: the input coefficients - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::addCoefBlock(int block, int block_size, const T *c) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] += c[i]; } } -/** @brief sets values of a block of coefficients to zero - * - * @param[in] block: the block index - * @param[in] block_size: size of the block - * - * @details a block is typically containing one kind of coefficients - * (given scaling/wavelet in each direction). Its size is then \f$ - * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$. - */ template void MWNode::zeroCoefBlock(int block, int block_size) { if (not this->isAllocated()) MSG_ABORT("Coefs not allocated"); for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = 0.0; } } -/** @brief forward MW transform from this node to its children - * - * @param[in] overwrite: if true the coefficients of the children are - * overwritten. If false the values are summed to the already present - * ones. - * - * @details it performs forward MW transform inserting the result - * directly in the right place for each child node. The children must - * already be present and its memory allocated for this to work - * properly. - */ template void MWNode::giveChildrenCoefs(bool overwrite) { assert(this->isBranchNode()); if (not this->isAllocated()) MSG_ABORT("Not allocated!"); @@ -334,17 +233,6 @@ template void MWNode::giveChildrenCoefs(bool overwrite } } -/** @brief forward MW transform to compute scaling coefficients of a single child - * - * @param[in] cIdx: child index - * @param[in] overwrite: if true the coefficients of the children are - * overwritten. If false the values are summed to the already present - * ones. - * - * @details it performs forward MW transform in place on a loose - * node. The scaling coefficients of the selected child are then - * copied/summed in the correct child node. - */ template void MWNode::giveChildCoefs(int cIdx, bool overwrite) { MWNode node_i = *this; @@ -365,12 +253,6 @@ template void MWNode::giveChildCoefs(int cIdx, bool ov child.calcNorms(); } -/** Takes a MWParent and generates coefficients, reverse operation from - * giveChildrenCoefs */ -/** @brief backward MW transform to compute scaling/wavelet coefficients of a parent - * - * \warning This routine is only used in connection with Periodic Boundary Conditions - */ template void MWNode::giveParentCoefs(bool overwrite) { MWNode node = *this; MWNode &parent = getMWParent(); @@ -387,12 +269,6 @@ template void MWNode::giveParentCoefs(bool overwrite) parent.calcNorms(); } -/** @brief Copy scaling coefficients from children to parent - * - * @details Takes the scaling coefficients of the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. - */ template void MWNode::copyCoefsFromChildren() { int kp1_d = this->getKp1_d(); int nChildren = this->getTDim(); @@ -403,14 +279,6 @@ template void MWNode::copyCoefsFromChildren() { } } -/** @brief Generates scaling coefficients of children - * - * @details If the node is a leafNode, it takes the scaling&wavelet - * coefficients of the parent and it generates the scaling - * coefficients for the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. - */ template void MWNode::threadSafeGenChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -421,14 +289,6 @@ template void MWNode::threadSafeGenChildren() { MRCPP_UNSET_OMP_LOCK(); } -/** @brief Creates scaling coefficients of children - * - * @details If the node is a leafNode, it takes the scaling&wavelet - * coefficients of the parent and it generates the scaling - * coefficients for the children and stores - * them consecutively in the corresponding block of the parent, - * following the usual bitwise notation. The new node is permanently added to the tree. - */ template void MWNode::threadSafeCreateChildren() { if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; } MRCPP_SET_OMP_LOCK(); @@ -439,16 +299,6 @@ template void MWNode::threadSafeCreateChildren() { MRCPP_UNSET_OMP_LOCK(); } -/** @brief Coefficient-Value transform - * - * @details This routine transforms the scaling coefficients of the node to the - * function values in the corresponding quadrature roots (of its children). - * - * @param[in] operation: forward (coef->value) or backward (value->coef). - * - * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1) - * representation, instead of s/d (scaling and wavelet). - */ template void MWNode::cvTransform(int operation, bool firstchild) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -539,25 +389,6 @@ void MWNode::cvTransform(int operation) { } */ -/** @brief Multiwavelet transform - * - * @details Application of the filters on one node to pass from a 0/1 (scaling - * on child 0 and 1) representation to an s/d (scaling and - * wavelet) representation. Bit manipulation is used in order to - * determine the correct filters and whether to apply them or just - * pass to the next couple of indexes. The starting coefficients are - * preserved until the application is terminated, then they are - * overwritten. With minor modifications this code can also be used - * for the inverse mw transform (just use the transpose filters) or - * for the application of an operator (using A, B, C and T parts of an - * operator instead of G1, G0, H1, H0). This is the version where the - * three directions are operated one after the other. Although this - * is formally faster than the other algorithm, the separation of the - * three dimensions prevent the possibility to use the norm of the - * operator in order to discard a priori negligible contributions. - * - * * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1). - */ template void MWNode::mwTransform(int operation) { int kp1 = this->getKp1(); int kp1_dm1 = math_utils::ipow(kp1, D - 1); @@ -597,19 +428,16 @@ template void MWNode::mwTransform(int operation) { } } -/** @brief Set all norms to Undefined. */ template void MWNode::clearNorms() { this->squareNorm = -1.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = -1.0; } } -/** @brief Set all norms to zero. */ template void MWNode::zeroNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = 0.0; } } -/** @brief Calculate and store square norm and component norms, if allocated. */ template void MWNode::calcNorms() { this->squareNorm = 0.0; for (int i = 0; i < this->getTDim(); i++) { @@ -619,7 +447,6 @@ template void MWNode::calcNorms() { } } -/** @brief Calculate and return the squared scaling norm. */ template double MWNode::getScalingNorm() const { double sNorm = this->getComponentNorm(0); if (sNorm >= 0.0) { @@ -629,7 +456,6 @@ template double MWNode::getScalingNorm() const { } } -/** @brief Calculate and return the squared wavelet norm. */ template double MWNode::getWaveletNorm() const { double wNorm = 0.0; for (int i = 1; i < this->getTDim(); i++) { @@ -643,7 +469,6 @@ template double MWNode::getWaveletNorm() const { return wNorm; } -/** @brief Calculate the norm of one component (NOT the squared norm!). */ template double MWNode::calcComponentNorm(int i) const { if (this->isGenNode() and i != 0) return 0.0; assert(this->isAllocated()); @@ -658,9 +483,6 @@ template double MWNode::calcComponentNorm(int i) const return std::sqrt(sq_norm); } -/** @brief Update the coefficients of the node by a mw transform of the scaling - * coefficients of the children. - */ template void MWNode::reCompress() { if (this->isGenNode()) NOT_IMPLEMENTED_ABORT; if (this->isBranchNode()) { @@ -672,12 +494,6 @@ template void MWNode::reCompress() { } } -/** @brief Recurse down until an EndNode is found, and then crop children below the given precision threshold - * - * @param[in] prec: precision required - * @param[in] splitFac: factor used in the split check (larger factor means tighter threshold for finer nodes) - * @param[in] absPrec: flag to switch from relative (false) to absolute (true) precision. - */ template bool MWNode::crop(double prec, double splitFac, bool absPrec) { if (this->isEndNode()) { return true; @@ -707,11 +523,6 @@ template void MWNode::genParent() { NOT_REACHED_ABORT; } -/** @brief Recursive deallocation of children and all their descendants. - * - * @details - * Leaves node as LeafNode and children[] as null pointer. - */ template void MWNode::deleteChildren() { if (this->isLeafNode()) return; for (int cIdx = 0; cIdx < getTDim(); cIdx++) { @@ -726,7 +537,6 @@ template void MWNode::deleteChildren() { this->setIsLeafNode(); } -/** @brief Recursive deallocation of parent and all their forefathers. */ template void MWNode::deleteParent() { if (this->parent == nullptr) return; MWNode &parent = getMWParent(); @@ -736,7 +546,6 @@ template void MWNode::deleteParent() { this->parent = nullptr; } -/** @brief Deallocation of all generated nodes . */ template void MWNode::deleteGenerated() { if (this->isBranchNode()) { if (this->isEndNode()) { @@ -747,7 +556,6 @@ template void MWNode::deleteGenerated() { } } -/** @brief returns the coordinates of the centre of the node */ template Coord MWNode::getCenter() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -757,7 +565,6 @@ template Coord MWNode::getCenter() const { return r; } -/** @brief returns the upper bounds of the D-interval defining the node */ template Coord MWNode::getUpperBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -767,7 +574,6 @@ template Coord MWNode::getUpperBounds() const { return ub; } -/** @brief returns the lower bounds of the D-interval defining the node */ template Coord MWNode::getLowerBounds() const { auto two_n = std::pow(2.0, -getScale()); auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors(); @@ -777,14 +583,6 @@ template Coord MWNode::getLowerBounds() const { return lb; } -/** @brief Routine to find the path along the tree. - * - * @param[in] nIdx: the sought after node through its NodeIndex - * - * @details Given the translation indices at the final scale, computes the child m - * to be followed at the current scale in oder to get to the requested - * node at the final scale. The result is the index of the child needed. - * The index is obtained by bit manipulation of of the translation indices. */ template int MWNode::getChildIndex(const NodeIndex &nIdx) const { assert(isAncestor(nIdx)); int cIdx = 0; @@ -799,12 +597,6 @@ template int MWNode::getChildIndex(const NodeIndex return cIdx; } -/** @brief Routine to find the path along the tree. - * - * @param[in] r: the sought after node through the coordinates of a point in space - * - * @detailsGiven a point in space, determines which child should be followed - * to get to the corresponding terminal node. */ template int MWNode::getChildIndex(const Coord &r) const { assert(hasCoord(r)); int cIdx = 0; @@ -818,18 +610,6 @@ template int MWNode::getChildIndex(const Coord &r) return cIdx; } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. - * - * @details The original quadrature points are fetched and then - * dilated and translated. For each cartesian direction \f$ \alpha = - * x,y,z... \f$ the set of quadrature points becomes \f$ x^\alpha_i = - * 2^{-n} (x_i + l^\alpha \f$. By taking all possible - * \f$(k+1)^d\f$ combinations, they will then define a d-dimensional - * grid of quadrature points. - * - */ template void MWNode::getPrimitiveQuadPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, kp1); @@ -842,19 +622,6 @@ template void MWNode::getPrimitiveQuadPts(MatrixXd &pt for (int d = 0; d < D; d++) pts.row(d) = sFac * (roots.array() + static_cast(l[d])); } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: quadrature points in a \f$ d \times (k+1) \f$ matrix form. - * - * @details The original quadrature points are fetched and then - * dilated and translated to match the quadrature points in the - * children of the given node. For each cartesian direction \f$ \alpha = x,y,z... \f$ - * the set of quadrature points becomes \f$ x^\alpha_i = 2^{-n-1} (x_i + 2 l^\alpha + t^\alpha) \f$, where \f$ t^\alpha = - * 0,1 \f$. By taking all possible \f$(k+1)^d\combinations \f$, they will - * then define a d-dimensional grid of quadrature points for the child - * nodes. - * - */ template void MWNode::getPrimitiveChildPts(MatrixXd &pts) const { int kp1 = this->getKp1(); pts = MatrixXd::Zero(D, 2 * kp1); @@ -870,16 +637,6 @@ template void MWNode::getPrimitiveChildPts(MatrixXd &p } } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: expanded quadrature points in a \f$ d \times - * (k+1)^d \f$ matrix form. - * - * @details The primitive quadrature points are used to obtain a - * tensor-product representation collecting all \f$ (k+1)^d \f$ - * vectors of quadrature points. - * - */ template void MWNode::getExpandedQuadPts(Eigen::MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveQuadPts(prim_pts); @@ -894,16 +651,6 @@ template void MWNode::getExpandedQuadPts(Eigen::Matrix if (D >= 4) NOT_IMPLEMENTED_ABORT; } -/** @brief Returns the quadrature points in a given node - * - * @param[in,out] pts: expanded quadrature points in a \f$ d \times - * 2^d(k+1)^d \f$ matrix form. - * - * @details The primitive quadrature points of the children are used to obtain a - * tensor-product representation collecting all \f$ 2^d (k+1)^d \f$ - * vectors of quadrature points. - * - */ template void MWNode::getExpandedChildPts(MatrixXd &pts) const { MatrixXd prim_pts; getPrimitiveChildPts(prim_pts); @@ -928,16 +675,6 @@ template void MWNode::getExpandedChildPts(MatrixXd &pt } } -/** @brief Const version of node retriever that NEVER generates. - * - * @param[in] idx: the requested NodeIndex - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine returns the appropriate Node, or a NULL pointer if - * the node does not exist, or if it is a GenNode. Recursion starts at at this - * node and ASSUMES the requested node is in fact decending from this node. - */ template const MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) const { if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); @@ -952,16 +689,6 @@ template const MWNode *MWNode::retrieveNodeNoGen return this->children[cIdx]->retrieveNodeNoGen(idx); } -/** @brief Node retriever that NEVER generates. - * - * @param[in] idx: the requested NodeIndex - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine returns the appropriate Node, or a NULL pointer if - * the node does not exist, or if it is a GenNode. Recursion starts at at this - * node and ASSUMES the requested node is in fact decending from this node. - */ template MWNode *MWNode::retrieveNodeNoGen(const NodeIndex &idx) { if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); @@ -976,18 +703,6 @@ template MWNode *MWNode::retrieveNodeNoGen(const return this->children[cIdx]->retrieveNodeNoGen(idx); } -/** @brief Node retriever that returns requested Node or EndNode (const version). - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template const MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) const { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -995,18 +710,6 @@ template const MWNode *MWNode::retrieveNodeOrEnd return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } -/** @brief Node retriever that returns requested Node or EndNode. - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template MWNode *MWNode::retrieveNodeOrEndNode(const Coord &r, int depth) { if (getDepth() == depth or this->isEndNode()) { return this; } int cIdx = getChildIndex(r); @@ -1014,17 +717,6 @@ template MWNode *MWNode::retrieveNodeOrEndNode(c return this->children[cIdx]->retrieveNodeOrEndNode(r, depth); } -/** @brief Node retriever that returns requested Node or EndNode (const version). - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template const MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) const { if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); @@ -1039,18 +731,6 @@ template const MWNode *MWNode::retrieveNodeOrEnd return this->children[cIdx]->retrieveNodeOrEndNode(idx); } -/** @brief Node retriever that returns requested Node or EndNode. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node given the - * coordinates of a point in space. This routine returns the - * appropriate Node, or the EndNode on the path to the requested node, - * and will never create or return GenNodes. Recursion starts at at - * this node and ASSUMES the requested node is in fact decending from - * this node. - */ template MWNode *MWNode::retrieveNodeOrEndNode(const NodeIndex &idx) { if (getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); @@ -1065,17 +745,6 @@ template MWNode *MWNode::retrieveNodeOrEndNode(c return this->children[cIdx]->retrieveNodeOrEndNode(idx); } -/** @brief Node retriever that ALWAYS returns the requested node. - * - * @param[in] r: the coordinates of a point in the node - * @param[in] depth: the depth which one needs to descend - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. - * This routine always returns the appropriate node, and will generate nodes - * that does not exist. Recursion starts at this node and ASSUMES the - * requested node is in fact decending from this node. - */ template MWNode *MWNode::retrieveNode(const Coord &r, int depth) { if (depth < 0) MSG_ABORT("Invalid argument"); @@ -1087,17 +756,6 @@ template MWNode *MWNode::retrieveNode(const Coor return this->children[cIdx]->retrieveNode(r, depth); } -/** @brief Node retriever that ALWAYS returns the requested node, possibly without coefs. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. This - * routine always returns the appropriate node, and will generate nodes that - * does not exist. Recursion starts at this node and ASSUMES the requested - * node is in fact descending from this node. - * If create = true, the nodes are permanently added to the tree. - */ template MWNode *MWNode::retrieveNode(const NodeIndex &idx, bool create) { if (getScale() == idx.getScale()) { // we're done if (tree->isLocal) { @@ -1123,18 +781,6 @@ template MWNode *MWNode::retrieveNode(const Node return this->children[cIdx]->retrieveNode(idx, create); } -/** Node retriever that ALWAYS returns the requested node. - * - * WARNING: This routine is NOT thread safe! Must be used within omp critical. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find and return the node with a given NodeIndex. This - * routine always returns the appropriate node, and will generate nodes that - * does not exist. Recursion starts at this node and ASSUMES the requested - * node is in fact related to this node. - */ template MWNode *MWNode::retrieveParent(const NodeIndex &idx) { if (getScale() < idx.getScale()) MSG_ABORT("Scale error") if (getScale() == idx.getScale()) return this; @@ -1145,15 +791,6 @@ template MWNode *MWNode::retrieveParent(const No return this->parent->retrieveParent(idx); } -/** @brief Gives the norm (absolute value) of the node at the given NodeIndex. - * - * @param[in] idx: the NodeIndex of the requested node - * - * @details - * Recursive routine to find the node with a given NodeIndex. When an EndNode is - * found, do not generate any new node, but rather give the value of the norm - * assuming the function is uniformly distributed within the node. - */ template double MWNode::getNodeNorm(const NodeIndex &idx) const { if (this->getScale() == idx.getScale()) { // we're done assert(getNodeIndex() == idx); @@ -1168,10 +805,6 @@ template double MWNode::getNodeNorm(const NodeIndex return this->children[cIdx]->getNodeNorm(idx); } -/** @brief Test if a given coordinate is within the boundaries of the node. - * - * @param[in] r: point coordinates - */ template bool MWNode::hasCoord(const Coord &r) const { double sFac = std::pow(2.0, -getScale()); const NodeIndex &l = getNodeIndex(); @@ -1203,11 +836,6 @@ template bool MWNode::isCompatible(const MWNode // return true; } -/** @brief Test if the node is decending from a given NodeIndex, that is, if they have - * overlapping support. - * - * @param[in] idx: the NodeIndex of the requested node - */ template bool MWNode::isAncestor(const NodeIndex &idx) const { int relScale = idx.getScale() - getScale(); if (relScale < 0) return false; @@ -1223,10 +851,6 @@ template bool MWNode::isDecendant(const NodeIndex & NOT_IMPLEMENTED_ABORT; } -/** @brief printout ofm the node content. - * - * @param[in] o: the output stream - */ template std::ostream &MWNode::print(std::ostream &o) const { std::string flags = " "; o << getNodeIndex(); @@ -1252,12 +876,6 @@ template std::ostream &MWNode::print(std::ostream &o) return o; } -/** @brief recursively set maxSquaredNorm and maxWSquareNorm of parent and descendants - * - * @details - * normalization is such that a constant function gives constant value, - * i.e. *not* same normalization as a squareNorm - */ template void MWNode::setMaxSquareNorm() { auto n = this->getScale(); this->maxWSquareNorm = calcScaledWSquareNorm(); @@ -1272,8 +890,7 @@ template void MWNode::setMaxSquareNorm() { } } } -/** @brief recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1 - */ + template void MWNode::resetMaxSquareNorm() { auto n = this->getScale(); this->maxSquareNorm = -1.0; diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h index 96955828b..8b032698e 100644 --- a/src/trees/MWNode.h +++ b/src/trees/MWNode.h @@ -39,8 +39,8 @@ namespace mrcpp { /** * @class MWNode - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Coefficient type (e.g. double, ComplexDouble). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * * @brief Base class for Multiwavelet nodes * @@ -53,8 +53,7 @@ namespace mrcpp { * nodes, pointer to the corresponding MWTree etc... See member and * data descriptions for details. * - * @note - * Nodes are created and managed by MWTree and specialized trees + * @note Nodes are created and managed by MWTree and specialized trees * (e.g., FunctionTree). Most users should not instantiate nodes * directly; instead, operate at the tree level. */ @@ -110,7 +109,7 @@ class MWNode { /** * @brief Test if the node is decending from a given NodeIndex, that is, if they have - * overlapping support. + * overlapping support * @param[in] idx the NodeIndex of the requested node */ bool isAncestor(const NodeIndex &idx) const; @@ -153,7 +152,7 @@ class MWNode { /** * @brief Returns the quadrature points of this node * - * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form * * @details The original quadrature points are fetched and then * dilated and translated. For each cartesian direction \f$ \alpha = @@ -167,7 +166,7 @@ class MWNode { /** * @brief Returns the quadrature points of this node * - * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form. + * @param[out] pts Quadrature points in a \f$ d \times (k+1) \f$ matrix form * * @details The original quadrature points are fetched and then * dilated and translated to match the quadrature points in the @@ -183,7 +182,7 @@ class MWNode { * @brief Returns the quadrature points of this node * * @param[out] pts Expanded quadrature points in a \f$ d \times - * (k+1)^d \f$ matrix form. + * (k+1)^d \f$ matrix form * * @details The primitive quadrature points are used to obtain a * tensor-product representation collecting all \f$ (k+1)^d \f$ @@ -195,7 +194,7 @@ class MWNode { * @brief Returns the quadrature points of this node * * @param[out] pts Expanded quadrature points in a \f$ d \times - * 2^d(k+1)^d \f$ matrix form. + * 2^d(k+1)^d \f$ matrix form * * @details The primitive quadrature points of the children are used to obtain a * tensor-product representation collecting all \f$ 2^d (k+1)^d \f$ @@ -279,9 +278,9 @@ class MWNode { virtual void genParent(); /** - * @brief Recursive deallocation of children and all their descendants. + * @brief Recursive deallocation of children and all their descendants * - * @details Leaves node as LeafNode and children[] as null pointer. + * @details Leaves node as LeafNode and children[] as null pointer */ virtual void deleteChildren(); @@ -290,7 +289,7 @@ class MWNode { /** * @brief Coefficient-Value transform - * @param operation Forward (coef->value) or backward (value->coef). + * @param operation Forward (coef->value) or backward (value->coef) * * @details This routine transforms the scaling coefficients of the node to the * function values in the corresponding quadrature roots (of its children). @@ -302,7 +301,7 @@ class MWNode { /** * @brief Multiwavelet transform - * @param operation compression (s0,s1->s,d) or reconstruction (s,d->s0,s1). + * @param operation Compression (s0,s1->s,d) or reconstruction (s,d->s0,s1) * * @details Application of the filters on one node to pass from a 0/1 (scaling * on child 0 and 1) representation to an s/d (scaling and @@ -318,44 +317,53 @@ class MWNode { * is formally faster than the other algorithm, the separation of the * three dimensions prevent the possibility to use the norm of the * operator in order to discard a priori negligible contributions. - * */ virtual void mwTransform(int operation); /** - * @brief Node-norm at an arbitrary index. - * @param idx Target index (may be at a finer scale). - * @return A node-wise norm consistent with the basis and scale. + * @brief Gives the norm (absolute value) of the node at the given NodeIndex + * @param[in] idx the NodeIndex of the requested node + * + * @details + * Recursive routine to find the node with a given NodeIndex. When an EndNode is + * found, do not generate any new node, but rather give the value of the norm + * assuming the function is uniformly distributed within the node. */ double getNodeNorm(const NodeIndex &idx) const; - /// @name Status flags - ///@{ - bool hasParent() const { return (parent != nullptr) ? true : false; } - bool hasCoefs() const { return (this->status & FlagHasCoefs); } - bool isEndNode() const { return (this->status & FlagEndNode); } - bool isGenNode() const { return (this->status & FlagGenNode); } - bool isRootNode() const { return (this->status & FlagRootNode); } - bool isLeafNode() const { return not(this->status & FlagBranchNode); } - bool isAllocated() const { return (this->status & FlagAllocated); } - bool isBranchNode() const { return (this->status & FlagBranchNode); } - bool isLooseNode() const { return (this->status & FlagLooseNode); } + /* + * Getters and setters + */ + bool hasParent() const { return (parent != nullptr) ? true : false; } ///< @return Whether the node hsa a parent + bool hasCoefs() const { return (this->status & FlagHasCoefs); } ///< @return Whether the node has coefficients + bool isEndNode() const { return (this->status & FlagEndNode); } ///< @return Whether the node is an end node + bool isGenNode() const { return (this->status & FlagGenNode); } ///< @return Whether the node is a generated node + bool isRootNode() const { return (this->status & FlagRootNode); } ///< @return Whether the node is a root node + bool isLeafNode() const { return not(this->status & FlagBranchNode); } ///< @return Whether the node is a leaf node + bool isAllocated() const { return (this->status & FlagAllocated); } ///< @return Whether the node is fully allocated + bool isBranchNode() const { return (this->status & FlagBranchNode); } ///< @return Whether the node is a leaf node + bool isLooseNode() const { return (this->status & FlagLooseNode); } ///< @return Whether the node is a loose node + + /** + * @brief Allows checking the state of a node against a state mask + * @param mask The status mask to compare against + * @return Whether the state of the node matches the given mask + */ bool checkStatus(unsigned char mask) const { return (mask == (this->status & mask)); } - void setHasCoefs() { SET_BITS(status, FlagHasCoefs | FlagAllocated); } - void setIsEndNode() { SET_BITS(status, FlagEndNode); } - void setIsGenNode() { SET_BITS(status, FlagGenNode); } - void setIsRootNode() { SET_BITS(status, FlagRootNode); } - void setIsLeafNode() { CLEAR_BITS(status, FlagBranchNode); } - void setIsAllocated() { SET_BITS(status, FlagAllocated); } - void setIsBranchNode() { SET_BITS(status, FlagBranchNode); } - void setIsLooseNode() { SET_BITS(status, FlagLooseNode); } - void clearHasCoefs() { CLEAR_BITS(status, FlagHasCoefs); } - void clearIsEndNode() { CLEAR_BITS(status, FlagEndNode); } - void clearIsGenNode() { CLEAR_BITS(status, FlagGenNode); } - void clearIsRootNode() { CLEAR_BITS(status, FlagRootNode); } - void clearIsAllocated() { CLEAR_BITS(status, FlagAllocated); } - ///@} + void setHasCoefs() { SET_BITS(status, FlagHasCoefs | FlagAllocated); } ///< @brief Marks the node as having coefficients + void setIsEndNode() { SET_BITS(status, FlagEndNode); } ///< @brief Marks the node as an end node + void setIsGenNode() { SET_BITS(status, FlagGenNode); } ///< @brief Marks the node as a generated node + void setIsRootNode() { SET_BITS(status, FlagRootNode); } ///< @brief Marks the node as a root node + void setIsLeafNode() { CLEAR_BITS(status, FlagBranchNode); } ///< @brief Marks the node as a leaf node + void setIsAllocated() { SET_BITS(status, FlagAllocated); } ///< @brief Marks the node as allocated + void setIsBranchNode() { SET_BITS(status, FlagBranchNode); } ///< @brief Marks the node as a leaf node + void setIsLooseNode() { SET_BITS(status, FlagLooseNode); } ///< @brief Marks the node as a loose node + void clearHasCoefs() { CLEAR_BITS(status, FlagHasCoefs); } ///< @brief Clears the mark for having coefficients + void clearIsEndNode() { CLEAR_BITS(status, FlagEndNode); } ///< @brief Clears the mark for being an end node + void clearIsGenNode() { CLEAR_BITS(status, FlagGenNode); } ///< @brief Clears the mark for being a generated node + void clearIsRootNode() { CLEAR_BITS(status, FlagRootNode); } ///< @brief Clears the mark for being a root node + void clearIsAllocated() { CLEAR_BITS(status, FlagAllocated); } ///< @brief Clears the mark for being allocated friend std::ostream &operator<<(std::ostream &o, const MWNode &nd) { return nd.print(o); } @@ -369,135 +377,363 @@ class MWNode { friend class FunctionNode; friend class OperatorNode; friend class DerivativeCalculator; - bool isComplex = false; ///< Helper flag for mixed-real/complex workflows. - friend class FunctionTree; ///< Allows complex trees to access real nodes when needed. + bool isComplex = false; // TODO put as one of the flags + friend class FunctionTree; // required if a ComplexDouble tree access a double node from another tree! friend class FunctionTree; - int childSerialIx{-1}; ///< Index of first child in a serialized view, or -1 for leaves. + int childSerialIx{-1}; ///< index of first child in a serial tree, or -1 for leaf nodes/end nodes protected: - // -------- Ownership and hierarchy -------- - MWTree *tree{nullptr}; ///< Tree the node belongs to. - MWNode *parent{nullptr}; ///< Parent node (nullptr for roots). - MWNode *children[1 << D]; ///< Array of 2^D children (valid if branch node). - - // -------- Norms (cached) -------- - double squareNorm{-1.0}; ///< Squared norm of all 2^D (k+1)^D coefficients. - double componentNorms[1 << D]; ///< Squared norms of the 2^D components. - double maxSquareNorm{-1.0}; ///< Maximum scaled squared norm among node and descendants. - double maxWSquareNorm{-1.0}; ///< Maximum scaled wavelet squared norm among node and descendants. - - // -------- Coefficients -------- - T *coefs{nullptr}; ///< Buffer of size 2^D (k+1)^D with MW coefficients. - int n_coefs{0}; ///< Number of coefficients in @ref coefs. - - // -------- Serialization helpers -------- - int serialIx{-1}; ///< Index in the serial tree - int parentSerialIx{-1}; ///< Index of parent in the serial tree, or -1 for roots - - // -------- Indexing and space-filling path -------- - NodeIndex nodeIndex; ///< Scale and translation of this node. - HilbertPath hilbertPath; ///< Current Hilbert path state for child ordering. - - // -------- Construction helpers -------- + MWTree *tree{nullptr}; ///< Tree the node belongs to + MWNode *parent{nullptr}; ///< Parent node (nullptr for root nodes) + MWNode *children[1 << D]; ///< Array of 2^D children (valid if branch node) + + double squareNorm{-1.0}; ///< Squared norm of all 2^D (k+1)^D coefficients + double componentNorms[1 << D]; ///< Squared norms of the separated 2^D components + double maxSquareNorm{-1.0}; ///< Maximum squared norm among the node and descendants + double maxWSquareNorm{-1.0}; ///< Maximum wavelet squared norm among the node and descendants + ///< NB: must be set before used. + T *coefs{nullptr}; ///< The 2^D (k+1)^D MW coefficients + ///< For example, in case of a one dimensional function \f$ f \f$ + ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$, + ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$ + ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$. + ///< Here \f$ n, l \f$ are unique for every node. + int n_coefs{0}; ///< Number of coefficients in @ref coefs. + + int serialIx{-1}; ///< Index in the serial tree + int parentSerialIx{-1}; ///< Index of the parent in the serial tree, or -1 for root nodes + + NodeIndex nodeIndex; ///< Scale and translation of this node. + HilbertPath hilbertPath; ///< Current Hilbert path state for child ordering. + + /** + * @brief MWNode default constructor + * + * @details Should be used only by NodeAllocator to obtain + * virtual table pointers for the derived classes + */ MWNode(); + + /** + * @brief MWNode constructor + * @param[in] tree The MWTree the root node belongs to + * @param[in] rIdx The integer specifying the corresponding root node + * + * @details Constructor for root nodes. It requires the corresponding + * MWTree and an integer to fetch the right NodeIndex. + */ MWNode(MWTree *tree, int rIdx); + + /** + * @brief MWNode constructor + * @param[in] tree The MWTree the root node belongs to + * @param[in] idx The NodeIndex defining scale and translation of the node + * + * @details Constructor for an empty node, given the corresponding MWTree and NodeIndex + */ MWNode(MWTree *tree, const NodeIndex &idx); + + /** + * @brief MWNode constructor + * @param[in] parent Parent node + * @param[in] cIdx Child index of the current node + * + * @details Constructor for leaf nodes. It requires the corresponding + * parent and an integer to identify the correct child. + */ MWNode(MWNode *parent, int cIdx); - /// Free coefficient buffer and reset counters. + // Implemented in child classes virtual void dealloc(); - /// Crop node based on precision; may trigger refinement. + /** + * @brief Recurse down until an EndNode is found, and then crop children below the given precision threshold + * @param prec The required precision + * @param splitFac Factor used in the split check (larger factor means tighter threshold for finer nodes) + * @param absPrec Flag to switch from relative (false) to absolute (true) precision. + * @return Whether the crop was successful + */ bool crop(double prec, double splitFac, bool absPrec); - /// Initialize thread lock (when OpenMP is enabled). + /// @brief Initialize thread lock (when OpenMP is enabled). void initNodeLock() { MRCPP_INIT_OMP_LOCK(); } - /// Allocate coefficient buffer as `n_blocks * block_size`. + /** + * @brief Allocate the coefs vector + * @param n_blocks The number of blocks + * @param block_size The size of a block + * + * @details This is only used by loose nodes, because the loose nodes + * are not treated by the NodeAllocator class. + */ virtual void allocCoefs(int n_blocks, int block_size); - /// Release coefficient buffer. + /** + * @brief Deallocate the coefs vector + * + * @details This is only used by loose nodes, because the loose nodes + * are not treated by the NodeAllocator class. + */ virtual void freeCoefs(); - /// Update cached maxima from descendants. + /** + * @brief recursively set maxSquaredNorm and maxWSquareNorm of parent and descendants + * + * @details + * normalization is such that a constant function gives constant value, + * i.e. *not* same normalization as a squareNorm + */ void setMaxSquareNorm(); - /// Invalidate cached maxima for this branch. + /// @brief Recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1 void resetMaxSquareNorm(); - /// Scaled total norm \f$ 2^{D n}\|c\|^2 \f$ (lazy). + /// @return The scaled square norm. double calcScaledSquareNorm() const { return std::pow(2.0, D * getScale()) * getSquareNorm(); } - /// Scaled wavelet norm \f$ 2^{D n}\|d\|^2 \f$ (lazy). + /// @return The scaled wavelet square norm. double calcScaledWSquareNorm() const { return std::pow(2.0, D * getScale()) * getWaveletNorm(); } - /// Component-wise norm computation hook. + /** + * @brief Calculate the norm of one component (NOT the squared norm!) + * @param i The component index + * @return The single component norm + */ virtual double calcComponentNorm(int i) const; - /// Recompress local representation after edits. + /** + * @brief Update the coefficients of the node by a MW transform of the scaling + * coefficients of the children. + */ virtual void reCompress(); - /// Push coefficients from parent to all children. + /** + * @brief Forward MW transform from this node to its children + * @param overwrite If true, the coefficients of the children are + * overwritten. If false, the values are summed to the already present + * ones. + * + * @details It performs forward MW transform inserting the result + * directly in the right place for each child node. The children must + * already be present and its memory allocated for this to work + * properly. + */ virtual void giveChildrenCoefs(bool overwrite = true); - /// Push coefficients from parent to a specific child. + /** + * @brief Forward MW transform to compute scaling coefficients of a single child + * @param[in] cIdx The child index + * @param[in] overwrite If true, the coefficients of the children are + * overwritten. If false, the values are summed to the already present + * ones. + * + * @details It performs forward MW transform in place on a loose + * node. The scaling coefficients of the selected child are then + * copied/summed in the correct child node. + */ virtual void giveChildCoefs(int cIdx, bool overwrite = true); - /// Pull coefficients from children to parent. + /** @brief Backward MW transform to compute scaling/wavelet coefficients of a parent + * + * @details Takes a MWParent and generates coefficients, reverse operation from + * giveChildrenCoefs. + * + * @note This routine is only used in connection with Periodic Boundary Conditions + */ virtual void giveParentCoefs(bool overwrite = true); - /// Rebuild local buffer from children (inverse of giveChildrenCoefs). + /** + * @brief Copy scaling coefficients from children to parent + * + * @details Takes the scaling coefficients of the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. + */ virtual void copyCoefsFromChildren(); - /// Child index for a target node index (same scale or finer). + /** + * @brief Routine to find the path along the tree + * @param[in] nIdx The sought after node through its NodeIndex + * + * @details Given the translation indices at the final scale, computes the child m + * to be followed at the current scale in oder to get to the requested + * node at the final scale. The result is the index of the child needed. + * The index is obtained by bit manipulation of of the translation indices. + */ int getChildIndex(const NodeIndex &nIdx) const; - /// Child index for a spatial coordinate. + /** + * @brief Routine to find the path along the tree + * @param[in] r The sought after node through the coordinates of a point in space + * + * @details Given a point in space, determines which child should be followed + * to get to the corresponding terminal node. + */ int getChildIndex(const Coord &r) const; - /// Whether two nodes lie in different branches (fast check). + /** + * @brief Fast check whether two nodes lie in different branches + * @param rhs The node to compare against + * @return true if two nodes lie in different branches + */ bool diffBranch(const MWNode &rhs) const; - /// Retrieve node owning coordinate @p r at given depth (may create). + /** + * @brief Node retriever that ALWAYS returns the requested node + * + * @param[in] r The coordinates of a point in the node + * @param depth The depth to descend + * @return The node at the given coordinates + * + * @details Recursive routine to find and return the node with a given NodeIndex. + * This routine always returns the appropriate node, and will generate nodes + * that does not exist. Recursion starts at this node and ASSUMES the + * requested node is in fact decending from this node. + */ MWNode *retrieveNode(const Coord &r, int depth); - /// Retrieve node at index @p idx (may create). + /** + * @brief Node retriever that ALWAYS returns the requested node, possibly without coefs + * @param[in] idx The NodeIndex of the requested node + * @return The node at the given node index + * + * @details Recursive routine to find and return the node with a given NodeIndex. This + * routine always returns the appropriate node, and will generate nodes that + * does not exist. Recursion starts at this node and ASSUMES the requested + * node is in fact descending from this node. + * If create = true, the nodes are permanently added to the tree. + */ MWNode *retrieveNode(const NodeIndex &idx, bool create = false); - /// Retrieve parent node for index @p idx (may create ancestors). + /** + * @brief Node retriever that ALWAYS returns the requested node + * @param[in] idx The NodeIndex of the requested node + * @return The node at the given node index + * + * @details Recursive routine to find and return the node with a given NodeIndex. This + * routine always returns the appropriate node, and will generate nodes that + * does not exist. Recursion starts at this node and ASSUMES the requested + * node is in fact related to this node. + * + * @warning This routine is NOT thread safe! Must be used within omp critical. + */ MWNode *retrieveParent(const NodeIndex &idx); - /// Lookup without generation (const). + /** + * @brief Const version of node retriever that NEVER generates + * @param[in] idx The requested NodeIndex + * @returns The requested node + * + * @details Recursive routine to find and return the node with a given NodeIndex. + * This routine returns the appropriate Node, or a NULL pointer if + * the node does not exist, or if it is a GenNode. Recursion starts at at this + * node and ASSUMES the requested node is in fact decending from this node. + */ const MWNode *retrieveNodeNoGen(const NodeIndex &idx) const; - /// Lookup without generation (mutable). + /** + * @brief Node retriever that NEVER generates. + * @param[in] idx The requested NodeIndex + * @returns The requested node + * + * @details Recursive routine to find and return the node with a given NodeIndex. + * This routine returns the appropriate Node, or a NULL pointer if + * the node does not exist, or if it is a GenNode. Recursion starts at at this + * node and ASSUMES the requested node is in fact decending from this node. + */ MWNode *retrieveNodeNoGen(const NodeIndex &idx); - /// Find node or end node by coordinate (const). + /** + * @brief Node retriever that returns requested Node or EndNode (const version) + * @param[in] r The coordinates of a point in the node + * @param depth The depth to descend + * @return The node at the given coordinates + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ const MWNode *retrieveNodeOrEndNode(const Coord &r, int depth) const; - /// Find node or end node by coordinate (mutable). + /** + * @brief Node retriever that returns requested Node or EndNode + * @param[in] r The coordinates of a point in the node + * @param depth The depth to descend + * @return The node at the given coordinates + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ MWNode *retrieveNodeOrEndNode(const Coord &r, int depth); - /// Find node or end node by index (const). + /** + * @brief Node retriever that returns requested Node or EndNode (const version) + * @param[in] idx The NodeIndex of the requested node + * @return The requested node + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ const MWNode *retrieveNodeOrEndNode(const NodeIndex &idx) const; - /// Find node or end node by index (mutable). + /** + * @brief Node retriever that returns requested Node or EndNode + * @param[in] idx The NodeIndex of the requested node + * @return The requested node + * + * @details Recursive routine to find and return the node given the + * coordinates of a point in space. This routine returns the + * appropriate Node, or the EndNode on the path to the requested node, + * and will never create or return GenNodes. Recursion starts at at + * this node and ASSUMES the requested node is in fact decending from + * this node. + */ MWNode *retrieveNodeOrEndNode(const NodeIndex &idx); - /// Thread-safe child creation. + /** + * @brief Creates scaling coefficients of children + * + * @details If the node is a leaf node, it takes the scaling&wavelet + * coefficients of the parent and it generates the scaling + * coefficients for the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. The new node is permanently added to the tree. + */ void threadSafeCreateChildren(); - /// Thread-safe generation of children. + /** + * @brief Generates scaling coefficients of children + * + * @details If the node is a leaf node, it takes the scaling&wavelet + * coefficients of the parent and it generates the scaling + * coefficients for the children and stores + * them consecutively in the corresponding block of the parent, + * following the usual bitwise notation. + */ void threadSafeGenChildren(); - /// Remove nodes generated during adaptive build. + + /// @brief Deallocation of all generated nodes void deleteGenerated(); - /// Printable diagnostics for a node. + /** + * @brief Prints of the node content + * @param[in,out] o The output stream + */ virtual std::ostream &print(std::ostream &o) const; - // --- Bit flags describing node state (see status member) --- + // Bit flags describing node state static const unsigned char FlagBranchNode = B8(00000001); static const unsigned char FlagGenNode = B8(00000010); static const unsigned char FlagHasCoefs = B8(00000100); From 1a0c0bd38cf2fbbe5e1442a0a0c35edf761e9dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Thu, 6 Nov 2025 17:30:57 +0100 Subject: [PATCH 35/51] NodeBox documented --- src/trees/NodeBox.h | 119 +++++++++++++++----------------------------- 1 file changed, 41 insertions(+), 78 deletions(-) diff --git a/src/trees/NodeBox.h b/src/trees/NodeBox.h index 732cee06b..ceb83853c 100644 --- a/src/trees/NodeBox.h +++ b/src/trees/NodeBox.h @@ -23,19 +23,6 @@ * */ -/** - * @file NodeBox.h - * @brief Container that associates a regular grid of boxes with pointers to MW nodes. - * - * @details - * A NodeBox is a thin wrapper around @ref BoundingBox that, in addition to the - * geometric information (bounds, scale, periodicity), keeps a dense array of - * pointers to @ref MWNode objects—one slot per box at the underlying scale. - * It is used by @ref MWTree to store and access the set of **root nodes** - * at the world scale, and by other components whenever a compact mapping - * from box indices to nodes is required. - */ - #pragma once #include "BoundingBox.h" @@ -45,123 +32,99 @@ namespace mrcpp { /** * @class NodeBox - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Scalar type of the associated @ref MWNode (e.g., double, ComplexDouble). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * @brief Bounding box with node-pointer storage. - * - * @details - * The class allocates and owns a contiguous array of pointers, one per box - * defined by the base @ref BoundingBox. Pointers are not owned by NodeBox - * (ownership stays with the corresponding @ref MWTree allocator); NodeBox only - * stores and clears them. The counter @ref nOccupied tracks how many slots - * are non-null. + * @brief Bounding box with node-pointer storage */ template class NodeBox final : public BoundingBox { public: /** - * @brief Construct a NodeBox from a lower-corner index and number of boxes. - * @param idx Lower-corner @ref NodeIndex at the world scale. - * @param nb Number of boxes per dimension (defaults to all ones). - * - * @details The geometric information is taken from @p idx and @p nb. - * Internal pointer storage is allocated and initialized to `nullptr`. + * @brief Construct a NodeBox from a lower-corner index and number of boxes + * @param idx Lower-corner @ref NodeIndex at the world scale + * @param nb Number of boxes per dimension (defaults to all ones) */ NodeBox(const NodeIndex &idx, const std::array &nb = {}); /** - * @brief Copy-construct from another NodeBox. - * @param box Source NodeBox. - * - * @details Copies the underlying @ref BoundingBox state and recreates - * pointer storage; node pointers themselves are copied (shallow). + * @brief Copy-construct from another NodeBox + * @param box Source NodeBox */ NodeBox(const NodeBox &box); /** - * @brief Construct from a plain @ref BoundingBox. - * @param box Geometric box to take as base. - * - * @details Creates an equivalent NodeBox and allocates empty pointer storage. + * @brief Construct from a plain @ref BoundingBox + * @param box Geometric box to take as base */ NodeBox(const BoundingBox &box); - /// Non-assignable (pointer storage is managed per-instance). NodeBox &operator=(const NodeBox &box) = delete; - /// Destructor; releases the internal pointer array (not the nodes). + /// @brief Destructor, deletes all nodes ~NodeBox() override; /** - * @brief Store a node pointer in slot @p idx. - * @param idx Linear box index in `[0, size())`. - * @param node Address of the node pointer to store (double pointer). - * - * @details The stored value is `*node`. If it was previously `nullptr` - * and the new value is non-null, @ref nOccupied is incremented. If it was - * non-null and is reset to `nullptr`, @ref nOccupied is decremented. + * @brief Store a node pointer in index @p idx + * @param idx Linear box index in `[0, size())` + * @param node Address of the node pointer to store (double pointer) */ void setNode(int idx, MWNode **node); /** - * @brief Clear the node pointer in slot @p idx (set it to `nullptr`). - * @param idx Linear box index. + * @brief Clear the node pointer stored at index @p idx + * @param idx Linear box index in `[0, size())` */ void clearNode(int idx) { this->nodes[idx] = nullptr; } /** - * @name Node access (mutable) - * @{ - */ - - /** - * @brief Get the node stored at the box corresponding to @p idx. - * @param idx Node index at the world scale. - * @return Reference to the node. - * @pre The slot must contain a non-null pointer. + * @brief Get the node stored at the given index @p idx + * @param idx Node index at the world scale + * @return Reference to the node */ MWNode &getNode(NodeIndex idx); - /** * @brief Get the node stored at the box containing coordinate @p r. - * @param r A point in world coordinates. - * @return Reference to the node. - * @pre The slot must contain a non-null pointer. + * @param r Coordinates of a point + * @return Reference to the node */ MWNode &getNode(Coord r); - /** - * @brief Get the node stored at linear index @p i. - * @param i Linear box index (default 0). - * @return Reference to the node. - * @pre The slot must contain a non-null pointer. + * @brief Get the node stored at the given index @p i + * @param i Linear box index (default 0) + * @return Reference to the node */ MWNode &getNode(int i = 0); - ///@} /** - * @name Node access (const) - * @{ + * @brief Get the node stored at the given index @p idx + * @param idx Node index at the world scale + * @return Reference to the node */ const MWNode &getNode(NodeIndex idx) const; + /** + * @brief Get the node stored at the box containing coordinate @p r. + * @param r Coordinates of a point + * @return Reference to the node + */ const MWNode &getNode(Coord r) const; + /** + * @brief Get the node stored at the given index @p i + * @param i Linear box index (default 0) + * @return Reference to the node + */ const MWNode &getNode(int i = 0) const; - ///@} - - /// @return Number of slots with non-null pointers. - int getNOccupied() const { return this->nOccupied; } - /// @return Raw pointer to the internal node-pointer array (size == size()). - MWNode **getNodes() { return this->nodes; } + int getNOccupied() const { return this->nOccupied; } ///< @return The number of occupied node slots + MWNode **getNodes() { return this->nodes; } ///< @return The nodes stored in this box protected: int nOccupied; ///< Number of non-null entries in @ref nodes. MWNode **nodes; ///< Dense array of node pointers (size equals number of boxes). - /// Allocate and zero-initialize the @ref nodes array. + /// @brief Allocate the node double pointers void allocNodePointers(); - /// Clear all stored pointers (does not delete nodes). + /// @brief Clear and delete all nodes void deleteNodes(); }; From 7c536cd1f2099616509741bc8b102bfb356f8d81 Mon Sep 17 00:00:00 2001 From: Bin Gao Date: Fri, 7 Nov 2025 20:29:15 +0100 Subject: [PATCH 36/51] add doxygen comments for FunctionNode There are a few functions I am uncertain if my comments are correct. I will post them on pull request so that someone can review them. There are also some template specializations in FunctionNode.cpp, which I am not sure if we need to make doxygen comment for them. --- src/trees/FunctionNode.cpp | 21 +- src/trees/FunctionNode.h | 397 ++++++++++++++++++++++++++----------- 2 files changed, 279 insertions(+), 139 deletions(-) diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp index ff23fb394..03503d3f9 100644 --- a/src/trees/FunctionNode.cpp +++ b/src/trees/FunctionNode.cpp @@ -42,8 +42,6 @@ using namespace Eigen; namespace mrcpp { -/** Function evaluation. - * Evaluate all polynomials defined on the node. */ template T FunctionNode::evalf(Coord r) { if (not this->hasCoefs()) MSG_ERROR("Evaluating node without coefs"); @@ -87,11 +85,6 @@ template T FunctionNode::evalScaling(const Coord &r return two_n * result; } -/** Function integration. - * - * Wrapper for function integration, that requires different methods depending - * on scaling type. Integrates the function represented on the node on the - * full support of the node. */ template T FunctionNode::integrate() const { if (not this->hasCoefs()) { return 0.0; } switch (this->getScalingType()) { @@ -106,15 +99,6 @@ template T FunctionNode::integrate() const { } } -/** Function integration, Legendre basis. - * - * Integrates the function represented on the node on the full support of the - * node. The Legendre basis is particularly easy to integrate, as the work is - * already done when calculating its coefficients. The coefficients of the - * node is defined as the projection integral - * s_i = int f(x)phi_i(x)dx - * and since the first Legendre function is the constant 1, the first - * coefficient is simply the integral of f(x). */ template T FunctionNode::integrateLegendre() const { double n = (D * this->getScale()) / 2.0; double two_n = std::pow(2.0, -n); @@ -234,10 +218,9 @@ template void FunctionNode::getValues(Matrix void FunctionNode::getAbsCoefs(T *absCoefs) { diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h index d4ed4d645..9e5d9cf74 100644 --- a/src/trees/FunctionNode.h +++ b/src/trees/FunctionNode.h @@ -32,221 +32,378 @@ namespace mrcpp { -/** - * @file FunctionNode.h - * @brief Leaf/branch node type that stores function coefficients on a - * multiresolution tree. - * - * @details - * A FunctionNode is a concrete MWNode specialized for function representations. - * It holds scaling and wavelet coefficients, provides allocation and refinement - * helpers, and exposes utilities for evaluation, coefficient access and - * basic per-node operations such as integration and local dot products. - * - * Template parameters: - * - D: spatial dimension (1, 2 or 3) - * - T: scalar type (double or ComplexDouble) - */ - /** * @class FunctionNode - * @tparam D Spatial dimension. - * @tparam T Scalar type. - * @brief Node of a FunctionTree that stores coefficients and implements - * function-specific operations. + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * @note Construction is managed by FunctionTree and NodeAllocator. Users do not - * construct FunctionNode directly. + * @brief Node of a @ref FunctionTree that stores coefficients and implements + * function-specific operations + * + * @details A FunctionNode is a concrete @ref MWNode specialized for function + * representations. It holds scaling and wavelet coefficients, provides + * allocation and refinement helpers, and exposes utilities for evaluation, + * coefficient access and basic per-node operations such as integration and + * local dot products. + * + * @note FunctionNodes are managed by @ref FunctionTree and @ref NodeAllocator. + * Most users should not construct FunctionNode directly. */ template class FunctionNode final : public MWNode { public: - /** @name Typed accessors */ - ///@{ - - /** @brief Return the owning FunctionTree (non-const). */ - FunctionTree &getFuncTree() { return static_cast &>(*this->tree); } + FunctionTree &getFuncTree() { return static_cast &>(*this->tree); } ///< @return A reference to the tree this node belongs to, cast to a non-const @ref FunctionTree + FunctionNode &getFuncParent() { return static_cast &>(*this->parent); } ///< @return A reference to the parent of this node, cast to a non-const @ref FunctionNode - /** @brief Return the parent node cast to FunctionNode (non-const). */ - FunctionNode &getFuncParent() { return static_cast &>(*this->parent); } - - /** @brief Return the i-th child cast to FunctionNode (non-const). */ + /** + * @param i The index of the child + * @return A reference to the child at the given index, cast to a non-const @ref FunctionNode + */ FunctionNode &getFuncChild(int i) { return static_cast &>(*this->children[i]); } - /** @brief Return the owning FunctionTree (const). */ - const FunctionTree &getFuncTree() const { return static_cast &>(*this->tree); } + const FunctionTree &getFuncTree() const { return static_cast &>(*this->tree); } ///< @return A reference to the tree this node belongs to, cast to a const @ref FunctionTree + const FunctionNode &getFuncParent() const { return static_cast &>(*this->parent); } ///< @return A reference to the parent of this node, cast to a const @ref FunctionNode - /** @brief Return the parent node cast to FunctionNode (const). */ - const FunctionNode &getFuncParent() const { return static_cast &>(*this->parent); } - - /** @brief Return the i-th child cast to FunctionNode (const). */ + /** + * @param i The index of the child + * @return A reference to the child at the given index, cast to a const @ref FunctionNode + */ const FunctionNode &getFuncChild(int i) const { return static_cast &>(*this->children[i]); } - ///@} - - /** @name Tree-structure overrides */ - ///@{ - /** - * @brief Create children of this node. - * @param coefs If true, initialize children by transferring coefficients - * from this node as appropriate for the basis. + * @brief Create child nodes for this node and abort if it is already a + * branch node (see @ref FlagBranchNode) + * @param coefs If `true`, allocate coefficient chunk for child nodes * - * @details Allocates child nodes through the node allocator and updates - * the internal topology. When coefs is true, scaling/wavelet blocks are - * propagated so that the represented function is unchanged by the split. + * @details This routine allocates child nodes via the tree's @ref NodeAllocator. + * The tree's node counter is incremented by @ref MWTree::incrementNodeCount. + * Finally, this node is marked as both a branch node (see @ref FlagBranchNode) + * and a non-end node (see @ref FlagEndNode). */ void createChildren(bool coefs) override; /** - * @brief Generate (allocate) children if absent. - * @details Convenience wrapper that creates children without coefficient - * transfer. Intended for topology building when coefficients are filled - * later by a calculator. + * @brief Generates child nodes with the @ref FlagGenNode bit flag set, and + * abort if this node is already a branch node (see @ref FlagBranchNode) + * + * @details This routine creates general or redundant child nodes for + * temporary use. As a result, the tree's node counter remains unchanged, + * and this node is marked only as a branch node (see @ref FlagBranchNode). */ void genChildren() override; /** - * @brief Ensure a parent exists and is allocated. - * @details Creates the parent node if missing and links this node into the - * parent children array. + * @brief Generate a parent for this node and abort if it already has one + * + * @details This routine allocates the parent node via the tree's @ref + * NodeAllocator and links this node into the parent children array. The + * tree's node counter is incremented by @ref MWTree::incrementNodeCount. */ void genParent() override; /** - * @brief Delete children of this node. - * @details Deallocates child nodes and updates internal state. Coefficients - * in this node remain untouched. + * @brief Recursive deallocation of children and all their descendants. + * + * @details This routine uses base class function @ref MWTree::deleteChildren + * for the deallocation. Finally, this node is marked as an end node (see + * @ref FlagEndNode). */ void deleteChildren() override; - ///@} - /** - * @brief Integrate the node contribution over its spatial support. - * @return The integral of the locally represented function on this node. + * @brief Function integration + * @return The integral of type @p T * - * @details Uses the current scaling basis to compute the exact contribution - * from scaling and wavelet parts confined to this node. For orthonormal - * wavelets the integral often reduces to the scaling block. + * @details Wrapper for function integration, that requires different + * methods depending on scaling type @ref FuncType. Integrates the function + * represented on the node on the full support of the node. This routine + * will return zero if the node does not have coeffcients, and abort if the + * node has invalid type of scaling basis (Legendre or Interpol; see + * MRCPP/constants.h). */ T integrate() const; - /** @name Value and coefficient access */ - ///@{ - /** - * @brief Set nodal values from a vector. - * @param vec Column vector of size getNCoefs(). + * @brief Set values from a vector to the node's coefficients, and update + * metadata of the node + * @param vec Column vector * - * @details The vector is interpreted in the node's value layout used by - * the scaling basis. Typical use is for interpolating bases, where values - * correspond to quadrature or interpolation points. Internally, node - * coefficients are updated accordingly. + * @details This routine calls @ref MWTree::setCoefBlock to set + * values from the vector, and update metadata of the node by caling + * @ref MWTree::cvTransform, and @ref MWTree::mwTransform. The + * node is marked as having coefficients, and its square norm and component + * norms are also computed by @ref MWTree::calcNorms. */ void setValues(const Eigen::Matrix &vec); /** - * @brief Extract nodal values into a vector. - * @param[out] vec Column vector resized to getNCoefs(). - * - * @details The returned values correspond to the basis-specific value - * layout for this node (e.g. interpolation/expanded points). + * @brief Extract the node's coefficients into a vector + * @param[out] vec Column vector resized to the number of coefficients of + * the node, see @ref MWTree::getNCoefs */ void getValues(Eigen::Matrix &vec); /** - * @brief Write absolute values of coefficients into a raw buffer. - * @param[out] absCoefs Pointer to memory of length getNCoefs(). + * @brief Get coefficients corresponding to absolute value of function + * @param[out] absCoefs Coefficients of type @p T * - * @details Fills absCoefs[i] = abs(coef[i]). For complex T this is the - * magnitude; for real T this is std::abs. The ordering matches the node's - * internal coefficient layout. + * @note The absolute value of function is computed using std::norm(). */ void getAbsCoefs(T *absCoefs); - ///@} - friend class FunctionTree; friend class NodeAllocator; protected: - /** @name Constructors and assignment (managed by the tree) */ - ///@{ - + /** + * @brief FunctionNode constructor + * + * @note This routine uses @ref MWNode default constructor. + */ FunctionNode() : MWNode() {} + /** + * @brief FunctionNode constructor + * @param[in] tree The MWTree the root node belongs to + * @param[in] rIdx The integer specifying the corresponding root node + * + * @details Constructor for root nodes. It actually calls @ref MWNode + * contructor MWNode(tree, rIdx). + */ explicit FunctionNode(MWTree *tree, int rIdx) : MWNode(tree, rIdx) {} - FunctionNode(MWNode *parent, int cIdx) - : MWNode(parent, cIdx) {} - + /** + * @brief FunctionNode constructor + * @param[in] tree The MWTree the root node belongs to + * @param[in] idx The NodeIndex defining scale and translation of the node + * + * @details Constructor for an empty node, which calls @ref MWNode + * contructor MWNode(tree, idx). + */ FunctionNode(MWTree *tree, const NodeIndex &idx) : MWNode(tree, idx) {} + /** + * @brief FunctionNode constructor + * @param[in] parent Parent node + * @param[in] cIdx Child index of the current node + * + * @details Constructor for leaf nodes. It invokes @ref MWNode constructor + * MWNode(parent, cIdx). + */ + FunctionNode(MWNode *parent, int cIdx) + : MWNode(parent, cIdx) {} + FunctionNode(const FunctionNode &node) = delete; FunctionNode &operator=(const FunctionNode &node) = delete; - ~FunctionNode() = default; - ///@} + /// @brief Default destructor of FunctionNode + ~FunctionNode() = default; - /** @brief Evaluate the reconstructed function at r (using this node only). */ + /** + * @brief Function evaluation + * @param[in,out] r The sought after node through the coordinates of a point in space + * @return The evaluated result of type @p T + * + * @details Evaluate all polynomials defined on the child node found by + * @ref MWTree::getChildIndex. Trigger an error if the node does not + * have coefficients, or failed to find the child node. For periodic + * systems, the coordinate r will be mapped to the [-1, 1] periodic cell if + * it is outside the unit cell, see @ref periodic::coord_manipulation. + */ + //FIXME I guess the evaluation is performed for the child node, not this + // node, because this routine calls getFuncChild(cIdx).evalScaling(r), + // where cIdx = this->getChildIndex(r) T evalf(Coord r); - /** @brief Evaluate the scaling part at r. */ + /** + * @brief Function evaluation + * @param[in] r Coordinate where the evaluation is performed at + * @return The evaluated result of type @p T + */ T evalScaling(const Coord &r) const; - /** @brief Deallocate node-owned memory and reset local state. */ + /// @brief Deallocate the node and detach it from the tree it belongs to + //FIXME this routine calls dealloc of NodeAllocator, which frees the memory + // of the node by calling ~MWNode() and it seems that coefficients are freed + // only for LooseNode. void dealloc() override; - /** @brief Recompress local coefficients after updates. */ + /** + * @brief Update the coefficients of the node by an MW transform of the + * scaling coefficients of the children + * @note There is a specialization for @p D = 3, + * see @ref FunctionNode<3>::reCompress. + */ + //FIXME It is written in FunctionNode.cpp that, "Option to overwrite or add + // up existing coefficients". Not sure what it means, in particular "add up + // existing coefficients". + //FIXME not sure if @ref FunctionNode<3>::reCompress works void reCompress() override; - /** @brief Integration helper for Legendre scaling basis. */ + /** + * @brief Function integration, Legendre basis + * @return The integral of type @p T + * + * @details Integrate the function represented on the node on the full + * support of the node. The Legendre basis is particularly easy to + * integrate, as the work is already done when calculating its + * coefficients. The coefficients of the node is defined as the projection + * integral \f$ s_i = \int f(x)\phi_i(x)\mathrm{d}x \f$ and since the first + * Legendre function is the constant 1, the first coefficient is simply the + * integral of \f$ f(x) \f$. + */ T integrateLegendre() const; - /** @brief Integration helper for interpolating scaling basis. */ + /** + * @brief Function integration, Interpolating basis + * @return The integral of type @p T + * + * @details Integrate the function represented on the node on the full + * support of the node. A bit more involved than in the Legendre basis, as + * is requires some coupling of quadrature weights. + */ T integrateInterpolating() const; - /** @brief Integration helper when values representation is active. */ + /** + * @brief Function integration, Interpolating basis + * @return The integral of type @p T + * + * @details Integrate the function represented on the node on the full + * support of the node. A bit more involved than in the Legendre basis, as + * is requires some coupling of quadrature weights. + */ + //FIXME This routine has exactly the same documentation comment as + // integrateInterpolating() in FunctionNode.cpp. T integrateValues() const; }; -/** @name Per-node local dot-product helpers (double) */ -///@{ +//FIXME All comments of dot_scaling() are exactly the same in FunctionNode.cpp, +// a bit boilerplate. But another important thing is the conjugate is actually +// taken for bra and ket in case of complex values, instead of only bra. +//FIXME Comments of dot_wavelet() have the same problem. /** - * @brief Dot product of scaling parts on matching nodes (double). - * @return Sum over matching scaling blocks within the two nodes. + * @brief Inner product of the functions represented by the scaling basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the scaling + * basis on the node on the full support of the nodes. The scaling basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket); /** - * @brief Dot product of wavelet parts on matching nodes (double). - * @return Sum over matching wavelet blocks within the two nodes. + * @brief Inner product of the functions represented by the scaling basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the scaling + * basis on the node on the full support of the nodes. The scaling basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugates of bra and ket will be taken. */ -template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); +template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); -///@} +/** + * @brief Inner product of the functions represented by the scaling basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the scaling + * basis on the node on the full support of the nodes. The scaling basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of bra will be taken. + */ +template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); -/** @name Per-node local dot-product helpers (complex-complex) */ -///@{ +/** + * @brief Inner product of the functions represented by the scaling basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the scaling + * basis on the node on the full support of the nodes. The scaling basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of ket will be taken. + */ +template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); -template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); -template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -///@} +/** + * @brief Inner product of the functions represented by the wavelet basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the wavelet + * basis on the node on the full support of the nodes. The wavelet basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + */ +template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -/** @name Per-node local dot-product helpers (complex-real and real-complex) */ -///@{ +/** + * @brief Inner product of the functions represented by the wavelet basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the wavelet + * basis on the node on the full support of the nodes. The wavelet basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugates of bra and ket will be taken. + */ +template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); +/** + * @brief Inner product of the functions represented by the wavelet basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the wavelet + * basis on the node on the full support of the nodes. The wavelet basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of bra will be taken. + */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); +/** + * @brief Inner product of the functions represented by the wavelet basis of + * the nodes + * @param[in] bra FunctionNode on bra + * @param[in] ket FunctionNode on ket + * @return The computed inner product + * + * @details Integrates the product of the functions represented by the wavelet + * basis on the node on the full support of the nodes. The wavelet basis is + * fully orthonormal, and the inner product is simply the dot product of the + * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of ket will be taken. + */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); -///@} +//FIXME There are template specializations in FunctionNode.cpp, do we +// need to document them as well? -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp From c3c897e9958552310e67db1916d7ce002afecf97 Mon Sep 17 00:00:00 2001 From: Bin Gao Date: Mon, 10 Nov 2025 14:54:58 +0100 Subject: [PATCH 37/51] resolve comments of evalf, dealloc, reCompress, dot_scaling and dot_scaling --- src/trees/FunctionNode.cpp | 67 -------------------------------------- src/trees/FunctionNode.h | 43 +++++++++--------------- 2 files changed, 15 insertions(+), 95 deletions(-) diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp index 03503d3f9..0233fd1fb 100644 --- a/src/trees/FunctionNode.cpp +++ b/src/trees/FunctionNode.cpp @@ -364,9 +364,6 @@ template void FunctionNode::dealloc() { } } -/** Update the coefficients of the node by a mw transform of the scaling - * coefficients of the children. Option to overwrite or add up existing - * coefficients. Specialized for D=3 below. */ template void FunctionNode::reCompress() { MWNode::reCompress(); } @@ -391,14 +388,6 @@ template <> void FunctionNode<3>::reCompress() { } } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -416,14 +405,6 @@ template double dot_scaling(const FunctionNode &bra, const Fu #endif } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -450,14 +431,6 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -476,14 +449,6 @@ template ComplexDouble dot_scaling(const FunctionNode return result; } -/** Inner product of the functions represented by the scaling basis of the nodes. - * - * Integrates the product of the functions represented by the scaling basis on - * the node on the full support of the nodes. The scaling basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket) { assert(bra.hasCoefs()); assert(ket.hasCoefs()); @@ -502,14 +467,6 @@ template ComplexDouble dot_scaling(const FunctionNode &bra, c return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -530,14 +487,6 @@ template double dot_wavelet(const FunctionNode &bra, const Fu #endif } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -566,14 +515,6 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; @@ -594,14 +535,6 @@ template ComplexDouble dot_wavelet(const FunctionNode return result; } -/** Inner product of the functions represented by the wavelet basis of the nodes. - * - * Integrates the product of the functions represented by the wavelet basis on - * the node on the full support of the nodes. The wavelet basis is fully - * orthonormal, and the inner product is simply the dot product of the - * coefficient vectors. Assumes the nodes have identical support. - * NB: will take conjugate of bra in case of complex values. - */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket) { if (bra.isGenNode() or ket.isGenNode()) return 0.0; diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h index 9e5d9cf74..b3d02f213 100644 --- a/src/trees/FunctionNode.h +++ b/src/trees/FunctionNode.h @@ -202,19 +202,16 @@ template class FunctionNode final : public MWNode { ~FunctionNode() = default; /** - * @brief Function evaluation - * @param[in,out] r The sought after node through the coordinates of a point in space + * @brief Evaluate function at a point + * @param[in,out] r The point in space * @return The evaluated result of type @p T * - * @details Evaluate all polynomials defined on the child node found by + * @details Evaluate all polynomials defined on the node found by * @ref MWTree::getChildIndex. Trigger an error if the node does not - * have coefficients, or failed to find the child node. For periodic - * systems, the coordinate r will be mapped to the [-1, 1] periodic cell if - * it is outside the unit cell, see @ref periodic::coord_manipulation. + * have coefficients. For periodic systems, the coordinate r will be mapped + * to the [-1, 1] periodic cell if it is outside the unit cell, see + * @ref periodic::coord_manipulation. */ - //FIXME I guess the evaluation is performed for the child node, not this - // node, because this routine calls getFuncChild(cIdx).evalScaling(r), - // where cIdx = this->getChildIndex(r) T evalf(Coord r); /** @@ -224,10 +221,7 @@ template class FunctionNode final : public MWNode { */ T evalScaling(const Coord &r) const; - /// @brief Deallocate the node and detach it from the tree it belongs to - //FIXME this routine calls dealloc of NodeAllocator, which frees the memory - // of the node by calling ~MWNode() and it seems that coefficients are freed - // only for LooseNode. + /// @brief Deallocate the node from the tree void dealloc() override; /** @@ -236,10 +230,6 @@ template class FunctionNode final : public MWNode { * @note There is a specialization for @p D = 3, * see @ref FunctionNode<3>::reCompress. */ - //FIXME It is written in FunctionNode.cpp that, "Option to overwrite or add - // up existing coefficients". Not sure what it means, in particular "add up - // existing coefficients". - //FIXME not sure if @ref FunctionNode<3>::reCompress works void reCompress() override; /** @@ -279,11 +269,6 @@ template class FunctionNode final : public MWNode { T integrateValues() const; }; -//FIXME All comments of dot_scaling() are exactly the same in FunctionNode.cpp, -// a bit boilerplate. But another important thing is the conjugate is actually -// taken for bra and ket in case of complex values, instead of only bra. -//FIXME Comments of dot_wavelet() have the same problem. - /** * @brief Inner product of the functions represented by the scaling basis of * the nodes @@ -295,6 +280,7 @@ template class FunctionNode final : public MWNode { * basis on the node on the full support of the nodes. The scaling basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of bra will be taken in case of complex values. */ template double dot_scaling(const FunctionNode &bra, const FunctionNode &ket); @@ -309,7 +295,7 @@ template double dot_scaling(const FunctionNode &bra, const Fu * basis on the node on the full support of the nodes. The scaling basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugates of bra and ket will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); @@ -324,7 +310,7 @@ template ComplexDouble dot_scaling(const FunctionNode * basis on the node on the full support of the nodes. The scaling basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugate of bra will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); @@ -339,7 +325,7 @@ template ComplexDouble dot_scaling(const FunctionNode * basis on the node on the full support of the nodes. The scaling basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugate of ket will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_scaling(const FunctionNode &bra, const FunctionNode &ket); @@ -355,6 +341,7 @@ template ComplexDouble dot_scaling(const FunctionNode &bra, c * basis on the node on the full support of the nodes. The wavelet basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. + * @note Conjugate of bra will be taken in case of complex values. */ template double dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); @@ -369,7 +356,7 @@ template double dot_wavelet(const FunctionNode &bra, const Fu * basis on the node on the full support of the nodes. The wavelet basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugates of bra and ket will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); @@ -384,7 +371,7 @@ template ComplexDouble dot_wavelet(const FunctionNode * basis on the node on the full support of the nodes. The wavelet basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugate of bra will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); @@ -399,7 +386,7 @@ template ComplexDouble dot_wavelet(const FunctionNode * basis on the node on the full support of the nodes. The wavelet basis is * fully orthonormal, and the inner product is simply the dot product of the * coefficient vectors. Assumes the nodes have identical support. - * @note Conjugate of ket will be taken. + * @note Conjugate of bra will be taken in case of complex values. */ template ComplexDouble dot_wavelet(const FunctionNode &bra, const FunctionNode &ket); From 77a7a9d0f5ad3bf1567b791657f283f74e0c1316 Mon Sep 17 00:00:00 2001 From: Bin Gao Date: Tue, 11 Nov 2025 14:05:29 +0100 Subject: [PATCH 38/51] add comment for BoysFunction --- src/functions/BoysFunction.h | 65 +++++++----------------------------- 1 file changed, 12 insertions(+), 53 deletions(-) diff --git a/src/functions/BoysFunction.h b/src/functions/BoysFunction.h index 237707f27..3bd29f89a 100644 --- a/src/functions/BoysFunction.h +++ b/src/functions/BoysFunction.h @@ -32,76 +32,35 @@ namespace mrcpp { /** * @class BoysFunction - * @brief Adaptive multiresolution evaluator for the 1D Boys-type integral - * \f$F_n(x) = \int_{0}^{1} t^{2n}\,e^{-x\,t^2}\,dt\f$. - * - * What this class does - * -------------------- - * This class provides an implementation of MRCPP's @ref RepresentableFunction - * interface for the scalar function \f$F_n(x)\f$ of a single variable \f$x\f$. - * Given an input abscissa \f$x\f$, `evalf()`: - * 1. builds the integrand \f$g_x(t)=e^{-x t^2}\,t^{2n}\f$ on \f$t\in[0,1]\f$, - * 2. projects it adaptively into a 1D multiresolution basis (using the - * `MultiResolutionAnalysis<1>` member), - * 3. integrates the resulting @ref FunctionTree over the unit interval. - * - * Notes on conventions - * -------------------- - * - In quantum-chemistry literature, the “Boys function” is often defined with - * an integral to \f$\infty\f$. Here it is the *unit-interval* variant - * \f$[0,1]\f$, consistent with the implementation in the corresponding .cpp. - * - The basis family and order used by the `MRA` are chosen in the .cpp - * definition (currently an interpolating basis of order 13). - * - * Accuracy and performance - * ------------------------ - * - The tolerance passed at construction (`prec`) controls the adaptive - * projection target. Smaller values yield higher accuracy at greater cost. - * - The multiresolution approach concentrates degrees of freedom where the - * integrand has structure (e.g., near \f$t=0\f$ for large \f$x\f$). + * @brief Adaptive multiresolution evaluator for the Boys function + * \f$ F_n(x) = \int_{0}^{1} t^{2n}\mathrm{e}^{-xt^2}\mathrm{d}t \f$, + * where \f$ x\ge0 \f$ and \f$ n\ge0 \f$ */ class BoysFunction final : public RepresentableFunction<1, double> { public: /** - * @brief Construct an evaluator for \f$F_n(x)\f$. + * @brief Construct an evaluator for \f$ F_n(x) \f$ + * @param n The order (\f$ \ge0 \f$) of the Boys function + * @param prec Projection precision for the adaptive MRA (default \f$ 10^{-10} \f$) * - * @param n Non-negative integer order in \f$F_n(x)\f$ (power \f$t^{2n}\f$). - * @param prec Target projection precision for the adaptive MRA - * (default \f$10^{-10}\f$). - * - * Implementation detail: - * The `MRA` member is initialised in the .cpp with a default 1D bounding - * box and a fixed scaling basis; this header does not constrain that choice. + * @details The `MRA` member is initialised in the .cpp with a default 1D bounding + * box and a fixed scaling basis (currently an interpolating basis of order 13); + * this header does not constrain that choice. */ BoysFunction(int n, double prec = 1.0e-10); /** - * @brief Evaluate \f$F_n(x)\f$ at the given abscissa. - * - * @param r Coordinate container with a single component: \f$x = r[0]\f$. - * @return The numerical value of \f$F_n(x)\f$ obtained by adaptively - * projecting and integrating on \f$[0,1]\f$. - * - * Semantics: - * - Satisfies the @ref RepresentableFunction contract. - * - Internally constructs the integrand lambda and invokes the MRCPP - * `project` + `integrate` pipeline on the stored `MRA`. + * @brief Evaluate \f$ F_n(x) \f$ at the given abscissa + * @param r Coordinate container with a single component: \f$ x = r[0] \f$ + * @return The numerical value of \f$ F_n(x) \f$ */ double evalf(const Coord<1> &r) const override; private: - /** @brief Integer order \f$n\f$ in \f$F_n(x)\f$ (kept constant for the lifetime). */ const int order; - /** @brief Target projection tolerance for adaptive representation. */ const double prec; - /** - * @brief Multiresolution context used to project/integrate the integrand. - * - * The concrete basis family and order are configured in the .cpp file. - * The same `MRA` instance is reused across evaluations for efficiency. - */ MultiResolutionAnalysis<1> MRA; }; From d6c5451b85becaabfa14233c2cd86e485786ec1c Mon Sep 17 00:00:00 2001 From: ylvao Date: Wed, 12 Nov 2025 11:31:13 +0100 Subject: [PATCH 39/51] documentation for MRA.h and .cpp (empty line fix in nodeindex.h) --- src/trees/MultiResolutionAnalysis.cpp | 89 -------------- src/trees/MultiResolutionAnalysis.h | 166 +++++++++++--------------- src/trees/NodeIndex.h | 1 + 3 files changed, 70 insertions(+), 186 deletions(-) diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp index 43b39c32d..4378ce01d 100644 --- a/src/trees/MultiResolutionAnalysis.cpp +++ b/src/trees/MultiResolutionAnalysis.cpp @@ -32,22 +32,6 @@ namespace mrcpp { -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) - * - * @param[in] bb: 2-element integer array [Lower, Upper] defining the bounds for a BoundingBox object representing the computational domain - * @param[in] order: Maximum polynomial order of the multiwavelet basis, - * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA - * @param[in] maxDepth: Exponent of the node refinement in base 2, relative to root scale. - * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. - * - * @details Constructor of the MultiResolutionAnalysis class from scratch, without requiring any pre-existing complex structure. - * The constructor calls the InterpolatingBasis basis constructor to generate the MultiWavelets basis of functions, - * then the BoundingBox constructor to create the computational domain. The constructor then checks if the generated node depth, or - * node refinement is beyond the root scale or the maximum depth allowed, in which case it will abort the process. - * Otherwise, the process goes on to setup the filters with the class' setupFilter method. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int order, int depth) : maxDepth(depth) @@ -58,18 +42,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(std::array bb, int o setupFilter(); } -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Constructs a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) from a pre-existing BoundingBox object - * - * @param[in] bb: BoundingBox object representing the computational domain - * @param[in] order: (integer) Maximum polynomial order of the multiwavelet basis, - * immediately used in the constructor of an InterPolatingBasis object which becomes an attribute of the MRA - * @param[in] maxDepth: (integer) Exponent of the node refinement in base 2, relative to root scale. - * In other words, it is the maximum amount of refinement that we allow in a node, in other to avoid overflow of values. - * - * @details Constructor of the MultiResolutionAnalysis class from a BoundingBox object. For more details see the first constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, int order, int depth) : maxDepth(depth) @@ -80,14 +52,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, in setupFilter(); } -/** @returns New MultiResolutionAnalysis (MRA) object - * - * @brief Copy constructor for a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) - * - * @param[in] mra: Pre-existing MRA object - * - * @details Copy a MultiResolutionAnalysis object without modifying the original. For more details see the first constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysis &mra) : maxDepth(mra.maxDepth) @@ -98,17 +62,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const MultiResolutionAnalysi setupFilter(); } -/** @returns New MultiResolutionAnalysis object - * - * @brief Constructor for a MultiResolutionAnalysis object from a pre-existing BoundingBox (computational domain) and a ScalingBasis (Multiwavelet basis) objects - * - * @param[in] bb: Computational domain as a BoundingBox object, taken by constant reference - * @param[in] sb: Polynomial basis (MW) as a ScalingBasis object - * @param[in] depth: Maximum allowed resolution depth, relative to root scale - * - * @details Creates a MRA object from pre-existing BoundingBox and ScalingBasis objects. These objects are taken as reference. For more details about the constructor itself, see the first - * constructor. - */ template MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, const ScalingBasis &sb, int depth) : maxDepth(depth) @@ -119,16 +72,6 @@ MultiResolutionAnalysis::MultiResolutionAnalysis(const BoundingBox &bb, co setupFilter(); } -/** @returns Whether the two MRA objects are equal. - * - * @brief Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis, computational domain and maximum depth, and false otherwise - * - * @param[in] mra: MRA object, taken by constant reference - * - * @details Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis - * object) and maximum depth (integer), and false otherwise. Computations on different MRA cannot be combined, this operator can be used to make sure that the multiple MRAs are compatible. For more - * information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes. - */ template bool MultiResolutionAnalysis::operator==(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return false; if (this->world != mra.world) return false; @@ -136,16 +79,6 @@ template bool MultiResolutionAnalysis::operator==(const MultiResoluti return true; } -/** @returns Whether the two MRA objects are not equal. - * - * @brief Inequality operator for the MultiResolutionAnalysis class, returns false if both MRAs have the same polynomial basis, computational domain and maximum depth, and true otherwise - * - * @param[in] mra: MRA object, taken by constant reference - * - * @details Inequality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis - * object) and maximum depth (integer), and false otherwise. Opposite of the == operator. For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their - * respective classes. - */ template bool MultiResolutionAnalysis::operator!=(const MultiResolutionAnalysis &mra) const { if (this->basis != mra.basis) return true; if (this->world != mra.world) @@ -157,14 +90,6 @@ template bool MultiResolutionAnalysis::operator!=(const MultiResoluti return false; } -/** - * - * @brief Displays the MRA's attributes in the outstream defined in the Printer class - * - * @details This function displays the attributes of the MRA in the using the Printer class. - * By default, the Printer class writes all information in the output file, not the terminal. - * - */ template void MultiResolutionAnalysis::print() const { print::separator(0, ' '); print::header(0, "MultiResolution Analysis"); @@ -174,15 +99,6 @@ template void MultiResolutionAnalysis::print() const { print::separator(0, '=', 2); } -/** - * - * @brief Initializes the MW filters for the given MW basis. - * - * @details By calling the get() function for the appropriate MW basis, the global - * FilterCache Singleton object is initialized. Any subsequent reference to this - * particular filter will point to the same unique global object. - * - */ template void MultiResolutionAnalysis::setupFilter() { getLegendreFilterCache(lfilters); getInterpolatingFilterCache(ifilters); @@ -200,11 +116,6 @@ template void MultiResolutionAnalysis::setupFilter() { } } -/** @returns Maximum possible distance between two points in the MRA domain - * - * @brief Computes the difference between the lower and upper bounds of the computational domain - * - */ template double MultiResolutionAnalysis::calcMaxDistance() const { const Coord &lb = getWorldBox().getLowerBounds(); const Coord &ub = getWorldBox().getUpperBounds(); diff --git a/src/trees/MultiResolutionAnalysis.h b/src/trees/MultiResolutionAnalysis.h index 891a3fd6e..c27dc8c42 100644 --- a/src/trees/MultiResolutionAnalysis.h +++ b/src/trees/MultiResolutionAnalysis.h @@ -33,158 +33,130 @@ namespace mrcpp { -/** - * @file MultiResolutionAnalysis.h - * @brief Declaration of the MultiResolutionAnalysis class template. - * - * @details - * A MultiResolutionAnalysis (MRA) bundles the information that must be - * shared by compatible functions and operators: - * - the computational domain (see @ref BoundingBox), - * - the multiresolution scaling basis (see @ref ScalingBasis), and - * - a maximum refinement depth. - * - * Two objects (functions/operators) can only be combined if their MRAs are - * equal, i.e. identical domain, basis order/type, and depth. - * - * @par Example - * @code{.cpp} - * using MRA3 = mrcpp::MultiResolutionAnalysis<3>; - * - * // Domain: [-4, 4]^3 with automatically chosen root scale - * mrcpp::BoundingBox<3> world({-4.0, 4.0}); - * - * // Build a 3D MRA with Legendre, order = 7, depth = 12 - * mrcpp::ScalingBasis basis(Legendre, 7); - * MRA3 mra(world, basis, 12); - * - * // Query information - * int order = mra.getOrder(); - * int maxScale = mra.getMaxScale(); - * auto &box = mra.getWorldBox(); - * auto &sbasis = mra.getScalingBasis(); - * @endcode - */ - /** * @class MultiResolutionAnalysis - * @tparam D Spatial dimension (1, 2, or 3). + * @tparam D Spatial dimension (1, 2, or 3) * - * @brief Collects the computational domain and multiresolution basis. + * @brief Class for MultiResolutionAnalysis templates * * @details - * The MRA fixes: - * - the **world box** (domain tiling and scaling), - * - the **scaling basis** (type and polynomial order), and - * - the **maximum depth** of refinement relative to the world’s root scale. - * - * The combination of these parameters determines the finest admissible scale - * via @ref getMaxScale. + * The MultiResolutionAnalysis (MRA) objects bundles information that must be shared for + * compatible functions and operators: + * - Computational domain (see @ref BoundingBox) + * - MultiResolution scaling basis, as a polynomial order (see @ref ScalingBasis) + * - Maximum refinement depth, relative to the world’s root scale (= @ref maxDepth by default) + * + * Class also contains useful functions to compare MRA objects, + * find max and min box sizes and print a human readable diagnostic for the MRA. */ template class MultiResolutionAnalysis final { public: /** - * @brief Construct from a symmetric domain and a basis order. + * @brief Construct from a symmetric domain and a basis order * - * @param[in] bb Domain bounds as either [0,L] or [-L,L] (L>0). - * @param[in] order Polynomial order of the scaling basis. - * @param[in] depth Maximum refinement depth (relative to root scale). + * @param[in] bb 2-element integer array defining domain bounds + * @param[in] order Polynomial order of the multiwavelet basis + * @param[in] depth Maximum refinement depth (relative to root scale). Default is \ref MaxDepth * * @details + * Constructor of the MultiResolutionAnalysis class from scratch. * The scaling basis type is chosen by MRCPP defaults for the given @p order. - * The root scale is inferred from @p bb to keep the per-dimension scaling - * factor in (1, 2). + * The root scale is inferred from @p bb to keep the per-dimension scaling factor in (1, 2). */ MultiResolutionAnalysis(std::array bb, int order, int depth = MaxDepth); /** - * @brief Construct from a preconfigured @ref BoundingBox and basis order. + * @brief Constructs MultiResolutionAnalysis object from a pre-existing @ref BoundingBox object + * + * @param[in] bb BoundingBox object representing the computational domain + * @param[in] order Polynomial order of the multiwavelet basis + * @param[in] depth Maximum refinement depth (relative to root scale). Default is \ref MaxDepth * - * @param[in] bb Computational domain (possibly periodic). - * @param[in] order Polynomial order of the scaling basis. - * @param[in] depth Maximum refinement depth. + * @details + * Creates a MRA object from pre-existing BoundingBox, @p bb, object with a polynomial, @ref p, order to set the basis + * and the maximum amount of allowed refinement in a node, @p depth. */ MultiResolutionAnalysis(const BoundingBox &bb, int order, int depth = MaxDepth); /** * @brief Construct from a @ref BoundingBox and a fully specified @ref ScalingBasis. * - * @param[in] bb Computational domain. - * @param[in] sb Scaling basis (type and order). - * @param[in] depth Maximum refinement depth. + * @param[in] bb BoundingBox object representing the computational domain + * @param[in] sb Polynomial basis (MW) as a ScalingBasis object + * @param[in] depth Maximum refinement depth (relative to root scale). Default is \ref MaxDepth + + * @details + * Creates a MRA object from pre-existing BoundingBox, @p bb, and ScalingBasis, @p sb, objects + * and the maximum amount of allowed refinement in a node, @p depth. */ MultiResolutionAnalysis(const BoundingBox &bb, const ScalingBasis &sb, int depth = MaxDepth); - /** @brief Copy constructor. */ + /** + * @brief Copy constructor for a MultiResolutionAnalysis object composed of computational domain (world) and a polynomial basis (Multiwavelets) + * @param[in] mra Pre-existing MRA object + * @details Copy a MultiResolutionAnalysis object without modifying the original + */ MultiResolutionAnalysis(const MultiResolutionAnalysis &mra); /** @brief Deleted assignment (MRAs are intended to be immutable after construction). */ MultiResolutionAnalysis &operator=(const MultiResolutionAnalysis &mra) = delete; - /** @brief Return polynomial order of the scaling basis. */ - int getOrder() const { return this->basis.getScalingOrder(); } + /* + * Getters + */ - /** @brief Maximum refinement depth relative to the world’s root scale. */ - int getMaxDepth() const { return this->maxDepth; } + int getOrder() const { return this->basis.getScalingOrder(); } ///< @return Polynomial order of the scaling basis + int getMaxDepth() const { return this->maxDepth; } ///< @return Maximum refinement depth relative to the world’s root scale + int getMaxScale() const { return this->world.getScale() + this->maxDepth; } ///< @return Sum of world root scale and maximum refinement depth, @ref getMaxDepth + int getRootScale() const { return this->world.getScale(); } ///< @return World root scale - /** - * @brief Absolute finest scale index. - * - * @details - * This is the sum of the world root scale and @ref getMaxDepth, i.e. - * the maximum scale the MRA allows trees to reach. - */ - int getMaxScale() const { return this->world.getScale() + this->maxDepth; } - - /** @brief Low-level filter associated with the current basis. */ - const MWFilter &getFilter() const { return *this->filter; } - - /** @brief The scaling basis specification (type and order). */ - const ScalingBasis &getScalingBasis() const { return this->basis; } - - /** @brief The computational domain (world box). */ - const BoundingBox &getWorldBox() const { return this->world; } + const MWFilter &getFilter() const { return *this->filter; } ///< @return Low-level filter associated with the current basis + const ScalingBasis &getScalingBasis() const { return this->basis; } ///< @return Scaling basis type and order + const BoundingBox &getWorldBox() const { return this->world; } ///< @return Computational domain (world box) /** - * @brief Convenience: compute a minimal length scale from a tolerance. - * - * @param[in] epsilon Target tolerance. + * @brief Convenience: compute a minimal length scale from a tolerance + * @param[in] epsilon Target tolerance * @return A distance proportional to \f$\sqrt{\epsilon\,2^{-\mathrm{maxScale}}}\f$. */ double calcMinDistance(double epsilon) const { return std::sqrt(epsilon * std::pow(2.0, -getMaxScale())); } /** - * @brief Convenience: compute a maximal relevant distance. - * - * @details The exact definition is basis-dependent and implemented in - * the corresponding source file. + * @brief Convenience: compute a maximal relevant distance + * @return Maximum distance of computational (world) domain + * @note The exact definition is basis-dependent */ double calcMaxDistance() const; - /** @brief Root (coarsest) scale index of the world. */ - int getRootScale() const { return this->world.getScale(); } - /** - * @brief Equality: same world, same basis (type & order), same depth. + * @brief Equality operator for the MultiResolutionAnalysis class (basis, domain, depth) * - * @note Two MRAs must compare equal to allow mixing functions/operators. + * @param[in] mra: MRA object, taken by constant reference + * @returns True if both MRAs have the same polynomial basis, computational domain and maximum depth + * + * @note Two MRAs must be equal to allow mixing functions/operators */ bool operator==(const MultiResolutionAnalysis &mra) const; - /** @brief Inequality. */ + /** + * @brief Inequality operator for the MultiResolutionAnalysis class (basis, domain, depth) + * @param[in] mra: MRA object, taken by constant reference + * @returns True if MRAs have different polynomial basis, computational domain or maximum depth + */ bool operator!=(const MultiResolutionAnalysis &mra) const; - /** @brief Human-readable diagnostics to stdout. */ - void print() const; + void print() const; ///< @brief Displays human-readable diagnostics of MRA to outputfile protected: - const int maxDepth; ///< Maximum refinement depth permitted by this MRA. - const ScalingBasis basis; ///< Scaling basis (type and polynomial order). - const BoundingBox world; ///< Computational domain description. - MWFilter *filter; ///< Low-level filter derived from @ref basis. + const int maxDepth; ///< Maximum refinement depth permitted by this MRA + const ScalingBasis basis; ///< Scaling basis (type and polynomial order) + const BoundingBox world; ///< Computational domain description + MWFilter *filter; ///< Low-level filter derived from @ref basis - /** @brief Internal helper to instantiate @ref filter based on @ref basis. */ + /** + * @brief Internal helper to instantiate @ref filter based on @ref basis + */ void setupFilter(); }; diff --git a/src/trees/NodeIndex.h b/src/trees/NodeIndex.h index e230afef6..7c7cf1081 100644 --- a/src/trees/NodeIndex.h +++ b/src/trees/NodeIndex.h @@ -124,6 +124,7 @@ template class NodeIndex final { /* * Getters and setters */ + int getScale() const { return this->N; } ///< @return Scale of node std::array getTranslation() const { return this->L; } ///< @return Full translation vector void setScale(int n) { this->N = static_cast(n); } ///< @param n Scale of node From f3c26173d4725ef46ee094293b387ec3a2c5ff08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Wed, 12 Nov 2025 16:43:21 +0100 Subject: [PATCH 40/51] AnalyticFunction documented --- src/functions/AnalyticFunction.h | 99 +++++++------------------------- 1 file changed, 21 insertions(+), 78 deletions(-) diff --git a/src/functions/AnalyticFunction.h b/src/functions/AnalyticFunction.h index 2705b7ddc..938718d72 100644 --- a/src/functions/AnalyticFunction.h +++ b/src/functions/AnalyticFunction.h @@ -34,67 +34,26 @@ namespace mrcpp { /** * @class AnalyticFunction - * @tparam D Spatial dimension (1, 2, 3, …). - * @tparam T Numeric value type (defaults to double). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * @brief Thin adapter that turns a C++ callable `std::function&)>` - * into a @ref RepresentableFunction suitable for MRCPP algorithms. - * - * Motivation - * ---------- - * Many MRCPP routines operate on the abstract interface `RepresentableFunction` - * (which provides domain bounds and an `evalf()` method). `AnalyticFunction` lets - * users plug in any analytic lambda or function pointer without writing a full - * derived class. - * - * Domain handling - * --------------- - * The base class @ref RepresentableFunction stores lower/upper bounds for each - * coordinate dimension. `AnalyticFunction::evalf` first checks - * `RepresentableFunction::outOfBounds(r)` and **returns 0** for points outside - * the domain; otherwise it forwards to the user-supplied callable. - * - * Typical usage - * ------------- - * @code - * using F = AnalyticFunction<2>; - * std::vector a = {0.0, 0.0}; - * std::vector b = {1.0, 2.0}; - * F f( - * [](const Coord<2>& x) -> double { - * // x[0] = x, x[1] = y - * return std::sin(x[0]) * std::exp(-x[1]); - * }, - * a, b - * ); - * Coord<2> p; p[0] = 0.3; p[1] = 1.5; - * double v = f.evalf(p); // evaluates lambda if p within [a,b] - * @endcode - * - * Thread-safety - * ------------- - * `AnalyticFunction` itself holds only an immutable std::function after construction. - * It is safe to call `evalf` concurrently *iff your callable is thread-safe* and - * does not mutate shared state. + * @brief Implementation of @ref RepresentableFunction for the datatype double */ template class AnalyticFunction : public RepresentableFunction { public: - /** @brief Default constructor; leaves the callable empty. */ + /** @brief Default constructor; leaves the callable empty */ AnalyticFunction() = default; - /** @brief Virtual destructor to match the base class interface. */ + /** @brief Virtual destructor to match the base class interface */ ~AnalyticFunction() override = default; /** - * @brief Construct with a callable and optional raw-pointer bounds. - * - * @param f Callable of signature `T(const Coord&)`. - * @param a Optional pointer to an array of D lower bounds (can be nullptr). - * @param b Optional pointer to an array of D upper bounds (can be nullptr). + * @brief Constructor with raw pointers for the bounds * - * The bounds are forwarded to the @ref RepresentableFunction base; if both - * are nullptr the base uses its defaults (implementation-defined). + * @param f The analytic function which is evaluated in this class + * @param a Optional raw pointer to an array of D lower bounds (can be nullptr) + * @param b Optional raw pointer to an array of D upper bounds (can be nullptr) */ AnalyticFunction(std::function &r)> f, const double *a = nullptr, @@ -103,14 +62,11 @@ class AnalyticFunction : public RepresentableFunction { , func(f) {} /** - * @brief Construct with a callable and STL vector bounds. - * - * @param f Callable of signature `T(const Coord&)`. - * @param a Vector of D lower bounds. - * @param b Vector of D upper bounds. + * @brief Overload constructor with std::vector for the bounds * - * Convenience overload that forwards raw pointers of the vectors to the - * other constructor. The vectors must have length D. + * @param f The analytic function which is evaluated in this class + * @param a Vector of D lower bounds. + * @param b Vector of D upper bounds. */ AnalyticFunction(std::function &r)> f, const std::vector &a, @@ -118,24 +74,18 @@ class AnalyticFunction : public RepresentableFunction { : AnalyticFunction(f, a.data(), b.data()) {} /** - * @brief Replace the underlying callable at runtime. - * - * @param f New callable `T(const Coord&)`. - * - * No synchronization is performed; if other threads may call `evalf` - * concurrently, arrange external synchronization. + * @brief Set the analytic function to be evaluated + * @param f New analytic function */ void set(std::function &r)> f) { this->func = f; } /** - * @brief Evaluate the function at coordinate @p r. - * - * Behavior: - * - If @p r lies outside the domain bounds (per `outOfBounds(r)`), return 0. - * - Otherwise, return `func(r)`. + * @brief Evaluate the analytic function at coordinate @p r. + * @param r Coordinate where to evaluate the function * - * @note Returning 0 outside the domain is consistent with how many MRCPP - * integrators and projectors treat functions on bounded supports. + * @details Checks if the point is within bounds before evaluating + * + * @return The function value at point @p r */ T evalf(const Coord &r) const override { T val = T(0); @@ -144,14 +94,7 @@ class AnalyticFunction : public RepresentableFunction { } protected: - /** - * @brief Stored analytic callable. - * - * The signature uses `Coord` (MRCPP’s fixed-size coordinate array). - * The callable should be side-effect free or externally synchronized if - * used from multiple threads. - */ - std::function &r)> func; + std::function &r)> func; ///< User-provided analytic function }; } // namespace mrcpp \ No newline at end of file From 7d0a0e324cee027753acbcb7cdf6580506ff14a5 Mon Sep 17 00:00:00 2001 From: Luca Frediani Date: Thu, 13 Nov 2025 12:46:48 +0100 Subject: [PATCH 41/51] Documents TreeIterator.h --- src/trees/TreeIterator.h | 166 ++++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/src/trees/TreeIterator.h b/src/trees/TreeIterator.h index b9c913570..33eee6647 100644 --- a/src/trees/TreeIterator.h +++ b/src/trees/TreeIterator.h @@ -28,21 +28,28 @@ * @brief Iteration helpers for traversing multiwavelet trees. * * @details - * This header provides a generic depth-aware iterator over @ref MWTree nodes. + * This header provides a depth-aware iterator over the nodes of a @ref MWTree. * It supports different **traversal directions** and **node-ordering schemes**, * selected via constants defined in @c MRCPP/constants.h: * - Traversal mode: @c TopDown or @c BottomUp - * - Iterator type: @c Lebesgue (Z-order) or @c Hilbert (space-filling) + * In the @c TopDown mode, one iterates from the first root node and recursively + * over the children + * In the @c BottomUp mode, one first traverses the tree all the way down to the + * leaves and then starts iteratig from there + * - Iterator type: @c Lebesgue (Z-order) or @c Hilbert * - * The iterator yields @ref MWNode instances from one or more root nodes, - * honoring a user-provided maximum depth and whether *generated* (non-end) - * nodes should be returned. + * The iterator yields @ref MWNode instances in the requested sequence determined by + * the parameters above + * + * The file contains two classes: @ref TreeIterator and @ref IteratorNode. + * The @ref TreeIterator is the main interface for users, while the @ref IteratorNode + * is mainly a placeholder for a few node-specific flags. * * @par Example * @code{.cpp} * using namespace mrcpp; * TreeIterator<3,double> it(tree, TopDown, Lebesgue); - * it.setReturnGenNodes(true); // include generated/branch nodes + * it.setReturnGenNodes(true); // include generated nodes * it.setMaxDepth(5); // restrict to depth <= 5 * * while (it.next()) { @@ -61,98 +68,90 @@ namespace mrcpp { /** * @class TreeIterator - * @brief Stateful iterator for traversing an @ref MWTree. * - * @tparam D Spatial dimensionality (1, 2, or 3). - * @tparam T Coefficient type (e.g., @c double or @c ComplexDouble). + * @brief Iterator for traversing an @ref MWTree. + * + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * * @details - * The iterator walks the tree starting from each root node, producing nodes + * The iterator traverses the tree starting the root node(s), producing nodes * according to: - * - a **traversal direction** (@c TopDown or @c BottomUp), and - * - an **ordering scheme** within siblings (@c Lebesgue or @c Hilbert). + * - a **traversal direction** ( @c TopDown or @c BottomUp), and + * - an **ordering scheme** within siblings ( @c Lebesgue or @c Hilbert). * * The behavior can be refined with: * - @ref setReturnGenNodes() to toggle inclusion of generated (non-leaf) nodes, * - @ref setMaxDepth() to limit the traversal depth, * - @ref setTraverse() / @ref setIterator() to change policies at runtime. * - * The iteration state is represented by a small internal linked stack of - * @ref IteratorNode frames. + * The iteration state is represented by an internal linked stack of + * @ref IteratorNode instances. */ template class TreeIterator { public: /** * @brief Construct a detached iterator (no tree bound yet). + * * @param traverse Traversal mode (e.g., @c TopDown or @c BottomUp). * @param iterator Node-ordering mode (e.g., @c Lebesgue or @c Hilbert). * * @note Call @ref init() before the first @ref next() if you use this ctor. */ TreeIterator(int traverse = TopDown, int iterator = Lebesgue); - /** * @brief Construct an iterator bound to a tree. + * * @param tree Tree to traverse. * @param traverse Traversal mode (e.g., @c TopDown or @c BottomUp). * @param iterator Node-ordering mode (e.g., @c Lebesgue or @c Hilbert). */ TreeIterator(MWTree &tree, int traverse = TopDown, int iterator = Lebesgue); - /// @brief Destructor (releases internal traversal state). virtual ~TreeIterator(); - - /** - * @brief Include/exclude generated (non-end) nodes in the iteration stream. - * @param i If @c true, generated nodes are returned by @ref next(). - * If @c false, only end (leaf) nodes are produced. - */ - void setReturnGenNodes(bool i = true) { this->returnGenNodes = i; } - - /** - * @brief Set maximum depth measured from the root scale. - * @param depth Non-negative maximum depth; if negative, no limit is applied. - */ - void setMaxDepth(int depth) { this->maxDepth = depth; } - - /** - * @brief Change traversal mode at runtime. - * @param traverse @c TopDown or @c BottomUp (see @c MRCPP/constants.h). - * @warning Changing mode invalidates in-flight assumptions; call before @ref init(). - */ - void setTraverse(int traverse); - - /** - * @brief Change sibling-ordering policy at runtime. - * @param iterator @c Lebesgue or @c Hilbert (see @c MRCPP/constants.h). - * @warning Changing mode invalidates in-flight assumptions; call before @ref init(). - */ - void setIterator(int iterator); - + void setReturnGenNodes(bool i = true) { this->returnGenNodes = i; } ///< @param i If true, generated nodes are included in the sequence. + void setMaxDepth(int depth) { this->maxDepth = depth; } ///< @param depth Non-negative maximum depth; if negative, no limit is applied. + void setTraverse(int traverse);///< @param traverse set Traversal mode (@c TopDown or @c BottomUp). + void setIterator(int iterator);///< @param iterator set Iterator type (@c Lebesgue or @c Hilbert). + MWNode &getNode() { return *this->state->node; } ///< @return Reference to the node yielded by the last successful @ref next() / @ref nextParent(). /** * @brief Bind the iterator to a tree and reset traversal state. + * * @param tree Tree to traverse. */ void init(MWTree &tree); - /** * @brief Advance to the next node according to the current policy. + * * @return @c true if a node is available (use @ref getNode()), @c false when finished. + * + * @details + * if the current @ref IteratorNode is null, return false. + * In @c TopDown mode, try to return the current node first. + * If successful, return true. + * If not, check if the current node has children, and try to return + * the next child node according to the ordering scheme. + * If successful, return true. + * If not, try to move to the next root node, and return its first node + * according to the ordering scheme. + * If successful, return true. + * If not, in @c BottomUp mode, try to return the current node. + * If successful, return true. + * If not, remove the current state and recur invoking a new @ref next(). */ bool next(); - /** - * @brief Move the cursor to the parent of the current node (if any). - * @return @c true if the parent exists and becomes current, otherwise @c false. + * @brief Advance to the next parent node according to the current policy. + * + * @return @c true if the parent node is available, @c false when finished. + * + * @details + * Returns the current node or the parent of the current node. The logic makes sure the correct + * parent is returned according to the traversal mode and ordering scheme. In case of PBC calculations, + * the parent may be above the root nodes defining the unit cell. */ bool nextParent(); - /** - * @brief Access the current node. - * @return Reference to the node yielded by the last successful @ref next() / @ref nextParent(). - */ - MWNode &getNode() { return *this->state->node; } - friend class IteratorNode; protected: @@ -165,52 +164,57 @@ template class TreeIterator { IteratorNode *state{nullptr}; ///< Current traversal frame. IteratorNode *initialState{nullptr}; ///< Initial frame for the current root. - /// @brief Map logical child order [0..2^D) to physical child index based on @ref type. - int getChildIndex(int i) const; - - /// @name Traversal helpers - ///@{ - bool tryParent(); - bool tryChild(int i); - bool tryNode(); - bool tryNextRoot(); - bool tryNextRootParent(); - void removeState(); - bool checkDepth(const MWNode &node) const; - bool checkGenerated(const MWNode &node) const; - ///@} + int getChildIndex(int i) const; ///< @brief Map logical child order [0..2^D) to actual child index based on @ref type. +/** + * @name try... methods + * @brief The following methods test if the node of a given type should be returned. + * @details In addition to returning @c true or @c false, these methods also update the internal + * traversal state accordingly. + * @{ + */ + bool tryParent(); ///< @return @c true if the parent node should be returned. + bool tryChild(int i);///< @return @c true if the child at index @p i should be returned. + bool tryNode(); ///< @return @c true if the current node shuld be returned. + bool tryNextRoot(); ///< @return @c true if the next root node should be returned. + bool tryNextRootParent(); ///< @return @c true if the parent of the next root node is available and should be returned. +/** @} */ + void removeState(); ///< @brief Remove the current traversal frame from the stack. + bool checkDepth(const MWNode &node) const; ///< @return @c true if the node is within the max depth limit. + bool checkGenerated(const MWNode &node) const; ///< @return @c true if the generated nodes should be included. }; /** * @class IteratorNode - * @brief Lightweight frame holding traversal state for one MW node. + * @brief Iterator representing a node in the traversal stack. * - * @tparam D Spatial dimensionality (1, 2, or 3). - * @tparam T Coefficient type (e.g., @c double or @c ComplexDouble). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * * @details - * The iterator maintains a small linked list (stack) of these frames while - * walking the tree. Each frame keeps: + * This is an internal placeholder which contains both the pointer to the actual node to return and + * flags to determine if itself, its parent and its children have been already returned. + * It contains: * - a pointer to the node, - * - a link to the previous frame, + * - a link to the next node in the stack * - completion flags for the current node, its parent, and its children. */ template class IteratorNode final { public: MWNode *node; ///< Current node. - IteratorNode *next; ///< Previous frame in the stack. - bool doneNode; ///< Whether the node itself has been yielded. - bool doneParent; ///< Whether the parent transition has been attempted. - bool doneChild[1 << D]; ///< Whether each child has been attempted. + IteratorNode *next; ///< Next node in the stack. + bool doneNode; ///< Whether the node itself has been used. + bool doneParent; ///< Whether the parent node has been used. + bool doneChild[1 << D]; ///< Whether each child has been used. /** - * @brief Construct a traversal frame. + * @brief Construct a new iterator + * * @param nd Pointer to the MW node represented by this frame. - * @param nx Link to the previous frame (can be @c nullptr). + * @param nx Link to the next iterator (can be @c nullptr). */ IteratorNode(MWNode *nd, IteratorNode *nx = nullptr); - /// @brief Recursively delete the linked frames that follow this one. + /// @brief Recursively delete the linked iterators that follow this one. ~IteratorNode() { delete this->next; } }; From 5c05207a681a164dd3497e6bb9e0ef9aa9020208 Mon Sep 17 00:00:00 2001 From: Jacopo_Masotti Date: Tue, 11 Nov 2025 16:03:52 +0100 Subject: [PATCH 42/51] function_utils both .h .cpp: BUT in the .h there was no instance of the function ObaraSaika_ab, so I added it with documentation --- src/functions/function_utils.cpp | 17 ------- src/functions/function_utils.h | 85 ++++++++++++++------------------ 2 files changed, 38 insertions(+), 64 deletions(-) diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp index 598c9b12a..2d6d8280e 100644 --- a/src/functions/function_utils.cpp +++ b/src/functions/function_utils.cpp @@ -38,24 +38,7 @@ template double function_utils::calc_overlap(const GaussFunc &a, cons return S; } -/** Compute the monodimensional overlap integral between two - gaussian distributions by means of the Obara-Saika recursiive - scheme - \f[ S_{ij} = \int_{-\infty}^{+\infty} \,\mathrm{d} x - (x-x_a)^{p_a} - (x-x_b)^{p_b} - e^{-c_a (x-x_a)^2} - e^{-c_b (x-x_b)^2}\f] - - @param power_a \f$ p_a \f$ - @param power_b \f$ p_b \f$ - @param pos_a \f$ x_a \f$ - @param pos_b \f$ x_b \f$ - @param expo_a \f$ c_a \f$ - @param expo_b \f$ c_b \f$ - - */ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) { int i, j; double expo_p, mu, pos_p, x_ab, x_pa, x_pb, s_00; diff --git a/src/functions/function_utils.h b/src/functions/function_utils.h index 36e2e0521..09c27e5b7 100644 --- a/src/functions/function_utils.h +++ b/src/functions/function_utils.h @@ -23,53 +23,7 @@ * */ -/** - * @file GaussExp.cpp (lightweight connector) - * - * @brief Ties together Gaussian primitives and exponential utilities, and - * exposes (via forward declaration) the templated overlap routine - * without pulling in heavier headers that could cause cycles. - * - * What this TU does - * ----------------- - * - Includes: - * - "GaussExp.h": utilities for Gaussian/exponential expressions used - * elsewhere in MRCPP (e.g., Boys integrals, screened interactions). - * - "Gaussian.h": the definition of `GaussFunc`, i.e., a Cartesian - * Gaussian primitive storing powers, center, exponent(s), and a coefficient. - * - Declares (but does not define) the templated function - * `function_utils::calc_overlap(const GaussFunc&, const GaussFunc&)`. - * The definition lives in the function-utils implementation unit - * (see `function_utils.cpp`), which provides the Obara–Saika-based 1D core. - * - * Why only a forward declaration here? - * ------------------------------------ - * - To avoid including a potentially heavy implementation header (and risking - * circular dependencies), we forward-declare the template in the *same* - * namespace `mrcpp::function_utils`. This enables use sites that only need - * the signature to compile quickly, while the actual template definition - * will be instantiated by the linker when the corresponding .cpp is linked. - * - * Notes on templates and linkage - * ------------------------------ - * - Because this is only a declaration, any translation unit that actually - * *uses* `calc_overlap` must see the template **definition** (e.g., by - * including the proper header or by relying on explicit instantiations - * provided in the implementation TU). MRCPP provides common explicit - * instantiations (e.g., D = 1, 2, 3) in `function_utils.cpp`. - * - * Example usage - * ------------- - * @code - * #include "Gaussian.h" - * // (this file is included transitively somewhere) - * using mrcpp::GaussFunc; - * using mrcpp::function_utils::calc_overlap; - * - * GaussFunc<3> gA(...), gB(...); - * double S = calc_overlap<3>(gA, gB); // calls Obara–Saika-backed routine - * @endcode - */ + #include "GaussExp.h" #include "Gaussian.h" @@ -80,7 +34,44 @@ namespace mrcpp { // Keeping this here avoids heavy includes and potential include cycles. namespace function_utils { template + +/** + * @brief Compute the monodimensional overlap integral between two + * gaussian distributions by means of the Obara-Saika recursive + * scheme + * + * \f$ [ S_{ij} = \int_{-\infty}^{+\infty} \,\mathrm{d} x + * (x-x_a)^{p_a} + * (x-x_b)^{p_b} + * e^{-c_a (x-x_a)^2} + * e^{-c_b (x-x_b)^2} \f$ + * + * @param power_a \f$ p_a \f$ + * @param power_b \f$ p_b \f$ + * @param pos_a \f$ x_a \f$ + * @param pos_b \f$ x_b \f$ + * @param expo_a \f$ c_a \f$ + * @param expo_b \f$ c_b \f$ + * + * @return The value of the overlap integral as a d + */ +double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) + +/** + * @brief Compute the overlap integral between two Gaussian functions. + * + * @param[in] a The first Gaussian function + * @param[in] b The second Gaussian function + * + * @return The value of the overlap integral + */ double calc_overlap(const GaussFunc &a, const GaussFunc &b); + + + + + + } // namespace function_utils } // namespace mrcpp \ No newline at end of file From fd8332d64e6cfb374e9cba9d1d636ae049342652 Mon Sep 17 00:00:00 2001 From: Jacopo_Masotti Date: Tue, 11 Nov 2025 16:13:27 +0100 Subject: [PATCH 43/51] special_function .h and .cpp documented --- src/functions/special_functions.cpp | 51 +------------------ src/functions/special_functions.h | 78 +++++++++++++---------------- 2 files changed, 38 insertions(+), 91 deletions(-) diff --git a/src/functions/special_functions.cpp b/src/functions/special_functions.cpp index 555528a58..8c8c21972 100644 --- a/src/functions/special_functions.cpp +++ b/src/functions/special_functions.cpp @@ -29,31 +29,7 @@ namespace mrcpp { -/** @brief Free-particle time evolution on real line. - * - * @param[in] x: space coordinate in \f$ \mathbb R \f$. - * @param[in] x0: \f$ x_0 \f$ center of gaussian function at zero time moment. - * @param[in] t: time moment. - * @param[in] sigma: \f$ \sigma \f$ width of the initial gaussian wave. - * - * @details Analytical solution of a one dimensional free-particle - * movement - * \f[ - * \psi(x, t) - * = - * \sqrt{ - * \frac{ \sigma }{ 4it + \sigma } - * } - * e^{ - \frac { (x - x_0)^2 }{ 4it + \sigma } } - * \f] - * where \f$ t, \sigma > 0 \f$. - * - * @returns The complex-valued wave function - * \f$ \psi(x, t) \f$ - * at the specified space coordinate and time. - * - * - */ + std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma) { std::complex i(0.0, 1.0); // Imaginary unit @@ -64,35 +40,12 @@ std::complex free_particle_analytical_solution(double x, double x0, doub return std::sqrt(sigma) / sqrt_denom * std::exp(exponent); } - - -/** @brief A smooth compactly supported non-negative function. - * - * @param[in] x: space coordinate in \f$ \mathbb R \f$. - * @param[in] a: the left support boundary. - * @param[in] b: the right support boundary. - * - * @details Smooth function on the real line \f$ \mathbb R \f$ - * defined by the formula - * \f[ - * g_{a,b} (x) = \exp \left( - \frac{b - a}{(x - a)(b - x)} \right) - * , \quad - * a < x < b - * \f] - * and \f$ g_{a,b} (x) = 0 \f$ elsewhere. - * - * @returns The non-negative value - * \f$ g_{a,b} (x) \f$ - * at the specified space coordinate \f$ x \in \mathbb R \f$. - * - * - */ double smooth_compact_function(double x, double a, double b) { double res = 0; if (a < x && x < b) { res = exp((a - b) / (x - a) / (b - x)); } return res; -} +} } // namespace mrcpp \ No newline at end of file diff --git a/src/functions/special_functions.h b/src/functions/special_functions.h index 41a5d31a4..d2043cb20 100644 --- a/src/functions/special_functions.h +++ b/src/functions/special_functions.h @@ -30,59 +30,53 @@ namespace mrcpp { -/** - * # Free-particle Gaussian propagation (analytic form) +/** @brief Free-particle time evolution on real line. * - * @brief Analytic solution \f$\psi(x,t)\f$ of the 1D free-particle Schrödinger equation - * for a Gaussian initially centered at \f$x_0\f$ with width parameter \f$\sigma>0\f$. + * @param[in] x: space coordinate in \f$ \mathbb R \f$. + * @param[in] x0: \f$ x_0 \f$ center of gaussian function at zero time moment. + * @param[in] t: time moment. + * @param[in] sigma: \f$ \sigma \f$ width of the initial gaussian wave. * - * This declaration corresponds to the definition in `special_functions.cpp`. The solution used is + * @details Analytical solution of a one dimensional free-particle + * movement * \f[ - * \psi(x,t) - * = - * \sqrt{\frac{\sigma}{\,\sigma + 4\, i\, t\,}}\; - * \exp\!\left(-\,\frac{(x-x_0)^2}{\,\sigma + 4\, i\, t\,}\right), + * \psi(x, t) + * = + * \sqrt{ + * \frac{ \sigma }{ 4it + \sigma } + * } + * e^{ - \frac { (x - x_0)^2 }{ 4it + \sigma } } * \f] - * which matches the conventional free propagator with units chosen such that \f$\hbar=1\f$ - * and mass \f$m=\tfrac12\f$ (hence the factor \f$4it\f$ in the denominator). - * - * @param x Real-space coordinate \f$x \in \mathbb{R}\f$. - * @param x0 Initial center \f$x_0\f$ of the Gaussian at \f$t=0\f$. - * @param t Time \f$t \in \mathbb{R}\f$. - * @param sigma Positive width parameter \f$\sigma>0\f$ of the initial Gaussian. - * - * @return Complex value of \f$\psi(x,t)\f$ at the requested point. - * - * @note The complex square root in the prefactor is taken on the principal branch - * by `std::sqrt(std::complex)`. - * @note For \f$t=0\f$, the expression reduces to \f$\psi(x,0)=\exp\!\big(-\tfrac{(x-x_0)^2}{\sigma}\big)\f$. + * where \f$ t, \sigma > 0 \f$. + * + * @returns The complex-valued wave function + * \f$ \psi(x, t) \f$ + * at the specified space coordinate and time. + * + * */ std::complex free_particle_analytical_solution(double x, double x0, double t, double sigma); -/** - * # Smooth compactly supported bump +/** @brief A smooth compactly supported non-negative function. * - * @brief A smooth (\f$C^\infty\f$) non-negative function supported on the open interval \f$(a,b)\f$. + * @param[in] x: space coordinate in \f$ \mathbb R \f$. + * @param[in] a: the left support boundary. + * @param[in] b: the right support boundary. * - * The function is defined by + * @details Smooth function on the real line \f$ \mathbb R \f$ + * defined by the formula * \f[ - * g_{a,b}(x) = - * \begin{cases} - * \exp\!\Big(-\dfrac{b-a}{(x-a)(b-x)}\Big), & a < x < b,\\[6pt] - * 0, & \text{otherwise}, - * \end{cases} + * g_{a,b} (x) = \exp \left( - \frac{b - a}{(x - a)(b - x)} \right) + * , \quad + * a < x < b * \f] - * and vanishes to **all orders** at the endpoints \f$a\f$ and \f$b\f$. - * - * @param x Real-space coordinate \f$x \in \mathbb{R}\f$. - * @param a Left endpoint of support (default `0`). - * @param b Right endpoint of support (default `1`). - * - * @return The value \f$g_{a,b}(x)\f$. - * - * @note If \f$a \ge b\f$, the function is identically zero for all \f$x\f$. - * @warning Near the endpoints, the denominator \f$(x-a)(b-x)\f$ becomes small; - * the exponent is large and negative so the result underflows smoothly to zero. + * and \f$ g_{a,b} (x) = 0 \f$ elsewhere. + * + * @returns The non-negative value + * \f$ g_{a,b} (x) \f$ + * at the specified space coordinate \f$ x \in \mathbb R \f$. + * + * */ double smooth_compact_function(double x, double a = 0, double b = 1); From d42b601157e8341eb285d75fe70f134c3662c3e4 Mon Sep 17 00:00:00 2001 From: Jacopo_Masotti Date: Tue, 11 Nov 2025 17:00:58 +0100 Subject: [PATCH 44/51] Gaussian .h and .cpp documented --- src/functions/Gaussian.cpp | 11 +- src/functions/Gaussian.h | 222 +++++++++++++++---------------------- 2 files changed, 91 insertions(+), 142 deletions(-) diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp index 6dbfa7c5b..e6431d971 100644 --- a/src/functions/Gaussian.cpp +++ b/src/functions/Gaussian.cpp @@ -171,16 +171,7 @@ template double Gaussian::calcOverlap(const Gaussian &inp) const { return S; } -/** @brief Generates a GaussExp that is semi-periodic around a unit-cell - * - * @returns Semi-periodic version of a Gaussian around a unit-cell - * @param[in] period: The period of the unit cell - * @param[in] nStdDev: Number of standard diviations covered in each direction. Default 4.0 - * - * @details nStdDev = 1, 2, 3 and 4 ensures atleast 68.27%, 95.45%, 99.73% and 99.99% of the - * integral is conserved with respect to the integration limits. - * - */ + template GaussExp Gaussian::periodify(const std::array &period, double nStdDev) const { GaussExp gauss_exp; auto pos_vec = std::vector>(); diff --git a/src/functions/Gaussian.h b/src/functions/Gaussian.h index 5048c66b2..7683f4aca 100644 --- a/src/functions/Gaussian.h +++ b/src/functions/Gaussian.h @@ -23,51 +23,6 @@ * */ -/** - * Base class for Gaussian type functions - * - * High-level overview - * ------------------- - * This header declares the abstract template class Gaussian, a common base - * for concrete Gaussian primitives used throughout MRCPP. A Gaussian here is - * a separable Cartesian function in D dimensions of the form - * - * f(x) = coef * Π_{d=0..D-1} (x_d - pos[d])^{power[d]} * exp(-alpha[d] * (x_d - pos[d])^2), - * - * where: - * - coef : global scalar amplitude (double). - * - power[d] : non-negative integer power of the monomial factor in dim d. - * - alpha[d] : strictly positive exponent in dim d (width parameter). - * - pos[d] : center coordinate in dim d. - * - * The class only provides *common infrastructure* (storage, screening helpers, - * normalization by norm, simple algebra on prefactors, batch evaluation stubs, - * etc.). Concrete subclasses implement the analytic pieces that depend on the - * exact Gaussian flavor (e.g. GaussFunc, GaussPoly), such as: - * - evalf(...) : the actual evaluation at a point. - * - evalf1D(...) : 1D component evaluation used in tensorized loops. - * - calcSquareNorm() : exact L2 norm. - * - differentiate() : derivative producing a polynomial × Gaussian (GaussPoly). - * - asGaussExp() : expansion into sum of pure Gaussians if needed. - * - * Screening and visibility - * ------------------------ - * Gaussian supports optional *screening* (axis-aligned bounding boxes) to skip - * work on dyadic tiles that are provably negligible. See: - * - calcScreening(stdDeviations) : builds [A,B] bounds as ± nσ around pos. - * - checkScreen(n, l) : tile-level cull test at dyadic scale n. - * - isVisibleAtScale(...) : heuristic visibility vs. resolution. - * - isZeroOnInterval(...) : quick interval culling via ±5σ rule. - * - * Relations to other types - * ------------------------ - * - GaussExp: an expansion (sum) of Gaussian-like terms. - * - GaussFunc: Gaussian with a *single* monomial factor (derived class). - * - GaussPoly: Gaussian multiplied by a *polynomial* (derivative results). - * - * Thread-safety: instances are regular value objects; no shared state. - */ - #pragma once #include @@ -81,47 +36,33 @@ namespace mrcpp { /** - * @tparam D Spatial dimension (1, 2, or 3 in MRCPP usage). - * * @class Gaussian - * @brief Abstract base for separable Cartesian Gaussians in D dimensions. - * - * Interface summary - * ----------------- - * Construction: - * - Gaussian(a, c, r, p) : isotropic exponent (alpha[d]=a). - * - Gaussian(alpha[], c, r, p) : anisotropic exponents per axis. - * - * Core virtuals (must be implemented by derived classes): - * - copy() : virtual clone (CRTP alternative). - * - evalf(r) : value at point r (D-vector). - * - evalf1D(x, d) : 1D factor along axis d (helper). - * - calcSquareNorm() : exact ∥f∥². - * - asGaussExp() : expansion into GaussExp. - * - differentiate(dir) : analytic derivative → GaussPoly. + * + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * Utilities provided here: - * - evalf(points, values) : batch evaluation per-axis (column-wise). - * - calcOverlap(inp) : ⟨this|inp⟩ via GaussExp + Obara–Saika. - * - periodify(period, nσ) : semi-periodic replication into a GaussExp. - * - calcScreening(nσ) : build ±nσ bounds and enable screening. - * - checkScreen(n, l) : dyadic tile cull test when screening on. - * - normalize() : rescale by 1/∥f∥ (uses calcSquareNorm()). - * - multPureGauss(lhs,rhs): complete-the-square product of *pure* Gaussians - * (monomials handled by derived classes). - * - multConstInPlace(c) : scale the global coefficient. - * - * Accessors/mutators: - * - get/set for coef, alpha, pos, power; toggle screen flag. + * @brief Represent and manipulate Gaussian-type functions (GTFs) + * + * @details The Gaussian class is an abstract base class for + * representing Gaussian-type functions (GTFs) in D dimensions. + * A GTF is defined as + * \f$ f(\mathbf{r}) = C \prod_{d=0}^{D-1} g_d(r_d) \f$, where + * \f$ g_d(r_d) = (r_d - p_d)^{\alpha_d} \exp[-\beta_d (r_d - p_d)^2] \f$. + * Here, C is a global coefficient, p_d is the center position along axis d, + * \alpha_d is the exponent for the monomial term, and \beta_d is the exponent for the Gaussian envelope. + * + * The class provides methods for evaluating the function at given points, + * computing overlap integrals with other Gaussian functions, differentiating + * the function, and converting to Gaussian expansions suitable for pairwise operations. */ template class Gaussian : public RepresentableFunction { public: /** - * @brief Isotropic constructor. - * @param a Exponent value α to be replicated on all axes (α[d] = a). - * @param c Global scalar coefficient. - * @param r Center position (Coord), defaults to origin. - * @param p Per-axis monomial powers (non-negative). + * @brief Isotropic constructor (same exponent on all axes) + * @param a Exponent value α to be replicated on all axes (α[d] = a) + * @param c Global scalar coefficient + * @param r Center position (Coord), defaults to origin + * @param p Per-axis monomial powers (non-negative), stored as array * * @warning This ctor does not check positivity of @p a; callers are expected * to pass α>0 (required for square integrability and σ = 1/√(2α)). @@ -129,8 +70,8 @@ template class Gaussian : public RepresentableFunction { Gaussian(double a, double c, const Coord &r, const std::array &p); /** - * @brief Anisotropic constructor (per-axis exponents). - * @param a Exponent array α[d] per axis. + * @brief Anisotropic constructor (different set of coefficients and exponents per each axis) + * @param a Exponent ARRAY α[d] per axis. * @param c Global scalar coefficient. * @param r Center position (Coord). * @param p Per-axis monomial powers (non-negative). @@ -144,24 +85,25 @@ template class Gaussian : public RepresentableFunction { /** @name Evaluation API (to be implemented by subclasses) */ ///@{ /** - * @brief Evaluate f(r) at a D-dimensional coordinate. - * @param r Point (Coord) in physical space. + * @brief Evaluate the gaussian f(r) at a D-dimensional coordinate + * @param[in] r Point (Coord) in physical space in the MRA box * @return Function value f(r). */ virtual double evalf(const Coord &r) const = 0; /** - * @brief Evaluate the *1D* separable factor along axis @p dim. - * @param r Coordinate along axis @p dim. + * @brief Evaluate the *1D* separable factor along axis @p dim + * @param r Coordinate along axis @p dim * @param dim Axis index in [0, D-1]. - * @return g_dim(r) = (r-pos[dim])^{power[dim]} exp[-α[dim](r-pos[dim])²], possibly scaled. + * + * @return The value of the 1D Gaussian factor g_dim(r), dim = {0, .., D-1} -> x,y,z... */ virtual double evalf1D(double r, int dim) const = 0; /** - * @brief Batch evaluation helper. - * @param points Matrix (N×D): column d holds all coordinates along axis d. - * @param values Matrix (N×D): on return, values(i,d) = evalf1D(points(i,d), d). + * @brief Evaluate a set of points in D dimensions, arranged in the matrix form + * @param[in] points Matrix (N×D): column d holds all coordinates along axis d. + * @param[out] values Matrix (N×D): on return, values(i,d) = evalf1D(points(i,d), d). * * @note This does *not* multiply across dimensions; it only fills the * per-axis factors column-wise for later tensor products. @@ -172,25 +114,29 @@ template class Gaussian : public RepresentableFunction { /** @name Integral properties and expansions */ ///@{ /** - * @brief Overlap ⟨this|inp⟩ computed via GaussExp reduction and - * Obara–Saika 1D recurrences per dimension. + * @brief Overlap ⟨this|inp⟩ of two gaussians + * + * @param[in] inp The other Gaussian instance + * + * @return The value of the overlap integral ⟨this|inp⟩ as a double */ double calcOverlap(const Gaussian &inp) const; - /** @brief Exact L2 norm squared ∥f∥² (implemented by subclass). */ + /// @return Exact L2 norm squared ∥f∥² (implemented by subclass) virtual double calcSquareNorm() const = 0; - /** - * @brief Represent as a sum of Gaussians (pure or polynomial-times-Gaussian), - * suitable for pairwise operations; implemented by subclass. - */ + + /// @brief Represent as a sum of Gaussians (pure or polynomial-times-Gaussian), suitable for pairwise operations; implemented by subclass virtual GaussExp asGaussExp() const = 0; - /** - * @brief Create a semi-periodic expansion by replicating the function on a - * Cartesian lattice so that most of its mass lies within a unit cell. - * @param period Per-axis period lengths. - * @param nStdDev Number of standard deviations to preserve (default 4.0). + /** @brief Generates a GaussExp that is semi-periodic around a unit-cell + * + * @returns Semi-periodic version of a Gaussian around a unit-cell + * @param[in] period: The period of the unit cell + * @param[in] nStdDev: Number of standard diviations covered in each direction. Default 4.0 + * + * @details nStdDev = 1, 2, 3 and 4 ensures atleast 68.27%, 95.45%, 99.73% and 99.99% of the + * integral is conserved with respect to the integration limits. */ GaussExp periodify(const std::array &period, double nStdDev = 4.0) const; ///@} @@ -198,9 +144,10 @@ template class Gaussian : public RepresentableFunction { /** @name Differential operators */ ///@{ /** - * @brief Analytic derivative ∂/∂x_dir (Cartesian direction). - * @param dir Axis index in [0, D-1]. - * @return A GaussPoly representing the derivative (polynomial×Gaussian). + * @brief Analytic derivative d/dx_dir (Cartesian direction) of the Gaussian + * @param dir Axis index in [0, D-1] + * + * @return A GaussPoly representing the derivative (polynomial×Gaussian) */ virtual GaussPoly differentiate(int dir) const = 0; ///@} @@ -208,15 +155,17 @@ template class Gaussian : public RepresentableFunction { /** @name Screening and normalization */ ///@{ /** - * @brief Build ±nσ bounds around the center on each axis and enable screening. - * Used to cheaply cull tiles/intervals that cannot contribute. - * @param stdDeviations Number of standard deviations n used for the box. + * @brief Build ±nσ bounds around the center on each axis and enable screening + * + * @param stdDeviations Number of standard deviations n used for the box + * + * @note Used to cheaply cull tiles/intervals that cannot contribute */ void calcScreening(double stdDeviations); /** - * @brief Normalize in place by dividing by the L2 norm. - * @note Calls calcSquareNorm() from the derived class. + * @brief Rescale the Gaussian so that its L2 norm equals 1. + * @note Calls calcSquareNorm() from the derived class */ void normalize() { double norm = std::sqrt(calcSquareNorm()); @@ -232,10 +181,10 @@ template class Gaussian : public RepresentableFunction { */ void multPureGauss(const Gaussian &lhs, const Gaussian &rhs); - /** @brief Scale the global coefficient by a constant. */ + /// @brief Scale the global coefficient by a constant void multConstInPlace(double c) { this->coef *= c; } - /** @brief Shorthand for multConstInPlace. */ + /// @brief Shorthand for multConstInPlace void operator*=(double c) { multConstInPlace(c); } ///@} @@ -243,20 +192,22 @@ template class Gaussian : public RepresentableFunction { ///@{ bool getScreen() const { return screen; } /** - * @brief Tile-level culling test for dyadic box at scale n and translation l. - * @return true if the box is completely outside the screening bounds and can be skipped. + * @brief Tile-level culling test for dyadic box at scale n and translation l + * @return True if the box is completely outside the screening bounds and can be skipped */ bool checkScreen(int n, const int *l) const; ///@} + + // some getters and setters /** @name Parameter accessors */ ///@{ - int getPower(int i) const { return power[i]; } - double getCoef() const { return coef; } - double getExp(int i) const { return alpha[i]; } - const std::array &getPower() const { return power; } - const std::array &getPos() const { return pos; } - std::array getExp() const { return alpha; } + int getPower(int i) const { return power[i]; } ///< Get monomial power on axis i + double getCoef() const { return coef; } ///< Get monomial coefficient + double getExp(int i) const { return alpha[i]; } ///< Get monomial exponent on axis i + const std::array &getPower() const { return power; } ///< Get monomial powers on the axis in an array + const std::array &getPos() const { return pos; } ///< Get monomial positions on the axis in an array + std::array getExp() const { return alpha; } ///< Get monomial exponent on the axis in an array ///@} /** @name Parameter mutators */ @@ -278,38 +229,45 @@ template class Gaussian : public RepresentableFunction { protected: /** @name Core parameters (POD) */ ///@{ - bool screen; ///< If true, use [A,B] screening in fast checks (set via calcScreening / setScreen). - double coef; /**< Global scale factor (α in the docs above). */ - std::array power; /**< Monomial powers per axis (non-negative integers). */ - std::array alpha; /**< Exponents per axis (>0). Controls width: σ_d = 1/√(2 α_d). */ - Coord pos; /**< Center coordinates. */ + bool screen; ///< If true, use [A,B] screening in fast checks (set via calcScreening / setScreen) + double coef; ///< Global scale factor (α in the docs above) + std::array power; ///< Monomial powers per axis (non-negative integers) + std::array alpha; ///< Exponents per axis (>0). Controls width: σ_d = 1/√(2 α_d) + Coord pos; ///< Center coordinates ///@} /** @name Visibility / culling helpers used by trees and projection */ ///@{ /** - * @brief Heuristic visibility vs. resolution scale and quadrature sampling. - * @param scale Dyadic scale (tile size ~ 2^{-scale}). - * @param nQuadPts Number of quadrature points per tile edge. - * @return false if the Gaussian is “too narrow” to be represented at this scale. + * @brief Heuristic visibility vs. resolution scale and quadrature sampling + * @param scale Dyadic scale (tile size ~ 2^{-scale}) + * @param nQuadPts Number of quadrature points per tile edge + * @return false if the Gaussian is “too narrow” to be represented at this scale */ bool isVisibleAtScale(int scale, int nQuadPts) const; /** * @brief Quick check whether the function is essentially zero on [a,b] per axis, - * using a ±5σ bounding rule (implementation in the .cpp). + * using a ±5σ bounding rule (implementation in the .cpp) + * @param a Lower bounds array of length D + * @param b Upper bounds array of length D + * @return true if the function is effectively zero on [a,b] */ bool isZeroOnInterval(const double *a, const double *b) const; ///@} /** * @brief Maximum standard deviation across axes: max_d 1/√(2 α_d). - * @details Used by periodify() to decide how many neighboring images to include. + * @details Used by periodify() to decide how many neighboring images to include + * + * @return The maximum standard deviation among all axes */ double getMaximumStandardDiviation() const; /** - * @brief Subclass hook for stream output; should print parameters in a readable way. + * @brief Subclass hook for stream output; should print parameters in a readable way + * @param o The output stream + * @return The output stream */ virtual std::ostream &print(std::ostream &o) const = 0; }; From f59a4973910a2ad07f2a4f0a5ea9ad1d4140a7cd Mon Sep 17 00:00:00 2001 From: Jacopo_Masotti Date: Mon, 17 Nov 2025 10:13:02 +0100 Subject: [PATCH 45/51] NodeAllocator.* --- src/trees/NodeAllocator.cpp | 3 +- src/trees/NodeAllocator.h | 187 ++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 84 deletions(-) diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp index f4f72061b..990101e40 100644 --- a/src/trees/NodeAllocator.cpp +++ b/src/trees/NodeAllocator.cpp @@ -219,7 +219,6 @@ template void NodeAllocator::appendChunk(bool coefs) { std::fill(this->stackStatus.begin() + oldsize, this->stackStatus.end(), 0); } -/** Fill all holes in the chunks with occupied nodes, then remove all empty chunks */ template int NodeAllocator::compress() { MRCPP_SET_OMP_LOCK(); int nNodes = (1 << D); @@ -362,7 +361,7 @@ template int NodeAllocator::findNextOccupied(int sIdx) return sIdx; } -/** Traverse tree and redefine pointer, counter and tables. */ + template void NodeAllocator::reassemble() { MRCPP_SET_OMP_LOCK(); this->nNodes = 0; diff --git a/src/trees/NodeAllocator.h b/src/trees/NodeAllocator.h index 433e6a3c6..a4b8f52b0 100644 --- a/src/trees/NodeAllocator.h +++ b/src/trees/NodeAllocator.h @@ -23,22 +23,6 @@ * */ -/** - * @file NodeAllocator.h - * @brief Chunked allocator for MWNode objects and their coefficient storage. - * - * @details - * The allocator handles: - * - contiguous chunk allocation for **nodes** and **coefficients**, - * - a simple stack-like free list for fast allocation/deallocation, - * - optional backing via a shared memory block (@ref SharedMemory), - * - utility routines for compaction (@ref compress) and reassembly - * after structural edits, and - * - query helpers for chunk sizes and usage. - * - * It is used by both @ref FunctionTree and @ref OperatorTree. - */ - #pragma once #include @@ -50,10 +34,10 @@ namespace mrcpp { /** * @class NodeAllocator - * @tparam D Spatial dimension (1, 2, or 3). - * @tparam T Scalar coefficient type (e.g., double, ComplexDouble). + * @tparam D Spatial dimension (1, 2, or 3) + * @tparam T Coefficient type (e.g. double, ComplexDouble) * - * @brief Chunked memory manager for @ref MWNode objects and their coefficients. + * @brief Chunked memory manager for @ref MWNode objects and their coefficients * * @details * Nodes and their coefficient arrays are organized in **chunks** to reduce @@ -70,22 +54,30 @@ namespace mrcpp { template class NodeAllocator final { public: /** - * @brief Construct an allocator bound to an operator tree. - * @param tree Owning @ref OperatorTree instance. - * @param mem Optional shared-memory provider for coefficients (may be `nullptr`). - * @param coefsPerNode Number of coefficients per node. - * @param nodesPerChunk Maximum number of nodes per chunk. + * @brief Construct an allocator bound to a function tree + * @param[in] tree Owning @ref FunctionTree instance + * @param[in] mem Optional shared-memory provider for coefficients (may be `nullptr`) + * @param[in] coefsPerNode Number of coefficients per node + * @param[in] nodesPerChunk Maximum number of nodes per chunk + * + * @details Reserves space for chunk pointers to avoid excessive reallocation, + * but does not allocate any chunks until needed. */ - NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + NodeAllocator(FunctionTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + /** - * @brief Construct an allocator bound to a function tree. - * @param tree Owning @ref FunctionTree instance. - * @param mem Optional shared-memory provider for coefficients (may be `nullptr`). - * @param coefsPerNode Number of coefficients per node. - * @param nodesPerChunk Maximum number of nodes per chunk. + * @brief Construct an allocator bound to an operator tree + * @param[in] tree Owning @ref OperatorTree instance + * @param[in] mem Optional shared-memory provider for coefficients (may be `nullptr`) + * @param[in] coefsPerNode Number of coefficients per node + * @param[in] nodesPerChunk Maximum number of nodes per chunk + * + * @details Reserves space for chunk pointers to avoid excessive reallocation, + * but does not allocate any chunks until needed. */ - NodeAllocator(FunctionTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk); + /// Non-copyable. NodeAllocator(const NodeAllocator &tree) = delete; @@ -95,92 +87,123 @@ template class NodeAllocator final { /// Destructor; releases all owned chunks (nodes and coefficients). ~NodeAllocator(); + /** - * @brief Allocate a consecutive block of nodes. - * @param nNodes Number of nodes to allocate. - * @param coefs If `true`, also ensure coefficient storage is available. - * @return Serial index (`sIdx`) of the first newly allocated node. - * + * @brief Get pointer to a node object by serial index + * @param[in] sIdx Serial index of the node + * @return Pointer to the @ref MWNode instance. + */ + MWNode *getNode_p(int sIdx); + + + /** + * @brief Get pointer to the coefficient array for a node + * @param[in] sIdx Serial index of the node + * @return Pointer to 'T[coefsPerNode]' or 'nullptr' if unavailable. + */ + T *getCoef_p(int sIdx); + + + + /** + * @brief Allocate a consecutive block of nodes + * @param[in] nNodes Number of nodes to allocate + * @param[in] coefs If 'true', also ensure coefficient storage is available + * @return Serial index ('sIdx') of the first newly allocated node (the top of the stack) + * + * @details Allocates a block of @p nNodes consecutive nodes, returning + * the serial index of the first node in the block. If `coefs` is true, + * coefficient arrays are also allocated for each node. If there is not + * enough space in existing chunks, new chunks are allocated to satisfy + * the request + * + * @warning Does not initialize the node objects; caller is responsible + * for placement-new or similar + * + * @warning If insufficient space is available, and allocation of new + * chunks fails, an exception is thrown and no nodes are allocated. + * + * @throw std::bad_alloc if memory allocation fails. + * @throw std::runtime_error if insufficient space is available after + * attempting to allocate new chunks. * @note May grow the underlying chunk arrays if space is exhausted. */ int alloc(int nNodes, bool coefs = true); /** - * @brief Deallocate a node at serial index. - * @param sIdx Serial index of the node to free. + * @brief Deallocate a node at serial index + * @param[in] sIdx Serial index of the node to free + * @details Marks the node at serial index @p sIdx as free for future + * allocations. Does not destroy the node object or its coefficient array. + * It also updates the number of allocated nodes. * * @warning Does not shrink chunks; it only marks the slot as free. + * + * @throw std::out_of_range if @p sIdx is invalid. */ void dealloc(int sIdx); /** - * @brief Deallocate coefficient arrays for all nodes. + * @brief Deallocate coefficient arrays for all nodes * @note Node objects remain allocated; only their coefficient buffers are freed. */ void deallocAllCoeff(); /** - * @brief Pre-allocate a number of chunks. - * @param nChunks Number of chunks to append. - * @param coefs If `true`, allocate coefficient chunks as well. + * @brief Pre-allocate a number of chunks + * @param[in] nChunks Number of chunks to append + * @param[in] coefs If 'true', allocate coefficient chunks as well + * + * @details It reinitializes the allocator, allocating @p nChunks chunks + * (both nodes and coefficients, if @p coefs is true). It resized the + * stackStatus vectors with the new total capacity, and resets the + * allocation stack. + * + * @note This method clears any previously allocated nodes and + * their coefficient buffers. * - * @details Useful to avoid repeated growth when the final size is known. + * @throw If nChunks <= 0 */ void init(int nChunks, bool coefs = true); /** - * @brief Compact allocated nodes to reduce fragmentation. - * @return Number of nodes moved during compaction. + * @brief Fill all holes in the chunks with occupied nodes, then remove all empty chunks + * @return Number of nodes deleted during compaction * * @details After compaction, serial indices may change internally; users - * should refresh any external mappings that depend on `sIdx`. + * should refresh any external mappings that depend on 'sIdx'. */ int compress(); + /** + * @brief Drop trailing unused chunks to release memory. + * @return Number of chunks deleted + * + * @details Scans chunks from the end towards the beginning, deleting any + * chunks that are completely unused. Stops when a chunk with at least + * one occupied node is found. + */ + int deleteUnusedChunks(); + /** - * @brief Rebuild internal pointers after external moves/shuffling. + * @brief Traverse tree and redefine pointer, counter and tables * @details Typically invoked after operations that reorder nodes without - * using @ref compress. + * using @ref compress */ void reassemble(); - /** - * @brief Drop trailing unused chunks to release memory. - * @return Number of chunks deleted. - */ - int deleteUnusedChunks(); - /** @name Introspection */ - ///@{ - /// @return Number of nodes currently in use (allocated and not freed). - int getNNodes() const { return this->nNodes; } - /// @return Number of coefficients per node. - int getNCoefs() const { return this->coefsPerNode; } - /// @return Total number of allocated node chunks. - int getNChunks() const { return this->nodeChunks.size(); } - /// @return Number of chunks currently used by active nodes. - int getNChunksUsed() const { return (this->topStack + this->maxNodesPerChunk - 1) / this->maxNodesPerChunk; } - /// @return Size in bytes of one node chunk (nodes only). - int getNodeChunkSize() const { return this->maxNodesPerChunk * this->sizeOfNode; } - /// @return Size in bytes of one coefficient chunk. - int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); } - /// @return Maximum number of nodes that fit in a single chunk. - int getMaxNodesPerChunk() const { return this->maxNodesPerChunk; } - ///@} - /** - * @brief Get pointer to the coefficient array for a node. - * @param sIdx Serial index of the node. - * @return Pointer to `T[coefsPerNode]` or `nullptr` if unavailable. - */ - T *getCoef_p(int sIdx); + + int getNNodes() const { return this->nNodes; } ///< @return Number of nodes currently in use (allocated and not freed). + int getNCoefs() const { return this->coefsPerNode; } ///< @return Number of coefficients per node. + int getNChunks() const { return this->nodeChunks.size(); } ///< @return Total number of allocated node chunks. + int getNChunksUsed() const { return (this->topStack + this->maxNodesPerChunk - 1) / this->maxNodesPerChunk; } ///< @return Number of chunks currently used by active nodes. + int getNodeChunkSize() const { return this->maxNodesPerChunk * this->sizeOfNode; } ///< @return Size in bytes of one node chunk (nodes only). + int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); } ///< @return Size in bytes of one coefficient chunk. + int getMaxNodesPerChunk() const { return this->maxNodesPerChunk; } ///< @return Maximum number of nodes that fit in a single chunk. + - /** - * @brief Get pointer to a node object by serial index. - * @param sIdx Serial index of the node. - * @return Pointer to the @ref MWNode instance. - */ - MWNode *getNode_p(int sIdx); /// @return Pointer to the i-th coefficient chunk (contiguous block). T *getCoefChunk(int i) { return this->coefChunks[i]; } From f1b4adaf1dcd076357cb3fce0cddaa7b1045b25f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Wed, 19 Nov 2025 14:21:10 +0100 Subject: [PATCH 46/51] GaussFunc documented --- src/functions/GaussFunc.cpp | 17 ---- src/functions/GaussFunc.h | 182 ++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 100 deletions(-) diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp index 28736be58..06fa8904b 100644 --- a/src/functions/GaussFunc.cpp +++ b/src/functions/GaussFunc.cpp @@ -143,11 +143,6 @@ template void GaussFunc::multInPlace(const GaussFunc &rhs) { this->setPow(newPow); } -/** @brief Multiply two GaussFuncs - * @param[in] this: Left hand side of multiply - * @param[in] rhs: Right hand side of multiply - * @returns New GaussPoly - */ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { GaussFunc &lhs = *this; GaussPoly result; @@ -163,10 +158,6 @@ template GaussPoly GaussFunc::mult(const GaussFunc &rhs) { return result; } -/** @brief Multiply GaussFunc by scalar - * @param[in] c: Scalar to multiply - * @returns New GaussFunc - */ template GaussFunc GaussFunc::mult(double c) { GaussFunc g = *this; g.coef *= c; @@ -195,14 +186,6 @@ template std::ostream &GaussFunc::print(std::ostream &o) const { return o; } -/** @brief Compute Coulomb repulsion energy between two GaussFuncs - * @param[in] this: Left hand GaussFunc - * @param[in] rhs: Right hand GaussFunc - * @returns Coulomb energy - * - * @note Both Gaussians must be normalized to unit charge - * \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct! - */ template double GaussFunc::calcCoulombEnergy(const GaussFunc &gf) const { NOT_IMPLEMENTED_ABORT; } diff --git a/src/functions/GaussFunc.h b/src/functions/GaussFunc.h index a983feabc..94d371400 100644 --- a/src/functions/GaussFunc.h +++ b/src/functions/GaussFunc.h @@ -32,62 +32,36 @@ namespace mrcpp { -/** @class GaussFunc - * @tparam D Spatial dimension (1,2,3,…). +/** + * @class GaussFunc + * @tparam D Spatial dimension (1, 2, or 3) * - * @brief Single Cartesian Gaussian primitive (optionally with monomial powers) - * in D dimensions. + * @brief Gaussian function in D dimensions with a simple monomial in front * - * Mathematical form - * ----------------- - * In D dimensions the function is separable: - * \f[ - * G(\mathbf{x}) - * = \alpha \prod_{d=0}^{D-1} (x_d - R_d)^{p_d}\, - * \exp\!\big(-\beta_d\,(x_d - R_d)^2\big), - * \f] - * where: - * - \f$ \alpha \f$ is a scalar coefficient (amplitude), - * - \f$ \mathbf{R} = (R_0,\dots,R_{D-1}) \f$ is the center, - * - \f$ \mathbf{p} = (p_0,\dots,p_{D-1}) \f$ are non-negative integers (Cartesian powers), - * - \f$ \boldsymbol{\beta} = (\beta_0,\dots,\beta_{D-1}) \f$ are positive exponents; they - * can be isotropic (\f$\beta_d=\beta\f$) or anisotropic (per-axis). + * - Monodimensional Gaussian (GaussFunc<1>): + * \f$ g(x) = \alpha (x-x_0)^a e^{-\beta (x-x_0)^2} \f$ * - * Relationship to @ref Gaussian - * ----------------------------- - * This class *derives* from @ref Gaussian, which stores the common state - * (coefficient, center, exponents, powers) and provides a polymorphic interface. - * @c GaussFunc implements operations specific to “pure Gaussian × monomial” - * terms (e.g., evaluation, in-place multiplication with same-center terms). - * - * Typical usage - * ------------- - * - Build analytic functions and evaluate them at given points (@ref evalf). - * - Construct @ref GaussExp (expansions) by appending multiple @c GaussFunc. - * - Form products using @ref mult (returns @ref GaussPoly) or scale by a scalar. - * - Differentiate analytically with respect to a coordinate (@ref differentiate). + * - Multidimensional Gaussian (GaussFunc): + * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$ */ template class GaussFunc : public Gaussian { public: - /** @name Constructors - * @{ - */ - /** @brief Construct with isotropic exponent. - * @param beta Isotropic exponent \f$\beta\f$ (used on all axes). - * @param alpha Coefficient \f$\alpha\f$. - * @param pos Center \f$\mathbf{R}\f$ (defaults to origin). - * @param pow Powers \f$\mathbf{p}\f$ (defaults to all zeros). - * - * This forwards to the @ref Gaussian base constructor. + /** + * @brief Constructor which forwads to the Gaussian constructor + * @param beta Exponent, \f$ e^{-\beta r^2} \f$ + * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ + * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ + * @param[in] pow Monomial power, \f$ x^{pow[0]}, y^{pow[1]}, ... \f$ */ GaussFunc(double beta, double alpha, const Coord &pos = {}, const std::array &pow = {}) : Gaussian(beta, alpha, pos, pow) {} - /** @brief Construct with anisotropic exponents (per-axis @p beta). - * @param beta Array of exponents \f$(\beta_0,\dots,\beta_{D-1})\f$. - * @param alpha Coefficient \f$\alpha\f$. - * @param pos Center \f$\mathbf{R}\f$. - * @param pow Powers \f$\mathbf{p}\f$. + /** + * @brief Constructor which forwads to the Gaussian constructor + * @param[in] beta List of exponents, \f$ e^{-\beta r^2} \f$ + * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ + * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ + * @param[in] pow Monomial power, \f$ x^{pow[0]}, y^{pow[1]}, ... \f$ */ GaussFunc(const std::array &beta, double alpha, @@ -95,79 +69,121 @@ template class GaussFunc : public Gaussian { const std::array &pow = {}) : Gaussian(beta, alpha, pos, pow) {} - /** @brief Copy constructor (shallow copy of POD members, as expected). */ + /// @brief Copy constructor. GaussFunc(const GaussFunc &gf) : Gaussian(gf) {} - /** @brief Deleted assignment for safety (use copy-construct as needed). */ + GaussFunc &operator=(const GaussFunc &rhs) = delete; - /** @brief Polymorphic copier (virtual constructor idiom). */ - Gaussian *copy() const override; - /** @} */ - /** @name Physics / analysis helpers - * @{ + /** + * @brief Performs a deep copy + * @return Pointer to a new GaussFunc copy of this instance */ - /** @brief Coulomb repulsion with another Gaussian (specialized for D=3). - * @details For D=3 and isotropic exponents, a closed form using Boys @f$F_0@f$ is used. - * Other D are not implemented and trigger a runtime abort. + Gaussian *copy() const override; + + /** + * @brief Compute Coulomb repulsion energy between this GaussFunc and another + * @param gf Other GaussFunc + * @return Coulomb energy + * + * @note Implemented only for D = 3 + * @note Both Gaussians must be normalized to unit charge + * \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct! */ double calcCoulombEnergy(const GaussFunc &rhs) const; - /** @brief \f$\|G\|_2^2 = \int |G|^2 \, d\mathbf{x}\f$ (separable product of 1D integrals). */ + /** + * @brief Calculates the squared norm of this GaussFunc + * @return The squared norm + */ double calcSquareNorm() const override; - /** @} */ - /** @name Evaluation - * @{ + /** + * @brief Evaluate the gaussian f(r) at a D-dimensional coordinate + * @param r Point (Coord) in physical space in the MRA box + * @return Function value f(r). */ - /** @brief Full D-dimensional evaluation at coordinate @p r. */ double evalf(const Coord &r) const override; - /** @brief 1D factor evaluation for axis @p dir (used in separable algorithms). */ + /** + * @brief Evaluate the *1D* separable factor along axis @p dim + * @param r Coordinate along axis @p dim + * @param dim Axis index in [0, D-1]. + * + * @return The value of the 1D Gaussian factor g_dim(r), dim = {0, .., D-1} -> x,y,z... + */ double evalf1D(double r, int dir) const override; - /** @} */ - /** @name Transformations and algebra - * @{ + /** + * @brief Convert this GaussFunc to a GaussExp object + * @return A GaussExp representing this GaussFunc */ - /** @brief Wrap this single Gaussian as a length-1 Gaussian expansion. */ GaussExp asGaussExp() const override; - /** @brief Analytic derivative w.r.t. @p dir, returns a @ref GaussPoly. */ + /** + * @brief Analytic derivative d/dx_dir (Cartesian direction) of the GaussFunc + * @param dir Axis index in [0, D-1] + * + * @return A GaussPoly representing the derivative (polynomial×Gaussian) + */ GaussPoly differentiate(int dir) const override; - /** @brief In-place product with another Gaussian at the *same center*. - * @details Exponents and powers add; coefficients multiply. - * Fails fast if centers differ (cannot keep a pure GaussFunc). + /** + * @brief Multiplies this GaussFunc in-place with another GaussFunc + * @param rhs The GaussFunc to multiply with + * @note The result is stored in this GaussFunc, thus overwriting its previous values */ void multInPlace(const GaussFunc &rhs); - /** @brief Alias for @ref multInPlace. */ + + /** + * @brief Operator overload forwarding to multInPlace + * @param rhs The GaussFunc to multiply with + */ void operator*=(const GaussFunc &rhs) { multInPlace(rhs); } - /** @brief Product with another Gaussian (same or different center). - * @details Returns a @ref GaussPoly (Gaussian times polynomial) obtained by - * completing the square and combining monomial factors. */ + /** + * @brief Multiply another GaussFunc with this GaussFunc + * @param rhs Other GaussFunc + * @return Resulting GaussPoly + */ GaussPoly mult(const GaussFunc &rhs); - /** @brief Scalar multiplication (returns a scaled copy). */ + /** + * @brief Multiply this GaussFunc with a scalar + * @param c Scalar to multiply + * @returns Resulting GaussFunc + */ GaussFunc mult(double c); - /** @brief Operator overloads forwarding to the methods above. */ + /** + * @brief Operator overload forwarding to mult + * @param rhs The GaussFunc to multiply with + * @return Resulting GaussPoly + */ GaussPoly operator*(const GaussFunc &rhs) { return this->mult(rhs); } + + /** + * @brief Operator overload forwarding to mult + * @param rhs Scalar to multiply with + * @return Resulting GaussFunc + */ GaussFunc operator*(double c) { return this->mult(c); } - /** @} */ - /** @name Power setters - * @{ + /** + * @brief Set the power in dimension d + * @param d Dimension index + * @param power Power to set */ - /** @brief Set a single Cartesian power component @p power on axis @p d. */ void setPow(int d, int power) override { this->power[d] = power; } - /** @brief Set the full power vector \f$\mathbf{p}\f$. */ + + /** + * @brief Set the powers in all dimensions + * @param power Array of powers to set + */ void setPow(const std::array &power) override { this->power = power; } - /** @} */ private: - /** @brief Pretty-printer used by stream insertion (see implementation). */ + /// @brief Print GaussFunc to output stream std::ostream &print(std::ostream &o) const override; }; From c0e5175fe82536b24b54f12c8bc87a9ff2de4ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Fri, 21 Nov 2025 11:41:49 +0100 Subject: [PATCH 47/51] fixed compilation issues in function_utils --- src/functions/function_utils.cpp | 44 ++++++++++++++------------------ src/functions/function_utils.h | 16 +++--------- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp index 2d6d8280e..23806285d 100644 --- a/src/functions/function_utils.cpp +++ b/src/functions/function_utils.cpp @@ -27,10 +27,6 @@ namespace mrcpp { -namespace function_utils { -double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b); -} // namespace function_utils - template double function_utils::calc_overlap(const GaussFunc &a, const GaussFunc &b) { double S = 1.0; for (int d = 0; d < D; d++) { S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]); } @@ -38,12 +34,10 @@ template double function_utils::calc_overlap(const GaussFunc &a, cons return S; } - double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) { int i, j; double expo_p, mu, pos_p, x_ab, x_pa, x_pb, s_00; - /* The highest angular momentum combination is l=20 for a and b - * simulatnelusly */ + // The highest angular momentum combination is l=20 for a and b simultaneously double s_coeff[64]; // if (out_of_bounds(power_a, 0, MAX_GAUSS_POWER) || @@ -53,36 +47,36 @@ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, dou // INVALID_ARG_EXIT; // } - /* initialization of a hell of a lot of coefficients.... */ - expo_p = expo_a + expo_b; /* total exponent */ - mu = expo_a * expo_b / (expo_a + expo_b); /* reduced exponent */ - pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; /* center of charge */ - x_ab = pos_a - pos_b; /* X_{AB} */ - x_pa = pos_p - pos_a; /* X_{PA} */ - x_pb = pos_p - pos_b; /* X_{PB} */ + // initialization of a hell of a lot of coefficients.... + expo_p = expo_a + expo_b; // total exponent + mu = expo_a * expo_b / (expo_a + expo_b); // reduced exponent + pos_p = (expo_a * pos_a + expo_b * pos_b) / expo_p; // center of charge + x_ab = pos_a - pos_b; // X_{AB} + x_pa = pos_p - pos_a; // X_{PA} + x_pb = pos_p - pos_b; // X_{PB} s_00 = pi / expo_p; - s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); /* overlap of two spherical gaussians */ - // int n_0j_coeff = 1 + power_b; /* n. of 0j coefficients needed */ - // int n_ij_coeff = 2 * power_a; /* n. of ij coefficients needed (i > 0) */ + s_00 = std::sqrt(s_00) * std::exp(-mu * x_ab * x_ab); // overlap of two spherical gaussians + // int n_0j_coeff = 1 + power_b; // n. of 0j coefficients needed + // int n_ij_coeff = 2 * power_a; // n. of ij coefficients needed (i > 0) - /* we add 3 coeffs. to avoid a hell of a lot of if statements */ - /* n_tot_coeff = n_0j_coeff + n_ij_coeff + 3; */ - /* s_coeff = (double *) calloc(n_tot_coeff, sizeof(double));*/ + // we add 3 coeffs. to avoid a hell of a lot of if statements + // n_tot_coeff = n_0j_coeff + n_ij_coeff + 3; + // s_coeff = (double *) calloc(n_tot_coeff, sizeof(double)); - /* generate first two coefficients */ + // generate first two coefficients s_coeff[0] = s_00; s_coeff[1] = x_pb * s_00; j = 1; - /* generate the rest of the first row */ + // generate the rest of the first row while (j < power_b) { s_coeff[j + 1] = x_pb * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); j++; } - /* generate the first two coefficients with i > 0 */ + // generate the first two coefficients with i > 0 s_coeff[j + 1] = s_coeff[j] - x_ab * s_coeff[j - 1]; s_coeff[j + 2] = x_pa * s_coeff[j] + j * s_coeff[j - 1] / (2.0 * expo_p); i = 1; - /* generate the remaining coefficients with i > 0 */ + // generate the remaining coefficients with i > 0 while (i < power_a) { int i_l = j + 2 * i + 1; int i_r = j + 2 * i + 2; @@ -91,7 +85,7 @@ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, dou i++; } - /* free(s_coeff);*/ + // free(s_coeff); return s_coeff[power_b + 2 * power_a]; } diff --git a/src/functions/function_utils.h b/src/functions/function_utils.h index 09c27e5b7..c04baff22 100644 --- a/src/functions/function_utils.h +++ b/src/functions/function_utils.h @@ -23,8 +23,6 @@ * */ - - #include "GaussExp.h" #include "Gaussian.h" @@ -33,8 +31,6 @@ namespace mrcpp { // Forward declaration only: definition is provided in function_utils.cpp. // Keeping this here avoids heavy includes and potential include cycles. namespace function_utils { -template - /** * @brief Compute the monodimensional overlap integral between two * gaussian distributions by means of the Obara-Saika recursive @@ -53,9 +49,9 @@ template * @param expo_a \f$ c_a \f$ * @param expo_b \f$ c_b \f$ * - * @return The value of the overlap integral as a d + * @return The value of the overlap integral as a double */ -double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b) +double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b); /** * @brief Compute the overlap integral between two Gaussian functions. @@ -65,13 +61,7 @@ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, dou * * @return The value of the overlap integral */ -double calc_overlap(const GaussFunc &a, const GaussFunc &b); - - - - - - +template double calc_overlap(const GaussFunc &a, const GaussFunc &b); } // namespace function_utils } // namespace mrcpp \ No newline at end of file From de6bd6be23f054f260ebf334ea0bed22a6258f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Wed, 3 Dec 2025 10:36:33 +0100 Subject: [PATCH 48/51] Documented GaussPoly --- src/functions/GaussFunc.h | 4 +- src/functions/GaussPoly.cpp | 15 --- src/functions/GaussPoly.h | 247 ++++++++++++++++-------------------- 3 files changed, 113 insertions(+), 153 deletions(-) diff --git a/src/functions/GaussFunc.h b/src/functions/GaussFunc.h index 94d371400..42b1be70a 100644 --- a/src/functions/GaussFunc.h +++ b/src/functions/GaussFunc.h @@ -47,7 +47,7 @@ namespace mrcpp { template class GaussFunc : public Gaussian { public: /** - * @brief Constructor which forwads to the Gaussian constructor + * @brief Constructor which forwards to the Gaussian constructor * @param beta Exponent, \f$ e^{-\beta r^2} \f$ * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ @@ -57,7 +57,7 @@ template class GaussFunc : public Gaussian { : Gaussian(beta, alpha, pos, pow) {} /** - * @brief Constructor which forwads to the Gaussian constructor + * @brief Constructor which forwards to the Gaussian constructor * @param[in] beta List of exponents, \f$ e^{-\beta r^2} \f$ * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp index 0dfeaf2cd..da37171ba 100644 --- a/src/functions/GaussPoly.cpp +++ b/src/functions/GaussPoly.cpp @@ -37,12 +37,6 @@ using namespace Eigen; namespace mrcpp { -/** @returns New GaussPoly object - * @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$ - * @param[in] alpha: Coefficient, \f$ \alpha e^{-r^2} \f$ - * @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ - * @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ - */ template GaussPoly::GaussPoly(double beta, double alpha, const Coord &pos, const std::array &power) : Gaussian(beta, alpha, pos, power) { @@ -261,10 +255,6 @@ template GaussPoly GaussPoly::mult(const GaussPoly &rhs) { */ } -/** @brief Multiply GaussPoly by scalar - * @param[in] c: Scalar to multiply - * @returns New GaussPoly - */ template GaussPoly GaussPoly::mult(double c) { GaussPoly g = *this; g.coef *= c; @@ -283,11 +273,6 @@ template void GaussPoly::setPow(const std::array &pow) { } } -/** @brief Set polynomial in given dimension - * - * @param[in] d: Cartesian direction - * @param[in] poly: Polynomial to set - */ template void GaussPoly::setPoly(int d, Polynomial &poly) { if (this->poly[d] != nullptr) { delete this->poly[d]; } this->poly[d] = new Polynomial(poly); diff --git a/src/functions/GaussPoly.h b/src/functions/GaussPoly.h index 94f9850de..052d7e307 100644 --- a/src/functions/GaussPoly.h +++ b/src/functions/GaussPoly.h @@ -35,225 +35,200 @@ namespace mrcpp { -/** @class GaussPoly +/** + * @class GaussPoly + * @tparam D Spatial dimension (1, 2, or 3) + * + * @brief Gaussian function in D dimensions with a general polynomial in front * - * @brief Polynomial–Gaussian in D dimensions (separable form). + * - Monodimensional Gaussian (GaussPoly<1>): * - * Concept - * ------- - * GaussPoly represents functions of the form - * \f[ - * f(\mathbf x) \;=\; c \;\prod_{d=1}^{D}\; P_d(x_d - x^{(0)}_d)\, - * \exp\!\big(-\alpha_d\,(x_d - x^{(0)}_d)^2\big), - * \f] - * i.e. a per–dimension polynomial factor times an anisotropic Gaussian. - * The per–axis polynomials \f$P_d\f$ are stored as owned pointers - * `Polynomial* poly[d]`. This class derives from @ref Gaussian to reuse - * storage for coefficient `coef`, exponents `alpha[d]`, and center `pos[d]`. + * \f$ g(x) = \alpha P(x-x_0) e^{-\beta (x-x_0)^2} \f$ * - * Key features - * ------------ - * - Exact evaluation in 1D/ND (see evalf / evalf1D). - * - Exact L2–norm via expansion into a sum of monomial–Gaussians - * (@ref asGaussExp + analytic overlaps). - * - Algebraic utilities (scalar and poly products; some are intentionally - * left unimplemented to avoid accidental heavy symbolic work). + * - Multidimensional Gaussian (GaussFunc): * - * Ownership - * --------- - * The `poly[d]` pointers are owned by the GaussPoly instance (deep-copied - * in copy-operations and destroyed in the destructor). - * - * Relation to GaussFunc - * --------------------- - * A @ref GaussFunc corresponds to the special case where each `P_d(t)=t^{k_d}` - * is a monomial. A GaussPoly can be expanded to a sum of GaussFunc terms - * (tensor product of monomials) with @ref asGaussExp. + * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$ */ template class GaussPoly : public Gaussian { public: - /** @name Constructors & Lifetime - * @{ - */ - - /** @brief Construct an isotropic GaussPoly with optional per-axis degrees. + /** + * @brief Constructor * - * @param[in] alpha Exponent parameter (isotropic): \f$ \alpha_d \equiv \alpha \f$. - * @param[in] coef Global amplitude \f$ c \f$. - * @param[in] pos Center \f$ x^{(0)} \f$ per dimension. - * @param[in] power Maximum polynomial degree per dimension (order of @ref Polynomial). - * - * Initializes each `poly[d]` as a Polynomial of degree `power[d]` - * (if any non-zero degree is requested), otherwise keeps it nullptr. - * The Gaussian base class stores `(coef, alpha, pos, power)`. + * @param beta Exponent, \f$ e^{-\beta r^2} \f$ + * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ + * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ + * @param[in] pow Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ */ GaussPoly(double alpha = 0.0, double coef = 1.0, const Coord &pos = {}, const std::array &power = {}); - /** @brief Construct an anisotropic GaussPoly (per-axis exponents). - * - * @param[in] alpha Per-axis exponents \f$ \{\alpha_d\}_{d=1}^D \f$. - * @param[in] coef Global amplitude. - * @param[in] pos Center per dimension. - * @param[in] power Maximum polynomial degree per dimension. + /** + * @brief Constructor * - * Same allocation policy for `poly[d]` as in the isotropic constructor. + * @param[in] beta List of exponents, \f$ e^{-\beta r^2} \f$ + * @param alpha Coefficient, \f$ \alpha e^{-r^2} \f$ + * @param[in] pos Position \f$ (x - pos[0]), (y - pos[1]), ... \f$ + * @param[in] pow Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$ */ GaussPoly(const std::array &alpha, double coef, const Coord &pos = {}, const std::array &power = {}); - /** @brief Deep-copy ctor (also clones per-axis polynomials). */ + /// @brief Copy constructor. GaussPoly(const GaussPoly &gp); - /** @brief Build GaussPoly from a @ref GaussFunc (monomial×Gaussian). - * - * Creates per-axis polynomials equal to the corresponding monomials, - * i.e. `P_d(t) = t^{power[d]}`. + /** + * @brief Construct from a GaussFunc + * @param[in] gf: GaussFunc to convert */ GaussPoly(const GaussFunc &gf); - /** @brief Disable copy-assignment (explicit semantic/ownership choice). */ GaussPoly &operator=(const GaussPoly &gp) = delete; - /** @brief Polymorphic clone (deep copy). */ + /** + * @brief Performs a deep copy + * @return Pointer to a new GaussFunc copy of this instance + */ Gaussian *copy() const override; - /** @brief Destructor; releases owned Polynomial pointers. */ ~GaussPoly(); - /** @} */ - - /** @name Math & Evaluation - * @{ - */ - - /** @brief Exact L2-norm squared \f$ \|f\|_2^2 \f$. - * - * Implementation: - * 1) Expand to a sum of monomial Gaussians (@ref asGaussExp). - * 2) Sum analytic overlaps of all pairs (Obara–Saika), see - * `function_utils::calc_overlap`. + /** + * @brief Calculates the squared norm of this GaussFunc + * @return The squared norm */ double calcSquareNorm() const override; - /** @brief Evaluate \f$ f(\mathbf x) \f$ at a point (D-D). */ + /** + * @brief Evaluate the gaussian f(r) at a D-dimensional coordinate + * @param r Point (Coord) in physical space in the MRA box + * @return Function value f(r). + */ double evalf(const Coord &r) const override; - /** @brief Evaluate the 1D factor in dimension `dim` at coordinate `r`. - * - * The convention (consistent with other classes): the global amplitude - * `coef` is applied only in `dim==0` so that a tensor product across - * dimensions yields the correct global amplitude once. + /** + * @brief Evaluate the *1D* separable factor along axis @p dim + * @param r Coordinate along axis @p dim + * @param dim Axis index in [0, D-1]. + * + * @return The value of the 1D Gaussian factor g_dim(r), dim = {0, .., D-1} -> x,y,z... */ double evalf1D(double r, int dim) const override; - /** @brief Expand into a sum of @ref GaussFunc terms (tensor of monomials). - * - * Produces \f$ \prod_d P_d \f$ as a sum of monomials and attaches the same - * Gaussian envelope. This is used both for integration and algebra. + /** + * @brief Convert this GaussPoly to a GaussExp object + * @return A GaussExp representing this GaussPoly */ GaussExp asGaussExp() const override; - /** @brief Analytic derivative in Cartesian direction `dir`. - * - * @note The implementation may throw/abort if not provided for GaussPoly. - * (The .cpp currently marks this as NOT_IMPLEMENTED.) - */ + /// @warning This method is currently not implemented. GaussPoly differentiate(int dir) const override; - /** @} */ - - /** @name Algebra - * @{ - */ - - /** @brief In-place product with another GaussPoly (same center/envelope). - * - * @warning Not implemented in the current source (will abort if called). - */ + /// @warning This method is currently not implemented. void multInPlace(const GaussPoly &rhs); /** @brief In-place product operator (delegates to @ref multInPlace). */ void operator*=(const GaussPoly &rhs) { multInPlace(rhs); } - /** @brief Symbolic product, returns a new GaussPoly. - * - * @warning Not implemented in the current source (will abort if called). - */ + /// @warning This method is currently not implemented. GaussPoly mult(const GaussPoly &rhs); - /** @brief Multiply by scalar (returns a copy). */ + /** + * @brief Multiply this GaussPoly with a scalar + * @param c Scalar to multiply + * @returns Resulting GaussPoly + */ GaussPoly mult(double c); - /** @brief Operator sugar for @ref mult(const GaussPoly&). */ + /** + * @brief Operator overload forwarding to mult + * @param rhs The GaussPoly to multiply with + * @return Resulting GaussPoly + * @warning @ref mult is currently not implemented. + */ GaussPoly operator*(const GaussPoly &rhs) { return mult(rhs); } - /** @brief Operator sugar for @ref mult(double). */ + /** + * @brief Operator overload forwarding to mult + * @param rhs Scalar to multiply with + * @return Resulting GaussPoly + */ GaussPoly operator*(double c) { return mult(c); } - /** @} */ - - /** @name Accessors (per-axis polynomials) - * @{ + /** + * @brief Returns the polynomial coefficients in a specified dimension + * @param i Dimension index + * @return The Eigen vector of coefficients */ - - /** @brief Read-only access to coefficient vector of polynomial in dim `i`. */ const Eigen::VectorXd &getPolyCoefs(int i) const { return poly[i]->getCoefs(); } - /** @brief Mutable access to coefficient vector of polynomial in dim `i`. */ + /** + * @brief Returns the polynomial coefficients in a specified dimension + * @param i Dimension index + * @return The Eigen vector of coefficients + */ Eigen::VectorXd &getPolyCoefs(int i) { return poly[i]->getCoefs(); } - /** @brief Read-only access to polynomial object in dim `i`. */ + /** + * @brief Returns the Polynomial in a specified dimension + * @param i Dimension index + * @return The Polynomial reference + */ const Polynomial &getPoly(int i) const { return *poly[i]; } - /** @brief Mutable access to polynomial object in dim `i`. */ + /** + * @brief Returns the Polynomial in a specified dimension + * @param i Dimension index + * @return The Polynomial reference + */ Polynomial &getPoly(int i) { return *poly[i]; } - /** @} */ - - /** @name Mutators (structure/shape) - * @{ + /** + * @brief Set the power in dimension d + * @param d Dimension index + * @param power Power to set */ - - /** @brief Set polynomial degree in one dimension (reallocates @ref Polynomial). */ void setPow(int d, int pow) override; - /** @brief Set polynomial degrees in all dimensions (reallocates). */ + /** + * @brief Set the powers in all dimensions + * @param power Array of powers to set + */ void setPow(const std::array &pow) override; - /** @brief Replace polynomial in dimension `d` with a copy of `poly`. - * - * Updates the stored per-axis degree to `poly.getOrder()`. - * Ownership remains with this GaussPoly (deep copy). + /** + * @brief Set polynomial in given dimension + * @param d Cartesian direction + * @param[in] poly Polynomial to set */ void setPoly(int d, Polynomial &poly); - /** @} */ - private: - /** @brief Owned per-axis polynomials \f$P_d\f$ (nullptr if unused). */ - Polynomial *poly[D]; + Polynomial *poly[D]; ///< Per-axis polynomial factors - /** @brief Helper (recursive): enumerate all monomial power combinations - * and collect combined coefficients (raw C-array version). - * - * Used by @ref asGaussExp to create the full tensor expansion. On the - * recursion leaf it pushes: - * - a newly allocated `int[D]` with the current powers, and - * - the corresponding scalar coefficient (product of axis coefficients, - * times the global amplitude). + /** + * @brief Recursive helper function to fill coefficient and power vectors for all terms + * @param[out] coefs Vector to fill with coefficients + * @param[out] power Vector to fill with power arrays + * @param pow Current power array being built + * @param dir Current dimension being processed */ void fillCoefPowVector(std::vector &coefs, std::vector &power, int pow[D], int dir) const; - /** @brief Helper (recursive): same as above, with std::array accumulator. */ + /** + * @brief Recursive helper function to fill coefficient and power vectors for all terms + * @param[out] coefs Vector to fill with coefficients + * @param[out] power Vector to fill with power arrays + * @param pow Current power array being built + * @param dir Current dimension being processed + */ void fillCoefPowVector(std::vector &coefs, std::vector &power, std::array &pow, int dir) const; - /** @brief Pretty-print (polynomial degrees, coefficients, envelope). */ + /// @brief Print GaussFunc to output stream std::ostream &print(std::ostream &o) const override; }; From 0da95e279bd11f8860c987f787ca2b2244fa71f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Wed, 3 Dec 2025 10:57:22 +0100 Subject: [PATCH 49/51] Documented LegendrePoly --- src/functions/LegendrePoly.cpp | 11 +--- src/functions/LegendrePoly.h | 93 ++++++++-------------------------- 2 files changed, 21 insertions(+), 83 deletions(-) diff --git a/src/functions/LegendrePoly.cpp b/src/functions/LegendrePoly.cpp index 4dce049b0..e785449f3 100644 --- a/src/functions/LegendrePoly.cpp +++ b/src/functions/LegendrePoly.cpp @@ -42,12 +42,10 @@ namespace mrcpp { using LegendreCache = ObjectCache; -/** Legendre polynomial constructed on [-1,1] and - * scaled by n and translated by l */ LegendrePoly::LegendrePoly(int k, double n, double l) : Polynomial(k) { // Since we create Legendre polynomials recursively on [-1,1] - // we cache all lower order polynomilas for future use. + // we cache all lower order polynomials for future use. LegendreCache &Cache = LegendreCache::getInstance(); if (k >= 1) { if (not Cache.hasId(k - 1)) { @@ -63,7 +61,6 @@ LegendrePoly::LegendrePoly(int k, double n, double l) dilate(n); } -/** Compute Legendre polynomial coefs on interval [-1,1] */ void LegendrePoly::computeLegendrePolynomial(int k) { assert(this->size() >= k); if (k == 0) { @@ -91,9 +88,6 @@ void LegendrePoly::computeLegendrePolynomial(int k) { } } -/** Calculate the value of an n:th order Legendre polynominal in x, including - * the first derivative. - */ Vector2d LegendrePoly::firstDerivative(double x) const { double c1, c2, c4, ym, yp, y; double dy, dyp, dym; @@ -139,9 +133,6 @@ Vector2d LegendrePoly::firstDerivative(double x) const { return val; } -/** Calculate the value of an n:th order Legendre polynominal in x, including - * first and second derivatives. - */ Vector3d LegendrePoly::secondDerivative(double x) const { NOT_IMPLEMENTED_ABORT; double c1, c2, c4, ym, yp, y, d2y; diff --git a/src/functions/LegendrePoly.h b/src/functions/LegendrePoly.h index a51074310..c719bbe8f 100644 --- a/src/functions/LegendrePoly.h +++ b/src/functions/LegendrePoly.h @@ -29,91 +29,38 @@ namespace mrcpp { -/** @class LegendrePoly - * @brief Polynomial subclass representing a (possibly shifted/scaled) Legendre polynomial. - * - * Purpose - * ------- - * Encapsulates the Legendre polynomial \(P_k\) of degree @p k, constructed on the - * canonical interval \([-1,1]\) and then affinely mapped to an external coordinate - * via the Polynomial base class’ internal transform: - * - * \f[ - * q = N\,x + L, - * \f] - * - * so that evaluations are effectively \(P_k(q(x))\). - * - * Construction details - * -------------------- - * - The raw coefficients of \(P_k(q)\) on \([-1,1]\) are computed using the - * standard three–term recurrence in @ref computeLegendrePolynomial. - * - After coefficients are set, the base class is instructed to translate by @p l - * and dilate by @p n, which stores the affine map \((N,L)\) used at evaluation time. - * - * Notes - * ----- - * - The actual caching of lower-order polynomials and the affine setup are handled - * in the corresponding .cpp file (see constructor and implementation comments). - * - Derivative helpers return values with respect to the *external* variable @p x, - * taking the internal affine map into account. - */ + /** + * @class LegendrePoly + * @brief Class defining a Legendre polynomial of degree k + */ class LegendrePoly final : public Polynomial { public: - /** @brief Construct degree-@p k Legendre polynomial with optional affine transform. - * - * @param k Degree (order) of the Legendre polynomial \(P_k\). - * @param n Dilation factor (applied after translation). Conceptually produces \(P_k(Nx+L)\) with \(N=n\). - * @param l Translation (applied before dilation). Conceptually produces \(P_k(Nx+L)\) with \(L=l\). - * - * Semantics - * --------- - * - First builds \(P_k\) on \([-1,1]\) in the internal variable \(q\). - * - Records canonical bounds \([-1,1]\) for error checking. - * - Applies the affine map encoded by @p n and @p l through the base class. + /** + * @brief Construct and compute a Legendre polynomial of degree k + * @param k Degree (order) of the Legendre polynomial + * @param n Dilation factor (applied after translation) + * @param l Translation (applied before dilation) */ LegendrePoly(int k, double n = 1.0, double l = 0.0); - /** @brief Evaluate \(P_k(x)\) and its first derivative w.r.t. the external variable. - * - * @param x External evaluation point. - * @return \f$[\,P_k(x),\,\tfrac{d}{dx}P_k(x)\,]\f$ as an Eigen::Vector2d. - * - * Details - * ------- - * - Internally maps @p x to the polynomial’s canonical coordinate \(q = N x + L\). - * - Uses a recurrence that simultaneously advances value and derivative. - * - Performs a bounds check consistent with the base-class domain bookkeeping. + /** + * @brief Evaluate value and first derivative of this Legendre polynomial in x + * @param x External evaluation point + * @return Value and first derivative as an Eigen::Vector2d */ Eigen::Vector2d firstDerivative(double x) const; - /** @brief Evaluate value, first and second derivatives (declared interface). - * - * @param x External evaluation point. - * @return \f$[\,P_k(x),\,P'_k(x),\,P''_k(x)\,]\f$ as an Eigen::Vector3d. - * - * @note The current implementation in the .cpp intentionally aborts - * (NOT_IMPLEMENTED) to document that second-derivative support - * is not provided yet. + /** + * @brief Evaluate second derivative of this Legendre polynomial in x + * @param x External evaluation point + * @return Value, first and second derivative as an Eigen::Vector3d */ Eigen::Vector3d secondDerivative(double x) const; private: - /** @brief Fill coefficient vector with the canonical \([-1,1]\) Legendre polynomial \(P_k\). - * - * @param k Degree (order). - * - * Implementation sketch - * --------------------- - * - Base cases: - * - \(P_0(q) = 1\) - * - \(P_1(q) = q\) - * - Recurrence for \(k \ge 2\): - * \f[ - * P_k(q) = \frac{(2k-1)\,q\,P_{k-1}(q) - (k-1)\,P_{k-2}(q)}{k}. - * \f] - * - Operates directly in coefficient space (ascending powers of \(q\)). - * - Lower orders are retrieved from an ObjectCache to avoid recomputation (in .cpp). + /** + * @brief Recursively compute the Legendre polynomial of order k on interval [-1,1] + * @param k Order of the Legendre polynomial */ void computeLegendrePolynomial(int k); }; From 988a882629cb8bd92c924340a93cb2f43b92cd49 Mon Sep 17 00:00:00 2001 From: Bin Gao Date: Fri, 12 Dec 2025 23:28:35 +0100 Subject: [PATCH 50/51] add documentation for GaussExp --- src/functions/GaussExp.cpp | 5 - src/functions/GaussExp.h | 303 ++++++++++++++++++++++--------------- 2 files changed, 177 insertions(+), 131 deletions(-) diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp index a57fe6708..521a94d41 100644 --- a/src/functions/GaussExp.cpp +++ b/src/functions/GaussExp.cpp @@ -333,11 +333,6 @@ template std::ostream &GaussExp::print(std::ostream &o) const { return o; } -/** @returns Coulomb repulsion energy between all pairs in GaussExp, including self-interaction - * - * @note Each Gaussian must be normalized to unit charge - * \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct! - */ template double GaussExp::calcCoulombEnergy() const { NOT_IMPLEMENTED_ABORT } diff --git a/src/functions/GaussExp.h b/src/functions/GaussExp.h index 6bbed072b..79d87309c 100644 --- a/src/functions/GaussExp.h +++ b/src/functions/GaussExp.h @@ -37,267 +37,318 @@ namespace mrcpp { #define GAUSS_EXP_PREC 1.e-10 -/** @class GaussExp - * @tparam D Spatial dimension (1, 2, 3, …). +/** + * @class GaussExp + * @tparam D Spatial dimension (1, 2, or 3) * - * @brief Container for a finite linear combination (“expansion”) of - * Cartesian Gaussian-type primitives in D dimensions. + * @brief Gaussian expansion in D dimensions * - * Mathematical model - * ------------------ - * - 1D: - * \f[ - * g(x) = \sum_{m=1}^M g_m(x) - * = \sum_{m=1}^M \alpha_m \exp\!\big(-\beta_m (x - x_m)^2\big). - * \f] - * - D dimensions (separable Cartesian form): - * \f[ - * G(\mathbf{x}) = \sum_{m=1}^M G_m(\mathbf{x}) - * = \sum_{m=1}^M \prod_{d=1}^D g^{(d)}_m(x_d), - * \f] - * where each term is represented by a @ref Gaussian (base class) and is - * concretely either a pure Gaussian @ref GaussFunc or a Gaussian times a - * Cartesian polynomial @ref GaussPoly. + * @details + * - Monodimensional Gaussian expansion: * - * Ownership & invariants - * ---------------------- - * - The expansion OWNS its terms via raw pointers (@c Gaussian*). It - * performs deep copies on copy construction/assignment and deletes terms - * in the destructor. - * - @c funcs[i] is either non-null (a valid Gaussian term) or null for - * “empty” slots when constructed with a fixed number of terms. + * \f$ g(x) = \sum_{m=1}^M g_m(x) = \sum_{m=1}^M \alpha_m e^{-\beta (x-x^0)^2} \f$ * - * Typical uses - * ------------ - * - Build analytic functions as sums of Gaussians, evaluate them pointwise - * (@ref evalf). - * - Combine expansions algebraically: @ref add, @ref mult (by expansion, - * single term, polynomial term, or scalar). - * - Compute norms and normalize: @ref calcSquareNorm, @ref normalize. - * - Manage crude support/visibility via screening: @ref calcScreening, - * @ref setScreen. + * - Multidimensional Gaussian expansion: + * + * \f$ G(x) = \sum_{m=1}^M G_m(x) = \sum_{m=1}^M \prod_{d=1}^D g_m^d(x^d) \f$ + * + * Each Gaussian-type functions (GTFs) is represented by a @ref Gaussian + * (base class) and is concretely either a pure Gaussian @ref GaussFunc or a + * Gaussian times a Cartesian polynomial @ref GaussPoly. */ template class GaussExp : public RepresentableFunction { public: /** - * @brief Construct an expansion with @p nTerms empty slots. + * @brief Construct a Gaussian expansion and initialize each GTF to `nullptr` * - * @param nTerms Number of entries reserved in @ref funcs (default 0). - * @param prec Historical argument (unused here); kept for API symmetry. + * @param nTerms Number of GTFs (default 0) + * @param prec Unused here * - * After construction, populate terms via @ref setFunc or @ref append. + * @note After construction, populate GTFs via @ref setFunc or @ref append. */ GaussExp(int nTerms = 0, double prec = GAUSS_EXP_PREC); - /** @brief Deep-copy constructor (clones every term via virtual copy()). */ + /// @brief Deep-copy constructor (clones every GTF via virtual copy()) GaussExp(const GaussExp &gExp); - /** @brief Deep-copy assignment (existing terms are discarded then cloned). */ + /// @brief Deep-copy assignment (existing GTFs are discarded then cloned) GaussExp &operator=(const GaussExp &gExp); - /** @brief Destructor: deletes all owned terms and clears pointers. */ + /// @brief Destructor: deletes all owned GTFs and clears pointers ~GaussExp() override; - // ---- STL-style iteration over owned pointers (non-const and const) ---- - auto begin() { return funcs.begin(); } - auto end() { return funcs.end(); } - const auto begin() const { return funcs.begin(); } - const auto end() const { return funcs.end(); } - - // ---- Analysis helpers --------------------------------------------------- + auto begin() { return funcs.begin(); } ///< @return An iterator pointing to the first GTF + auto end() { return funcs.end(); } ///< @return An iterator pointing to the past-the-end GTF + const auto begin() const { return funcs.begin(); } ///< @return A const iterator pointing to the first GTF + const auto end() const { return funcs.end(); } ///< @return A const iterator pointing to the past-the-end GTF /** - * @brief Coulomb self-energy of the expansion. - * @details Implemented for D=3 (see .cpp); throws/not-implemented for others. - * @note For physical correctness each term should be charge-normalized. + * @brief Coulomb repulsion energy between all pairs in the Gaussian expansion, including self-interaction + * @note Each GTF must be normalized to unit charge + * \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct! + * Currently this function is only implemented for `D=3`. */ double calcCoulombEnergy() const; /** - * @brief Compute the squared L2 norm of the expansion: - * \f$ \| \sum_i f_i \|_2^2 = \sum_i \|f_i\|^2 + 2\sum_{i::calcScreening. */ void calcScreening(double nStdDev = defaultScreening); - // ---- RepresentableFunction interface ------------------------------------ - /** - * @brief Pointwise evaluation \f$ f(\mathbf{r}) = \sum_i f_i(\mathbf{r}) \f$. - * @param r D-dimensional coordinate. + * @brief Evaluate the Gaussian expansion at a D-dimensional coordinate + * @param r Point (Coord) in physical space in the MRA box + * @return Gaussian expansion value at the point */ double evalf(const Coord &r) const override; - // ---- Other transforms/utilities ----------------------------------------- - /** - * @brief Build a periodified version of the expansion by tiling each term. - * @param period Period per axis (e.g., {Lx, Ly, Lz} in 3D). - * @param nStdDev Screening control for how many images to include. + * @brief Generates a Gaussian expansion that is semi-periodic around a unit-cell + * @param[in] period: The period of the unit cell + * @param[in] nStdDev: Number of standard diviations covered in each direction. Default 4.0 + * @return Semi-periodic version of a Gaussian expansion around a unit-cell + * + * @note See the implementation of each GTF in @ref Gaussian::periodify. */ GaussExp periodify(const std::array &period, double nStdDev = 4.0) const; /** - * @brief Component-wise derivative \f$\partial/\partial x_{\text{dir}}\f$. - * @param dir Axis index in [0, D). - * @return New expansion with each term differentiated. + * @brief Analytic derivative d/dx_dir (Cartesian direction) of the Gaussian expansion + * @param dir Axis index in [0, D-1] + * + * @return A GaussExp representing the derivative */ GaussExp differentiate(int dir) const; - // ---- Algebra: additive and multiplicative combinators ------------------- - - /** @brief Concatenate two expansions (returns all terms from both). */ + /** + * @brief Build a new Gaussian expansion that is the combination of this expansion and the other + * @param g The other Gaussian expansion + * @return The new Gaussian expansion with all GTFs from this expansion and the other + */ GaussExp add(GaussExp &g); - /** @brief Append a single Gaussian term to this expansion (returns new sum). */ + /** + * @brief Build a new Gaussian expansion by appending a single GTF to this expansion + * @param g The single GTF + * @return The new Gaussian expansion with GTFs from this expansion and the single GTF + */ GaussExp add(Gaussian &g); /** - * @brief Distribute product over terms: - * (Σ f_i) * (Σ g_j) = Σ_{ij} f_i⋅g_j (resulting in GaussPoly terms). + * @brief Build a new Gaussian expansion by multiplying this expansion and the other + * @param g The other Gaussian expansion + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i})(\sum_{j} g_{j}) = \sum_{ij} f_{i} g_{j} \f$ */ GaussExp mult(GaussExp &g); - /** @brief Multiply by a single pure Gaussian (resulting in GaussPoly terms). */ + /** + * @brief Build a new Gaussian expansion by multiplying this expansion and a single GTF + * @param g The single GTF + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i}) g = \sum_{i} f_{i} g \f$ + */ GaussExp mult(GaussFunc &g); - /** @brief Multiply by a single Gaussian–polynomial term. */ + /** + * @brief Build a new Gaussian expansion by multiplying this expansion and a single @ref GaussPoly + * @param g The single @ref GaussPoly + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i}) g = \sum_{i} g f_{i} \f$ + */ GaussExp mult(GaussPoly &g); - /** @brief Return a copy scaled by scalar @p d. */ + /** + * @brief Build a new Gaussian expansion whose coefficient is scaled by a scalar + * @param d The scalar + * @return The new Gaussian expansion */ GaussExp mult(double d); - /** @brief Scale coefficients in place by scalar @p d. */ + /** + * @brief Scale coefficients of this expansion in place by a scalar + * @param d The scalar */ void multInPlace(double d); - // ---- Operator sugar (forward to the methods above) ---------------------- - + /** + * @brief Overload the + operator to return a new Gaussian expansion formed by combining this expansion with the other + * @param g The other Gaussian expansion + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i})(\sum_{j} g_{j}) = \sum_{ij} f_{i} g_{j} \f$ + */ GaussExp operator+(GaussExp &g) { return this->add(g); } + /** + * @brief Overload the + operator to return a new Gaussian expansion formed by appending a single GTF to this expansion + * @param g The single GTF + * @return The new Gaussian expansion with GTFs from this expansion and the single GTF + */ GaussExp operator+(Gaussian &g) { return this->add(g); } + /** + * @brief Overload the * operator to return a new Gaussian expansion formed by multiplying this expansion and the other + * @param g The other Gaussian expansion + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i})(\sum_{j} g_{j}) = \sum_{ij} f_{i} g_{j} \f$ + */ GaussExp operator*(GaussExp &g) { return this->mult(g); } + /** + * @brief Overload the * operator to return a new Gaussian expansion formed by multiplying this expansion and a single GTF + * @param g The single GTF + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i}) g = \sum_{i} f_{i} g \f$ + */ GaussExp operator*(GaussFunc &g) { return this->mult(g); } + /** + * @brief Overload the * operator to return a new Gaussian expansion formed by multiplying this expansion and a single @ref GaussPoly + * @param g The single @ref GaussPoly + * @return The new Gaussian expansion \f$ (\sum_{i} f_{i}) g = \sum_{i} g f_{i} \f$ + */ GaussExp operator*(GaussPoly &g) { return this->mult(g); } + /** + * @brief Overload the * operator to return a new Gaussian expansion whose coefficient is scaled by a scalar + * @param d The scalar + * @return The new Gaussian expansion */ GaussExp operator*(double d) { return this->mult(d); } + /** + * @brief Overload the * operator to scale coefficients of this expansion in place by a scalar + * @param d The scalar */ void operator*=(double d) { this->multInPlace(d); } - // ---- Accessors ---------------------------------------------------------- - - /** @brief Current screening parameter (sign may encode “enabled/disabled”). */ + /// @brief Get screening parameter double getScreening() const { return screening; } - /** @brief Exponent(s) α per axis for term i. */ + /// @brief Get monomial exponent on the axis for the i-th GTF std::array getExp(int i) const { return this->funcs[i]->getExp(); } - /** @brief Scalar coefficient for term i. */ + /// @brief Get coefficient for the i-th GTF double getCoef(int i) const { return this->funcs[i]->getCoef(); } - /** @brief Powers (Cartesian angular momenta) per axis for term i. */ + /// @brief Get powers for the i-th GTF const std::array &getPower(int i) const { return this->funcs[i]->getPower(); } - /** @brief Center position per axis for term i. */ + /// @brief Get position for the i-th GTF const std::array &getPos(int i) const { return this->funcs[i]->getPos(); } - /** @brief Number of (owned) terms in the expansion. */ + /// @brief Get number of GTFs in the expansion int size() const { return this->funcs.size(); } - /** @brief Mutable access to term i (reference). */ + /// @brief Get mutable access to the i-th GTF Gaussian &getFunc(int i) { return *this->funcs[i]; } - /** @brief Const access to term i (reference). */ + /// @brief Get const access to the i-th GTF const Gaussian &getFunc(int i) const { return *this->funcs[i]; } - /** @brief Mutable pointer access (may be null if slot is empty). */ + /// @brief Get mutable pointer access to the i-th GTF Gaussian *operator[](int i) { return this->funcs[i]; } - /** @brief Const pointer access (may be null if slot is empty). */ + /// @brief Get const pointer access to the i-th GTF const Gaussian *operator[](int i) const { return this->funcs[i]; } - // ---- Mutators ----------------------------------------------------------- - /** - * @brief Install a Gaussian–polynomial term at slot i, scaling its coef by c. - * @details Replaces any existing object at slot i (deletes old). + * @brief Set a @ref GaussPoly for the i-th GTF in the expansion and scale its coefficient by a scalar + * @param i The i-th GTF + * @param g The @ref GaussPoly + * @param c The scalar + * @note Existing i-th GTF will be deleted */ void setFunc(int i, const GaussPoly &g, double c = 1.0); /** - * @brief Install a pure Gaussian term at slot i, scaling its coef by c. - * @details Replaces any existing object at slot i (deletes old). + * @brief Set a single GTF for the i-th GTF in the expansion and scale its coefficient by a scalar + * @param i The i-th GTF + * @param g The @ref GaussPoly + * @param c The scalar + * @note Existing i-th GTF will be deleted */ void setFunc(int i, const GaussFunc &g, double c = 1.0); - /** - * @brief Set global default screening for newly created instances. - * @throws If @p screen is negative. - */ + /// @brief Set global default screening for the Gaussian expansion void setDefaultScreening(double screen); /** - * @brief Enable/disable screening for this expansion and forward to terms. - * @details Conventionally, a positive @ref screening means “enabled” and - * a negative value means “disabled”. + * @brief Enable/disable screening for this expansion and forward to all GTFs + * @details Conventionally, a positive @ref screening means "enabled" and + * a negative value means "disabled". */ void setScreen(bool screen); - /** @brief Set (isotropic) exponent(s) α for term i. */ + /** + * @brief Set (isotropic) exponent the i-th GTF + * @param i The i-th GTF + * @param a The (isotropic) exponent + */ void setExp(int i, double a) { this->funcs[i]->setExp(a); } - /** @brief Set scalar coefficient for term i. */ + /** + * @brief Set coefficient for the i-th GTF + * @param i The i-th GTF + * @param b The coefficient + */ void setCoef(int i, double b) { this->funcs[i]->setCoef(b); } - /** @brief Set Cartesian powers for term i. */ + /** + * @brief Set powers for the i-th GTF + * @param i The i-th GTF + * @param power The powers + */ void setPow(int i, const std::array &power) { this->funcs[i]->setPow(power); } - /** @brief Set center position for term i. */ + /** + * @brief Set center coordinates for the i-th GTF + * @param i The i-th GTF + * @param pos The center coordinates + */ void setPos(int i, const std::array &pos) { this->funcs[i]->setPos(pos); } - /** @brief Append a single (cloned) Gaussian to the end of the expansion. */ + /** + * @brief Append a single GTF to the end of the expansion + * @param g The single GTF + */ void append(const Gaussian &g); - /** @brief Append all terms (cloned) from another expansion. */ + /** + * @brief Append all GTFs from the other expansion + * @param g The other expansion + */ void append(const GaussExp &g); - /** @brief Stream pretty-printer: prints a summary and the terms. */ + /** @brief Stream pretty-printer (delegates to protected function @ref GaussExp::print) */ friend std::ostream &operator<<(std::ostream &o, const GaussExp &gExp) { return gExp.print(o); } - /** @brief Grant @ref Gaussian access to internals where necessary. */ friend class Gaussian; protected: - /** @brief Owned list of Gaussian terms (raw-pointer ownership). */ std::vector *> funcs; - /** @brief Default screening parameter for new instances of this @c D. */ static double defaultScreening; - /** @brief Instance screening parameter (sign may encode enabled/disabled). */ double screening{0.0}; - /** @brief Implementation of stream printing (called by operator<<). */ + /** + * @brief Implementation of stream printing (called by operator<<) + * @param o The output stream + * @return The output stream + */ std::ostream &print(std::ostream &o) const; /** - * @brief Coarse visibility test for adaptive algorithms. - * @details Returns false if any term declares itself not visible - * at the given scale/sample count; true otherwise. + * @brief Heuristic visibility vs. resolution scale and quadrature sampling + * @param scale Dyadic scale (tile size ~ 2^{-scale}) + * @param nPts Number of quadrature points per tile edge + * @return false if any GTF declares itself not visible, true otherwise */ bool isVisibleAtScale(int scale, int nPts) const override; /** - * @brief Quick zero check on a box \f$[lb,ub]^D\f$: - * returns true only if every term is zero on the box. + * @brief Quick check whether the expansion is essentially zero on [la,lb] per axis + * @param la Lower bounds array of length D + * @param øb Upper bounds array of length D + * @return true only if each GTF is effectively zero on [la,lb] */ bool isZeroOnInterval(const double *lb, const double *ub) const override; }; -} // namespace mrcpp \ No newline at end of file +} // namespace mrcpp From 4b2945966736f1b2ade258c1892bdfb61b7ad110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20G=C3=B6llmann?= Date: Wed, 25 Feb 2026 14:50:04 +0100 Subject: [PATCH 51/51] documented Polynomial.h --- src/functions/Polynomial.cpp | 32 ---- src/functions/Polynomial.h | 327 ++++++++++++++++++----------------- 2 files changed, 169 insertions(+), 190 deletions(-) diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp index c54acc148..08d8cdfaf 100644 --- a/src/functions/Polynomial.cpp +++ b/src/functions/Polynomial.cpp @@ -24,12 +24,9 @@ */ /** - * * \date Jun 7, 2009 * \author Jonas Juselius \n * CTCC, University of Tromsø - * - * */ #include @@ -42,8 +39,6 @@ using namespace Eigen; namespace mrcpp { -/** Construct polynomial of order zero with given size and bounds. - * Includes default constructor. */ Polynomial::Polynomial(int k, const double *a, const double *b) : RepresentableFunction<1, double>(a, b) { assert(k >= 0); @@ -60,7 +55,6 @@ Polynomial::Polynomial(double c, int k, const double *a, const double *b) for (int i = 0; i <= k; i++) { this->coefs[i] *= std::pow(c, k - i); } } -/** Construct polynomial with given coefficient vector and bounds. */ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) : RepresentableFunction<1>(a, b) { this->N = 1.0; @@ -68,7 +62,6 @@ Polynomial::Polynomial(const VectorXd &c, const double *a, const double *b) setCoefs(c); } -/** Makes a complete copy of the polynomial */ Polynomial::Polynomial(const Polynomial &poly) : RepresentableFunction<1>(poly) { this->N = poly.N; @@ -76,7 +69,6 @@ Polynomial::Polynomial(const Polynomial &poly) this->coefs = poly.coefs; } -/** Copies only the function, not its bounds */ Polynomial &Polynomial::operator=(const Polynomial &poly) { RepresentableFunction<1>::operator=(poly); this->N = poly.N; @@ -85,7 +77,6 @@ Polynomial &Polynomial::operator=(const Polynomial &poly) { return *this; } -/** Evaluate scaled and translated polynomial */ double Polynomial::evalf(double x) const { if (isBounded()) { if (x < this->getScaledLowerBound()) return 0.0; @@ -100,35 +91,28 @@ double Polynomial::evalf(double x) const { return y; } -/** This returns the actual scaled lower bound */ double Polynomial::getScaledLowerBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->A[0] + this->L)); } -/** This returns the actual scaled upper bound */ double Polynomial::getScaledUpperBound() const { if (not isBounded()) MSG_ERROR("Unbounded polynomial"); return (1.0 / this->N * (this->B[0] + this->L)); } -/** Divide by norm of (bounded) polynomial. */ void Polynomial::normalize() { double sqNorm = calcSquareNorm(); if (sqNorm < 0.0) MSG_ABORT("Cannot normalize polynomial"); (*this) *= 1.0 / std::sqrt(sqNorm); } -/** Compute the squared L2-norm of the (bounded) polynomial. - * Unbounded polynomials return -1.0. */ double Polynomial::calcSquareNorm() { double sqNorm = -1.0; if (isBounded()) { sqNorm = this->innerProduct(*this); } return sqNorm; } -/** Returns the order of the highest non-zero coef. - * NB: Not the length of the coefs vector. */ int Polynomial::getOrder() const { int n = 0; for (int i = 0; i < this->coefs.size(); i++) { @@ -137,13 +121,11 @@ int Polynomial::getOrder() const { return n; } -/** Calculate P = c*P */ Polynomial &Polynomial::operator*=(double c) { this->coefs = c * this->coefs; return *this; } -/** Calculate P = P*Q */ Polynomial &Polynomial::operator*=(const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -160,7 +142,6 @@ Polynomial &Polynomial::operator*=(const Polynomial &Q) { return P; } -/** Calculate Q = c*P */ Polynomial Polynomial::operator*(double c) const { const Polynomial &P = *this; Polynomial Q(P); @@ -168,8 +149,6 @@ Polynomial Polynomial::operator*(double c) const { return Q; } -/** Calculate R = P*Q. - * Returns unbounded polynomial. */ Polynomial Polynomial::operator*(const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -178,19 +157,16 @@ Polynomial Polynomial::operator*(const Polynomial &Q) const { return R; } -/** Calculate P = P + Q. */ Polynomial &Polynomial::operator+=(const Polynomial &Q) { this->addInPlace(1.0, Q); return *this; } -/** Calculate P = P - Q. */ Polynomial &Polynomial::operator-=(const Polynomial &Q) { this->addInPlace(-1.0, Q); return *this; } -/** Calculate P = P + c*Q. */ void Polynomial::addInPlace(double c, const Polynomial &Q) { Polynomial &P = *this; if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); } @@ -208,8 +184,6 @@ void Polynomial::addInPlace(double c, const Polynomial &Q) { P.setCoefs(newCoefs); } -/** Calculate R = P + c*Q, with a default c = 1.0. - * Returns unbounded polynomial. */ Polynomial Polynomial::add(double c, const Polynomial &Q) const { const Polynomial &P = *this; Polynomial R; @@ -218,7 +192,6 @@ Polynomial Polynomial::add(double c, const Polynomial &Q) const { return R; } -/** Calculate Q = dP/dx */ Polynomial Polynomial::calcDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -226,7 +199,6 @@ Polynomial Polynomial::calcDerivative() const { return Q; } -/** Calculate P = dP/dx */ void Polynomial::calcDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -236,7 +208,6 @@ void Polynomial::calcDerivativeInPlace() { P.setCoefs(newCoefs); } -/** Calculate indefinite integral Q = \int dP dx, integration constant set to zero */ Polynomial Polynomial::calcAntiDerivative() const { const Polynomial &P = *this; Polynomial Q(P); @@ -244,7 +215,6 @@ Polynomial Polynomial::calcAntiDerivative() const { return Q; } -/** Calculate indefinite integral P = \int dP dx, integration constant set to zero */ void Polynomial::calcAntiDerivativeInPlace() { Polynomial &P = *this; int P_order = P.getOrder(); @@ -256,7 +226,6 @@ void Polynomial::calcAntiDerivativeInPlace() { P.setCoefs(newCoefs); } -/** Integrate the polynomial P on [a,b] analytically */ double Polynomial::integrate(const double *a, const double *b) const { double lb = -DBL_MAX, ub = DBL_MAX; if (this->isBounded()) { @@ -275,7 +244,6 @@ double Polynomial::integrate(const double *a, const double *b) const { return sfac * (antidiff.evalf(ub) - antidiff.evalf(lb)); } -/** Compute analytically on interval defined by the calling polynomial. */ double Polynomial::innerProduct(const Polynomial &Q) const { const Polynomial &P = *this; if (not P.isBounded()) MSG_ERROR("Unbounded polynomial"); diff --git a/src/functions/Polynomial.h b/src/functions/Polynomial.h index ce72828b4..ecdcfdab3 100644 --- a/src/functions/Polynomial.h +++ b/src/functions/Polynomial.h @@ -23,37 +23,6 @@ * */ -/** - * # Polynomial (interface) - * - * A light-weight, *affine-mapped* univariate polynomial used throughout MRCPP. - * Internally, a polynomial is represented in the auxiliary variable - * - * \f$ q = N\,x - L \f$ - * - * where `N` is a dilation and `L` a translation. Coefficients are stored - * in **ascending** powers of `q`, i.e. `coefs[k]` multiplies \f$q^k\f$. - * - * The class supports: - * - optional finite **bounds** (via the @ref RepresentableFunction base); - * values outside the bounds evaluate to 0. - * - algebra (sum, product, scalar scale) **within the same affine map** - * (same `N` and `L`). - * - analytical **derivatives**, **antiderivatives**, **inner products** - * and **definite integrals**. - * - * ## Affine operations (N, L) - * - `setDilation`, `setTranslation` overwrite the affine map. - * - `dilate(n)` changes the current map as `N ← N*n`. - * - `translate(l)` applies an external x-translation by `l`, which in the - * internal map becomes `L ← L + N*l` so that the *external* shift is by `l`. - * - * ## Order vs. size - * - `size()` returns the raw length of the coefficient vector. - * - `getOrder()` returns the highest index whose coefficient is numerically - * non-zero (trims trailing ~0 entries defined by `MachineZero`). - */ - #pragma once #include @@ -64,195 +33,237 @@ namespace mrcpp { +/** + * @class Polynomial + * + * @brief Base class for general polynomials + * + * @details The Polynomial class(es) are not implemented in the + * most efficient manner, because they are only evaluated a fixed + * number of times in a few predefined points, and all other + * evaluations are done by linear transformations. PolynomialCache + * implements the fast, and static const versions of the various + * 4Polynomials. + */ class Polynomial : public RepresentableFunction<1, double> { public: - /** @name Constructors - * @{ + /** + * @brief Construct polynomial of order zero with given bounds + * @param k Order of the polynomial + * @param a Lower bound in x as raw pointer + * @param b Upper bound in x as raw pointer */ - /** @brief Zero polynomial of order @p k on optional bounds [a,b]. */ Polynomial(int k = 0, const double *a = nullptr, const double *b = nullptr); - /** @overload */ + + /** + * @brief Construct polynomial of order k with given bounds + * @param k Order of the polynomial + * @param a Lower bound in x as vector + * @param b Upper bound in x as vector + */ Polynomial(int k, const std::vector &a, const std::vector &b) : Polynomial(k, a.data(), b.data()) {} - /** @brief From coefficient vector (ascending powers in q) and optional bounds. */ - Polynomial(const Eigen::VectorXd &c, const double *a = nullptr, const double *b = nullptr); - /** @overload */ - Polynomial(const Eigen::VectorXd &c, const std::vector &a, const std::vector &b) - : Polynomial(c, a.data(), b.data()) {} + /** - * @brief Constructs the binomial expansion of \f$(x-c)^k\f$ with optional bounds. - * - * Coefficients are filled using the binomial theorem; the internal map is - * initialized to the identity (`N=1, L=0`). + * @brief Construct polynomial with given coefficient, order and bounds + * @param c Coefficient of the polynomial + * @param k Order of the polynomial + * @param a Lower bound in x as raw pointer + * @param b Upper bound in x as raw pointer */ Polynomial(double c, int k = 0, const double *a = nullptr, const double *b = nullptr); - /** @overload */ + + /** + * @brief Construct polynomial with given coefficient, order and bounds + * @param c Coefficient of the polynomial + * @param k Order of the polynomial + * @param a Lower bound in x as vector + * @param b Upper bound in x as vector + */ Polynomial(double c, int k, const std::vector &a, const std::vector &b) : Polynomial(c, k, a.data(), b.data()) {} - /** @brief Deep copy (including bounds and affine map). */ + + /** + * @brief Construct polynomial with given coefficient vector and bounds + * @param c Coefficient vector + * @param a Lower bound in x as raw pointer + * @param b Upper bound in x as raw pointer + */ + Polynomial(const Eigen::VectorXd &c, const double *a = nullptr, const double *b = nullptr); + + /** + * @brief Construct polynomial with given coefficient vector and bounds + * @param c Coefficient vector + * @param a Lower bound in x as vector + * @param b Upper bound in x as vector + */ + Polynomial(const Eigen::VectorXd &c, const std::vector &a, const std::vector &b) + : Polynomial(c, a.data(), b.data()) {} + + /** @brief Copy constructor */ Polynomial(const Polynomial &poly); - /** @brief Deep copy assignment (including bounds and affine map). */ + /** @brief Assignment operator, copies oly the function, not its bounds */ Polynomial &operator=(const Polynomial &poly); + /** @brief Virtual destructor */ virtual ~Polynomial() = default; - /** @} */ - /** @name Evaluation - * @{ - */ /** - * @brief Evaluate at external coordinate \f$x\f$. - * - * If the polynomial has active bounds, returns `0` outside the bounded - * interval (in x). Internally evaluates the q-series with - * \f$q = N x - L\f$. + * @brief Evaluate scaled and translated polynomial + * @param x External evaluation point + * @return The polynomial value at x */ double evalf(double x) const; - /** @brief Convenience overload using a @ref Coord wrapper. */ + + /** + * @brief Evaluate scaled and translated polynomial at a given point + * @param r 1D-Cartesian coordinate + * @return The polynomial value at r + */ double evalf(const Coord<1> &r) const { return evalf(r[0]); } - /** @} */ - /** @name Bounds mapped to x - * @{ - */ - /** @brief Lower bound in x corresponding to the internal bound in q. */ - double getScaledLowerBound() const; - /** @brief Upper bound in x corresponding to the internal bound in q. */ - double getScaledUpperBound() const; - /** @} */ - - /** @name Norms - * @{ - */ - /** @brief L2-normalize on current (finite) bounds; no-op if unbounded. */ - void normalize(); + double getScaledLowerBound() const; ///< @return The actual scaled lower bound + double getScaledUpperBound() const; ///< @return The actual scaled upper bound + + void normalize(); ///< @brief Divide by norm of (bounded) polynomial + /** - * @brief Squared L2 norm on current bounds. - * @return \f$\|P\|^2\f$ if bounded; `-1` if unbounded. + * @brief Calculated squared L2 norm of the (bounded) polynomial + * @return Squared L2 norm, -1 if unbounded */ double calcSquareNorm(); - /** @} */ - /** @name Affine map (q = N x - L) - * @{ - */ - double getTranslation() const { return this->L; } ///< Current L (translation in q-map). - double getDilation() const { return this->N; } ///< Current N (dilation in q-map). + double getTranslation() const { return this->L; } ///< @return Current translation + double getDilation() const { return this->N; } ///< @return Current dilation + + void setDilation(double n) { this->N = n; } ///< @brief Set dilation factor N + void setTranslation(double l) { this->L = l; } ///< @brief Set translation L + void dilate(double n) { this->N *= n; } ///< @brief Dilate by factor n + void translate(double l) { this->L += this->N * l; } ///< @brief Translate by l + + int size() const { return this->coefs.size(); } ///< @return The size of the coefficient vector + int getOrder() const; ///< @return The order of the highest non-zero coefficient + + void clearCoefs() { this->coefs = Eigen::VectorXd::Zero(1); } ///< @brief Clear all coefficients + void setZero() { this->coefs = Eigen::VectorXd::Zero(this->coefs.size()); } ///< @brief Set all coefficients to zero + void setCoefs(const Eigen::VectorXd &c) { this->coefs = c; } ///< @brief Replace the coefficient vector with a new one + + Eigen::VectorXd &getCoefs() { return this->coefs; } ///< @return The coefficient vector + const Eigen::VectorXd &getCoefs() const { return this->coefs; } ///< @return The coefficient vector (const version) - void setDilation(double n) { this->N = n; } ///< Overwrite N. - void setTranslation(double l) { this->L = l; } ///< Overwrite L. - void dilate(double n) { this->N *= n; } ///< Scale N in place. /** - * @brief External x-translation by @p l. - * - * Adjusts the internal map as \f$L \leftarrow L + N\,l\f$ so that - * \f$q = N(x+l) - L_\text{old} = N x - (L_\text{old}-N l)\f$. + * @brief Calculates the derivative \f$ Q = dP/dx \f$ of this polynomial + * @return The derivative polynomial Q */ - void translate(double l) { this->L += this->N * l; } - /** @} */ + Polynomial calcDerivative() const; - /** @name Coefficients and order - * @{ - */ - int size() const { return this->coefs.size(); } ///< Raw length of the coefficient vector (q-powers). /** - * @brief Highest non-negligible power (polynomial degree). - * - * Scans from low to high and returns the largest index whose coefficient - * magnitude exceeds `MachineZero`. May be smaller than `size()-1`. - */ - int getOrder() const; - /** @brief Replace coefficients with a single zero (reset to degree 0). */ - void clearCoefs() { this->coefs = Eigen::VectorXd::Zero(1); } - /** @brief Zero all current coefficients (preserve vector length). */ - void setZero() { this->coefs = Eigen::VectorXd::Zero(this->coefs.size()); } - /** @brief Overwrite the coefficient vector (ascending powers in q). */ - void setCoefs(const Eigen::VectorXd &c) { this->coefs = c; } - - /** @brief Mutable access to the coefficient vector. */ - Eigen::VectorXd &getCoefs() { return this->coefs; } - /** @brief Const access to the coefficient vector. */ - const Eigen::VectorXd &getCoefs() const { return this->coefs; } - /** @} */ - - /** @name Calculus - * @{ + * @brief Calculates the indefinite integral \f$ Q = \int P\,dx \f$ of this polynomial, with constant = 0 + * @return The indefinite integral polynomial Q */ - /** @brief Returns \f$ P' \f$ (derivative w.r.t. x). */ - Polynomial calcDerivative() const; - /** @brief Returns an antiderivative \f$ Q \f$ with \f$Q(0)=0\f$. */ Polynomial calcAntiDerivative() const; - /** @brief In-place derivative \f$ P \leftarrow P' \f$. */ + /** + * @brief Calculates the derivative \f$ P \leftarrow dP/dx \f$ of this polynomial in-place + * @details Replaces the current polynomial with its derivative, i.e. \f$ P \leftarrow dP/dx \f$. + */ void calcDerivativeInPlace(); - /** @brief In-place antiderivative \f$ P \leftarrow \int P\,dx \f$, constant = 0. */ + + /** + * @brief Calculates the indefinite integral \f$ P \leftarrow \int P\,dx \f$ of this polynomial in-place, with constant = 0 + * @details Replaces the current polynomial with its indefinite integral, i.e. \f$ P \leftarrow \int P\,dx \f$, with integration constant set to zero. + */ void calcAntiDerivativeInPlace(); - /** @} */ - /** @name Integration & inner product - * @{ - */ /** - * @brief Analytic definite integral \f$\int_a^b P(x)\,dx\f$. - * - * - If the polynomial has internal bounds, integrates over the - * intersection with \f$[a,b]\f$ (if `a`/`b` are provided). - * - If unbounded, both `a` and `b` must be provided. + * @brief Calculates the analytical integral of P on [a, b] + * @param a Lower bound of the integration interval, defaults to the polynomial's lower bound + * @param b Upper bound of the integration interval, defaults to the polynomial's upper bound + * @return The integral of P on [a, b] */ double integrate(const double *a = 0, const double *b = 0) const; + /** - * @brief Inner product \f$\langle P,Q\rangle = \int P(x)Q(x)\,dx\f$ over P's bounds. - * - * Requires `*this` to be bounded. The product is formed algebraically and - * integrated over the same interval. + * @brief Analytically calculates the inner product of this polynomial with another one + * @param Q The other polynomial + * @return The inner product of the two polynomials */ - double innerProduct(const Polynomial &p) const; - /** @} */ + double innerProduct(const Polynomial &Q) const; - /** @name Algebra (same affine map required) - * @{ - */ /** - * @brief Fused add: \f$ P \leftarrow P + c\,Q \f$. - * - * @note Both operands must have the same `(N,L)`; this is enforced in the - * implementation and will error out if violated. + * @brief In-place sum \f$ P \leftarrow P + c\,Q \f$. + * @param c Scalar multiplier for Q + * @param Q The polynomial to be added to P */ void addInPlace(double c, const Polynomial &Q); - /** @brief Returns \f$ R = P + c\,Q \f$ (operands unchanged). */ + + /** + * @brief Sum \f$ R = P + c\,Q \f$. + * @param c Scalar multiplier for Q + * @param Q The polynomial to be added to P + * @return The resulting polynomial + */ Polynomial add(double c, const Polynomial &Q) const; - /** @brief Scalar product \f$ Q = c\,P \f$. */ + /** + * @brief Scalar product of Polynomial with c + * @param c The scalar multiplier + * @return The resulting polynomial + */ Polynomial operator*(double c) const; + /** - * @brief Polynomial product \f$ R = P\cdot Q \f$. - * - * @note Requires same `(N,L)` affine map in the implementation. + * @brief Product of two Polynomials + * @param Q The other polynomial + * @return The resulting (unbounded) polynomial */ Polynomial operator*(const Polynomial &Q) const; - /** @brief Sum \f$ P+Q \f$ (convenience). */ + /** + * @brief Sum two Polynomials + * @param Q The other polynomial + * @return The resulting polynomial + */ Polynomial operator+(const Polynomial &Q) const { return add(1.0, Q); } - /** @brief Difference \f$ P-Q \f$ (convenience). */ + + /** + * @brief Difference of two Polynomials + * @param Q The other polynomial + * @return The resulting polynomial + */ Polynomial operator-(const Polynomial &Q) const { return add(-1.0, Q); } - /** @brief In-place scalar scale: \f$ P \leftarrow c\,P \f$. */ + /** + * @brief In-place scalar product. + * @param c The scalar multiplier + * @return Reference to the modified polynomial + */ Polynomial &operator*=(double c); + /** - * @brief In-place product: \f$ P \leftarrow P\cdot Q \f$. - * - * @note Requires same `(N,L)` affine map in the implementation. + * @brief In-place product of two Polynomials + * @param Q The other polynomial + * @return Reference to the modified polynomial */ Polynomial &operator*=(const Polynomial &Q); - /** @brief In-place sum: \f$ P \leftarrow P+Q \f$. */ + + /** + * @brief In-place sum of two Polynomials + * @param Q The other polynomial + * @return Reference to the modified polynomial + */ Polynomial &operator+=(const Polynomial &Q); - /** @brief In-place difference: \f$ P \leftarrow P-Q \f$. */ + + /** + * @brief In-place difference of two Polynomials + * @param Q The other polynomial + * @return Reference to the modified polynomial + */ Polynomial &operator-=(const Polynomial &Q); - /** @} */ protected: - double N; ///< Dilation in the internal map \f$q = N x - L\f$. - double L; ///< Translation in the internal map \f$q = N x - L\f$. - Eigen::VectorXd coefs; ///< Coefficients for ascending powers of \f$q\f$. + double N; ///< Dilation coefficient + double L; ///< Translation coefficient + Eigen::VectorXd coefs; ///< Expansion coefficients }; } // namespace mrcpp \ No newline at end of file