From 56c96dc3d787641039fb07f6500f5ec19ac62f14 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peterw@met.no>
Date: Thu, 25 Apr 2024 19:21:36 +0200
Subject: [PATCH 01/38] template for native complex trees

---
 api/mrcpp_declarations.h                      |  61 ++-
 src/functions/AnalyticFunction.h              |  16 +-
 src/functions/BoysFunction.cpp                |   6 +-
 src/functions/BoysFunction.h                  |   2 +-
 src/functions/GaussExp.cpp                    | 142 +++----
 src/functions/GaussExp.h                      |  58 +--
 src/functions/GaussFunc.cpp                   |  47 +--
 src/functions/GaussFunc.h                     |  38 +-
 src/functions/GaussPoly.cpp                   |  88 ++---
 src/functions/GaussPoly.h                     |  30 +-
 src/functions/Gaussian.cpp                    |  42 ++-
 src/functions/Gaussian.h                      |  28 +-
 src/functions/Polynomial.cpp                  |   2 +-
 src/functions/Polynomial.h                    |   2 +-
 src/functions/RepresentableFunction.cpp       |  21 +-
 src/functions/RepresentableFunction.h         |  11 +-
 src/functions/function_utils.cpp              |  14 +-
 src/functions/function_utils.h                |   2 +-
 src/operators/OperatorState.h                 |  27 +-
 src/operators/OperatorStatistics.cpp          |  24 +-
 src/operators/OperatorStatistics.h            |   6 +-
 src/treebuilders/AdditionCalculator.h         |  16 +-
 src/treebuilders/AnalyticAdaptor.h            |  10 +-
 src/treebuilders/ConvolutionCalculator.cpp    |  82 ++--
 src/treebuilders/ConvolutionCalculator.h      |  26 +-
 src/treebuilders/CopyAdaptor.cpp              |  30 +-
 src/treebuilders/CopyAdaptor.h                |  10 +-
 .../CrossCorrelationCalculator.cpp            |   2 +-
 src/treebuilders/DefaultCalculator.h          |   6 +-
 src/treebuilders/DerivativeCalculator.cpp     |  74 ++--
 src/treebuilders/DerivativeCalculator.h       |  22 +-
 src/treebuilders/MapCalculator.h              |  16 +-
 src/treebuilders/MultiplicationAdaptor.h      |  10 +-
 src/treebuilders/MultiplicationCalculator.h   |  16 +-
 src/treebuilders/PowerCalculator.h            |  14 +-
 src/treebuilders/ProjectionCalculator.cpp     |  21 +-
 src/treebuilders/ProjectionCalculator.h       |   8 +-
 src/treebuilders/SplitAdaptor.h               |   6 +-
 src/treebuilders/SquareCalculator.h           |  14 +-
 src/treebuilders/TreeAdaptor.h                |   8 +-
 src/treebuilders/TreeBuilder.cpp              |  45 ++-
 src/treebuilders/TreeBuilder.h                |  14 +-
 src/treebuilders/TreeCalculator.h             |  10 +-
 src/treebuilders/WaveletAdaptor.h             |   6 +-
 src/treebuilders/add.cpp                      | 144 ++++---
 src/treebuilders/add.h                        |  20 +-
 src/treebuilders/apply.cpp                    | 135 ++++---
 src/treebuilders/apply.h                      |  18 +-
 src/treebuilders/complex_apply.cpp            |   3 -
 src/treebuilders/grid.cpp                     | 199 ++++++----
 src/treebuilders/grid.h                       |  26 +-
 src/treebuilders/map.cpp                      |  20 +-
 src/treebuilders/map.h                        |   8 +-
 src/treebuilders/multiply.cpp                 | 357 ++++++++++++------
 src/treebuilders/multiply.h                   |  52 +--
 src/treebuilders/project.cpp                  |  46 ++-
 src/treebuilders/project.h                    |   6 +-
 src/trees/FunctionNode.cpp                    | 115 +++---
 src/trees/FunctionNode.h                      |  58 +--
 src/trees/FunctionTree.cpp                    | 180 ++++-----
 src/trees/FunctionTree.h                      |  50 +--
 src/trees/FunctionTreeVector.h                |  16 +-
 src/trees/MWNode.cpp                          | 223 +++++------
 src/trees/MWNode.h                            |  94 ++---
 src/trees/MWTree.cpp                          | 140 +++----
 src/trees/MWTree.h                            |  58 +--
 src/trees/NodeAllocator.cpp                   |  68 ++--
 src/trees/NodeAllocator.h                     |  38 +-
 src/trees/NodeBox.cpp                         |  44 ++-
 src/trees/NodeBox.h                           |  24 +-
 src/trees/TreeIterator.cpp                    |  74 ++--
 src/trees/TreeIterator.h                      |  26 +-
 src/utils/ComplexFunction.h                   |  10 +-
 src/utils/Plotter.cpp                         |  92 ++---
 src/utils/Plotter.h                           |  20 +-
 src/utils/Printer.cpp                         |   2 +-
 src/utils/Printer.h                           |   4 +-
 src/utils/math_utils.cpp                      |  14 +
 src/utils/math_utils.h                        |   1 +
 src/utils/mpi_utils.cpp                       |  18 +-
 src/utils/mpi_utils.h                         |  18 +-
 src/utils/tree_utils.cpp                      | 118 +++---
 src/utils/tree_utils.h                        |  13 +-
 tests/operators/derivative_operator.cpp       |  22 +-
 tests/operators/helmholtz_operator.cpp        |   9 +-
 tests/operators/poisson_operator.cpp          |   4 +-
 .../schrodinger_evolution_operator.cpp        |  12 +-
 tests/treebuilders/map.cpp                    |   2 +-
 tests/treebuilders/multiplication.cpp         |  12 +-
 89 files changed, 2078 insertions(+), 1668 deletions(-)

diff --git a/api/mrcpp_declarations.h b/api/mrcpp_declarations.h
index f6501b726..a96880da3 100644
--- a/api/mrcpp_declarations.h
+++ b/api/mrcpp_declarations.h
@@ -34,34 +34,34 @@ namespace mrcpp {
 
 class Timer;
 class Printer;
-template <int D> class Plotter;
+template <int D, typename T = double> class Plotter;
 
-template <int D> class Gaussian;
-template <int D> class GaussFunc;
-template <int D> class GaussPoly;
-template <int D> class GaussExp;
+template <int D, typename T = double> class Gaussian;
+template <int D, typename T = double> class GaussFunc;
+template <int D, typename T = double> class GaussPoly;
+template <int D, typename T = double> class GaussExp;
 
 template <int D> class BoundingBox;
-template <int D> class NodeBox;
+template <int D, typename T = double> class NodeBox;
 template <int D> class NodeIndex;
 template <int D> class NodeIndexComp;
 
-class SharedMemory;
+template <typename T = double> class SharedMemory;
 class ScalingBasis;
 class LegendreBasis;
 class InterpolatingBasis;
 
-template <int D> class RepresentableFunction;
+template <int D, typename T = double> class RepresentableFunction;
 template <int D> class MultiResolutionAnalysis;
 
-template <int D> class MWTree;
-template <int D> class FunctionTree;
+template <int D, typename T = double> class MWTree;
+template <int D, typename T = double> class FunctionTree;
 class OperatorTree;
 
-template <int D> class NodeAllocator;
+template <int D, typename T = double> class NodeAllocator;
 
-template <int D> class MWNode;
-template <int D> class FunctionNode;
+template <int D, typename T = double> class MWNode;
+template <int D, typename T = double> class FunctionNode;
 class OperatorNode;
 
 template <int D> class IdentityConvolution;
@@ -79,31 +79,30 @@ template <int D> class DerivativeKernel;
 class PoissonKernel;
 class HelmholtzKernel;
 
-template <int D> class TreeBuilder;
-template <int D> class TreeCalculator;
-template <int D> class DefaultCalculator;
-template <int D> class ProjectionCalculator;
-template <int D> class AdditionCalculator;
-template <int D> class MultiplicationCalculator;
-template <int D> class ConvolutionCalculator;
-template <int D> class DerivativeCalculator;
+template <int D, typename T = double> class TreeBuilder;
+template <int D, typename T = double> class TreeCalculator;
+template <int D, typename T = double> class DefaultCalculator;
+template <int D, typename T = double> class ProjectionCalculator;
+template <int D, typename T = double> class AdditionCalculator;
+template <int D, typename T = double> class MultiplicationCalculator;
+template <int D, typename T = double> class ConvolutionCalculator;
+template <int D, typename T = double> class DerivativeCalculator;
 class CrossCorrelationCalculator;
 
-template <int D> class TreeAdaptor;
-template <int D> class AnalyticAdaptor;
-template <int D> class WaveletAdaptor;
-template <int D> class CopyAdaptor;
+template <int D, typename T = double> class TreeAdaptor;
+template <int D, typename T = double> class AnalyticAdaptor;
+template <int D, typename T = double> class WaveletAdaptor;
+template <int D, typename T = double> class CopyAdaptor;
 
-template <int D> class TreeIterator;
-template <int D> class IteratorNode;
+template <int D, typename T = double> class TreeIterator;
+template <int D, typename T = double> class IteratorNode;
 
 class BandWidth;
-template <int D> class OperatorState;
+template <int D, typename T = double> class OperatorState;
 
 template <int D> using Coord = std::array<double, D>;
-template <int D> using MWNodeVector = std::vector<MWNode<D> *>;
+template <int D, typename T = double> using MWNodeVector = std::vector<MWNode<D, T> *>;
 
-template <typename T, typename U> using FMap_ = std::function<T(U)>;
-typedef FMap_<double, double> FMap;
+template <typename T = double, typename U = double> using FMap = std::function<T(U)>;
 
 } // namespace mrcpp
diff --git a/src/functions/AnalyticFunction.h b/src/functions/AnalyticFunction.h
index abf0fcbd6..adb0a307c 100644
--- a/src/functions/AnalyticFunction.h
+++ b/src/functions/AnalyticFunction.h
@@ -32,29 +32,29 @@
 
 namespace mrcpp {
 
-template <int D> class AnalyticFunction : public RepresentableFunction<D> {
+template <int D, typename T> class AnalyticFunction : public RepresentableFunction<D, T> {
 public:
     AnalyticFunction() = default;
     ~AnalyticFunction() override = default;
 
-    AnalyticFunction(std::function<double(const Coord<D> &r)> f, const double *a = nullptr, const double *b = nullptr)
-            : RepresentableFunction<D>(a, b)
+    AnalyticFunction(std::function<T(const Coord<D> &r)> f, const double *a = nullptr, const double *b = nullptr)
+            : RepresentableFunction<D, T>(a, b)
             , func(f) {}
-    AnalyticFunction(std::function<double(const Coord<D> &r)> f,
+    AnalyticFunction(std::function<T(const Coord<D> &r)> f,
                      const std::vector<double> &a,
                      const std::vector<double> &b)
             : AnalyticFunction(f, a.data(), b.data()) {}
 
-    void set(std::function<double(const Coord<D> &r)> f) { this->func = f; }
+    void set(std::function<T(const Coord<D> &r)> f) { this->func = f; }
 
-    double evalf(const Coord<D> &r) const override {
-        double val = 0.0;
+    T evalf(const Coord<D> &r) const override {
+        T val = 0.0;
         if (not this->outOfBounds(r)) val = this->func(r);
         return val;
     }
 
 protected:
-    std::function<double(const Coord<D> &r)> func;
+    std::function<T(const Coord<D> &r)> func;
 };
 
 } // namespace mrcpp
diff --git a/src/functions/BoysFunction.cpp b/src/functions/BoysFunction.cpp
index 71b705139..0a3364845 100644
--- a/src/functions/BoysFunction.cpp
+++ b/src/functions/BoysFunction.cpp
@@ -32,7 +32,7 @@
 namespace mrcpp {
 
 BoysFunction::BoysFunction(int n, double p)
-        : RepresentableFunction<1>()
+  : RepresentableFunction<1, double>()
         , order(n)
         , prec(p)
         , MRA(BoundingBox<1>(), InterpolatingBasis(13)) {}
@@ -50,8 +50,8 @@ double BoysFunction::evalf(const Coord<1> &r) const {
         return std::exp(-xt_2) * t_2n;
     };
 
-    FunctionTree<1> tree(this->MRA);
-    mrcpp::project<1>(this->prec, tree, f);
+    FunctionTree<1, double> tree(this->MRA);
+    mrcpp::project<1, double>(this->prec, tree, f);
     double result = tree.integrate();
 
     Printer::setPrintLevel(oldlevel);
diff --git a/src/functions/BoysFunction.h b/src/functions/BoysFunction.h
index 4dc76bd72..cc5cc1916 100644
--- a/src/functions/BoysFunction.h
+++ b/src/functions/BoysFunction.h
@@ -30,7 +30,7 @@
 
 namespace mrcpp {
 
-class BoysFunction final : public RepresentableFunction<1> {
+  class BoysFunction final : public RepresentableFunction<1, double> {
 public:
     BoysFunction(int n, double prec = 1.0e-10);
 
diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp
index a57fe6708..a51372801 100644
--- a/src/functions/GaussExp.cpp
+++ b/src/functions/GaussExp.cpp
@@ -39,21 +39,21 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D> double GaussExp<D>::defaultScreening = 10.0;
+template <int D, typename T> double GaussExp<D, T>::defaultScreening = 10.0;
 
-template <int D> GaussExp<D>::GaussExp(int nTerms, double prec) {
+template <int D, typename T> GaussExp<D, T>::GaussExp(int nTerms, double prec) {
     for (int i = 0; i < nTerms; i++) { this->funcs.push_back(nullptr); }
 }
 
-template <int D> GaussExp<D>::GaussExp(const GaussExp<D> &gexp) {
+template <int D, typename T> GaussExp<D, T>::GaussExp(const GaussExp<D, T> &gexp) {
     screening = gexp.screening;
     for (unsigned int i = 0; i < gexp.size(); i++) {
-        Gaussian<D> *gauss = gexp.funcs[i]->copy();
+        Gaussian<D, T> *gauss = gexp.funcs[i]->copy();
         this->funcs.push_back(gauss);
     }
 }
 
-template <int D> GaussExp<D>::~GaussExp() {
+template <int D, typename T> GaussExp<D, T>::~GaussExp() {
     for (int i = 0; i < size(); i++) {
         if (this->funcs[i] != nullptr) {
             delete this->funcs[i];
@@ -62,7 +62,7 @@ template <int D> GaussExp<D>::~GaussExp() {
     }
 }
 
-template <int D> GaussExp<D> &GaussExp<D>::operator=(const GaussExp<D> &gexp) {
+template <int D, typename T> GaussExp<D, T> &GaussExp<D, T>::operator=(const GaussExp<D, T> &gexp) {
     if (&gexp == this) return *this;
     // screening = gexp.screening;
     this->funcs.clear();
@@ -70,77 +70,77 @@ template <int D> GaussExp<D> &GaussExp<D>::operator=(const GaussExp<D> &gexp) {
         if (gexp.funcs[i] == nullptr) {
             this->funcs.push_back(nullptr);
         } else {
-            Gaussian<D> *gauss = gexp.getFunc(i).copy();
+            Gaussian<D, T> *gauss = gexp.getFunc(i).copy();
             this->funcs.push_back(gauss);
         }
     }
     return *this;
 }
 
-template <int D> double GaussExp<D>::evalf(const Coord<D> &r) const {
-    double val = 0.0;
+template <int D, typename T> T GaussExp<D, T>::evalf(const Coord<D> &r) const {
+    T val = 0.0;
     for (int i = 0; i < this->size(); i++) { val += this->getFunc(i).evalf(r); }
     return val;
 }
 
-template <int D> bool GaussExp<D>::isVisibleAtScale(int scale, int nPts) const {
+template <int D, typename T> bool GaussExp<D, T>::isVisibleAtScale(int scale, int nPts) const {
     for (unsigned int i = 0; i < this->size(); i++) {
         if (not this->getFunc(i).isVisibleAtScale(scale, nPts)) { return false; }
     }
     return true;
 }
 
-template <int D> bool GaussExp<D>::isZeroOnInterval(const double *lb, const double *ub) const {
+template <int D, typename T> bool GaussExp<D, T>::isZeroOnInterval(const double *lb, const double *ub) const {
     for (unsigned int i = 0; i < this->size(); i++) {
         if (not this->getFunc(i).isZeroOnInterval(lb, ub)) { return false; }
     }
     return true;
 }
 
-template <int D> void GaussExp<D>::setFunc(int i, const GaussPoly<D> &g, double c) {
+template <int D, typename T> void GaussExp<D, T>::setFunc(int i, const GaussPoly<D, T> &g, double c) {
     if (i < 0 or i > (this->size() - 1)) {
         MSG_ERROR("Index out of bounds!");
         return;
     }
     if (this->funcs[i] != nullptr) { delete this->funcs[i]; }
-    this->funcs[i] = new GaussPoly<D>(g);
+    this->funcs[i] = new GaussPoly<D, T>(g);
     double coef = this->funcs[i]->getCoef();
     this->funcs[i]->setCoef(c * coef);
 }
 
-template <int D> void GaussExp<D>::setFunc(int i, const GaussFunc<D> &g, double c) {
+template <int D, typename T> void GaussExp<D, T>::setFunc(int i, const GaussFunc<D, T> &g, double c) {
     if (i < 0 or i > (this->size() - 1)) {
         MSG_ERROR("Index out of bounds!");
         return;
     }
     if (this->funcs[i] != nullptr) { delete this->funcs[i]; }
-    this->funcs[i] = new GaussFunc<D>(g);
+    this->funcs[i] = new GaussFunc<D, T>(g);
     double coef = this->funcs[i]->getCoef();
     this->funcs[i]->setCoef(c * coef);
 }
 
-template <int D> void GaussExp<D>::append(const Gaussian<D> &g) {
-    Gaussian<D> *gp = g.copy();
+template <int D, typename T> void GaussExp<D, T>::append(const Gaussian<D, T> &g) {
+    Gaussian<D, T> *gp = g.copy();
     this->funcs.push_back(gp);
 }
 
-template <int D> void GaussExp<D>::append(const GaussExp<D> &g) {
+template <int D, typename T> void GaussExp<D, T>::append(const GaussExp<D, T> &g) {
     for (int i = 0; i < g.size(); i++) {
-        Gaussian<D> *gp = g.getFunc(i).copy();
+        Gaussian<D, T> *gp = g.getFunc(i).copy();
         this->funcs.push_back(gp);
     }
 }
 
-template <int D> GaussExp<D> GaussExp<D>::differentiate(int dir) const {
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::differentiate(int dir) const {
     assert(dir >= 0 and dir < D);
-    GaussExp<D> result;
+    GaussExp<D, T> result;
     for (int i = 0; i < this->size(); i++) result.append(this->getFunc(i).differentiate(dir));
     return result;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::add(GaussExp<D> &g) {
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::add(GaussExp<D, T> &g) {
     int nsum = this->size() + g.size();
-    GaussExp<D> sum = GaussExp<D>(nsum);
+    GaussExp<D, T> sum = GaussExp<D, T>(nsum);
 
     int n = 0;
     for (int i = 0; i < this->size(); i++) {
@@ -155,34 +155,34 @@ template <int D> GaussExp<D> GaussExp<D>::add(GaussExp<D> &g) {
     return sum;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::add(Gaussian<D> &g) {
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::add(Gaussian<D, T> &g) {
     int nsum = this->size() + 1;
-    GaussExp<D> sum = GaussExp<D>(nsum);
+    GaussExp<D, T> sum = GaussExp<D, T>(nsum);
     for (int n = 0; n < this->size(); n++) { sum.funcs[n] = this->getFunc(n).copy(); }
     sum.funcs[this->size()] = g.copy();
     return sum;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::mult(GaussExp<D> &gexp) {
-    GaussExp<D> result;
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussExp<D, T> &gexp) {
+    GaussExp<D, T> result;
     for (int i = 0; i < this->size(); i++) {
         for (int j = 0; j < gexp.size(); j++) {
-            if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[i])) {
-                if (auto *g = dynamic_cast<GaussFunc<D> *>(gexp.funcs[j])) {
-                    GaussPoly<D> newTerm = (*g) * (*f);
+            if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[i])) {
+                if (auto *g = dynamic_cast<GaussFunc<D, T> *>(gexp.funcs[j])) {
+                    GaussPoly<D, T> newTerm = (*g) * (*f);
                     result.append(newTerm);
-                } else if (auto *g = dynamic_cast<GaussPoly<D> *>(gexp.funcs[j])) {
-                    GaussPoly<D> newTerm = (*g) * (*f);
+                } else if (auto *g = dynamic_cast<GaussPoly<D, T> *>(gexp.funcs[j])) {
+                    GaussPoly<D, T> newTerm = (*g) * (*f);
                     result.append(newTerm);
                 } else {
                     MSG_ABORT("Invalid Gaussian type!");
                 }
-            } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[i])) {
-                if (auto *g = dynamic_cast<GaussFunc<D> *>(gexp.funcs[j])) {
-                    GaussPoly<D> newTerm = (*f) * (*g);
+            } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[i])) {
+                if (auto *g = dynamic_cast<GaussFunc<D, T> *>(gexp.funcs[j])) {
+                    GaussPoly<D, T> newTerm = (*f) * (*g);
                     result.append(newTerm);
-                } else if (auto *g = dynamic_cast<GaussPoly<D> *>(gexp.funcs[j])) {
-                    GaussPoly<D> newTerm = (*f) * (*g);
+                } else if (auto *g = dynamic_cast<GaussPoly<D, T> *>(gexp.funcs[j])) {
+                    GaussPoly<D, T> newTerm = (*f) * (*g);
                     result.append(newTerm);
                 } else {
                     MSG_ABORT("Invalid Gaussian type!");
@@ -195,15 +195,15 @@ template <int D> GaussExp<D> GaussExp<D>::mult(GaussExp<D> &gexp) {
     return result;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::mult(GaussFunc<D> &g) {
-    GaussExp<D> result;
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussFunc<D, T> &g) {
+    GaussExp<D, T> result;
     int nTerms = this->size();
     for (int n = 0; n < nTerms; n++) {
-        if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[n])) {
-            GaussPoly<D> newTerm = *f * g;
+        if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[n])) {
+            GaussPoly<D, T> newTerm = *f * g;
             result.append(newTerm);
-        } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[n])) {
-            GaussPoly<D> newTerm = *f * g;
+        } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[n])) {
+            GaussPoly<D, T> newTerm = *f * g;
             result.append(newTerm);
         } else {
             MSG_ABORT("Invalid Gaussian type!");
@@ -211,15 +211,15 @@ template <int D> GaussExp<D> GaussExp<D>::mult(GaussFunc<D> &g) {
     }
     return result;
 }
-template <int D> GaussExp<D> GaussExp<D>::mult(GaussPoly<D> &g) {
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussPoly<D, T> &g) {
     int nTerms = this->size();
-    GaussExp<D> result(nTerms);
+    GaussExp<D, T> result(nTerms);
     for (int n = 0; n < nTerms; n++) {
-        if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[n])) {
-            GaussPoly<D> newTerm(g * *f);
+        if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[n])) {
+            GaussPoly<D, T> newTerm(g * *f);
             result.append(newTerm);
-        } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[n])) {
-            GaussPoly<D> newTerm(g * *f);
+        } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[n])) {
+            GaussPoly<D, T> newTerm(g * *f);
             result.append(newTerm);
         } else {
             MSG_ABORT("Invalid Gaussian type!");
@@ -228,17 +228,17 @@ template <int D> GaussExp<D> GaussExp<D>::mult(GaussPoly<D> &g) {
     return result;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::mult(double d) {
-    GaussExp<D> prod = *this;
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(double d) {
+    GaussExp<D, T> prod = *this;
     for (int i = 0; i < this->size(); i++) prod.funcs[i]->multConstInPlace(d);
     return prod;
 }
 
-template <int D> void GaussExp<D>::multInPlace(double d) {
+template <int D, typename T> void GaussExp<D, T>::multInPlace(double d) {
     for (int i = 0; i < this->size(); i++) this->funcs[i]->multConstInPlace(d);
 }
 
-template <int D> double GaussExp<D>::calcSquareNorm() const {
+template <int D, typename T> double GaussExp<D, T>::calcSquareNorm() const {
     /* computing the squares */
     double norm = 0.0;
     for (int i = 0; i < this->size(); i++) {
@@ -247,13 +247,13 @@ template <int D> double GaussExp<D>::calcSquareNorm() const {
     }
     /* computing the double products */
     for (int i = 0; i < this->size(); i++) {
-        GaussExp<D> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc
+        GaussExp<D, T> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc
         for (int fi = 0; fi < funcs_i.size(); fi++) {
-            GaussFunc<D> &func_i = static_cast<GaussFunc<D> &>(funcs_i.getFunc(fi));
+            GaussFunc<D, T> &func_i = static_cast<GaussFunc<D, T> &>(funcs_i.getFunc(fi));
             for (int j = i + 1; j < this->size(); j++) {
-                GaussExp<D> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc
+                GaussExp<D, T> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc
                 for (int fj = 0; fj < funcs_j.size(); fj++) {
-                    GaussFunc<D> &func_j = static_cast<GaussFunc<D> &>(funcs_j.getFunc(fj));
+                    GaussFunc<D, T> &func_j = static_cast<GaussFunc<D, T> &>(funcs_j.getFunc(fj));
                     double overlap = func_i.calcOverlap(func_j);
                     norm += 2.0 * overlap;
                 }
@@ -263,7 +263,7 @@ template <int D> double GaussExp<D>::calcSquareNorm() const {
     return norm;
 }
 
-template <int D> void GaussExp<D>::normalize() {
+template <int D, typename T> void GaussExp<D, T>::normalize() {
     double norm = std::sqrt(this->calcSquareNorm());
     for (int i = 0; i < this->size(); i++) {
         double coef = this->funcs[i]->getCoef();
@@ -271,12 +271,12 @@ template <int D> void GaussExp<D>::normalize() {
     }
 }
 
-template <int D> void GaussExp<D>::calcScreening(double nStdDev) {
+template <int D, typename T> void GaussExp<D, T>::calcScreening(double nStdDev) {
     screening = nStdDev;
     for (int i = 0; i < this->size(); i++) { this->funcs[i]->calcScreening(nStdDev); }
 }
 
-template <int D> void GaussExp<D>::setScreen(bool screen) {
+template <int D, typename T> void GaussExp<D, T>::setScreen(bool screen) {
     if (screen) {
         this->screening = std::abs(this->screening);
     } else {
@@ -290,7 +290,7 @@ template <int D> void GaussExp<D>::setScreen(bool screen) {
 // is not separable, we have to do the projection term by term.
 /*
 template<int D>
-void GaussExp<D>::calcWaveletCoefs(MWNode<D> &node) {
+void GaussExp<D, T>::calcWaveletCoefs(MWNode<D, T> &node) {
     static const int tDim = 1 << D;
     const ScalingBasis &sf = node.getMWTree().getScalingFunctions();
     MatrixXd &scaling = node.getMWTree().getTmpScalingCoefs();
@@ -319,12 +319,12 @@ void GaussExp<D>::calcWaveletCoefs(MWNode<D> &node) {
 }
 */
 
-template <int D> void GaussExp<D>::setDefaultScreening(double screen) {
+template <int D, typename T> void GaussExp<D, T>::setDefaultScreening(double screen) {
     if (screen < 0) { MSG_ERROR("Screening constant cannot be negative!"); }
     defaultScreening = screen;
 }
 
-template <int D> std::ostream &GaussExp<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &GaussExp<D, T>::print(std::ostream &o) const {
     o << "Gaussian expansion: " << size() << " terms" << std::endl;
     for (int i = 0; i < size(); i++) {
         o << "Term" << std::setw(3) << i << " :" << std::endl;
@@ -338,7 +338,7 @@ template <int D> std::ostream &GaussExp<D>::print(std::ostream &o) const {
  *  @note Each Gaussian must be normalized to unit charge
  *  \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct!
  */
-template <int D> double GaussExp<D>::calcCoulombEnergy() const {
+template <int D, typename T> double GaussExp<D, T>::calcCoulombEnergy() const {
     NOT_IMPLEMENTED_ABORT
 }
 
@@ -362,8 +362,8 @@ template <> double GaussExp<3>::calcCoulombEnergy() const {
     return energy;
 }
 
-template <int D> GaussExp<D> GaussExp<D>::periodify(const std::array<double, D> &period, double nStdDev) const {
-    GaussExp<D> out_exp;
+template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::periodify(const std::array<double, D> &period, double nStdDev) const {
+    GaussExp<D, T> out_exp;
     for (const auto &gauss : *this) {
         auto periodic_gauss = gauss->periodify(period, nStdDev);
         out_exp.append(periodic_gauss);
@@ -371,8 +371,12 @@ template <int D> GaussExp<D> GaussExp<D>::periodify(const std::array<double, D>
     return out_exp;
 }
 
-template class GaussExp<1>;
-template class GaussExp<2>;
-template class GaussExp<3>;
+template class GaussExp<1, double>;
+template class GaussExp<2, double>;
+template class GaussExp<3, double>;
+
+template class GaussExp<1, ComplexDouble>;
+template class GaussExp<2, ComplexDouble>;
+template class GaussExp<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/functions/GaussExp.h b/src/functions/GaussExp.h
index aa6ad4da3..58f5d7dd2 100644
--- a/src/functions/GaussExp.h
+++ b/src/functions/GaussExp.h
@@ -51,11 +51,11 @@ namespace mrcpp {
  *
  */
 
-template <int D> class GaussExp : public RepresentableFunction<D> {
+template <int D, typename T> class GaussExp : public RepresentableFunction<D, T> {
 public:
     GaussExp(int nTerms = 0, double prec = GAUSS_EXP_PREC);
-    GaussExp(const GaussExp<D> &gExp);
-    GaussExp &operator=(const GaussExp<D> &gExp);
+    GaussExp(const GaussExp<D, T> &gExp);
+    GaussExp &operator=(const GaussExp<D, T> &gExp);
     ~GaussExp() override;
 
     auto begin() { return funcs.begin(); }
@@ -70,25 +70,25 @@ template <int D> class GaussExp : public RepresentableFunction<D> {
 
     void calcScreening(double nStdDev = defaultScreening);
 
-    double evalf(const Coord<D> &r) const override;
+    T evalf(const Coord<D> &r) const override;
 
-    GaussExp<D> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
-    GaussExp<D> differentiate(int dir) const;
+    GaussExp<D, T> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
+    GaussExp<D, T> differentiate(int dir) const;
 
-    GaussExp<D> add(GaussExp<D> &g);
-    GaussExp<D> add(Gaussian<D> &g);
-    GaussExp<D> mult(GaussExp<D> &g);
-    GaussExp<D> mult(GaussFunc<D> &g);
-    GaussExp<D> mult(GaussPoly<D> &g);
-    GaussExp<D> mult(double d);
+    GaussExp<D, T> add(GaussExp<D, T> &g);
+    GaussExp<D, T> add(Gaussian<D, T> &g);
+    GaussExp<D, T> mult(GaussExp<D, T> &g);
+    GaussExp<D, T> mult(GaussFunc<D, T> &g);
+    GaussExp<D, T> mult(GaussPoly<D, T> &g);
+    GaussExp<D, T> mult(double d);
     void multInPlace(double d);
 
-    GaussExp<D> operator+(GaussExp<D> &g) { return this->add(g); }
-    GaussExp<D> operator+(Gaussian<D> &g) { return this->add(g); }
-    GaussExp<D> operator*(GaussExp<D> &g) { return this->mult(g); }
-    GaussExp<D> operator*(GaussFunc<D> &g) { return this->mult(g); }
-    GaussExp<D> operator*(GaussPoly<D> &g) { return this->mult(g); }
-    GaussExp<D> operator*(double d) { return this->mult(d); }
+    GaussExp<D, T> operator+(GaussExp<D, T> &g) { return this->add(g); }
+    GaussExp<D, T> operator+(Gaussian<D, T> &g) { return this->add(g); }
+    GaussExp<D, T> operator*(GaussExp<D, T> &g) { return this->mult(g); }
+    GaussExp<D, T> operator*(GaussFunc<D, T> &g) { return this->mult(g); }
+    GaussExp<D, T> operator*(GaussPoly<D, T> &g) { return this->mult(g); }
+    GaussExp<D, T> operator*(double d) { return this->mult(d); }
     void operator*=(double d) { this->multInPlace(d); }
 
     double getScreening() const { return screening; }
@@ -98,14 +98,14 @@ template <int D> class GaussExp : public RepresentableFunction<D> {
     const std::array<double, D> &getPos(int i) const { return this->funcs[i]->getPos(); }
 
     int size() const { return this->funcs.size(); }
-    Gaussian<D> &getFunc(int i) { return *this->funcs[i]; }
-    const Gaussian<D> &getFunc(int i) const { return *this->funcs[i]; }
+    Gaussian<D, T> &getFunc(int i) { return *this->funcs[i]; }
+    const Gaussian<D, T> &getFunc(int i) const { return *this->funcs[i]; }
 
-    Gaussian<D> *operator[](int i) { return this->funcs[i]; }
-    const Gaussian<D> *operator[](int i) const { return this->funcs[i]; }
+    Gaussian<D, T> *operator[](int i) { return this->funcs[i]; }
+    const Gaussian<D, T> *operator[](int i) const { return this->funcs[i]; }
 
-    void setFunc(int i, const GaussPoly<D> &g, double c = 1.0);
-    void setFunc(int i, const GaussFunc<D> &g, double c = 1.0);
+    void setFunc(int i, const GaussPoly<D, T> &g, double c = 1.0);
+    void setFunc(int i, const GaussFunc<D, T> &g, double c = 1.0);
 
     void setDefaultScreening(double screen);
     void setScreen(bool screen);
@@ -115,15 +115,15 @@ template <int D> class GaussExp : public RepresentableFunction<D> {
     void setPos(int i, const std::array<double, D> &pos) { this->funcs[i]->setPos(pos); }
 
     /** @brief Append Gaussian to expansion */
-    void append(const Gaussian<D> &g);
+    void append(const Gaussian<D, T> &g);
     /** @brief Append GaussExp to expansion */
-    void append(const GaussExp<D> &g);
+    void append(const GaussExp<D, T> &g);
 
-    friend std::ostream &operator<<(std::ostream &o, const GaussExp<D> &gExp) { return gExp.print(o); }
-    friend class Gaussian<D>;
+    friend std::ostream &operator<<(std::ostream &o, const GaussExp<D, T> &gExp) { return gExp.print(o); }
+    friend class Gaussian<D, T>;
 
 protected:
-    std::vector<Gaussian<D> *> funcs;
+    std::vector<Gaussian<D, T> *> funcs;
     static double defaultScreening;
     double screening{0.0};
 
diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp
index 28736be58..dedf563c3 100644
--- a/src/functions/GaussFunc.cpp
+++ b/src/functions/GaussFunc.cpp
@@ -39,12 +39,12 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D> Gaussian<D> *GaussFunc<D>::copy() const {
-    auto *gauss = new GaussFunc<D>(*this);
+template <int D, typename T> Gaussian<D, T> *GaussFunc<D, T>::copy() const {
+    auto *gauss = new GaussFunc<D, T>(*this);
     return gauss;
 }
 
-template <int D> double GaussFunc<D>::evalf(const Coord<D> &r) const {
+template <int D, typename T> T GaussFunc<D, T>::evalf(const Coord<D> &r) const {
     if (this->getScreen()) {
         for (int d = 0; d < D; d++) {
             if (r[d] < this->A[d] or r[d] > this->B[d]) { return 0.0; }
@@ -65,7 +65,7 @@ template <int D> double GaussFunc<D>::evalf(const Coord<D> &r) const {
     return this->coef * p2 * std::exp(-q2);
 }
 
-template <int D> double GaussFunc<D>::evalf1D(double r, int d) const {
+template <int D, typename T> T GaussFunc<D, T>::evalf1D(double r, int d) const {
     if (this->getScreen()) {
         if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; }
     }
@@ -85,7 +85,7 @@ template <int D> double GaussFunc<D>::evalf1D(double r, int d) const {
     return result;
 }
 
-template <int D> double GaussFunc<D>::calcSquareNorm() const {
+template <int D, typename T> double GaussFunc<D, T>::calcSquareNorm() const {
     double norm = 1.0;
     for (int d = 0; d < D; d++) {
         double a = 2.0 * this->alpha[d];
@@ -105,14 +105,14 @@ template <int D> double GaussFunc<D>::calcSquareNorm() const {
     return norm * this->coef * this->coef;
 }
 
-template<int D> GaussExp<D> GaussFunc<D>::asGaussExp() const {
-    GaussExp<D> gexp;
+template<int D, typename T> GaussExp<D, T> GaussFunc<D, T>::asGaussExp() const {
+    GaussExp<D, T> gexp;
     gexp.append(*this);
     return gexp;
 }
 
-template <int D> GaussPoly<D> GaussFunc<D>::differentiate(int dir) const {
-    GaussPoly<D> result(*this);
+template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::differentiate(int dir) const {
+    GaussPoly<D, T> result(*this);
     int oldPow = this->getPower(dir);
 
     Polynomial newPoly(oldPow + 1);
@@ -123,8 +123,8 @@ template <int D> GaussPoly<D> GaussFunc<D>::differentiate(int dir) const {
     return result;
 }
 
-template <int D> void GaussFunc<D>::multInPlace(const GaussFunc<D> &rhs) {
-    GaussFunc<D> &lhs = *this;
+template <int D, typename T> void GaussFunc<D, T>::multInPlace(const GaussFunc<D, T> &rhs) {
+    GaussFunc<D, T> &lhs = *this;
     for (int d = 0; d < D; d++) {
         if (lhs.getPos()[d] != rhs.getPos()[d]) {
             MSG_ABORT("Cannot multiply GaussFuncs of different center in-place");
@@ -148,9 +148,9 @@ template <int D> void GaussFunc<D>::multInPlace(const GaussFunc<D> &rhs) {
  *  @param[in] rhs: Right hand side of multiply
  *  @returns New GaussPoly
  */
-template <int D> GaussPoly<D> GaussFunc<D>::mult(const GaussFunc<D> &rhs) {
-    GaussFunc<D> &lhs = *this;
-    GaussPoly<D> result;
+template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::mult(const GaussFunc<D, T> &rhs) {
+    GaussFunc<D, T> &lhs = *this;
+    GaussPoly<D, T> result;
     result.multPureGauss(lhs, rhs);
     for (int d = 0; d < D; d++) {
         double newPos = result.getPos()[d];
@@ -167,13 +167,13 @@ template <int D> GaussPoly<D> GaussFunc<D>::mult(const GaussFunc<D> &rhs) {
  *  @param[in] c: Scalar to multiply
  *  @returns New GaussFunc
  */
-template <int D> GaussFunc<D> GaussFunc<D>::mult(double c) {
-    GaussFunc<D> g = *this;
+template <int D, typename T> GaussFunc<D, T> GaussFunc<D, T>::mult(double c) {
+    GaussFunc<D, T> g = *this;
     g.coef *= c;
     return g;
 }
 
-template <int D> std::ostream &GaussFunc<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &GaussFunc<D, T>::print(std::ostream &o) const {
     auto is_array = details::are_all_equal<D>(this->getExp());
 
     // If all of the values in the exponential are the same only
@@ -203,7 +203,7 @@ template <int D> std::ostream &GaussFunc<D>::print(std::ostream &o) const {
  *  @note Both Gaussians must be normalized to unit charge
  *  \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct!
  */
-template <int D> double GaussFunc<D>::calcCoulombEnergy(const GaussFunc<D> &gf) const {
+template <int D, typename T> double GaussFunc<D, T>::calcCoulombEnergy(const GaussFunc<D, T> &gf) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
@@ -236,7 +236,12 @@ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const
     return std::sqrt(4.0 * alpha / pi) * boysFac;
 }
 
-template class GaussFunc<1>;
-template class GaussFunc<2>;
-template class GaussFunc<3>;
+template class GaussFunc<1, double>;
+template class GaussFunc<2, double>;
+template class GaussFunc<3, double>;
+
+template class GaussFunc<1, ComplexDouble>;
+template class GaussFunc<2, ComplexDouble>;
+template class GaussFunc<3, ComplexDouble>;
+
 } // namespace mrcpp
diff --git a/src/functions/GaussFunc.h b/src/functions/GaussFunc.h
index 874bb3850..fce4f6fd2 100644
--- a/src/functions/GaussFunc.h
+++ b/src/functions/GaussFunc.h
@@ -40,12 +40,12 @@ namespace mrcpp {
  *
  * \f$ g(x) = \alpha (x-x_0)^a e^{-\beta (x-x_0)^2} \f$
  *
- * - Multidimensional Gaussian (GaussFunc<D>):
+ * - Multidimensional Gaussian (GaussFunc<D, T>):
  *
  * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$
  */
 
-template <int D> class GaussFunc : public Gaussian<D> {
+template <int D, typename T> class GaussFunc : public Gaussian<D, T> {
 public:
     /** @returns New GaussFunc object
      *  @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$
@@ -54,32 +54,32 @@ template <int D> class GaussFunc : public Gaussian<D> {
      *  @param[in] pow: Monomial power, \f$ x^{pow[0]}, y^{pow[1]}, ... \f$
      */
     GaussFunc(double beta, double alpha, const Coord<D> &pos = {}, const std::array<int, D> &pow = {})
-            : Gaussian<D>(beta, alpha, pos, pow) {}
+            : Gaussian<D, T>(beta, alpha, pos, pow) {}
     GaussFunc(const std::array<double, D> &beta,
               double alpha,
               const Coord<D> &pos = {},
               const std::array<int, D> &pow = {})
-            : Gaussian<D>(beta, alpha, pos, pow) {}
-    GaussFunc(const GaussFunc<D> &gf)
-            : Gaussian<D>(gf) {}
-    GaussFunc<D> &operator=(const GaussFunc<D> &rhs) = delete;
-    Gaussian<D> *copy() const override;
+            : Gaussian<D, T>(beta, alpha, pos, pow) {}
+    GaussFunc(const GaussFunc<D, T> &gf)
+            : Gaussian<D, T>(gf) {}
+    GaussFunc<D, T> &operator=(const GaussFunc<D, T> &rhs) = delete;
+    Gaussian<D, T> *copy() const override;
 
-    double calcCoulombEnergy(const GaussFunc<D> &rhs) const;
+    double calcCoulombEnergy(const GaussFunc<D, T> &rhs) const;
     double calcSquareNorm() const override;
 
-    double evalf(const Coord<D> &r) const override;
-    double evalf1D(double r, int dir) const override;
+    T evalf(const Coord<D> &r) const override;
+    T evalf1D(double r, int dir) const override;
 
-    GaussExp<D> asGaussExp() const override;
-    GaussPoly<D> differentiate(int dir) const override;
+    GaussExp<D, T> asGaussExp() const override;
+    GaussPoly<D, T> differentiate(int dir) const override;
 
-    void multInPlace(const GaussFunc<D> &rhs);
-    void operator*=(const GaussFunc<D> &rhs) { multInPlace(rhs); }
-    GaussPoly<D> mult(const GaussFunc<D> &rhs);
-    GaussFunc<D> mult(double c);
-    GaussPoly<D> operator*(const GaussFunc<D> &rhs) { return this->mult(rhs); }
-    GaussFunc<D> operator*(double c) { return this->mult(c); }
+    void multInPlace(const GaussFunc<D, T> &rhs);
+    void operator*=(const GaussFunc<D, T> &rhs) { multInPlace(rhs); }
+    GaussPoly<D, T> mult(const GaussFunc<D, T> &rhs);
+    GaussFunc<D, T> mult(double c);
+    GaussPoly<D, T> operator*(const GaussFunc<D, T> &rhs) { return this->mult(rhs); }
+    GaussFunc<D, T> operator*(double c) { return this->mult(c); }
 
     void setPow(int d, int power) override { this->power[d] = power; }
     void setPow(const std::array<int, D> &power) override { this->power = power; }
diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp
index 0dfeaf2cd..3e780ba3b 100644
--- a/src/functions/GaussPoly.cpp
+++ b/src/functions/GaussPoly.cpp
@@ -43,9 +43,9 @@ namespace mrcpp {
  *  @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$
  *  @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$
  */
-template <int D>
-GaussPoly<D>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const std::array<int, D> &power)
-        : Gaussian<D>(beta, alpha, pos, power) {
+template <int D, typename T>
+GaussPoly<D, T>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const std::array<int, D> &power)
+        : Gaussian<D, T>(beta, alpha, pos, power) {
     for (auto d = 0; d < D; d++) {
         if (power != std::array<int, D>{}) {
             this->poly[d] = new Polynomial(this->power[d]);
@@ -55,12 +55,12 @@ GaussPoly<D>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const st
     }
 }
 
-template <int D>
-GaussPoly<D>::GaussPoly(const std::array<double, D> &beta,
+template <int D, typename T>
+GaussPoly<D, T>::GaussPoly(const std::array<double, D> &beta,
                         double alpha,
                         const Coord<D> &pos,
                         const std::array<int, D> &pow)
-        : Gaussian<D>(beta, alpha, pos, pow) {
+        : Gaussian<D, T>(beta, alpha, pos, pow) {
     for (auto d = 0; d < D; d++) {
         if (pow != std::array<int, D>{}) {
             this->poly[d] = new Polynomial(this->power[d]);
@@ -70,15 +70,15 @@ GaussPoly<D>::GaussPoly(const std::array<double, D> &beta,
     }
 }
 
-template <int D>
-GaussPoly<D>::GaussPoly(const GaussPoly<D> &gp)
-        : Gaussian<D>(gp) {
+template <int D, typename T>
+GaussPoly<D, T>::GaussPoly(const GaussPoly<D, T> &gp)
+        : Gaussian<D, T>(gp) {
     for (int d = 0; d < D; d++) { poly[d] = new Polynomial(gp.getPoly(d)); }
 }
 
-template <int D>
-GaussPoly<D>::GaussPoly(const GaussFunc<D> &gf)
-        : Gaussian<D>(gf) {
+template <int D, typename T>
+GaussPoly<D, T>::GaussPoly(const GaussFunc<D, T> &gf)
+        : Gaussian<D, T>(gf) {
     for (int d = 0; d < D; d++) {
         int order = this->getPower(d);
         poly[d] = new Polynomial(order);
@@ -89,29 +89,29 @@ GaussPoly<D>::GaussPoly(const GaussFunc<D> &gf)
     }
 }
 
-template <int D> GaussPoly<D>::~GaussPoly() {
+template <int D, typename T> GaussPoly<D, T>::~GaussPoly() {
     for (int i = 0; i < D; i++) { delete poly[i]; }
 }
 
-template <int D> Gaussian<D> *GaussPoly<D>::copy() const {
-    auto *gauss = new GaussPoly<D>(*this);
+template <int D, typename T> Gaussian<D, T> *GaussPoly<D, T>::copy() const {
+    auto *gauss = new GaussPoly<D, T>(*this);
     return gauss;
 }
 
-template<int D> double GaussPoly<D>::calcSquareNorm() const {
-    GaussExp<D> this_exp = this->asGaussExp();
+template <int D, typename T> double GaussPoly<D, T>::calcSquareNorm() const {
+    GaussExp<D, T> this_exp = this->asGaussExp();
     double norm = 0.0;
     for (int i = 0; i < this_exp.size(); i++) {
-        auto func_i = static_cast<GaussFunc<D> &>(this_exp.getFunc(i));
+        auto func_i = static_cast<GaussFunc<D, T> &>(this_exp.getFunc(i));
         for (int j = 0; j < this_exp.size(); j++) {
-            auto func_j = static_cast<GaussFunc<D> &>(this_exp.getFunc(j));
+            auto func_j = static_cast<GaussFunc<D, T> &>(this_exp.getFunc(j));
             norm += function_utils::calc_overlap(func_i, func_j);
         }
     }
     return norm;
 }
 
-template <int D> double GaussPoly<D>::evalf(const Coord<D> &r) const {
+template <int D, typename T> T GaussPoly<D, T>::evalf(const Coord<D> &r) const {
     if (this->getScreen()) {
         for (int d = 0; d < D; d++) {
             if (r[d] < this->A[d] or r[d] > this->B[d]) { return 0.0; }
@@ -127,7 +127,7 @@ template <int D> double GaussPoly<D>::evalf(const Coord<D> &r) const {
     return this->coef * p2 * std::exp(-q2);
 }
 
-template <int D> double GaussPoly<D>::evalf1D(const double r, int d) const {
+template <int D, typename T> T GaussPoly<D, T>::evalf1D(const double r, int d) const {
     // NOTE!
     //     This function evaluation will give the first dimension the full coef
     //     amplitude, leaving all other directions with amplitude 1.0. This is to
@@ -146,7 +146,7 @@ template <int D> double GaussPoly<D>::evalf1D(const double r, int d) const {
     return p2 * std::exp(-this->alpha[d] * q2);
 }
 
-template <int D> GaussExp<D> GaussPoly<D>::asGaussExp() const {
+template <int D, typename T> GaussExp<D, T> GaussPoly<D, T>::asGaussExp() const {
     std::array<int, D> pow;
     std::array<double, D> pos;
     auto alpha = this->getExp();
@@ -162,12 +162,12 @@ template <int D> GaussExp<D> GaussPoly<D>::asGaussExp() const {
 
     fillCoefPowVector(coefs, power, pow, D);
 
-    GaussExp<D> gexp;
+    GaussExp<D, T> gexp;
     for (int i = 0; i < nTerms; i++) {
         double coef = coefs[i];
         for (int d = 0; d < D; d++) pow[d] = power[i][d];
         if (coef != 0.0) {
-            GaussFunc<D> gFunc(alpha, coef, pos, pow);
+            GaussFunc<D, T> gFunc(alpha, coef, pos, pow);
             gexp.append(gFunc);
         }
     }
@@ -175,16 +175,16 @@ template <int D> GaussExp<D> GaussPoly<D>::asGaussExp() const {
     return gexp;
 }
 
-template <int D> GaussPoly<D> GaussPoly<D>::differentiate(int dir) const {
+template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::differentiate(int dir) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
-template <int D> void GaussPoly<D>::multInPlace(const GaussPoly<D> &rhs) {
+template <int D, typename T> void GaussPoly<D, T>::multInPlace(const GaussPoly<D, T> &rhs) {
     NOT_IMPLEMENTED_ABORT;
 }
 
-template <int D>
-void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs, std::vector<int *> &power, int pow[D], int dir) const {
+template <int D, typename T>
+void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs, std::vector<int *> &power, int pow[D], int dir) const {
     dir--;
     for (int i = 0; i < this->getPower(dir) + 1; i++) {
         pow[dir] = i;
@@ -204,8 +204,8 @@ void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs, std::vector<int
     }
 }
 
-template <int D>
-void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs,
+template <int D, typename T>
+void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs,
                                      std::vector<int *> &power,
                                      std::array<int, D> &pow,
                                      int dir) const {
@@ -228,11 +228,11 @@ void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs,
     }
 }
 
-template <int D> GaussPoly<D> GaussPoly<D>::mult(const GaussPoly<D> &rhs) {
+template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::mult(const GaussPoly<D, T> &rhs) {
     NOT_IMPLEMENTED_ABORT;
     /*
-    GaussPoly<D> &lhs = *this;
-    GaussPoly<D> result;
+    GaussPoly<D, T> &lhs = *this;
+    GaussPoly<D, T> result;
     result.multPureGauss(lhs, rhs);
     for (int d = 0; d < D; d++) {
         double newPos = result.getPos()[d];
@@ -265,18 +265,18 @@ template <int D> GaussPoly<D> GaussPoly<D>::mult(const GaussPoly<D> &rhs) {
  *  @param[in] c: Scalar to multiply
  *  @returns New GaussPoly
  */
-template <int D> GaussPoly<D> GaussPoly<D>::mult(double c) {
-    GaussPoly<D> g = *this;
+template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::mult(double c) {
+    GaussPoly<D, T> g = *this;
     g.coef *= c;
     return g;
 }
 
-template <int D> void GaussPoly<D>::setPow(int d, int pow) {
+template <int D, typename T> void GaussPoly<D, T>::setPow(int d, int pow) {
     if (poly[d] != nullptr) { delete poly[d]; }
     poly[d] = new Polynomial(pow);
 }
 
-template <int D> void GaussPoly<D>::setPow(const std::array<int, D> &pow) {
+template <int D, typename T> void GaussPoly<D, T>::setPow(const std::array<int, D> &pow) {
     for (int d = 0; d < D; d++) {
         if (poly[d] != nullptr) { delete poly[d]; }
         poly[d] = new Polynomial(pow[d]);
@@ -288,13 +288,13 @@ template <int D> void GaussPoly<D>::setPow(const std::array<int, D> &pow) {
  *  @param[in] d: Cartesian direction
  *  @param[in] poly: Polynomial to set
  */
-template <int D> void GaussPoly<D>::setPoly(int d, Polynomial &poly) {
+template <int D, typename T> void GaussPoly<D, T>::setPoly(int d, Polynomial &poly) {
     if (this->poly[d] != nullptr) { delete this->poly[d]; }
     this->poly[d] = new Polynomial(poly);
     this->power[d] = poly.getOrder();
 }
 
-template <int D> std::ostream &GaussPoly<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &GaussPoly<D, T>::print(std::ostream &o) const {
     auto is_array = details::are_all_equal<D>(this->getExp());
 
     // If all of the values in the exponential are the same only
@@ -316,8 +316,12 @@ template <int D> std::ostream &GaussPoly<D>::print(std::ostream &o) const {
     return o;
 }
 
-template class GaussPoly<1>;
-template class GaussPoly<2>;
-template class GaussPoly<3>;
+template class GaussPoly<1, double>;
+template class GaussPoly<2, double>;
+template class GaussPoly<3, double>;
+
+template class GaussPoly<1, ComplexDouble>;
+template class GaussPoly<2, ComplexDouble>;
+template class GaussPoly<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/functions/GaussPoly.h b/src/functions/GaussPoly.h
index 97ed6f47d..d70b93474 100644
--- a/src/functions/GaussPoly.h
+++ b/src/functions/GaussPoly.h
@@ -43,38 +43,38 @@ namespace mrcpp {
  *
  * \f$ g(x) = \alpha P(x-x_0) e^{-\beta (x-x_0)^2} \f$
  *
- * - Multidimensional Gaussian (GaussFunc<D>):
+ * - Multidimensional Gaussian (GaussFunc<D, T>):
  *
  * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$
  */
 
-template <int D> class GaussPoly : public Gaussian<D> {
+template <int D, typename T> class GaussPoly : public Gaussian<D, T> {
 public:
     GaussPoly(double alpha = 0.0, double coef = 1.0, const Coord<D> &pos = {}, const std::array<int, D> &power = {});
     GaussPoly(const std::array<double, D> &alpha,
               double coef,
               const Coord<D> &pos = {},
               const std::array<int, D> &power = {});
-    GaussPoly(const GaussPoly<D> &gp);
-    GaussPoly(const GaussFunc<D> &gf);
-    GaussPoly<D> &operator=(const GaussPoly<D> &gp) = delete;
-    Gaussian<D> *copy() const override;
+    GaussPoly(const GaussPoly<D, T> &gp);
+    GaussPoly(const GaussFunc<D, T> &gf);
+    GaussPoly<D, T> &operator=(const GaussPoly<D, T> &gp) = delete;
+    Gaussian<D, T> *copy() const override;
     ~GaussPoly();
 
     double calcSquareNorm() const override;
 
-    double evalf(const Coord<D> &r) const override;
-    double evalf1D(double r, int dim) const override;
+    T evalf(const Coord<D> &r) const override;
+    T evalf1D(double r, int dim) const override;
 
-    GaussExp<D> asGaussExp() const override;
+    GaussExp<D, T> asGaussExp() const override;
     GaussPoly differentiate(int dir) const override;
 
-    void multInPlace(const GaussPoly<D> &rhs);
-    void operator*=(const GaussPoly<D> &rhs) { multInPlace(rhs); }
-    GaussPoly<D> mult(const GaussPoly<D> &rhs);
-    GaussPoly<D> mult(double c);
-    GaussPoly<D> operator*(const GaussPoly<D> &rhs) { return mult(rhs); }
-    GaussPoly<D> operator*(double c) { return mult(c); }
+    void multInPlace(const GaussPoly<D, T> &rhs);
+    void operator*=(const GaussPoly<D, T> &rhs) { multInPlace(rhs); }
+    GaussPoly<D, T> mult(const GaussPoly<D, T> &rhs);
+    GaussPoly<D, T> mult(double c);
+    GaussPoly<D, T> operator*(const GaussPoly<D, T> &rhs) { return mult(rhs); }
+    GaussPoly<D, T> operator*(double c) { return mult(c); }
 
     const Eigen::VectorXd &getPolyCoefs(int i) const { return poly[i]->getCoefs(); }
     Eigen::VectorXd &getPolyCoefs(int i) { return poly[i]->getCoefs(); }
diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp
index 6dbfa7c5b..2583f7c56 100644
--- a/src/functions/Gaussian.cpp
+++ b/src/functions/Gaussian.cpp
@@ -46,8 +46,8 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D>
-Gaussian<D>::Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p)
+template <int D, typename T>
+Gaussian<D, T>::Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p)
         : screen(false)
         , coef(c)
         , power(p)
@@ -55,15 +55,15 @@ Gaussian<D>::Gaussian(double a, double c, const Coord<D> &r, const std::array<in
     this->alpha.fill(a);
 }
 
-template <int D>
-Gaussian<D>::Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p)
+template <int D, typename T>
+Gaussian<D, T>::Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p)
         : screen(false)
         , coef(c)
         , power(p)
         , alpha(a)
         , pos(r) {}
 
-template <int D> void Gaussian<D>::multPureGauss(const Gaussian<D> &lhs, const Gaussian<D> &rhs) {
+template <int D, typename T> void Gaussian<D, T>::multPureGauss(const Gaussian<D, T> &lhs, const Gaussian<D, T> &rhs) {
 
     auto newAlpha = std::array<double, D>{};
     auto mju = std::array<double, D>{};
@@ -85,7 +85,7 @@ template <int D> void Gaussian<D>::multPureGauss(const Gaussian<D> &lhs, const G
     setCoef(newCoef);
 }
 
-template <int D> void Gaussian<D>::calcScreening(double nStdDev) {
+template <int D, typename T> void Gaussian<D, T>::calcScreening(double nStdDev) {
     assert(nStdDev > 0);
     if (not this->isBounded()) {
         this->bounded = true;
@@ -100,7 +100,7 @@ template <int D> void Gaussian<D>::calcScreening(double nStdDev) {
     screen = true;
 }
 
-template <int D> bool Gaussian<D>::checkScreen(int n, const int *l) const {
+template <int D, typename T> bool Gaussian<D, T>::checkScreen(int n, const int *l) const {
     if (not getScreen()) { return false; }
     double length = std::pow(2.0, -n);
     const double *A = this->getLowerBounds();
@@ -113,7 +113,7 @@ template <int D> bool Gaussian<D>::checkScreen(int n, const int *l) const {
     return false;
 }
 
-template <int D> bool Gaussian<D>::isVisibleAtScale(int scale, int nQuadPts) const {
+template <int D, typename T> bool Gaussian<D, T>::isVisibleAtScale(int scale, int nQuadPts) const {
     for (auto &alp : this->alpha) {
         double stdDeviation = std::pow(2.0 * alp, -0.5);
         auto visibleScale = static_cast<int>(-std::floor(std::log2(nQuadPts * 0.5 * stdDeviation)));
@@ -124,7 +124,7 @@ template <int D> bool Gaussian<D>::isVisibleAtScale(int scale, int nQuadPts) con
     return true;
 }
 
-template <int D> bool Gaussian<D>::isZeroOnInterval(const double *a, const double *b) const {
+template <int D, typename T> bool Gaussian<D, T>::isZeroOnInterval(const double *a, const double *b) const {
     for (int i = 0; i < D; i++) {
         double stdDeviation = std::pow(2.0 * this->alpha[i], -0.5);
         double gaussBoxMin = this->pos[i] - 5.0 * stdDeviation;
@@ -134,7 +134,7 @@ template <int D> bool Gaussian<D>::isZeroOnInterval(const double *a, const doubl
     return false;
 }
 
-template <int D> void Gaussian<D>::evalf(const MatrixXd &points, MatrixXd &values) const {
+template <int D, typename T> void Gaussian<D, T>::evalf(const MatrixXd &points, Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &values) const {
     assert(points.cols() == D);
     assert(points.cols() == values.cols());
     assert(points.rows() == values.rows());
@@ -143,7 +143,7 @@ template <int D> void Gaussian<D>::evalf(const MatrixXd &points, MatrixXd &value
     }
 }
 
-template <int D> double Gaussian<D>::getMaximumStandardDiviation() const {
+template <int D, typename T> double Gaussian<D, T>::getMaximumStandardDiviation() const {
 
     if (details::are_all_equal<D>(this->getExp())) {
         auto exponent = this->getExp()[0];
@@ -156,15 +156,15 @@ template <int D> double Gaussian<D>::getMaximumStandardDiviation() const {
     }
 }
 
-template <int D> double Gaussian<D>::calcOverlap(const Gaussian<D> &inp) const {
+template <int D, typename T> double Gaussian<D, T>::calcOverlap(const Gaussian<D, T> &inp) const {
     const auto &bra_exp = this->asGaussExp(); // Make sure all entries are GaussFunc
     const auto &ket_exp = inp.asGaussExp();   // Make sure all entries are GaussFunc
 
     double S = 0.0;
     for (int i = 0; i < bra_exp.size(); i++) {
-        const auto &bra_i = static_cast<const GaussFunc<D> &>(bra_exp.getFunc(i));
+        const auto &bra_i = static_cast<const GaussFunc<D, T> &>(bra_exp.getFunc(i));
         for (int j = 0; j < ket_exp.size(); j++) {
-            const auto &ket_j = static_cast<const GaussFunc<D> &>(ket_exp.getFunc(j));
+            const auto &ket_j = static_cast<const GaussFunc<D, T> &>(ket_exp.getFunc(j));
             S += function_utils::calc_overlap(bra_i, ket_j);
         }
     }
@@ -181,8 +181,8 @@ template <int D> double Gaussian<D>::calcOverlap(const Gaussian<D> &inp) const {
  * integral is conserved with respect to the integration limits.
  *
  */
-template <int D> GaussExp<D> Gaussian<D>::periodify(const std::array<double, D> &period, double nStdDev) const {
-    GaussExp<D> gauss_exp;
+template <int D, typename T> GaussExp<D, T> Gaussian<D, T>::periodify(const std::array<double, D> &period, double nStdDev) const {
+    GaussExp<D, T> gauss_exp;
     auto pos_vec = std::vector<Coord<D>>();
 
     auto x_std = nStdDev * this->getMaximumStandardDiviation();
@@ -239,8 +239,12 @@ template <int D> GaussExp<D> Gaussian<D>::periodify(const std::array<double, D>
     return gauss_exp;
 }
 
-template class Gaussian<1>;
-template class Gaussian<2>;
-template class Gaussian<3>;
+template class Gaussian<1, double>;
+template class Gaussian<2, double>;
+template class Gaussian<3, double>;
+
+template class Gaussian<1, ComplexDouble>;
+template class Gaussian<2, ComplexDouble>;
+template class Gaussian<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/functions/Gaussian.h b/src/functions/Gaussian.h
index d02cc43b1..7d5bf7dca 100644
--- a/src/functions/Gaussian.h
+++ b/src/functions/Gaussian.h
@@ -40,28 +40,28 @@
 
 namespace mrcpp {
 
-template <int D> class Gaussian : public RepresentableFunction<D> {
+template <int D, typename T> class Gaussian : public RepresentableFunction<D, T> {
 public:
     Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p);
     Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p);
-    Gaussian<D> &operator=(const Gaussian<D> &gp) = delete;
-    virtual Gaussian<D> *copy() const = 0;
+    Gaussian<D, T> &operator=(const Gaussian<D, T> &gp) = delete;
+    virtual Gaussian<D, T> *copy() const = 0;
     virtual ~Gaussian() = default;
 
-    virtual double evalf(const Coord<D> &r) const = 0;
-    virtual double evalf1D(double r, int dim) const = 0;
-    void evalf(const Eigen::MatrixXd &points, Eigen::MatrixXd &values) const;
-
-    double calcOverlap(const Gaussian<D> &inp) const;
+    virtual T evalf(const Coord<D> &r) const = 0;
+    virtual T evalf1D(double r, int dim) const = 0;
+    void evalf(const Eigen::MatrixXd &points, Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &values) const;
+ 
+    double calcOverlap(const Gaussian<D, T> &inp) const;
     virtual double calcSquareNorm() const = 0;
-    virtual GaussExp<D> asGaussExp() const = 0;
-    GaussExp<D> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
+    virtual GaussExp<D, T> asGaussExp() const = 0;
+    GaussExp<D, T> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
 
     /** @brief Compute analytic derivative of Gaussian
      *  @param[in] dir: Cartesian direction of derivative
      *  @returns New GaussPoly
      */
-    virtual GaussPoly<D> differentiate(int dir) const = 0;
+    virtual GaussPoly<D, T> differentiate(int dir) const = 0;
 
     void calcScreening(double stdDeviations);
 
@@ -70,7 +70,7 @@ template <int D> class Gaussian : public RepresentableFunction<D> {
         double norm = std::sqrt(calcSquareNorm());
         multConstInPlace(1.0 / norm);
     }
-    void multPureGauss(const Gaussian<D> &lhs, const Gaussian<D> &rhs);
+    void multPureGauss(const Gaussian<D, T> &lhs, const Gaussian<D, T> &rhs);
     void multConstInPlace(double c) { this->coef *= c; }
     void operator*=(double c) { multConstInPlace(c); }
 
@@ -92,9 +92,9 @@ template <int D> class Gaussian : public RepresentableFunction<D> {
     void setExp(const std::array<double, D> &_alpha) { this->alpha = _alpha; }
     void setPos(const std::array<double, D> &r) { this->pos = r; }
 
-    friend std::ostream &operator<<(std::ostream &o, const Gaussian<D> &gauss) { return gauss.print(o); }
+    friend std::ostream &operator<<(std::ostream &o, const Gaussian<D, T> &gauss) { return gauss.print(o); }
 
-    friend class GaussExp<D>;
+    friend class GaussExp<D, T>;
 
 protected:
     bool screen;
diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp
index 397b4e268..964fe687b 100644
--- a/src/functions/Polynomial.cpp
+++ b/src/functions/Polynomial.cpp
@@ -45,7 +45,7 @@ namespace mrcpp {
 /** Construct polynomial of order zero with given size and bounds.
  * Includes default constructor. */
 Polynomial::Polynomial(int k, const double *a, const double *b)
-        : RepresentableFunction<1>(a, b) {
+  : RepresentableFunction<1, double>(a, b) {
     assert(k >= 0);
     this->N = 1.0;
     this->L = 0.0;
diff --git a/src/functions/Polynomial.h b/src/functions/Polynomial.h
index e1c23e4a6..fadc2c988 100644
--- a/src/functions/Polynomial.h
+++ b/src/functions/Polynomial.h
@@ -44,7 +44,7 @@
 
 namespace mrcpp {
 
-class Polynomial : public RepresentableFunction<1> {
+  class Polynomial : public RepresentableFunction<1, double> {
 public:
     Polynomial(int k = 0, const double *a = nullptr, const double *b = nullptr);
     Polynomial(int k, const std::vector<double> &a, const std::vector<double> &b)
diff --git a/src/functions/RepresentableFunction.cpp b/src/functions/RepresentableFunction.cpp
index 8687297c7..3c55ac92b 100644
--- a/src/functions/RepresentableFunction.cpp
+++ b/src/functions/RepresentableFunction.cpp
@@ -38,7 +38,7 @@
 
 namespace mrcpp {
 
-template <int D> RepresentableFunction<D>::RepresentableFunction(const double *a, const double *b) {
+template <int D, typename T> RepresentableFunction<D, T>::RepresentableFunction(const double *a, const double *b) {
     if (a == nullptr or b == nullptr) {
         this->bounded = false;
         this->A = nullptr;
@@ -56,7 +56,7 @@ template <int D> RepresentableFunction<D>::RepresentableFunction(const double *a
 }
 
 /** Constructs a new function with same bounds as the input function */
-template <int D> RepresentableFunction<D>::RepresentableFunction(const RepresentableFunction<D> &func) {
+template <int D, typename T> RepresentableFunction<D, T>::RepresentableFunction(const RepresentableFunction<D, T> &func) {
     if (func.isBounded()) {
         this->bounded = true;
         this->A = new double[D];
@@ -74,11 +74,11 @@ template <int D> RepresentableFunction<D>::RepresentableFunction(const Represent
 
 /** Copies function, not bounds. Use copy constructor if you want an
  * identical function. */
-template <int D> RepresentableFunction<D> &RepresentableFunction<D>::operator=(const RepresentableFunction<D> &func) {
+template <int D, typename T> RepresentableFunction<D, T> &RepresentableFunction<D, T>::operator=(const RepresentableFunction<D, T> &func) {
     return *this;
 }
 
-template <int D> RepresentableFunction<D>::~RepresentableFunction() {
+template <int D, typename T> RepresentableFunction<D, T>::~RepresentableFunction() {
     if (this->isBounded()) {
         delete[] this->A;
         delete[] this->B;
@@ -87,7 +87,7 @@ template <int D> RepresentableFunction<D>::~RepresentableFunction() {
     this->B = nullptr;
 }
 
-template <int D> void RepresentableFunction<D>::setBounds(const double *a, const double *b) {
+template <int D, typename T> void RepresentableFunction<D, T>::setBounds(const double *a, const double *b) {
     if (a == nullptr or b == nullptr) { MSG_ERROR("Invalid arguments"); }
     if (not isBounded()) {
         this->bounded = true;
@@ -101,7 +101,7 @@ template <int D> void RepresentableFunction<D>::setBounds(const double *a, const
     }
 }
 
-template <int D> bool RepresentableFunction<D>::outOfBounds(const Coord<D> &r) const {
+template <int D, typename T> bool RepresentableFunction<D, T>::outOfBounds(const Coord<D> &r) const {
     if (not isBounded()) { return false; }
     for (int d = 0; d < D; d++) {
         if (r[d] < getLowerBound(d)) return true;
@@ -110,8 +110,11 @@ template <int D> bool RepresentableFunction<D>::outOfBounds(const Coord<D> &r) c
     return false;
 }
 
-template class RepresentableFunction<1>;
-template class RepresentableFunction<2>;
-template class RepresentableFunction<3>;
+template class RepresentableFunction<1, double>;
+template class RepresentableFunction<2, double>;
+template class RepresentableFunction<3, double>;
+template class RepresentableFunction<1, ComplexDouble>;
+template class RepresentableFunction<2, ComplexDouble>;
+template class RepresentableFunction<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/functions/RepresentableFunction.h b/src/functions/RepresentableFunction.h
index 2d6998812..c22d86292 100644
--- a/src/functions/RepresentableFunction.h
+++ b/src/functions/RepresentableFunction.h
@@ -38,20 +38,21 @@
 #include "MRCPP/constants.h"
 #include "MRCPP/mrcpp_declarations.h"
 #include "trees/NodeIndex.h"
+#include "utils/math_utils.h"
 
 namespace mrcpp {
 
-template <int D> class RepresentableFunction {
+template <int D, typename T> class RepresentableFunction {
 public:
     RepresentableFunction(const double *a = nullptr, const double *b = nullptr);
     RepresentableFunction(const std::vector<double> &a, const std::vector<double> &b)
             : RepresentableFunction(a.data(), b.data()) {}
-    RepresentableFunction(const RepresentableFunction<D> &func);
-    RepresentableFunction<D> &operator=(const RepresentableFunction<D> &func);
+    RepresentableFunction(const RepresentableFunction<D, T> &func);
+    RepresentableFunction<D, T> &operator=(const RepresentableFunction<D, T> &func);
     virtual ~RepresentableFunction();
 
     /** @returns Function value in a point @param[in] r: Cartesian coordinate */
-    virtual double evalf(const Coord<D> &r) const = 0;
+    virtual T evalf(const Coord<D> &r) const = 0;
 
     void setBounds(const double *a, const double *b);
     void clearBounds();
@@ -65,7 +66,7 @@ template <int D> class RepresentableFunction {
     const double *getLowerBounds() const { return this->A; }
     const double *getUpperBounds() const { return this->B; }
 
-    friend class AnalyticAdaptor<D>;
+    friend class AnalyticAdaptor<D, T>;
 
 protected:
     bool bounded;
diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp
index 60b287e1c..641216915 100644
--- a/src/functions/function_utils.cpp
+++ b/src/functions/function_utils.cpp
@@ -31,7 +31,7 @@ namespace function_utils {
 double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b);
 } // namespace function_utils
 
-template <int D> double function_utils::calc_overlap(const GaussFunc<D> &a, const GaussFunc<D> &b) {
+template <int D, typename T> double function_utils::calc_overlap(const GaussFunc<D, T> &a, const GaussFunc<D, T> &b) {
     double S = 1.0;
     for (int d = 0; d < D; d++) {
         S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]);
@@ -114,7 +114,13 @@ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, dou
     return s_coeff[power_b + 2 * power_a];
 }
 
-template double function_utils::calc_overlap<1>(const GaussFunc<1> &a, const GaussFunc<1> &b);
-template double function_utils::calc_overlap<2>(const GaussFunc<2> &a, const GaussFunc<2> &b);
-template double function_utils::calc_overlap<3>(const GaussFunc<3> &a, const GaussFunc<3> &b);
+template double function_utils::calc_overlap<1, double>(const GaussFunc<1, double> &a, const GaussFunc<1, double> &b);
+template double function_utils::calc_overlap<2, double>(const GaussFunc<2, double> &a, const GaussFunc<2, double> &b);
+template double function_utils::calc_overlap<3, double>(const GaussFunc<3, double> &a, const GaussFunc<3, double> &b);
+
+template double function_utils::calc_overlap<1, ComplexDouble>(const GaussFunc<1, ComplexDouble> &a, const GaussFunc<1, ComplexDouble> &b);
+template double function_utils::calc_overlap<2, ComplexDouble>(const GaussFunc<2, ComplexDouble> &a, const GaussFunc<2, ComplexDouble> &b);
+template double function_utils::calc_overlap<3, ComplexDouble>(const GaussFunc<3, ComplexDouble> &a, const GaussFunc<3, ComplexDouble> &b);
+
+
 } // namespace mrcpp
diff --git a/src/functions/function_utils.h b/src/functions/function_utils.h
index 896c06257..38955af9b 100644
--- a/src/functions/function_utils.h
+++ b/src/functions/function_utils.h
@@ -28,6 +28,6 @@
 
 namespace mrcpp {
 namespace function_utils {
-template <int D> double calc_overlap(const GaussFunc<D> &a, const GaussFunc<D> &b);
+template <int D, typename T> double calc_overlap(const GaussFunc<D, T> &a, const GaussFunc<D, T> &b);
 } // namespace function_utils
 } // namespace mrcpp
diff --git a/src/operators/OperatorState.h b/src/operators/OperatorState.h
index 245d9f70f..855f53060 100644
--- a/src/operators/OperatorState.h
+++ b/src/operators/OperatorState.h
@@ -42,9 +42,9 @@ namespace mrcpp {
 
 #define GET_OP_IDX(FT, GT, ID) (2 * ((GT >> ID) & 1) + ((FT >> ID) & 1))
 
-template <int D> class OperatorState final {
+template <int D, typename T> class OperatorState final {
 public:
-    OperatorState(MWNode<D> &gn, double *scr1)
+  OperatorState(MWNode<D, T> &gn, T *scr1)
             : gNode(&gn) {
         this->kp1 = this->gNode->getKp1();
         this->kp1_d = this->gNode->getKp1_d();
@@ -53,7 +53,7 @@ template <int D> class OperatorState final {
         this->gData = this->gNode->getCoefs();
         this->maxDeltaL = -1;
 
-        double *scr2 = scr1 + this->kp1_d;
+        T *scr2 = scr1 + this->kp1_d;
 
         for (int i = 1; i < D; i++) {
             if (IS_ODD(i)) {
@@ -64,9 +64,9 @@ template <int D> class OperatorState final {
         }
     }
 
-    OperatorState(MWNode<D> &gn, std::vector<double> scr1)
+  OperatorState(MWNode<D, T> &gn, std::vector<T> scr1)
             : OperatorState(gn, scr1.data()) {}
-    void setFNode(MWNode<D> &fn) {
+  void setFNode(MWNode<D, T> &fn) {
         this->fNode = &fn;
         this->fData = this->fNode->getCoefs();
     }
@@ -86,15 +86,16 @@ template <int D> class OperatorState final {
     int getMaxDeltaL() const { return this->maxDeltaL; }
     int getOperIndex(int i) const { return GET_OP_IDX(this->ft, this->gt, i); }
 
-    double **getAuxData() { return this->aux; }
+    T **getAuxData() { return this->aux; }
     double **getOperData() { return this->oData; }
 
-    friend class ConvolutionCalculator<D>;
-    friend class DerivativeCalculator<D>;
+  friend class ConvolutionCalculator<D, T>;
+  friend class DerivativeCalculator<D, T>;
 
 private:
     int ft;
     int gt;
+
     int maxDeltaL;
     double fThreshold;
     double gThreshold;
@@ -104,13 +105,13 @@ template <int D> class OperatorState final {
     int kp1_d;
     int kp1_dm1;
 
-    MWNode<D> *gNode;
-    MWNode<D> *fNode;
+    MWNode<D, T> *gNode;
+    MWNode<D, T> *fNode;
     NodeIndex<D> *fIdx;
 
-    double *aux[D + 1];
-    double *gData;
-    double *fData;
+    T *aux[D + 1];
+    T *gData;
+    T *fData;
     double *oData[D];
 
     void calcMaxDeltaL() {
diff --git a/src/operators/OperatorStatistics.cpp b/src/operators/OperatorStatistics.cpp
index d542e88f5..4ed0263cc 100644
--- a/src/operators/OperatorStatistics.cpp
+++ b/src/operators/OperatorStatistics.cpp
@@ -30,8 +30,8 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D>
-OperatorStatistics<D>::OperatorStatistics()
+template <int D, typename T>
+OperatorStatistics<D, T>::OperatorStatistics()
         : nThreads(mrcpp_get_max_threads())
         , totFCount(0)
         , totGCount(0)
@@ -58,7 +58,7 @@ OperatorStatistics<D>::OperatorStatistics()
     }
 }
 
-template <int D> OperatorStatistics<D>::~OperatorStatistics() {
+template <int D, typename T> OperatorStatistics<D, T>::~OperatorStatistics() {
     for (int i = 0; i < this->nThreads; i++) { delete this->compCount[i]; }
     delete[] this->compCount;
     delete[] this->fCount;
@@ -68,7 +68,7 @@ template <int D> OperatorStatistics<D>::~OperatorStatistics() {
 }
 
 /** Sum all node counters from all threads. */
-template <int D> void OperatorStatistics<D>::flushNodeCounters() {
+template <int D, typename T> void OperatorStatistics<D, T>::flushNodeCounters() {
     for (int i = 0; i < this->nThreads; i++) {
         this->totFCount += this->fCount[i];
         this->totGCount += this->gCount[i];
@@ -82,20 +82,20 @@ template <int D> void OperatorStatistics<D>::flushNodeCounters() {
 }
 
 /** Increment g-node usage counter. Needed for load balancing. */
-template <int D> void OperatorStatistics<D>::incrementGNodeCounters(const MWNode<D> &gNode) {
+template <int D, typename T> void OperatorStatistics<D, T>::incrementGNodeCounters(const MWNode<D, T> &gNode) {
     int thread = mrcpp_get_thread_num();
     this->gCount[thread]++;
 }
 
 /** Increment operator application counter. */
-template <int D> void OperatorStatistics<D>::incrementFNodeCounters(const MWNode<D> &fNode, int ft, int gt) {
+template <int D, typename T> void OperatorStatistics<D, T>::incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt) {
     int thread = mrcpp_get_thread_num();
     this->fCount[thread]++;
     (*this->compCount[thread])(ft, gt) += 1;
     if (fNode.isGenNode()) { this->genCount[thread]++; }
 }
 
-template <int D> std::ostream &OperatorStatistics<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &OperatorStatistics<D, T>::print(std::ostream &o) const {
     o << std::setw(8);
     o << "*OperatorFunc statistics: " << std::endl << std::endl;
     o << "  Total calculated gNodes      : " << this->totGCount << std::endl;
@@ -105,8 +105,12 @@ template <int D> std::ostream &OperatorStatistics<D>::print(std::ostream &o) con
     return o;
 }
 
-template class OperatorStatistics<1>;
-template class OperatorStatistics<2>;
-template class OperatorStatistics<3>;
+template class OperatorStatistics<1, double>;
+template class OperatorStatistics<2, double>;
+template class OperatorStatistics<3, double>;
+
+template class OperatorStatistics<1, ComplexDouble>;
+template class OperatorStatistics<2, ComplexDouble>;
+template class OperatorStatistics<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/operators/OperatorStatistics.h b/src/operators/OperatorStatistics.h
index 395a5d62a..9de97f8e0 100644
--- a/src/operators/OperatorStatistics.h
+++ b/src/operators/OperatorStatistics.h
@@ -32,14 +32,14 @@
 
 namespace mrcpp {
 
-template <int D> class OperatorStatistics final {
+  template <int D, typename T> class OperatorStatistics final {
 public:
     OperatorStatistics();
     ~OperatorStatistics();
 
     void flushNodeCounters();
-    void incrementFNodeCounters(const MWNode<D> &fNode, int ft, int gt);
-    void incrementGNodeCounters(const MWNode<D> &gNode);
+    void incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt);
+    void incrementGNodeCounters(const MWNode<D, T> &gNode);
 
     friend std::ostream &operator<<(std::ostream &o, const OperatorStatistics &os) { return os.print(o); }
 
diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h
index 431600192..eb0322947 100644
--- a/src/treebuilders/AdditionCalculator.h
+++ b/src/treebuilders/AdditionCalculator.h
@@ -30,24 +30,24 @@
 
 namespace mrcpp {
 
-template <int D> class AdditionCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class AdditionCalculator final : public TreeCalculator<D, T> {
 public:
-    AdditionCalculator(const FunctionTreeVector<D> &inp)
+    AdditionCalculator(const FunctionTreeVector<D, T> &inp)
             : sum_vec(inp) {}
 
 private:
-    FunctionTreeVector<D> sum_vec;
+    FunctionTreeVector<D, T> sum_vec;
 
-    void calcNode(MWNode<D> &node_o) override {
+    void calcNode(MWNode<D, T> &node_o) override {
         node_o.zeroCoefs();
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         for (int i = 0; i < this->sum_vec.size(); i++) {
             double c_i = get_coef(this->sum_vec, i);
-            FunctionTree<D> &func_i = get_func(this->sum_vec, i);
+            FunctionTree<D, T> &func_i = get_func(this->sum_vec, i);
             // This generates missing nodes
-            const MWNode<D> &node_i = func_i.getNode(idx);
-            const double *coefs_i = node_i.getCoefs();
+            const MWNode<D, T> &node_i = func_i.getNode(idx);
+            const T *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
         }
diff --git a/src/treebuilders/AnalyticAdaptor.h b/src/treebuilders/AnalyticAdaptor.h
index 45f73b4cd..d735933ec 100644
--- a/src/treebuilders/AnalyticAdaptor.h
+++ b/src/treebuilders/AnalyticAdaptor.h
@@ -30,16 +30,16 @@
 
 namespace mrcpp {
 
-template <int D> class AnalyticAdaptor final : public TreeAdaptor<D> {
+  template <int D, typename T> class AnalyticAdaptor final : public TreeAdaptor<D, T> {
 public:
-    AnalyticAdaptor(const RepresentableFunction<D> &f, int ms)
-            : TreeAdaptor<D>(ms)
+    AnalyticAdaptor(const RepresentableFunction<D, T> &f, int ms)
+      : TreeAdaptor<D, T>(ms)
             , func(&f) {}
 
 private:
-    const RepresentableFunction<D> *func;
+    const RepresentableFunction<D, T> *func;
 
-    bool splitNode(const MWNode<D> &node) const override {
+    bool splitNode(const MWNode<D, T> &node) const override {
         int scale = node.getScale();
         int nQuadPts = node.getKp1();
         if (this->func->isVisibleAtScale(scale, nQuadPts)) return false;
diff --git a/src/treebuilders/ConvolutionCalculator.cpp b/src/treebuilders/ConvolutionCalculator.cpp
index 5cb1b8f0c..d43fdca05 100644
--- a/src/treebuilders/ConvolutionCalculator.cpp
+++ b/src/treebuilders/ConvolutionCalculator.cpp
@@ -46,8 +46,8 @@ using Eigen::MatrixXi;
 
 namespace mrcpp {
 
-template <int D>
-ConvolutionCalculator<D>::ConvolutionCalculator(double p, ConvolutionOperator<D> &o, FunctionTree<D> &f, int depth)
+template <int D, typename T>
+ConvolutionCalculator<D, T>::ConvolutionCalculator(double p, ConvolutionOperator<D> &o, FunctionTree<D, T> &f, int depth)
         : maxDepth(depth)
         , prec(p)
         , oper(&o)
@@ -57,14 +57,14 @@ ConvolutionCalculator<D>::ConvolutionCalculator(double p, ConvolutionOperator<D>
     initTimers();
 }
 
-template <int D> ConvolutionCalculator<D>::~ConvolutionCalculator() {
+template <int D, typename T> ConvolutionCalculator<D, T>::~ConvolutionCalculator() {
     clearTimers();
     this->operStat.flushNodeCounters();
     println(10, this->operStat);
     for (int i = 0; i < this->bandSizes.size(); i++) { delete this->bandSizes[i]; }
 }
 
-template <int D> void ConvolutionCalculator<D>::initTimers() {
+template <int D, typename T> void ConvolutionCalculator<D, T>::initTimers() {
     int nThreads = mrcpp_get_max_threads();
     for (int i = 0; i < nThreads; i++) {
         this->band_t.push_back(new Timer(false));
@@ -73,7 +73,7 @@ template <int D> void ConvolutionCalculator<D>::initTimers() {
     }
 }
 
-template <int D> void ConvolutionCalculator<D>::clearTimers() {
+template <int D, typename T> void ConvolutionCalculator<D, T>::clearTimers() {
     int nThreads = mrcpp_get_max_threads();
     for (int i = 0; i < nThreads; i++) {
         delete this->band_t[i];
@@ -85,7 +85,7 @@ template <int D> void ConvolutionCalculator<D>::clearTimers() {
     this->norm_t.clear();
 }
 
-template <int D> void ConvolutionCalculator<D>::printTimers() const {
+template <int D, typename T> void ConvolutionCalculator<D, T>::printTimers() const {
     int oldprec = Printer::setPrecision(1);
     int nThreads = mrcpp_get_max_threads();
     printout(20, "\n\nthread ");
@@ -102,7 +102,7 @@ template <int D> void ConvolutionCalculator<D>::printTimers() const {
 
 /** Initialize the number of nodes formally within the bandwidth of an
  operator. The band size is used for thresholding. */
-template <int D> void ConvolutionCalculator<D>::initBandSizes() {
+template <int D, typename T> void ConvolutionCalculator<D, T>::initBandSizes() {
     for (int i = 0; i < this->oper->size(); i++) {
         // IMPORTANT: only 0-th dimension!
         const OperatorTree &oTree = this->oper->getComponent(i, 0);
@@ -118,7 +118,7 @@ template <int D> void ConvolutionCalculator<D>::initBandSizes() {
  * of an operator. Currently this routine ignores the fact that
  * there are edges on the world box, and thus over estimates
  * the number of nodes. This is different from the previous version. */
-template <int D> void ConvolutionCalculator<D>::calcBandSizeFactor(MatrixXi &bs, int depth, const BandWidth &bw) {
+template <int D, typename T> void ConvolutionCalculator<D, T>::calcBandSizeFactor(MatrixXi &bs, int depth, const BandWidth &bw) {
     for (int gt = 0; gt < this->nComp; gt++) {
         for (int ft = 0; ft < this->nComp; ft++) {
             int k = gt * this->nComp + ft;
@@ -139,8 +139,8 @@ template <int D> void ConvolutionCalculator<D>::calcBandSizeFactor(MatrixXi &bs,
 }
 
 /** Return a vector of nodes in F affected by O, given a node in G */
-template <int D> MWNodeVector<D> *ConvolutionCalculator<D>::makeOperBand(const MWNode<D> &gNode, std::vector<NodeIndex<D>> &idx_band) {
-    auto *band = new MWNodeVector<D>;
+template <int D, typename T> MWNodeVector<D, T> *ConvolutionCalculator<D, T>::makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band) {
+    auto *band = new MWNodeVector<D, T>;
 
     int o_depth = gNode.getScale() - this->oper->getOperatorRoot();
     int g_depth = gNode.getDepth();
@@ -150,7 +150,7 @@ template <int D> MWNodeVector<D> *ConvolutionCalculator<D>::makeOperBand(const M
     int reach = this->oper->getOperatorReach();
 
     if (width >= 0) {
-        const NodeBox<D> &fWorld = this->fTree->getRootBox();
+        const NodeBox<D, T> &fWorld = this->fTree->getRootBox();
         const NodeIndex<D> &cIdx = fWorld.getCornerIndex();
         const NodeIndex<D> &gIdx = gNode.getNodeIndex();
 
@@ -180,7 +180,7 @@ template <int D> MWNodeVector<D> *ConvolutionCalculator<D>::makeOperBand(const M
 }
 
 /** Recursively retrieve all reachable f-nodes within the bandwidth. */
-template <int D> void ConvolutionCalculator<D>::fillOperBand(MWNodeVector<D> *band, std::vector<NodeIndex<D>> &idx_band, NodeIndex<D> &idx, const int *nbox, int dim) {
+template <int D, typename T> void ConvolutionCalculator<D, T>::fillOperBand(MWNodeVector<D, T> *band, std::vector<NodeIndex<D>> &idx_band, NodeIndex<D> &idx, const int *nbox, int dim) {
     int l_start = idx[dim];
     for (int j = 0; j < nbox[dim]; j++) {
         // Recurse until dim == 0
@@ -190,7 +190,7 @@ template <int D> void ConvolutionCalculator<D>::fillOperBand(MWNodeVector<D> *ba
             continue;
         }
         if (not manipulateOperator) {
-            MWNode<D> &fNode = this->fTree->getNode(idx);
+            MWNode<D, T> &fNode = this->fTree->getNode(idx);
             idx_band.push_back(idx);
             band->push_back(&fNode);
 
@@ -198,18 +198,18 @@ template <int D> void ConvolutionCalculator<D>::fillOperBand(MWNodeVector<D> *ba
             const auto oper_scale = this->oper->getOperatorRoot();
             if (oper_scale == 0) {
                 if (periodic::in_unit_cell<D>(idx) and onUnitcell) {
-                    MWNode<D> &fNode = this->fTree->getNode(idx);
+                    MWNode<D, T> &fNode = this->fTree->getNode(idx);
                     idx_band.push_back(idx);
                     band->push_back(&fNode);
                 }
                 if (not periodic::in_unit_cell<D>(idx) and not onUnitcell) {
-                    MWNode<D> &fNode = this->fTree->getNode(idx);
+                    MWNode<D, T> &fNode = this->fTree->getNode(idx);
                     idx_band.push_back(idx);
                     band->push_back(&fNode);
                 }
             } else if (oper_scale < 0) {
                 if (periodic::in_unit_cell<D>(idx) and onUnitcell) {
-                    MWNode<D> &fNode = this->fTree->getNode(idx);
+                    MWNode<D, T> &fNode = this->fTree->getNode(idx);
                     idx_band.push_back(idx);
                     band->push_back(&fNode);
                 }
@@ -222,23 +222,23 @@ template <int D> void ConvolutionCalculator<D>::fillOperBand(MWNodeVector<D> *ba
     idx[dim] = l_start;
 }
 
-template <int D> void ConvolutionCalculator<D>::calcNode(MWNode<D> &node) {
-    auto &gNode = static_cast<FunctionNode<D> &>(node);
+template <int D, typename T> void ConvolutionCalculator<D, T>::calcNode(MWNode<D, T> &node) {
+    auto &gNode = static_cast<FunctionNode<D, T> &>(node);
     gNode.zeroCoefs();
 
     int o_depth = gNode.getScale() - this->oper->getOperatorRoot();
     if (manipulateOperator and this->oper->getOperatorRoot() < 0) o_depth = gNode.getDepth();
-    double tmpCoefs[gNode.getNCoefs()];
-    OperatorState<D> os(gNode, tmpCoefs);
+    T tmpCoefs[gNode.getNCoefs()];
+    OperatorState<D, T> os(gNode, tmpCoefs);
     this->operStat.incrementGNodeCounters(gNode);
 
     // Get all nodes in f within the bandwith of O in g
     this->band_t[mrcpp_get_thread_num()]->resume();
     std::vector<NodeIndex<D>> idx_band;
-    MWNodeVector<D> *fBand = makeOperBand(gNode, idx_band);
+    MWNodeVector<D, T> *fBand = makeOperBand(gNode, idx_band);
     this->band_t[mrcpp_get_thread_num()]->stop();
 
-    MWTree<D> &gTree = gNode.getMWTree();
+    MWTree<D, T> &gTree = gNode.getMWTree();
     double gThrs = gTree.getSquareNorm();
     if (gThrs > 0.0) {
         auto nTerms = static_cast<double>(this->oper->size());
@@ -250,7 +250,7 @@ template <int D> void ConvolutionCalculator<D>::calcNode(MWNode<D> &node) {
 
     this->calc_t[mrcpp_get_thread_num()]->resume();
     for (int n = 0; n < fBand->size(); n++) {
-        MWNode<D> &fNode = *(*fBand)[n];
+        MWNode<D, T> &fNode = *(*fBand)[n];
         NodeIndex<D> &fIdx = idx_band[n];
         os.setFNode(fNode);
         os.setFIndex(fIdx);
@@ -275,7 +275,7 @@ template <int D> void ConvolutionCalculator<D>::calcNode(MWNode<D> &node) {
 }
 
 /** Apply each component (term) of the operator expansion to a node in f */
-template <int D> void ConvolutionCalculator<D>::applyOperComp(OperatorState<D> &os) {
+  template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperComp(OperatorState<D, T> &os) {
     double fNorm = os.fNode->getComponentNorm(os.ft);
     int o_depth = os.fNode->getScale() - this->oper->getOperatorRoot();
     for (int i = 0; i < this->oper->size(); i++) {
@@ -290,9 +290,9 @@ template <int D> void ConvolutionCalculator<D>::applyOperComp(OperatorState<D> &
 
 /** Apply a single operator component (term) to a single f-node. Whether the
 operator actualy is applied is determined by a screening threshold. */
-template <int D> void ConvolutionCalculator<D>::applyOperator(int i, OperatorState<D> &os) {
-    MWNode<D> &gNode = *os.gNode;
-    MWNode<D> &fNode = *os.fNode;
+  template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperator(int i, OperatorState<D, T> &os) {
+    MWNode<D, T> &gNode = *os.gNode;
+    MWNode<D, T> &fNode = *os.fNode;
     const NodeIndex<D> &fIdx = *os.fIdx;
     const NodeIndex<D> &gIdx = gNode.getNodeIndex();
     int o_depth = gNode.getScale() - this->oper->getOperatorRoot();
@@ -326,9 +326,10 @@ template <int D> void ConvolutionCalculator<D>::applyOperator(int i, OperatorSta
 
 /** Perorm the required linear algebra operations in order to apply an
 operator component to a f-node in a n-dimensional tesor space. */
-template <int D> void ConvolutionCalculator<D>::tensorApplyOperComp(OperatorState<D> &os) {
-    double **aux = os.getAuxData();
+  template <int D, typename T> void ConvolutionCalculator<D, T>::tensorApplyOperComp(OperatorState<D, T> &os) {
+    T **aux = os.getAuxData();
     double **oData = os.getOperData();
+    /*
 #ifdef HAVE_BLAS
     double mult = 0.0;
     for (int i = 0; i < D; i++) {
@@ -353,9 +354,10 @@ template <int D> void ConvolutionCalculator<D>::tensorApplyOperComp(OperatorStat
         }
     }
 #else
+    */
     for (int i = 0; i < D; i++) {
-        Eigen::Map<MatrixXd> f(aux[i], os.kp1, os.kp1_dm1);
-        Eigen::Map<MatrixXd> g(aux[i + 1], os.kp1_dm1, os.kp1);
+        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> f(aux[i], os.kp1, os.kp1_dm1);
+        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> g(aux[i + 1], os.kp1_dm1, os.kp1);
         if (oData[i] != nullptr) {
             Eigen::Map<MatrixXd> op(oData[i], os.kp1, os.kp1);
             if (i == D - 1) { // Last dir: Add up into g
@@ -372,10 +374,10 @@ template <int D> void ConvolutionCalculator<D>::tensorApplyOperComp(OperatorStat
             }
         }
     }
-#endif
+    //#endif
 }
 
-template <int D> void ConvolutionCalculator<D>::touchParentNodes(MWTree<D> &tree) const {
+template <int D, typename T> void ConvolutionCalculator<D, T>::touchParentNodes(MWTree<D, T> &tree) const {
     if (not manipulateOperator) {
         const auto oper_scale = this->oper->getOperatorRoot();
         auto car_prod = math_utils::cartesian_product(std::vector<int>{-1, 0}, D);
@@ -391,15 +393,19 @@ template <int D> void ConvolutionCalculator<D>::touchParentNodes(MWTree<D> &tree
     }
 }
 
-template <int D> MWNodeVector<D> *ConvolutionCalculator<D>::getInitialWorkVector(MWTree<D> &tree) const {
-    auto *nodeVec = new MWNodeVector<D>;
+template <int D, typename T> MWNodeVector<D, T> *ConvolutionCalculator<D, T>::getInitialWorkVector(MWTree<D, T> &tree) const {
+    auto *nodeVec = new MWNodeVector<D, T>;
     if (tree.isPeriodic()) touchParentNodes(tree);
     tree_utils::make_node_table(tree, *nodeVec);
     return nodeVec;
 }
 
-template class ConvolutionCalculator<1>;
-template class ConvolutionCalculator<2>;
-template class ConvolutionCalculator<3>;
+template class ConvolutionCalculator<1, double>;
+template class ConvolutionCalculator<2, double>;
+template class ConvolutionCalculator<3, double>;
+
+template class ConvolutionCalculator<1, ComplexDouble>;
+template class ConvolutionCalculator<2, ComplexDouble>;
+template class ConvolutionCalculator<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/treebuilders/ConvolutionCalculator.h b/src/treebuilders/ConvolutionCalculator.h
index 3b88cb9b1..f114ba976 100644
--- a/src/treebuilders/ConvolutionCalculator.h
+++ b/src/treebuilders/ConvolutionCalculator.h
@@ -33,12 +33,12 @@
 
 namespace mrcpp {
 
-template <int D> class ConvolutionCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class ConvolutionCalculator final : public TreeCalculator<D, T> {
 public:
-    ConvolutionCalculator(double p, ConvolutionOperator<D> &o, FunctionTree<D> &f, int depth = MaxDepth);
+    ConvolutionCalculator(double p, ConvolutionOperator<D> &o, FunctionTree<D, T> &f, int depth = MaxDepth);
     ~ConvolutionCalculator() override;
 
-    MWNodeVector<D> *getInitialWorkVector(MWTree<D> &tree) const override;
+    MWNodeVector<D, T> *getInitialWorkVector(MWTree<D, T> &tree) const override;
 
     void setPrecFunction(const std::function<double(const NodeIndex<D> &idx)> &prec_func) { this->precFunc = prec_func; }
     void startManipulateOperator(bool excUnit) {
@@ -52,45 +52,45 @@ template <int D> class ConvolutionCalculator final : public TreeCalculator<D> {
     bool manipulateOperator{false};
     bool onUnitcell{false};
     ConvolutionOperator<D> *oper;
-    FunctionTree<D> *fTree;
+    FunctionTree<D, T> *fTree;
     std::vector<Timer *> band_t;
     std::vector<Timer *> calc_t;
     std::vector<Timer *> norm_t;
 
-    OperatorStatistics<D> operStat;
+    OperatorStatistics<D, T> operStat;
     std::vector<Eigen::MatrixXi *> bandSizes;
     std::function<double(const NodeIndex<D> &idx)> precFunc = [](const NodeIndex<D> &idx) { return 1.0; };
 
     static const int nComp = (1 << D);
     static const int nComp2 = (1 << D) * (1 << D);
 
-    MWNodeVector<D> *makeOperBand(const MWNode<D> &gNode, std::vector<NodeIndex<D>> &idx_band);
-    void fillOperBand(MWNodeVector<D> *band, std::vector<NodeIndex<D>> &idx_band, NodeIndex<D> &idx, const int *nbox, int dim);
+    MWNodeVector<D, T> *makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band);
+    void fillOperBand(MWNodeVector<D, T> *band, std::vector<NodeIndex<D>> &idx_band, NodeIndex<D> &idx, const int *nbox, int dim);
 
     void initTimers();
     void clearTimers();
     void printTimers() const;
 
     void initBandSizes();
-    int getBandSizeFactor(int i, int depth, const OperatorState<D> &os) const {
+    int getBandSizeFactor(int i, int depth, const OperatorState<D, T> &os) const {
         int k = os.gt * this->nComp + os.ft;
         return (*this->bandSizes[i])(depth, k);
     }
 
     void calcBandSizeFactor(Eigen::MatrixXi &bs, int depth, const BandWidth &bw);
 
-    void calcNode(MWNode<D> &node) override;
+    void calcNode(MWNode<D, T> &node) override;
     void postProcess() override {
         printTimers();
         clearTimers();
         initTimers();
     }
 
-    void applyOperComp(OperatorState<D> &os);
-    void applyOperator(int i, OperatorState<D> &os);
-    void tensorApplyOperComp(OperatorState<D> &os);
+    void applyOperComp(OperatorState<D, T> &os);
+    void applyOperator(int i, OperatorState<D, T> &os);
+    void tensorApplyOperComp(OperatorState<D, T> &os);
 
-    void touchParentNodes(MWTree<D> &tree) const;
+    void touchParentNodes(MWTree<D, T> &tree) const;
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/CopyAdaptor.cpp b/src/treebuilders/CopyAdaptor.cpp
index 4017c6e5e..8312ebb0f 100644
--- a/src/treebuilders/CopyAdaptor.cpp
+++ b/src/treebuilders/CopyAdaptor.cpp
@@ -29,21 +29,21 @@
 
 namespace mrcpp {
 
-template <int D>
-CopyAdaptor<D>::CopyAdaptor(FunctionTree<D> &t, int ms, int *bw)
-        : TreeAdaptor<D>(ms) {
+template <int D, typename T>
+CopyAdaptor<D, T>::CopyAdaptor(FunctionTree<D, T> &t, int ms, int *bw)
+        : TreeAdaptor<D, T>(ms) {
     setBandWidth(bw);
     tree_vec.push_back(std::make_tuple(1.0, &t));
 }
 
-template <int D>
-CopyAdaptor<D>::CopyAdaptor(FunctionTreeVector<D> &t, int ms, int *bw)
-        : TreeAdaptor<D>(ms)
+template <int D, typename T>
+CopyAdaptor<D, T>::CopyAdaptor(FunctionTreeVector<D, T> &t, int ms, int *bw)
+        : TreeAdaptor<D, T>(ms)
         , tree_vec(t) {
     setBandWidth(bw);
 }
 
-template <int D> void CopyAdaptor<D>::setBandWidth(int *bw) {
+template <int D, typename T> void CopyAdaptor<D, T>::setBandWidth(int *bw) {
     for (int d = 0; d < D; d++) {
         if (bw != nullptr) {
             this->bandWidth[d] = bw[d];
@@ -53,7 +53,7 @@ template <int D> void CopyAdaptor<D>::setBandWidth(int *bw) {
     }
 }
 
-template <int D> bool CopyAdaptor<D>::splitNode(const MWNode<D> &node) const {
+template <int D, typename T> bool CopyAdaptor<D, T>::splitNode(const MWNode<D, T> &node) const {
     const NodeIndex<D> &idx = node.getNodeIndex();
     for (int c = 0; c < node.getTDim(); c++) {
         for (int d = 0; d < D; d++) {
@@ -61,8 +61,8 @@ template <int D> bool CopyAdaptor<D>::splitNode(const MWNode<D> &node) const {
                 NodeIndex<D> bwIdx = idx.child(c);
                 bwIdx[d] += bw;
                 for (int i = 0; i < this->tree_vec.size(); i++) {
-                    const FunctionTree<D> &func_i = get_func(tree_vec, i);
-                    const MWNode<D> *node_i = func_i.findNode(bwIdx);
+                    const FunctionTree<D, T> &func_i = get_func(tree_vec, i);
+                    const MWNode<D, T> *node_i = func_i.findNode(bwIdx);
                     if (node_i != nullptr) return true;
                 }
             }
@@ -71,8 +71,12 @@ template <int D> bool CopyAdaptor<D>::splitNode(const MWNode<D> &node) const {
     return false;
 }
 
-template class CopyAdaptor<1>;
-template class CopyAdaptor<2>;
-template class CopyAdaptor<3>;
+template class CopyAdaptor<1, double>;
+template class CopyAdaptor<2, double>;
+template class CopyAdaptor<3, double>;
+
+template class CopyAdaptor<1, ComplexDouble>;
+template class CopyAdaptor<2, ComplexDouble>;
+template class CopyAdaptor<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/treebuilders/CopyAdaptor.h b/src/treebuilders/CopyAdaptor.h
index c9451e599..adeeb6766 100644
--- a/src/treebuilders/CopyAdaptor.h
+++ b/src/treebuilders/CopyAdaptor.h
@@ -30,17 +30,17 @@
 
 namespace mrcpp {
 
-template <int D> class CopyAdaptor final : public TreeAdaptor<D> {
+  template <int D, typename T> class CopyAdaptor final : public TreeAdaptor<D, T> {
 public:
-    CopyAdaptor(FunctionTree<D> &t, int ms, int *bw);
-    CopyAdaptor(FunctionTreeVector<D> &t, int ms, int *bw);
+    CopyAdaptor(FunctionTree<D, T> &t, int ms, int *bw);
+    CopyAdaptor(FunctionTreeVector<D, T> &t, int ms, int *bw);
 
 private:
     int bandWidth[D];
-    FunctionTreeVector<D> tree_vec;
+    FunctionTreeVector<D, T> tree_vec;
 
     void setBandWidth(int *bw);
-    bool splitNode(const MWNode<D> &node) const override;
+    bool splitNode(const MWNode<D, T> &node) const override;
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/CrossCorrelationCalculator.cpp b/src/treebuilders/CrossCorrelationCalculator.cpp
index efe9a3390..b4c2fc3ad 100644
--- a/src/treebuilders/CrossCorrelationCalculator.cpp
+++ b/src/treebuilders/CrossCorrelationCalculator.cpp
@@ -77,7 +77,7 @@ template <int T> void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros
         const MWNode<1> &node_a = this->kernel->getNode(idx_a);
         const MWNode<1> &node_b = this->kernel->getNode(idx_b);
 
-        VectorXd vec_a;
+        Eigen::Matrix<double, Eigen::Dynamic, 1> vec_a;
         VectorXd vec_b;
         node_a.getCoefs(vec_a);
         node_b.getCoefs(vec_b);
diff --git a/src/treebuilders/DefaultCalculator.h b/src/treebuilders/DefaultCalculator.h
index 13f698162..4a1a4ce54 100644
--- a/src/treebuilders/DefaultCalculator.h
+++ b/src/treebuilders/DefaultCalculator.h
@@ -29,16 +29,16 @@
 
 namespace mrcpp {
 
-template <int D> class DefaultCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class DefaultCalculator final : public TreeCalculator<D, T> {
 public:
     // Reimplementation without OpenMP, the default is faster this way
-    void calcNodeVector(MWNodeVector<D> &nodeVec) override {
+    void calcNodeVector(MWNodeVector<D, T> &nodeVec) override {
         int nNodes = nodeVec.size();
         for (int n = 0; n < nNodes; n++) { calcNode(*nodeVec[n]); }
     }
 
 private:
-    void calcNode(MWNode<D> &node) override {
+    void calcNode(MWNode<D, T> &node) override {
         node.clearHasCoefs();
         node.clearNorms();
     }
diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp
index a5acdc297..9f384013e 100644
--- a/src/treebuilders/DerivativeCalculator.cpp
+++ b/src/treebuilders/DerivativeCalculator.cpp
@@ -42,8 +42,8 @@ using Eigen::MatrixXd;
 
 namespace mrcpp {
 
-template <int D>
-DerivativeCalculator<D>::DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D> &f)
+template <int D, typename T>
+DerivativeCalculator<D, T>::DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D, T> &f)
         : applyDir(dir)
         , fTree(&f)
         , oper(&o) {
@@ -51,12 +51,12 @@ DerivativeCalculator<D>::DerivativeCalculator(int dir, DerivativeOperator<D> &o,
     initTimers();
 }
 
-template <int D> DerivativeCalculator<D>::~DerivativeCalculator() {
+template <int D, typename T> DerivativeCalculator<D, T>::~DerivativeCalculator() {
     this->operStat.flushNodeCounters();
     println(10, this->operStat);
 }
 
-template <int D> void DerivativeCalculator<D>::initTimers() {
+template <int D, typename T> void DerivativeCalculator<D, T>::initTimers() {
     int nThreads = mrcpp_get_max_threads();
     for (int i = 0; i < nThreads; i++) {
         this->band_t.push_back(Timer(false));
@@ -65,13 +65,13 @@ template <int D> void DerivativeCalculator<D>::initTimers() {
     }
 }
 
-template <int D> void DerivativeCalculator<D>::clearTimers() {
+template <int D, typename T> void DerivativeCalculator<D, T>::clearTimers() {
     this->band_t.clear();
     this->calc_t.clear();
     this->norm_t.clear();
 }
 
-template <int D> void DerivativeCalculator<D>::printTimers() const {
+template <int D, typename T> void DerivativeCalculator<D, T>::printTimers() const {
     int oldprec = Printer::setPrecision(1);
     int nThreads = mrcpp_get_max_threads();
     printout(20, "\n\nthread ");
@@ -86,12 +86,12 @@ template <int D> void DerivativeCalculator<D>::printTimers() const {
     Printer::setPrecision(oldprec);
 }
 
-template <int D> void DerivativeCalculator<D>::calcNode(MWNode<D> &inpNode, MWNode<D> &outNode) {
+    template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D, T> &inpNode, MWNode<D, T> &outNode) {
     //if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw");
     outNode.zeroCoefs();
     int nComp = (1 << D);
-    double tmpCoefs[outNode.getNCoefs()];
-    OperatorState<D> os(outNode, tmpCoefs);
+    T tmpCoefs[outNode.getNCoefs()];
+    OperatorState<D, T> os(outNode, tmpCoefs);
 
     os.setFNode(inpNode);
     os.setFIndex(inpNode.nodeIndex);
@@ -114,24 +114,24 @@ template <int D> void DerivativeCalculator<D>::calcNode(MWNode<D> &inpNode, MWNo
 }
 
 
-template <int D> void DerivativeCalculator<D>::calcNode(MWNode<D> &gNode) {
+template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D, T> &gNode) {
     gNode.zeroCoefs();
 
     int nComp = (1 << D);
-    double tmpCoefs[gNode.getNCoefs()];
-    OperatorState<D> os(gNode, tmpCoefs);
+    T tmpCoefs[gNode.getNCoefs()];
+    OperatorState<D, T> os(gNode, tmpCoefs);
     this->operStat.incrementGNodeCounters(gNode);
 
     // Get all nodes in f within the bandwith of O in g
     this->band_t[mrcpp_get_thread_num()].resume();
     std::vector<NodeIndex<D>> idx_band;
-    MWNodeVector<D> fBand = makeOperBand(gNode, idx_band);
+    MWNodeVector<D, T> fBand = makeOperBand(gNode, idx_band);
     this->band_t[mrcpp_get_thread_num()].stop();
 
     this->calc_t[mrcpp_get_thread_num()].resume();
 
     for (int n = 0; n < fBand.size(); n++) {
-        MWNode<D> &fNode = *fBand[n];
+        MWNode<D, T> &fNode = *fBand[n];
         NodeIndex<D> &fIdx = idx_band[n];
         os.setFNode(fNode);
         os.setFIndex(fIdx);
@@ -157,12 +157,12 @@ template <int D> void DerivativeCalculator<D>::calcNode(MWNode<D> &gNode) {
 }
 
 /** Return a vector of nodes in F affected by O, given a node in G */
-template <int D>
-MWNodeVector<D> DerivativeCalculator<D>::makeOperBand(const MWNode<D> &gNode, std::vector<NodeIndex<D>> &idx_band) {
+template <int D, typename T>
+MWNodeVector<D, T> DerivativeCalculator<D, T>::makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band) {
     assert(this->applyDir >= 0);
     assert(this->applyDir < D);
 
-    MWNodeVector<D> band;
+    MWNodeVector<D, T> band;
     const NodeIndex<D> &idx_0 = gNode.getNodeIndex();
 
     // Assumes given width only in applyDir, otherwise width = 0
@@ -182,10 +182,10 @@ MWNodeVector<D> DerivativeCalculator<D>::makeOperBand(const MWNode<D> &gNode, st
 }
 
 /** Apply a single operator component (term) to a single f-node assuming zero bandwidth */
-template <int D> void DerivativeCalculator<D>::applyOperator_bw0(OperatorState<D> &os) {
+template <int D, typename T> void DerivativeCalculator<D, T>::applyOperator_bw0(OperatorState<D, T> &os) {
     //cout<<" applyOperator "<<endl;
-    MWNode<D> &gNode = *os.gNode;
-    MWNode<D> &fNode = *os.fNode;
+    MWNode<D, T> &gNode = *os.gNode;
+    MWNode<D, T> &fNode = *os.fNode;
     const NodeIndex<D> &fIdx = *os.fIdx;
     const NodeIndex<D> &gIdx = gNode.getNodeIndex();
     int depth = gNode.getDepth();
@@ -216,9 +216,9 @@ template <int D> void DerivativeCalculator<D>::applyOperator_bw0(OperatorState<D
 
 /** Apply a single operator component (term) to a single f-node. Whether the
 operator actualy is applied is determined by a screening threshold. */
-template <int D> void DerivativeCalculator<D>::applyOperator(OperatorState<D> &os) {
-    MWNode<D> &gNode = *os.gNode;
-    MWNode<D> &fNode = *os.fNode;
+template <int D, typename T> void DerivativeCalculator<D, T>::applyOperator(OperatorState<D, T> &os) {
+    MWNode<D, T> &gNode = *os.gNode;
+    MWNode<D, T> &fNode = *os.fNode;
     const NodeIndex<D> &fIdx = *os.fIdx;
     const NodeIndex<D> &gIdx = gNode.getNodeIndex();
     int depth = gNode.getDepth();
@@ -261,9 +261,10 @@ template <int D> void DerivativeCalculator<D>::applyOperator(OperatorState<D> &o
 
 /** Perform the required linear algebra operations in order to apply an
 operator component to a f-node in a n-dimensional tensor space. */
-template <int D> void DerivativeCalculator<D>::tensorApplyOperComp(OperatorState<D> &os) {
-    double **aux = os.getAuxData();
+template <int D, typename T> void DerivativeCalculator<D, T>::tensorApplyOperComp(OperatorState<D, T> &os) {
+    T **aux = os.getAuxData();
     double **oData = os.getOperData();
+    /*
 #ifdef HAVE_BLAS
     double mult = 0.0;
     for (int i = 0; i < D; i++) {
@@ -271,8 +272,8 @@ template <int D> void DerivativeCalculator<D>::tensorApplyOperComp(OperatorState
             if (i == D - 1) { // Last dir: Add up into g
                 mult = 1.0;
             }
-            const double *f = aux[i];
-            double *g = const_cast<double *>(aux[i + 1]);
+            const T *f = aux[i];
+            T *g = const_cast<T *>(aux[i + 1]);
             cblas_dgemm(CblasColMajor,
                         CblasTrans,
                         CblasNoTrans,
@@ -301,9 +302,10 @@ template <int D> void DerivativeCalculator<D>::tensorApplyOperComp(OperatorState
         }
     }
 #else
+    */
     for (int i = 0; i < D; i++) {
-        Eigen::Map<MatrixXd> f(aux[i], os.kp1, os.kp1_dm1);
-        Eigen::Map<MatrixXd> g(aux[i + 1], os.kp1_dm1, os.kp1);
+        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> f(aux[i], os.kp1, os.kp1_dm1);
+        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> g(aux[i + 1], os.kp1_dm1, os.kp1);
         if (oData[i] != nullptr) {
             Eigen::Map<MatrixXd> op(oData[i], os.kp1, os.kp1);
             if (i == D - 1) { // Last dir: Add up into g
@@ -320,15 +322,19 @@ template <int D> void DerivativeCalculator<D>::tensorApplyOperComp(OperatorState
             }
         }
     }
-#endif
+    //#endif
 }
 
-template <int D> MWNodeVector<D> *DerivativeCalculator<D>::getInitialWorkVector(MWTree<D> &tree) const {
+template <int D, typename T> MWNodeVector<D, T> *DerivativeCalculator<D, T>::getInitialWorkVector(MWTree<D, T> &tree) const {
     return tree.copyEndNodeTable();
 }
 
-template class DerivativeCalculator<1>;
-template class DerivativeCalculator<2>;
-template class DerivativeCalculator<3>;
+template class DerivativeCalculator<1, double>;
+template class DerivativeCalculator<2, double>;
+template class DerivativeCalculator<3, double>;
+
+template class DerivativeCalculator<1, ComplexDouble>;
+template class DerivativeCalculator<2, ComplexDouble>;
+template class DerivativeCalculator<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/treebuilders/DerivativeCalculator.h b/src/treebuilders/DerivativeCalculator.h
index 5d4d28716..9adc48046 100644
--- a/src/treebuilders/DerivativeCalculator.h
+++ b/src/treebuilders/DerivativeCalculator.h
@@ -30,40 +30,40 @@
 
 namespace mrcpp {
 
-template <int D> class DerivativeCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class DerivativeCalculator final : public TreeCalculator<D, T> {
 public:
-    DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D> &f);
+  DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D, T> &f);
     ~DerivativeCalculator() override;
 
-    MWNodeVector<D> *getInitialWorkVector(MWTree<D> &tree) const override;
-    void calcNode(MWNode<D> &fNode, MWNode<D> &gNode);
+    MWNodeVector<D, T> *getInitialWorkVector(MWTree<D, T> &tree) const override;
+    void calcNode(MWNode<D, T> &fNode, MWNode<D, T> &gNode);
 
 private:
     int applyDir;
-    FunctionTree<D> *fTree;
+    FunctionTree<D, T> *fTree;
     DerivativeOperator<D> *oper;
 
     std::vector<Timer> band_t;
     std::vector<Timer> calc_t;
     std::vector<Timer> norm_t;
-    OperatorStatistics<D> operStat;
+    OperatorStatistics<D, T> operStat;
 
-    MWNodeVector<D> makeOperBand(const MWNode<D> &gNode, std::vector<NodeIndex<D>> &idx_band);
+    MWNodeVector<D, T> makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band);
 
     void initTimers();
     void clearTimers();
     void printTimers() const;
 
-    void calcNode(MWNode<D> &node) override;
+    void calcNode(MWNode<D, T> &node) override;
     void postProcess() override {
         printTimers();
         clearTimers();
         initTimers();
     }
 
-    void applyOperator(OperatorState<D> &os);
-    void applyOperator_bw0(OperatorState<D> &os);
-    void tensorApplyOperComp(OperatorState<D> &os);
+    void applyOperator(OperatorState<D, T> &os);
+    void applyOperator_bw0(OperatorState<D, T> &os);
+    void tensorApplyOperComp(OperatorState<D, T> &os);
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/MapCalculator.h b/src/treebuilders/MapCalculator.h
index 492c1f440..33f799ee9 100644
--- a/src/treebuilders/MapCalculator.h
+++ b/src/treebuilders/MapCalculator.h
@@ -29,24 +29,24 @@
 
 namespace mrcpp {
 
-template <int D> class MapCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class MapCalculator final : public TreeCalculator<D, T> {
 public:
-    MapCalculator(FMap fm, FunctionTree<D> &inp)
+    MapCalculator(FMap<T, T> fm, FunctionTree<D, T> &inp)
             : func(&inp)
             , fmap(std::move(fm)) {}
 
 private:
-    FunctionTree<D> *func;
-    FMap fmap;
-    void calcNode(MWNode<D> &node_o) override {
+    FunctionTree<D, T> *func;
+    FMap<T, T> fmap;
+    void calcNode(MWNode<D, T> &node_o) override {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         int n_coefs = node_o.getNCoefs();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         // This generates missing nodes
-        MWNode<D> node_i = func->getNode(idx); // Copy node
+        MWNode<D, T> node_i = func->getNode(idx); // Copy node
         node_i.mwTransform(Reconstruction);
         node_i.cvTransform(Forward);
-        const double *coefs_i = node_i.getCoefs();
+        const T *coefs_i = node_i.getCoefs();
         for (int j = 0; j < n_coefs; j++) { coefs_o[j] = fmap(coefs_i[j]); }
         node_o.cvTransform(Backward);
         node_o.mwTransform(Compression);
diff --git a/src/treebuilders/MultiplicationAdaptor.h b/src/treebuilders/MultiplicationAdaptor.h
index ff0fe992d..9637ac055 100644
--- a/src/treebuilders/MultiplicationAdaptor.h
+++ b/src/treebuilders/MultiplicationAdaptor.h
@@ -31,19 +31,19 @@
 
 namespace mrcpp {
 
-template <int D> class MultiplicationAdaptor : public TreeAdaptor<D> {
+template <int D, typename T> class MultiplicationAdaptor : public TreeAdaptor<D, T> {
 public:
-    MultiplicationAdaptor(double pr, int ms, FunctionTreeVector<D> &t)
-            : TreeAdaptor<D>(ms)
+    MultiplicationAdaptor(double pr, int ms, FunctionTreeVector<D, T> &t)
+            : TreeAdaptor<D, T>(ms)
             , prec(pr)
             , trees(t) {}
     ~MultiplicationAdaptor() override = default;
 
 protected:
     double prec;
-    mutable FunctionTreeVector<D> trees;
+    mutable FunctionTreeVector<D, T> trees;
 
-    bool splitNode(const MWNode<D> &node) const override {
+    bool splitNode(const MWNode<D, T> &node) const override {
         if (this->trees.size() != 2) MSG_ERROR("Invalid tree vec size: " << this->trees.size());
         auto &pNode0 = get_func(trees, 0).getNode(node.getNodeIndex());
         auto &pNode1 = get_func(trees, 1).getNode(node.getNodeIndex());
diff --git a/src/treebuilders/MultiplicationCalculator.h b/src/treebuilders/MultiplicationCalculator.h
index ba5669f4d..dac957822 100644
--- a/src/treebuilders/MultiplicationCalculator.h
+++ b/src/treebuilders/MultiplicationCalculator.h
@@ -30,26 +30,26 @@
 
 namespace mrcpp {
 
-template <int D> class MultiplicationCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class MultiplicationCalculator final : public TreeCalculator<D, T> {
 public:
-    MultiplicationCalculator(const FunctionTreeVector<D> &inp)
+    MultiplicationCalculator(const FunctionTreeVector<D, T> &inp)
             : prod_vec(inp) {}
 
 private:
-    FunctionTreeVector<D> prod_vec;
+    FunctionTreeVector<D, T> prod_vec;
 
-    void calcNode(MWNode<D> &node_o) override {
+    void calcNode(MWNode<D, T> &node_o) override {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
         for (int i = 0; i < this->prod_vec.size(); i++) {
             double c_i = get_coef(this->prod_vec, i);
-            FunctionTree<D> &func_i = get_func(this->prod_vec, i);
+            FunctionTree<D, T> &func_i = get_func(this->prod_vec, i);
             // This generates missing nodes
-            MWNode<D> node_i = func_i.getNode(idx); // Copy node
+            MWNode<D, T> node_i = func_i.getNode(idx); // Copy node
             node_i.mwTransform(Reconstruction);
             node_i.cvTransform(Forward);
-            const double *coefs_i = node_i.getCoefs();
+            const T *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
         }
diff --git a/src/treebuilders/PowerCalculator.h b/src/treebuilders/PowerCalculator.h
index bb2124b73..79147fc4b 100644
--- a/src/treebuilders/PowerCalculator.h
+++ b/src/treebuilders/PowerCalculator.h
@@ -29,25 +29,25 @@
 
 namespace mrcpp {
 
-template <int D> class PowerCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class PowerCalculator final : public TreeCalculator<D, T> {
 public:
-    PowerCalculator(FunctionTree<D> &inp, double pow)
+    PowerCalculator(FunctionTree<D, T> &inp, double pow)
             : power(pow)
             , func(&inp) {}
 
 private:
     double power;
-    FunctionTree<D> *func;
+    FunctionTree<D, T> *func;
 
-    void calcNode(MWNode<D> &node_o) override {
+    void calcNode(MWNode<D, T> &node_o) override {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         int n_coefs = node_o.getNCoefs();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         // This generates missing nodes
-        MWNode<D> node_i = func->getNode(idx); // Copy node
+        MWNode<D, T> node_i = func->getNode(idx); // Copy node
         node_i.mwTransform(Reconstruction);
         node_i.cvTransform(Forward);
-        const double *coefs_i = node_i.getCoefs();
+        const T *coefs_i = node_i.getCoefs();
         for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::pow(coefs_i[j], this->power); }
         node_o.cvTransform(Backward);
         node_o.mwTransform(Compression);
diff --git a/src/treebuilders/ProjectionCalculator.cpp b/src/treebuilders/ProjectionCalculator.cpp
index 46335d092..e451ea69e 100644
--- a/src/treebuilders/ProjectionCalculator.cpp
+++ b/src/treebuilders/ProjectionCalculator.cpp
@@ -30,18 +30,19 @@ using Eigen::MatrixXd;
 
 namespace mrcpp {
 
-template <int D> void ProjectionCalculator<D>::calcNode(MWNode<D> &node) {
+template <int D, typename T> void ProjectionCalculator<D, T>::calcNode(MWNode<D, T> &node) {
     MatrixXd exp_pts;
     node.getExpandedChildPts(exp_pts);
 
     assert(exp_pts.cols() == node.getNCoefs());
 
     Coord<D> r;
-    double *coefs = node.getCoefs();
+    T *coefs = node.getCoefs();
     for (int i = 0; i < node.getNCoefs(); i++) {
-        for (int d = 0; d < D; d++) { r[d] = scaling_factor[d] * exp_pts(d, i); }
-        coefs[i] = this->func->evalf(r);
+      for (int d = 0; d < D; d++) { r[d] = scaling_factor[d] * exp_pts(d, i); }
+      coefs[i] = this->func->evalf(r);
     }
+
     node.cvTransform(Backward);
     node.mwTransform(Compression);
     node.setHasCoefs();
@@ -50,7 +51,7 @@ template <int D> void ProjectionCalculator<D>::calcNode(MWNode<D> &node) {
 
 /* Old interpolating version, somewhat faster
 template<int D>
-void ProjectionCalculator<D>::calcNode(MWNode<D> &node) {
+void ProjectionCalculator<D, T>::calcNode(MWNode<D, T> &node) {
     const ScalingBasis &sf = node.getMWTree().getMRA().getScalingBasis();
     if (sf.getScalingType() != Interpol) {
         NOT_IMPLEMENTED_ABORT;
@@ -104,8 +105,12 @@ void ProjectionCalculator<D>::calcNode(MWNode<D> &node) {
 }
 */
 
-template class ProjectionCalculator<1>;
-template class ProjectionCalculator<2>;
-template class ProjectionCalculator<3>;
+template class ProjectionCalculator<1, double>;
+template class ProjectionCalculator<2, double>;
+template class ProjectionCalculator<3, double>;
+
+template class ProjectionCalculator<1, ComplexDouble>;
+template class ProjectionCalculator<2, ComplexDouble>;
+template class ProjectionCalculator<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/treebuilders/ProjectionCalculator.h b/src/treebuilders/ProjectionCalculator.h
index 2fbbb09fe..067c41422 100644
--- a/src/treebuilders/ProjectionCalculator.h
+++ b/src/treebuilders/ProjectionCalculator.h
@@ -29,16 +29,16 @@
 
 namespace mrcpp {
 
-template <int D> class ProjectionCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class ProjectionCalculator final : public TreeCalculator<D, T> {
 public:
-    ProjectionCalculator(const RepresentableFunction<D> &inp_func, const std::array<double, D> &sf)
+    ProjectionCalculator(const RepresentableFunction<D, T> &inp_func, const std::array<double, D> &sf)
             : func(&inp_func)
             , scaling_factor(sf) {}
 
 private:
-    const RepresentableFunction<D> *func;
+    const RepresentableFunction<D, T> *func;
     const std::array<double, D> scaling_factor;
-    void calcNode(MWNode<D> &node) override;
+    void calcNode(MWNode<D, T> &node) override;
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/SplitAdaptor.h b/src/treebuilders/SplitAdaptor.h
index b9d50fe8b..7e81bbe8b 100644
--- a/src/treebuilders/SplitAdaptor.h
+++ b/src/treebuilders/SplitAdaptor.h
@@ -29,16 +29,16 @@
 
 namespace mrcpp {
 
-template <int D> class SplitAdaptor final : public TreeAdaptor<D> {
+template <int D, typename T = double> class SplitAdaptor final : public TreeAdaptor<D, T> {
 public:
     SplitAdaptor(int ms, bool sp)
-            : TreeAdaptor<D>(ms)
+            : TreeAdaptor<D, T>(ms)
             , split(sp) {}
 
 private:
     bool split;
 
-    bool splitNode(const MWNode<D> &node) const override { return this->split; }
+    bool splitNode(const MWNode<D, T> &node) const override { return this->split; }
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/SquareCalculator.h b/src/treebuilders/SquareCalculator.h
index e9bb0f8d3..e56b41cf0 100644
--- a/src/treebuilders/SquareCalculator.h
+++ b/src/treebuilders/SquareCalculator.h
@@ -29,23 +29,23 @@
 
 namespace mrcpp {
 
-template <int D> class SquareCalculator final : public TreeCalculator<D> {
+template <int D, typename T> class SquareCalculator final : public TreeCalculator<D, T> {
 public:
-    SquareCalculator(FunctionTree<D> &inp)
+    SquareCalculator(FunctionTree<D, T> &inp)
             : func(&inp) {}
 
 private:
-    FunctionTree<D> *func;
+    FunctionTree<D, T> *func;
 
-    void calcNode(MWNode<D> &node_o) override {
+    void calcNode(MWNode<D, T> &node_o) override {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         int n_coefs = node_o.getNCoefs();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         // This generates missing nodes
-        MWNode<D> node_i = func->getNode(idx); // Copy node
+        MWNode<D, T> node_i = func->getNode(idx); // Copy node
         node_i.mwTransform(Reconstruction);
         node_i.cvTransform(Forward);
-        const double *coefs_i = node_i.getCoefs();
+        const T *coefs_i = node_i.getCoefs();
         for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
         node_o.cvTransform(Backward);
         node_o.mwTransform(Compression);
diff --git a/src/treebuilders/TreeAdaptor.h b/src/treebuilders/TreeAdaptor.h
index a46bab648..80cecb09e 100644
--- a/src/treebuilders/TreeAdaptor.h
+++ b/src/treebuilders/TreeAdaptor.h
@@ -30,7 +30,7 @@
 
 namespace mrcpp {
 
-template <int D> class TreeAdaptor {
+template <int D, typename T> class TreeAdaptor {
 public:
     TreeAdaptor(int ms)
             : maxScale(ms) {}
@@ -38,9 +38,9 @@ template <int D> class TreeAdaptor {
 
     void setMaxScale(int ms) { this->maxScale = ms; }
 
-    void splitNodeVector(MWNodeVector<D> &out, MWNodeVector<D> &inp) const {
+    void splitNodeVector(MWNodeVector<D, T> &out, MWNodeVector<D, T> &inp) const {
         for (int n = 0; n < inp.size(); n++) {
-            MWNode<D> &node = *inp[n];
+            MWNode<D, T> &node = *inp[n];
             // Can be BranchNode in operator application
             if (node.isBranchNode()) continue;
             if (node.getScale() + 2 > this->maxScale) continue;
@@ -54,7 +54,7 @@ template <int D> class TreeAdaptor {
 protected:
     int maxScale;
 
-    virtual bool splitNode(const MWNode<D> &node) const = 0;
+    virtual bool splitNode(const MWNode<D, T> &node) const = 0;
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/TreeBuilder.cpp b/src/treebuilders/TreeBuilder.cpp
index 223d94794..225b55cb5 100644
--- a/src/treebuilders/TreeBuilder.cpp
+++ b/src/treebuilders/TreeBuilder.cpp
@@ -35,13 +35,13 @@
 
 namespace mrcpp {
 
-template <int D>
-void TreeBuilder<D>::build(MWTree<D> &tree, TreeCalculator<D> &calculator, TreeAdaptor<D> &adaptor, int maxIter) const {
+template <int D, typename T>
+void TreeBuilder<D, T>::build(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator, TreeAdaptor<D, T> &adaptor, int maxIter) const {
     Timer calc_t(false), split_t(false), norm_t(false);
     println(10, " == Building tree");
 
-    MWNodeVector<D> *newVec = nullptr;
-    MWNodeVector<D> *workVec = calculator.getInitialWorkVector(tree);
+    MWNodeVector<D, T> *newVec = nullptr;
+    MWNodeVector<D, T> *workVec = calculator.getInitialWorkVector(tree);
 
     double sNorm = 0.0;
     double wNorm = 0.0;
@@ -69,7 +69,7 @@ void TreeBuilder<D>::build(MWTree<D> &tree, TreeCalculator<D> &calculator, TreeA
         norm_t.stop();
 
         split_t.resume();
-        newVec = new MWNodeVector<D>;
+        newVec = new MWNodeVector<D, T>;
         if (iter >= maxIter and maxIter >= 0) workVec->clear();
         adaptor.splitNodeVector(*newVec, *workVec);
         split_t.stop();
@@ -87,11 +87,11 @@ void TreeBuilder<D>::build(MWTree<D> &tree, TreeCalculator<D> &calculator, TreeA
     print::time(10, "Time split", split_t);
 }
 
-template <int D> void TreeBuilder<D>::clear(MWTree<D> &tree, TreeCalculator<D> &calculator) const {
+template <int D, typename T> void TreeBuilder<D, T>::clear(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator) const {
     println(10, " == Clearing tree");
 
     Timer clean_t;
-    MWNodeVector<D> nodeVec;
+    MWNodeVector<D, T> nodeVec;
     tree_utils::make_node_table(tree, nodeVec);
     calculator.calcNodeVector(nodeVec); // clear all coefficients
     clean_t.stop();
@@ -104,16 +104,16 @@ template <int D> void TreeBuilder<D>::clear(MWTree<D> &tree, TreeCalculator<D> &
     print::separator(10, ' ');
 }
 
-template <int D> int TreeBuilder<D>::split(MWTree<D> &tree, TreeAdaptor<D> &adaptor, bool passCoefs) const {
+template <int D, typename T> int TreeBuilder<D, T>::split(MWTree<D, T> &tree, TreeAdaptor<D, T> &adaptor, bool passCoefs) const {
     println(10, " == Refining tree");
 
     Timer split_t;
-    MWNodeVector<D> newVec;
-    MWNodeVector<D> *workVec = tree.copyEndNodeTable();
+    MWNodeVector<D, T> newVec;
+    MWNodeVector<D, T> *workVec = tree.copyEndNodeTable();
     adaptor.splitNodeVector(newVec, *workVec);
     if (passCoefs) {
         for (int i = 0; i < workVec->size(); i++) {
-            MWNode<D> &node = *(*workVec)[i];
+            MWNode<D, T> &node = *(*workVec)[i];
             if (node.isBranchNode()) { node.giveChildrenCoefs(true); }
         }
     }
@@ -131,11 +131,11 @@ template <int D> int TreeBuilder<D>::split(MWTree<D> &tree, TreeAdaptor<D> &adap
     return newVec.size();
 }
 
-template <int D> void TreeBuilder<D>::calc(MWTree<D> &tree, TreeCalculator<D> &calculator) const {
+template <int D, typename T> void TreeBuilder<D, T>::calc(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator) const {
     println(10, " == Calculating tree");
 
     Timer calc_t;
-    MWNodeVector<D> *workVec = calculator.getInitialWorkVector(tree);
+    MWNodeVector<D, T> *workVec = calculator.getInitialWorkVector(tree);
     calculator.calcNodeVector(*workVec);
     printout(10, "  -- #" << std::setw(3) << 0 << ": Calculated ");
     printout(10, std::setw(6) << workVec->size() << " nodes ");
@@ -148,26 +148,31 @@ template <int D> void TreeBuilder<D>::calc(MWTree<D> &tree, TreeCalculator<D> &c
     print::time(10, "Time calc", calc_t);
 }
 
-template <int D> double TreeBuilder<D>::calcScalingNorm(const MWNodeVector<D> &vec) const {
+template <int D, typename T> double TreeBuilder<D, T>::calcScalingNorm(const MWNodeVector<D, T> &vec) const {
     double sNorm = 0.0;
     for (int i = 0; i < vec.size(); i++) {
-        const MWNode<D> &node = *vec[i];
+        const MWNode<D, T> &node = *vec[i];
         if (node.getDepth() >= 0) sNorm += node.getScalingNorm();
     }
     return sNorm;
 }
 
-template <int D> double TreeBuilder<D>::calcWaveletNorm(const MWNodeVector<D> &vec) const {
+template <int D, typename T> double TreeBuilder<D, T>::calcWaveletNorm(const MWNodeVector<D, T> &vec) const {
     double wNorm = 0.0;
     for (int i = 0; i < vec.size(); i++) {
-        const MWNode<D> &node = *vec[i];
+        const MWNode<D, T> &node = *vec[i];
         if (node.getDepth() >= 0) wNorm += node.getWaveletNorm();
     }
     return wNorm;
 }
 
-template class TreeBuilder<1>;
-template class TreeBuilder<2>;
-template class TreeBuilder<3>;
+template class TreeBuilder<1, double>;
+template class TreeBuilder<2, double>;
+template class TreeBuilder<3, double>;
+
+
+template class TreeBuilder<1, ComplexDouble>;
+template class TreeBuilder<2, ComplexDouble>;
+template class TreeBuilder<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/treebuilders/TreeBuilder.h b/src/treebuilders/TreeBuilder.h
index 313e9f4f4..81c32afe6 100644
--- a/src/treebuilders/TreeBuilder.h
+++ b/src/treebuilders/TreeBuilder.h
@@ -29,16 +29,16 @@
 
 namespace mrcpp {
 
-template <int D> class TreeBuilder final {
+template <int D, typename T> class TreeBuilder final {
 public:
-    void build(MWTree<D> &tree, TreeCalculator<D> &calculator, TreeAdaptor<D> &adaptor, int maxIter) const;
-    void clear(MWTree<D> &tree, TreeCalculator<D> &calculator) const;
-    void calc(MWTree<D> &tree, TreeCalculator<D> &calculator) const;
-    int split(MWTree<D> &tree, TreeAdaptor<D> &adaptor, bool passCoefs) const;
+    void build(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator, TreeAdaptor<D, T> &adaptor, int maxIter) const;
+    void clear(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator) const;
+    void calc(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator) const;
+    int split(MWTree<D, T> &tree, TreeAdaptor<D, T> &adaptor, bool passCoefs) const;
 
 private:
-    double calcScalingNorm(const MWNodeVector<D> &vec) const;
-    double calcWaveletNorm(const MWNodeVector<D> &vec) const;
+    double calcScalingNorm(const MWNodeVector<D, T> &vec) const;
+    double calcWaveletNorm(const MWNodeVector<D, T> &vec) const;
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/TreeCalculator.h b/src/treebuilders/TreeCalculator.h
index acd9f00f8..4e171d91c 100644
--- a/src/treebuilders/TreeCalculator.h
+++ b/src/treebuilders/TreeCalculator.h
@@ -29,20 +29,20 @@
 
 namespace mrcpp {
 
-template <int D> class TreeCalculator {
+  template <int D, typename T> class TreeCalculator {
 public:
     TreeCalculator() = default;
     virtual ~TreeCalculator() = default;
 
-    virtual MWNodeVector<D> *getInitialWorkVector(MWTree<D> &tree) const { return tree.copyEndNodeTable(); }
+    virtual MWNodeVector<D, T> *getInitialWorkVector(MWTree<D, T> &tree) const { return tree.copyEndNodeTable(); }
 
-    virtual void calcNodeVector(MWNodeVector<D> &nodeVec) {
+    virtual void calcNodeVector(MWNodeVector<D, T> &nodeVec) {
 #pragma omp parallel shared(nodeVec) num_threads(mrcpp_get_num_threads())
         {
             int nNodes = nodeVec.size();
 #pragma omp for schedule(guided)
             for (int n = 0; n < nNodes; n++) {
-                MWNode<D> &node = *nodeVec[n];
+	      MWNode<D, T> &node = *nodeVec[n];
                 calcNode(node);
             }
         }
@@ -50,7 +50,7 @@ template <int D> class TreeCalculator {
     }
 
 protected:
-    virtual void calcNode(MWNode<D> &node) = 0;
+  virtual void calcNode(MWNode<D, T> &node) = 0;
     virtual void postProcess() {}
 };
 
diff --git a/src/treebuilders/WaveletAdaptor.h b/src/treebuilders/WaveletAdaptor.h
index 15da130e1..759f6b7ee 100644
--- a/src/treebuilders/WaveletAdaptor.h
+++ b/src/treebuilders/WaveletAdaptor.h
@@ -31,10 +31,10 @@
 
 namespace mrcpp {
 
-template <int D> class WaveletAdaptor : public TreeAdaptor<D> {
+  template <int D, typename T> class WaveletAdaptor : public TreeAdaptor<D, T> {
 public:
     WaveletAdaptor(double pr, int ms, bool ap = false, double sf = 1.0)
-            : TreeAdaptor<D>(ms)
+      : TreeAdaptor<D, T>(ms)
             , absPrec(ap)
             , prec(pr)
             , splitFac(sf) {}
@@ -50,7 +50,7 @@ template <int D> class WaveletAdaptor : public TreeAdaptor<D> {
     double splitFac;
     std::function<double(const NodeIndex<D> &idx)> precFunc = [](const NodeIndex<D> &idx) { return 1.0; };
 
-    bool splitNode(const MWNode<D> &node) const override {
+    bool splitNode(const MWNode<D, T> &node) const override {
         auto precFac = this->precFunc(node.getNodeIndex()); // returns 1.0 by default
         return tree_utils::split_check(node, this->prec * precFac, this->splitFac, this->absPrec);
     }
diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp
index 86b7f30a7..4278b46ca 100644
--- a/src/treebuilders/add.cpp
+++ b/src/treebuilders/add.cpp
@@ -61,16 +61,16 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D>
+template <int D, typename T>
 void add(double prec,
-         FunctionTree<D> &out,
+         FunctionTree<D, T> &out,
          double a,
-         FunctionTree<D> &inp_a,
+         FunctionTree<D, T> &inp_a,
          double b,
-         FunctionTree<D> &inp_b,
+         FunctionTree<D, T> &inp_b,
          int maxIter,
          bool absPrec) {
-    FunctionTreeVector<D> tmp_vec;
+    FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back(std::make_tuple(a, &inp_a));
     tmp_vec.push_back(std::make_tuple(b, &inp_b));
     add(prec, out, tmp_vec, maxIter, absPrec);
@@ -98,14 +98,14 @@ void add(double prec,
  * no coefs).
  *
  */
-template <int D> void add(double prec, FunctionTree<D> &out, FunctionTreeVector<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    AdditionCalculator<D> calculator(inp);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    AdditionCalculator<D, T> calculator(inp);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -116,7 +116,7 @@ template <int D> void add(double prec, FunctionTree<D> &out, FunctionTreeVector<
 
     Timer clean_t;
     for (int i = 0; i < inp.size(); i++) {
-        FunctionTree<D> &tree = get_func(inp, i);
+        FunctionTree<D, T> &tree = get_func(inp, i);
         tree.deleteGenerated();
     }
     clean_t.stop();
@@ -126,66 +126,126 @@ template <int D> void add(double prec, FunctionTree<D> &out, FunctionTreeVector<
     print::separator(10, ' ');
 }
 
-template <int D> void add(double prec, FunctionTree<D> &out, std::vector<FunctionTree<D> *> &inp, int maxIter, bool absPrec) {
-    FunctionTreeVector<D> inp_vec;
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter, bool absPrec) {
+    FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     add(prec, out, inp_vec, maxIter, absPrec);
 }
 
-template void add<1>(double prec,
-                     FunctionTree<1> &out,
+template void add<1, double>(double prec,
+                     FunctionTree<1, double> &out,
                      double a,
-                     FunctionTree<1> &tree_a,
+                     FunctionTree<1, double> &tree_a,
                      double b,
-                     FunctionTree<1> &tree_b,
+                     FunctionTree<1, double> &tree_b,
                      int maxIter,
                      bool absPrec);
-template void add<2>(double prec,
-                     FunctionTree<2> &out,
+template void add<2, double>(double prec,
+                     FunctionTree<2, double> &out,
                      double a,
-                     FunctionTree<2> &tree_a,
+                     FunctionTree<2, double> &tree_a,
                      double b,
-                     FunctionTree<2> &tree_b,
+                     FunctionTree<2, double> &tree_b,
                      int maxIter,
                      bool absPrec);
-template void add<3>(double prec,
-                     FunctionTree<3> &out,
+template void add<3, double>(double prec,
+                     FunctionTree<3, double> &out,
                      double a,
-                     FunctionTree<3> &tree_a,
+                     FunctionTree<3, double> &tree_a,
                      double b,
-                     FunctionTree<3> &tree_b,
+                     FunctionTree<3, double> &tree_b,
                      int maxIter,
                      bool absPrec);
 
-template void add<1>(double prec,
-                     FunctionTree<1> &out,
-                     FunctionTreeVector<1> &inp,
+template void add<1, double>(double prec,
+                     FunctionTree<1, double> &out,
+                     FunctionTreeVector<1, double> &inp,
                      int maxIter,
                      bool absPrec);
-template void add<2>(double prec,
-                     FunctionTree<2> &out,
-                     FunctionTreeVector<2> &inp,
+template void add<2, double>(double prec,
+                     FunctionTree<2, double> &out,
+                     FunctionTreeVector<2, double> &inp,
                      int maxIter,
                      bool absPrec);
-template void add<3>(double prec,
-                     FunctionTree<3> &out,
-                     FunctionTreeVector<3> &inp,
+template void add<3, double>(double prec,
+                     FunctionTree<3, double> &out,
+		     FunctionTreeVector<3, double> &inp,
                      int maxIter,
                      bool absPrec);
 
-template void add<1>(double prec,
-                     FunctionTree<1> &out,
-                     std::vector<FunctionTree<1> *> &inp,
+template void add<1, double>(double prec,
+                     FunctionTree<1, double> &out,
+                     std::vector<FunctionTree<1, double> *> &inp,
                      int maxIter,
                      bool absPrec);
-template void add<2>(double prec,
-                     FunctionTree<2> &out,
-                     std::vector<FunctionTree<2> *> &inp,
+template void add<2, double>(double prec,
+                     FunctionTree<2, double> &out,
+                     std::vector<FunctionTree<2, double> *> &inp,
                      int maxIter,
                      bool absPrec);
-template void add<3>(double prec,
-                     FunctionTree<3> &out,
-                     std::vector<FunctionTree<3> *> &inp,
+template void add<3, double>(double prec,
+                     FunctionTree<3, double> &out,
+                     std::vector<FunctionTree<3, double> *> &inp,
+                     int maxIter,
+                     bool absPrec);
+
+
+
+  
+template void add<1, ComplexDouble>(double prec,
+                     FunctionTree<1, ComplexDouble> &out,
+                     double a,
+                     FunctionTree<1, ComplexDouble> &tree_a,
+                     double b,
+                     FunctionTree<1, ComplexDouble> &tree_b,
+                     int maxIter,
+                     bool absPrec);
+template void add<2, ComplexDouble>(double prec,
+                     FunctionTree<2, ComplexDouble> &out,
+                     double a,
+                     FunctionTree<2, ComplexDouble> &tree_a,
+                     double b,
+                     FunctionTree<2, ComplexDouble> &tree_b,
+                     int maxIter,
+                     bool absPrec);
+template void add<3, ComplexDouble>(double prec,
+                     FunctionTree<3, ComplexDouble> &out,
+                     double a,
+                     FunctionTree<3, ComplexDouble> &tree_a,
+                     double b,
+                     FunctionTree<3, ComplexDouble> &tree_b,
+                     int maxIter,
+                     bool absPrec);
+
+template void add<1, ComplexDouble>(double prec,
+                     FunctionTree<1, ComplexDouble> &out,
+                     FunctionTreeVector<1, ComplexDouble> &inp,
+                     int maxIter,
+                     bool absPrec);
+template void add<2, ComplexDouble>(double prec,
+                     FunctionTree<2, ComplexDouble> &out,
+                     FunctionTreeVector<2, ComplexDouble> &inp,
+                     int maxIter,
+                     bool absPrec);
+template void add<3, ComplexDouble>(double prec,
+                     FunctionTree<3, ComplexDouble> &out,
+                     FunctionTreeVector<3, ComplexDouble> &inp,
+                     int maxIter,
+                     bool absPrec);
+
+template void add<1, ComplexDouble>(double prec,
+                     FunctionTree<1, ComplexDouble> &out,
+                     std::vector<FunctionTree<1, ComplexDouble> *> &inp,
+                     int maxIter,
+                     bool absPrec);
+template void add<2, ComplexDouble>(double prec,
+                     FunctionTree<2, ComplexDouble> &out,
+                     std::vector<FunctionTree<2, ComplexDouble> *> &inp,
+                     int maxIter,
+                     bool absPrec);
+template void add<3, ComplexDouble>(double prec,
+                     FunctionTree<3, ComplexDouble> &out,
+                     std::vector<FunctionTree<3, ComplexDouble> *> &inp,
                      int maxIter,
                      bool absPrec);
 
diff --git a/src/treebuilders/add.h b/src/treebuilders/add.h
index 105245829..68a3b3560 100644
--- a/src/treebuilders/add.h
+++ b/src/treebuilders/add.h
@@ -28,22 +28,22 @@
 
 namespace mrcpp {
 
-template <int D> void add(double prec,
-                          FunctionTree<D> &out,
+template <int D, typename T> void add(double prec,
+                          FunctionTree<D, T> &out,
                           double a,
-                          FunctionTree<D> &tree_a,
+                          FunctionTree<D, T> &tree_a,
                           double b,
-                          FunctionTree<D> &tree_b,
+                          FunctionTree<D, T> &tree_b,
                           int maxIter = -1,
                           bool absPrec = false);
-template <int D> void add(double prec,
-                          FunctionTree<D> &out,
-                          FunctionTreeVector<D> &inp,
+template <int D, typename T> void add(double prec,
+                          FunctionTree<D, T> &out,
+                          FunctionTreeVector<D, T> &inp,
                           int maxIter = -1,
                           bool absPrec = false);
-template <int D> void add(double prec,
-                          FunctionTree<D> &out,
-                          std::vector<FunctionTree<D> *> &inp,
+template <int D, typename T> void add(double prec,
+                          FunctionTree<D, T> &out,
+                          std::vector<FunctionTree<D, T> *> &inp,
                           int maxIter = -1,
                           bool absPrec = false);
 
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 3dc49de3c..04075d021 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -41,7 +41,7 @@
 
 namespace mrcpp {
 
-template <int D> void apply_on_unit_cell(bool inside, double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter, bool absPrec);
+template <int D, typename T> void apply_on_unit_cell(bool inside, double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter, bool absPrec);
 
 /** @brief Application of MW integral convolution operator
  *
@@ -64,16 +64,16 @@ template <int D> void apply_on_unit_cell(bool inside, double prec, FunctionTree<
  * no coefs).
  *
  */
-template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
 
     Timer pre_t;
     oper.calcBandWidths(prec);
     int maxScale = out.getMRA().getMaxScale();
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    ConvolutionCalculator<D> calculator(prec, oper, inp);
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    ConvolutionCalculator<D, T> calculator(prec, oper, inp);
     pre_t.stop();
-    TreeBuilder<D> builder;
+    TreeBuilder<D, T> builder;
     builder.build(out, calculator, adaptor, maxIter);
 
     Timer post_t;
@@ -113,18 +113,18 @@ template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperat
  * no coefs).
  *
  */
-template <int D> void apply_on_unit_cell(bool inside, double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void apply_on_unit_cell(bool inside, double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
 
     Timer pre_t;
     oper.calcBandWidths(prec);
     int maxScale = out.getMRA().getMaxScale();
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    ConvolutionCalculator<D> calculator(prec, oper, inp);
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    ConvolutionCalculator<D, T> calculator(prec, oper, inp);
     calculator.startManipulateOperator(inside);
     pre_t.stop();
 
-    TreeBuilder<D> builder;
+    TreeBuilder<D, T> builder;
     builder.build(out, calculator, adaptor, maxIter);
 
     Timer post_t;
@@ -166,7 +166,7 @@ template <int D> void apply_on_unit_cell(bool inside, double prec, FunctionTree<
  * no coefs).
  *
  */
-template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, FunctionTreeVector<D> &precTrees, int maxIter, bool absPrec) {
+template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter, bool absPrec) {
     Timer pre_t;
     oper.calcBandWidths(prec);
     int maxScale = out.getMRA().getMaxScale();
@@ -183,13 +183,13 @@ template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperat
         return 1.0 / maxNorm;
     };
 
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
     adaptor.setPrecFunction(precFunc);
-    ConvolutionCalculator<D> calculator(prec, oper, inp);
+    ConvolutionCalculator<D, T> calculator(prec, oper, inp);
     calculator.setPrecFunction(precFunc);
     pre_t.stop();
 
-    TreeBuilder<D> builder;
+    TreeBuilder<D, T> builder;
     builder.build(out, calculator, adaptor, maxIter);
 
     Timer post_t;
@@ -227,7 +227,7 @@ template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperat
  * no coefs).
  *
  */
-template <int D> void apply_far_field(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
     apply_on_unit_cell<D>(false, prec, out, oper, inp, maxIter, absPrec);
 }
 
@@ -253,7 +253,7 @@ template <int D> void apply_far_field(double prec, FunctionTree<D> &out, Convolu
  * no coefs).
  *
  */
-template <int D> void apply_near_field(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void apply_near_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
     apply_on_unit_cell<D>(true, prec, out, oper, inp, maxIter, absPrec);
 }
 
@@ -273,9 +273,9 @@ template <int D> void apply_near_field(double prec, FunctionTree<D> &out, Convol
  * @note The output function should contain only empty root nodes at entry.
  *
  */
-template <int D> void apply(FunctionTree<D> &out, DerivativeOperator<D> &oper, FunctionTree<D> &inp, int dir) {
+template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTree<D, T> &inp, int dir) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
-    TreeBuilder<D> builder;
+    TreeBuilder<D, T> builder;
     int maxScale = out.getMRA().getMaxScale();
 
     int bw[D]; // Operator bandwidth in [x,y,z]
@@ -285,14 +285,14 @@ template <int D> void apply(FunctionTree<D> &out, DerivativeOperator<D> &oper, F
     Timer pre_t;
     oper.calcBandWidths(1.0); // Fixed 0 or 1 for derivatives
     bw[dir] = oper.getMaxBandWidth();
-    CopyAdaptor<D> pre_adaptor(inp, maxScale, bw);
-    DefaultCalculator<D> pre_calculator;
+    CopyAdaptor<D, T> pre_adaptor(inp, maxScale, bw);
+    DefaultCalculator<D, T> pre_calculator;
     builder.build(out, pre_calculator, pre_adaptor, -1);
     pre_t.stop();
 
     // Apply operator on fixed expanded grid
-    SplitAdaptor<D> apply_adaptor(maxScale, false); // Splits no nodes
-    DerivativeCalculator<D> apply_calculator(dir, oper, inp);
+    SplitAdaptor<D, T> apply_adaptor(maxScale, false); // Splits no nodes
+    DerivativeCalculator<D, T> apply_calculator(dir, oper, inp);
     builder.build(out, apply_calculator, apply_adaptor, 0);
     if (out.isPeriodic()) out.rescale(std::pow(2.0, -oper.getOperatorRoot()));
 
@@ -320,10 +320,10 @@ template <int D> void apply(FunctionTree<D> &out, DerivativeOperator<D> &oper, F
  * @note The length of the output vector will be the template dimension D.
  *
  */
-template <int D> FunctionTreeVector<D> gradient(DerivativeOperator<D> &oper, FunctionTree<D> &inp) {
-    FunctionTreeVector<D> out;
+template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp) {
+    FunctionTreeVector<D, T> out;
     for (int d = 0; d < D; d++) {
-        auto *grad_d = new FunctionTree<D>(inp.getMRA());
+        auto *grad_d = new FunctionTree<D, T>(inp.getMRA());
         apply(*grad_d, oper, inp, d);
         out.push_back({1.0, grad_d});
     }
@@ -346,16 +346,16 @@ template <int D> FunctionTreeVector<D> gradient(DerivativeOperator<D> &oper, Fun
  * - The output function should contain only empty root nodes at entry.
  *
  */
-template <int D> void divergence(FunctionTree<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D> &inp) {
+template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> &inp) {
     if (inp.size() != D) MSG_ABORT("Dimension mismatch");
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
-    FunctionTreeVector<D> tmp_vec;
+    FunctionTreeVector<D, T> tmp_vec;
     for (int d = 0; d < D; d++) {
         double coef_d = get_coef(inp, d);
-        FunctionTree<D> &func_d = get_func(inp, d);
-        auto *out_d = new FunctionTree<D>(func_d.getMRA());
+        FunctionTree<D, T> &func_d = get_func(inp, d);
+        auto *out_d = new FunctionTree<D, T>(func_d.getMRA());
         apply(*out_d, oper, func_d, d);
         tmp_vec.push_back(std::make_tuple(coef_d, out_d));
     }
@@ -364,35 +364,62 @@ template <int D> void divergence(FunctionTree<D> &out, DerivativeOperator<D> &op
     clear(tmp_vec, true);
 }
 
-template <int D> void divergence(FunctionTree<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D> *> &inp) {
-    FunctionTreeVector<D> inp_vec;
+template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp) {
+    FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     divergence(out, oper, inp_vec);
 }
 
-template void apply<1>(double prec, FunctionTree<1> &out, ConvolutionOperator<1> &oper, FunctionTree<1> &inp, int maxIter, bool absPrec);
-template void apply<2>(double prec, FunctionTree<2> &out, ConvolutionOperator<2> &oper, FunctionTree<2> &inp, int maxIter, bool absPrec);
-template void apply<3>(double prec, FunctionTree<3> &out, ConvolutionOperator<3> &oper, FunctionTree<3> &inp, int maxIter, bool absPrec);
-template void apply<1>(double prec, FunctionTree<1> &out, ConvolutionOperator<1> &oper, FunctionTree<1> &inp, FunctionTreeVector<1> &precTrees, int maxIter, bool absPrec);
-template void apply<2>(double prec, FunctionTree<2> &out, ConvolutionOperator<2> &oper, FunctionTree<2> &inp, FunctionTreeVector<2> &precTrees, int maxIter, bool absPrec);
-template void apply<3>(double prec, FunctionTree<3> &out, ConvolutionOperator<3> &oper, FunctionTree<3> &inp, FunctionTreeVector<3> &precTrees, int maxIter, bool absPrec);
-template void apply_far_field<1>(double prec, FunctionTree<1> &out, ConvolutionOperator<1> &oper, FunctionTree<1> &inp, int maxIter, bool absPrec);
-template void apply_far_field<2>(double prec, FunctionTree<2> &out, ConvolutionOperator<2> &oper, FunctionTree<2> &inp, int maxIter, bool absPrec);
-template void apply_far_field<3>(double prec, FunctionTree<3> &out, ConvolutionOperator<3> &oper, FunctionTree<3> &inp, int maxIter, bool absPrec);
-template void apply_near_field<1>(double prec, FunctionTree<1> &out, ConvolutionOperator<1> &oper, FunctionTree<1> &inp, int maxIter, bool absPrec);
-template void apply_near_field<2>(double prec, FunctionTree<2> &out, ConvolutionOperator<2> &oper, FunctionTree<2> &inp, int maxIter, bool absPrec);
-template void apply_near_field<3>(double prec, FunctionTree<3> &out, ConvolutionOperator<3> &oper, FunctionTree<3> &inp, int maxIter, bool absPrec);
-template void apply<1>(FunctionTree<1> &out, DerivativeOperator<1> &oper, FunctionTree<1> &inp, int dir);
-template void apply<2>(FunctionTree<2> &out, DerivativeOperator<2> &oper, FunctionTree<2> &inp, int dir);
-template void apply<3>(FunctionTree<3> &out, DerivativeOperator<3> &oper, FunctionTree<3> &inp, int dir);
-template void divergence<1>(FunctionTree<1> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1> &inp);
-template void divergence<2>(FunctionTree<2> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2> &inp);
-template void divergence<3>(FunctionTree<3> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3> &inp);
-template void divergence<1>(FunctionTree<1> &out, DerivativeOperator<1> &oper, std::vector<FunctionTree<1> *> &inp);
-template void divergence<2>(FunctionTree<2> &out, DerivativeOperator<2> &oper, std::vector<FunctionTree<2> *> &inp);
-template void divergence<3>(FunctionTree<3> &out, DerivativeOperator<3> &oper, std::vector<FunctionTree<3> *> &inp);
-template FunctionTreeVector<1> gradient<1>(DerivativeOperator<1> &oper, FunctionTree<1> &inp);
-template FunctionTreeVector<2> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2> &inp);
-template FunctionTreeVector<3> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3> &inp);
+template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
+template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
+template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
+template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
+template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
+template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
+template void apply_far_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
+template void apply_far_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
+template void apply_far_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
+template void apply_near_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
+template void apply_near_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
+template void apply_near_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
+template void apply<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTree<1, double> &inp, int dir);
+template void apply<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTree<2, double> &inp, int dir);
+template void apply<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTree<3, double> &inp, int dir);
+template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, double> &inp);
+template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, double> &inp);
+template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, double> &inp);
+template void divergence<1, double>(FunctionTree<1, double> &out, DerivativeOperator<1> &oper, std::vector<FunctionTree<1, double> *> &inp);
+template void divergence<2, double>(FunctionTree<2, double> &out, DerivativeOperator<2> &oper, std::vector<FunctionTree<2, double> *> &inp);
+template void divergence<3, double>(FunctionTree<3, double> &out, DerivativeOperator<3> &oper, std::vector<FunctionTree<3, double> *> &inp);
+template FunctionTreeVector<1, double> gradient<1>(DerivativeOperator<1> &oper, FunctionTree<1, double> &inp);
+template FunctionTreeVector<2, double> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, double> &inp);
+template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, double> &inp);
+
+
+
+template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, FunctionTreeVector<1, ComplexDouble> &precTrees, int maxIter, bool absPrec);
+template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, FunctionTreeVector<2, ComplexDouble> &precTrees, int maxIter, bool absPrec);
+template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec);
+template void apply_far_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply_far_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply_far_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply_near_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply_near_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply_near_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int dir);
+template void apply<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int dir);
+template void apply<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int dir);
+template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, FunctionTreeVector<1, ComplexDouble> &inp);
+template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, FunctionTreeVector<2, ComplexDouble> &inp);
+template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, FunctionTreeVector<3, ComplexDouble> &inp);
+template void divergence<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, DerivativeOperator<1> &oper, std::vector<FunctionTree<1, ComplexDouble> *> &inp);
+template void divergence<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, DerivativeOperator<2> &oper, std::vector<FunctionTree<2, ComplexDouble> *> &inp);
+template void divergence<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, DerivativeOperator<3> &oper, std::vector<FunctionTree<3, ComplexDouble> *> &inp);
+template FunctionTreeVector<1, ComplexDouble> gradient<1>(DerivativeOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp);
+template FunctionTreeVector<2, ComplexDouble> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp);
+template FunctionTreeVector<3, ComplexDouble> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index ae38e96ad..f6217e381 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -30,18 +30,18 @@
 namespace mrcpp {
 
 // clang-format off
-template <int D> class FunctionTree;
+template <int D, typename T> class FunctionTree;
 template <int D> class DerivativeOperator;
 template <int D> class ConvolutionOperator;
 
-template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, FunctionTreeVector<D> &precTrees, int maxIter = -1, bool absPrec = false);
-template <int D> void apply_far_field(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply_near_field(double prec, FunctionTree<D> &out, ConvolutionOperator<D> &oper, FunctionTree<D> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply(FunctionTree<D> &out, DerivativeOperator<D> &oper, FunctionTree<D> &inp, int dir = -1);
-template <int D> void divergence(FunctionTree<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D> &inp);
-template <int D> void divergence(FunctionTree<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D> *> &inp);
-template <int D> FunctionTreeVector<D> gradient(DerivativeOperator<D> &oper, FunctionTree<D> &inp);
+template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply_near_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTree<D, T> &inp, int dir = -1);
+template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> &inp);
+template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp);
+template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp);
 // clang-format on
 
 } // namespace mrcpp
diff --git a/src/treebuilders/complex_apply.cpp b/src/treebuilders/complex_apply.cpp
index ab410244e..8b22d19f6 100644
--- a/src/treebuilders/complex_apply.cpp
+++ b/src/treebuilders/complex_apply.cpp
@@ -101,9 +101,6 @@ void apply
 }
 
 
-
-
-
 template
 void apply <1>
 (
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index fb9e65a91..f37730701 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -48,11 +48,11 @@ namespace mrcpp {
  * @note This algorithm will start at whatever grid is present in the `out`
  * tree when the function is called.
  */
-template <int D> void build_grid(FunctionTree<D> &out, int scales) {
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, int scales) {
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    DefaultCalculator<D> calculator;
-    SplitAdaptor<D> adaptor(maxScale, true); // Splits all nodes
+    TreeBuilder<D, T> builder;
+    DefaultCalculator<D, T> calculator;
+    SplitAdaptor<D, T> adaptor(maxScale, true); // Splits all nodes
     for (auto n = 0; n < scales; n++) builder.build(out, calculator, adaptor, 1);
 }
 
@@ -75,11 +75,11 @@ template <int D> void build_grid(FunctionTree<D> &out, int scales) {
  * particular `RepresentableFunction`.
  *
  */
-template <int D> void build_grid(FunctionTree<D> &out, const RepresentableFunction<D> &inp, int maxIter) {
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const RepresentableFunction<D, T> &inp, int maxIter) {
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    AnalyticAdaptor<D> adaptor(inp, maxScale);
-    DefaultCalculator<D> calculator;
+    TreeBuilder<D, T> builder;
+    AnalyticAdaptor<D, T> adaptor(inp, maxScale);
+    DefaultCalculator<D, T> calculator;
     builder.build(out, calculator, adaptor, maxIter);
     print::separator(10, ' ');
 }
@@ -103,13 +103,13 @@ template <int D> void build_grid(FunctionTree<D> &out, const RepresentableFuncti
  * term. Higher exponent means finer resolution.
  *
  */
-template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, int maxIter) {
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const GaussExp<D, T> &inp, int maxIter) {
     if (!out.getMRA().getWorldBox().isPeriodic()) {
         auto maxScale = out.getMRA().getMaxScale();
-        TreeBuilder<D> builder;
-        DefaultCalculator<D> calculator;
+        TreeBuilder<D, T> builder;
+        DefaultCalculator<D, T> calculator;
         for (auto i = 0; i < inp.size(); i++) {
-            AnalyticAdaptor<D> adaptor(inp.getFunc(i), maxScale);
+	  AnalyticAdaptor<D, T> adaptor(inp.getFunc(i), maxScale);
             builder.build(out, calculator, adaptor, maxIter);
         }
     } else {
@@ -142,12 +142,12 @@ template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, i
  * but NOT vice versa.
  *
  */
-template <int D> void build_grid(FunctionTree<D> &out, FunctionTree<D> &inp, int maxIter) {
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    CopyAdaptor<D> adaptor(inp, maxScale, nullptr);
-    DefaultCalculator<D> calculator;
+    TreeBuilder<D, T> builder;
+    CopyAdaptor<D, T> adaptor(inp, maxScale, nullptr);
+    DefaultCalculator<D, T> calculator;
     builder.build(out, calculator, adaptor, maxIter);
     print::separator(10, ' ');
 }
@@ -171,20 +171,20 @@ template <int D> void build_grid(FunctionTree<D> &out, FunctionTree<D> &inp, int
  * `maxIter` is reached).
  *
  */
-template <int D> void build_grid(FunctionTree<D> &out, FunctionTreeVector<D> &inp, int maxIter) {
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    CopyAdaptor<D> adaptor(inp, maxScale, nullptr);
-    DefaultCalculator<D> calculator;
+    TreeBuilder<D, T> builder;
+    CopyAdaptor<D, T> adaptor(inp, maxScale, nullptr);
+    DefaultCalculator<D, T> calculator;
     builder.build(out, calculator, adaptor, maxIter);
     print::separator(10, ' ');
 }
 
-template <int D> void build_grid(FunctionTree<D> &out, std::vector<FunctionTree<D> *> &inp, int maxIter) {
-    FunctionTreeVector<D> inp_vec;
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter) {
+    FunctionTreeVector<D, T> inp_vec;
     for (auto *t : inp) inp_vec.push_back({1.0, t});
     build_grid(out, inp_vec, maxIter);
 }
@@ -202,8 +202,8 @@ template <int D> void build_grid(FunctionTree<D> &out, std::vector<FunctionTree<
  * tree when the function is called and will overwrite any existing coefs.
  *
  */
-template <int D> void copy_func(FunctionTree<D> &out, FunctionTree<D> &inp) {
-    FunctionTreeVector<D> tmp_vec;
+template <int D, typename T> void copy_func(FunctionTree<D, T> &out, FunctionTree<D, T> &inp) {
+    FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back(std::make_tuple(1.0, &inp));
     add(-1.0, out, tmp_vec);
 }
@@ -218,7 +218,7 @@ template <int D> void copy_func(FunctionTree<D> &out, FunctionTree<D> &inp) {
  * will _extend_ the existing grid.
  *
  */
-template <int D> void copy_grid(FunctionTree<D> &out, FunctionTree<D> &inp) {
+template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA")
     out.clear();
     build_grid(out, inp);
@@ -233,9 +233,9 @@ template <int D> void copy_grid(FunctionTree<D> &out, FunctionTree<D> &inp) {
  * grid refinement as well.
  *
  */
-template <int D> void clear_grid(FunctionTree<D> &out) {
-    TreeBuilder<D> builder;
-    DefaultCalculator<D> calculator;
+template <int D, typename T> void clear_grid(FunctionTree<D, T> &out) {
+    TreeBuilder<D, T> builder;
+    DefaultCalculator<D, T> calculator;
     builder.clear(out, calculator);
 }
 
@@ -250,11 +250,11 @@ template <int D> void clear_grid(FunctionTree<D> &out) {
  * the function representation unchanged, but on a larger grid.
  *
  */
-template <int D> int refine_grid(FunctionTree<D> &out, int scales) {
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, int scales) {
     auto nSplit = 0;
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    SplitAdaptor<D> adaptor(maxScale, true); // Splits all nodes
+    TreeBuilder<D, T> builder;
+    SplitAdaptor<D, T> adaptor(maxScale, true); // Splits all nodes
     for (auto n = 0; n < scales; n++) {
         nSplit += builder.split(out, adaptor, true); // Transfers coefs to children
     }
@@ -274,10 +274,10 @@ template <int D> int refine_grid(FunctionTree<D> &out, int scales) {
  * unchanged, but (possibly) on a larger grid.
  *
  */
-template <int D> int refine_grid(FunctionTree<D> &out, double prec, bool absPrec) {
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, double prec, bool absPrec) {
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
     int nSplit = builder.split(out, adaptor, true);
     return nSplit;
 }
@@ -294,11 +294,11 @@ template <int D> int refine_grid(FunctionTree<D> &out, double prec, bool absPrec
  * leaving the function representation unchanged, but on a larger grid.
  *
  */
-template <int D> int refine_grid(FunctionTree<D> &out, FunctionTree<D> &inp) {
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA")
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    CopyAdaptor<D> adaptor(inp, maxScale, nullptr);
+    TreeBuilder<D, T> builder;
+    CopyAdaptor<D, T> adaptor(inp, maxScale, nullptr);
     auto nSplit = builder.split(out, adaptor, true);
     return nSplit;
 }
@@ -316,52 +316,93 @@ template <int D> int refine_grid(FunctionTree<D> &out, FunctionTree<D> &inp) {
  * is implemented in the particular `RepresentableFunction`.
  *
  */
-template <int D> int refine_grid(FunctionTree<D> &out, const RepresentableFunction<D> &inp) {
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, const RepresentableFunction<D, T> &inp) {
     auto maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    AnalyticAdaptor<D> adaptor(inp, maxScale);
+    TreeBuilder<D, T> builder;
+    AnalyticAdaptor<D, T> adaptor(inp, maxScale);
     int nSplit = builder.split(out, adaptor, true);
     return nSplit;
 }
 
-template void build_grid<1>(FunctionTree<1> &out, int scales);
-template void build_grid<2>(FunctionTree<2> &out, int scales);
-template void build_grid<3>(FunctionTree<3> &out, int scales);
-template void build_grid<1>(FunctionTree<1> &out, const GaussExp<1> &inp, int maxIter);
-template void build_grid<2>(FunctionTree<2> &out, const GaussExp<2> &inp, int maxIter);
-template void build_grid<3>(FunctionTree<3> &out, const GaussExp<3> &inp, int maxIter);
-template void build_grid<1>(FunctionTree<1> &out, const RepresentableFunction<1> &inp, int maxIter);
-template void build_grid<2>(FunctionTree<2> &out, const RepresentableFunction<2> &inp, int maxIter);
-template void build_grid<3>(FunctionTree<3> &out, const RepresentableFunction<3> &inp, int maxIter);
-template void build_grid<1>(FunctionTree<1> &out, FunctionTree<1> &inp, int maxIter);
-template void build_grid<2>(FunctionTree<2> &out, FunctionTree<2> &inp, int maxIter);
-template void build_grid<3>(FunctionTree<3> &out, FunctionTree<3> &inp, int maxIter);
-template void build_grid<1>(FunctionTree<1> &out, FunctionTreeVector<1> &inp, int maxIter);
-template void build_grid<2>(FunctionTree<2> &out, FunctionTreeVector<2> &inp, int maxIter);
-template void build_grid<3>(FunctionTree<3> &out, FunctionTreeVector<3> &inp, int maxIter);
-template void build_grid<1>(FunctionTree<1> &out, std::vector<FunctionTree<1> *> &inp, int maxIter);
-template void build_grid<2>(FunctionTree<2> &out, std::vector<FunctionTree<2> *> &inp, int maxIter);
-template void build_grid<3>(FunctionTree<3> &out, std::vector<FunctionTree<3> *> &inp, int maxIter);
-template void copy_func<1>(FunctionTree<1> &out, FunctionTree<1> &inp);
-template void copy_func<2>(FunctionTree<2> &out, FunctionTree<2> &inp);
-template void copy_func<3>(FunctionTree<3> &out, FunctionTree<3> &inp);
-template void copy_grid<1>(FunctionTree<1> &out, FunctionTree<1> &inp);
-template void copy_grid<2>(FunctionTree<2> &out, FunctionTree<2> &inp);
-template void copy_grid<3>(FunctionTree<3> &out, FunctionTree<3> &inp);
-template void clear_grid<1>(FunctionTree<1> &out);
-template void clear_grid<2>(FunctionTree<2> &out);
-template void clear_grid<3>(FunctionTree<3> &out);
-template int refine_grid<1>(FunctionTree<1> &out, int scales);
-template int refine_grid<2>(FunctionTree<2> &out, int scales);
-template int refine_grid<3>(FunctionTree<3> &out, int scales);
-template int refine_grid<1>(FunctionTree<1> &out, double prec, bool absPrec);
-template int refine_grid<2>(FunctionTree<2> &out, double prec, bool absPrec);
-template int refine_grid<3>(FunctionTree<3> &out, double prec, bool absPrec);
-template int refine_grid<1>(FunctionTree<1> &out, FunctionTree<1> &inp);
-template int refine_grid<2>(FunctionTree<2> &out, FunctionTree<2> &inp);
-template int refine_grid<3>(FunctionTree<3> &out, FunctionTree<3> &inp);
-template int refine_grid<1>(FunctionTree<1> &out, const RepresentableFunction<1> &inp);
-template int refine_grid<2>(FunctionTree<2> &out, const RepresentableFunction<2> &inp);
-template int refine_grid<3>(FunctionTree<3> &out, const RepresentableFunction<3> &inp);
+template void build_grid<1, double>(FunctionTree<1, double> &out, int scales);
+template void build_grid<2, double>(FunctionTree<2, double> &out, int scales);
+template void build_grid<3, double>(FunctionTree<3, double> &out, int scales);
+template void build_grid<1, double>(FunctionTree<1, double> &out, const GaussExp<1, double> &inp, int maxIter);
+template void build_grid<2, double>(FunctionTree<2, double> &out, const GaussExp<2, double> &inp, int maxIter);
+template void build_grid<3, double>(FunctionTree<3, double> &out, const GaussExp<3, double> &inp, int maxIter);
+template void build_grid<1, double>(FunctionTree<1, double> &out, const RepresentableFunction<1, double> &inp, int maxIter);
+template void build_grid<2, double>(FunctionTree<2, double> &out, const RepresentableFunction<2, double> &inp, int maxIter);
+template void build_grid<3, double>(FunctionTree<3, double> &out, const RepresentableFunction<3, double> &inp, int maxIter);
+template void build_grid<1, double>(FunctionTree<1, double> &out, FunctionTree<1, double> &inp, int maxIter);
+template void build_grid<2, double>(FunctionTree<2, double> &out, FunctionTree<2, double> &inp, int maxIter);
+template void build_grid<3, double>(FunctionTree<3, double> &out, FunctionTree<3, double> &inp, int maxIter);
+template void build_grid<1, double>(FunctionTree<1, double> &out, FunctionTreeVector<1, double> &inp, int maxIter);
+template void build_grid<2, double>(FunctionTree<2, double> &out, FunctionTreeVector<2, double> &inp, int maxIter);
+template void build_grid<3, double>(FunctionTree<3, double> &out, FunctionTreeVector<3, double> &inp, int maxIter);
+template void build_grid<1, double>(FunctionTree<1, double> &out, std::vector<FunctionTree<1, double> *> &inp, int maxIter);
+template void build_grid<2, double>(FunctionTree<2, double> &out, std::vector<FunctionTree<2, double> *> &inp, int maxIter);
+template void build_grid<3, double>(FunctionTree<3, double> &out, std::vector<FunctionTree<3, double> *> &inp, int maxIter);
+template void copy_func<1, double>(FunctionTree<1, double> &out, FunctionTree<1, double> &inp);
+template void copy_func<2, double>(FunctionTree<2, double> &out, FunctionTree<2, double> &inp);
+template void copy_func<3, double>(FunctionTree<3, double> &out, FunctionTree<3, double> &inp);
+template void copy_grid<1, double>(FunctionTree<1, double> &out, FunctionTree<1, double> &inp);
+template void copy_grid<2, double>(FunctionTree<2, double> &out, FunctionTree<2, double> &inp);
+template void copy_grid<3, double>(FunctionTree<3, double> &out, FunctionTree<3, double> &inp);
+template void clear_grid<1, double>(FunctionTree<1, double> &out);
+template void clear_grid<2, double>(FunctionTree<2, double> &out);
+template void clear_grid<3, double>(FunctionTree<3, double> &out);
+template int refine_grid<1, double>(FunctionTree<1, double> &out, int scales);
+template int refine_grid<2, double>(FunctionTree<2, double> &out, int scales);
+template int refine_grid<3, double>(FunctionTree<3, double> &out, int scales);
+template int refine_grid<1, double>(FunctionTree<1, double> &out, double prec, bool absPrec);
+template int refine_grid<2, double>(FunctionTree<2, double> &out, double prec, bool absPrec);
+template int refine_grid<3, double>(FunctionTree<3, double> &out, double prec, bool absPrec);
+template int refine_grid<1, double>(FunctionTree<1, double> &out, FunctionTree<1, double> &inp);
+template int refine_grid<2, double>(FunctionTree<2, double> &out, FunctionTree<2, double> &inp);
+template int refine_grid<3, double>(FunctionTree<3, double> &out, FunctionTree<3, double> &inp);
+template int refine_grid<1, double>(FunctionTree<1, double> &out, const RepresentableFunction<1, double> &inp);
+template int refine_grid<2, double>(FunctionTree<2, double> &out, const RepresentableFunction<2, double> &inp);
+template int refine_grid<3, double>(FunctionTree<3, double> &out, const RepresentableFunction<3, double> &inp);
 
+
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, int scales);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, int scales);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, int scales);
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const GaussExp<1, ComplexDouble> &inp, int maxIter);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const GaussExp<2, ComplexDouble> &inp, int maxIter);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const GaussExp<3, ComplexDouble> &inp, int maxIter);
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const RepresentableFunction<1, ComplexDouble> &inp, int maxIter);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp, int maxIter);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp, int maxIter);
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp, int maxIter);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp, int maxIter);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp, int maxIter);
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, FunctionTreeVector<1, ComplexDouble> &inp, int maxIter);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, FunctionTreeVector<2, ComplexDouble> &inp, int maxIter);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, FunctionTreeVector<3, ComplexDouble> &inp, int maxIter);
+template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, std::vector<FunctionTree<1, ComplexDouble> *> &inp, int maxIter);
+template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, std::vector<FunctionTree<2, ComplexDouble> *> &inp, int maxIter);
+template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, std::vector<FunctionTree<3, ComplexDouble> *> &inp, int maxIter);
+template void copy_func<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp);
+template void copy_func<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp);
+template void copy_func<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp);
+template void copy_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp);
+template void copy_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp);
+template void copy_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp);
+template void clear_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out);
+template void clear_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out);
+template void clear_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out);
+template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, int scales);
+template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, int scales);
+template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, int scales);
+template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, double prec, bool absPrec);
+template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, double prec, bool absPrec);
+template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, double prec, bool absPrec);
+template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp);
+template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp);
+template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp);
+template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const RepresentableFunction<1, ComplexDouble> &inp);
+template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp);
+template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp);
+  
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.h b/src/treebuilders/grid.h
index 1f4c3e4f5..8bb683fd2 100644
--- a/src/treebuilders/grid.h
+++ b/src/treebuilders/grid.h
@@ -30,17 +30,17 @@
 #include "trees/FunctionTreeVector.h"
 
 namespace mrcpp {
-template <int D> void build_grid(FunctionTree<D> &out, int scales);
-template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, int maxIter = -1);
-template <int D> void build_grid(FunctionTree<D> &out, const RepresentableFunction<D> &inp, int maxIter = -1);
-template <int D> void build_grid(FunctionTree<D> &out, FunctionTree<D> &inp, int maxIter = -1);
-template <int D> void build_grid(FunctionTree<D> &out, FunctionTreeVector<D> &inp, int maxIter = -1);
-template <int D> void build_grid(FunctionTree<D> &out, std::vector<FunctionTree<D> *> &inp, int maxIter = -1);
-template <int D> void copy_func(FunctionTree<D> &out, FunctionTree<D> &inp);
-template <int D> void copy_grid(FunctionTree<D> &out, FunctionTree<D> &inp);
-template <int D> void clear_grid(FunctionTree<D> &out);
-template <int D> int refine_grid(FunctionTree<D> &out, int scales);
-template <int D> int refine_grid(FunctionTree<D> &out, double prec, bool absPrec = false);
-template <int D> int refine_grid(FunctionTree<D> &out, FunctionTree<D> &inp);
-template <int D> int refine_grid(FunctionTree<D> &out, const RepresentableFunction<D> &inp);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, int scales);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const GaussExp<D, T> &inp, int maxIter = -1);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const RepresentableFunction<D, T> &inp, int maxIter = -1);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter = -1);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter = -1);
+template <int D, typename T> void build_grid(FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter = -1);
+template <int D, typename T> void copy_func(FunctionTree<D, T> &out, FunctionTree<D, T> &inp);
+template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp);
+template <int D, typename T> void clear_grid(FunctionTree<D, T> &out);
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, int scales);
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, double prec, bool absPrec = false);
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp);
+template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, const RepresentableFunction<D, T> &inp);
 } // namespace mrcpp
diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp
index 98824d002..d4cb0b900 100644
--- a/src/treebuilders/map.cpp
+++ b/src/treebuilders/map.cpp
@@ -65,13 +65,13 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D>
-void map(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, FMap fmap, int maxIter, bool absPrec) {
+template <int D, typename T>
+void map(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, FMap<T, T> fmap, int maxIter, bool absPrec) {
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    MapCalculator<D> calculator(fmap, inp);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    MapCalculator<D, T> calculator(fmap, inp);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -89,8 +89,12 @@ void map(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, FMap fmap, int
     print::separator(10, ' ');
 }
 
-template void map<1>(double prec, FunctionTree<1> &out, FunctionTree<1> &inp, FMap fmap, int maxIter, bool absPrec);
-template void map<2>(double prec, FunctionTree<2> &out, FunctionTree<2> &inp, FMap fmap, int maxIter, bool absPrec);
-template void map<3>(double prec, FunctionTree<3> &out, FunctionTree<3> &inp, FMap fmap, int maxIter, bool absPrec);
+template void map<1, double>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
+template void map<2, double>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
+template void map<3, double>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
+
+template void map<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
+template void map<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
+template void map<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
 
 } // Namespace mrcpp
diff --git a/src/treebuilders/map.h b/src/treebuilders/map.h
index 1c54dac32..db2788c27 100644
--- a/src/treebuilders/map.h
+++ b/src/treebuilders/map.h
@@ -28,10 +28,10 @@
 #include "trees/FunctionTreeVector.h"
 
 namespace mrcpp {
-template <int D> class RepresentableFunction;
-template <int D> class FunctionTree;
+template <int D, typename T> class RepresentableFunction;
+template <int D, typename T> class FunctionTree;
 
-template <int D>
-void map(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, FMap fmap, int maxIter = -1, bool absPrec = false);
+template <int D, typename T>
+void map(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, FMap<T, T> fmap, int maxIter = -1, bool absPrec = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index a21e539ab..3ab96cd64 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -68,16 +68,16 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D>
+template <int D, typename T>
 void multiply(double prec,
-              FunctionTree<D> &out,
+              FunctionTree<D, T> &out,
               double c,
-              FunctionTree<D> &inp_a,
-              FunctionTree<D> &inp_b,
+              FunctionTree<D, T> &inp_a,
+              FunctionTree<D, T> &inp_b,
               int maxIter,
               bool absPrec,
               bool useMaxNorms) {
-    FunctionTreeVector<D> tmp_vec;
+    FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back({c, &inp_a});
     tmp_vec.push_back({1.0, &inp_b});
     multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms);
@@ -106,10 +106,10 @@ void multiply(double prec,
  * no coefs).
  *
  */
-template <int D>
+template <int D, typename T>
 void multiply(double prec,
-              FunctionTree<D> &out,
-              FunctionTreeVector<D> &inp,
+              FunctionTree<D, T> &out,
+              FunctionTreeVector<D, T> &inp,
               int maxIter,
               bool absPrec,
               bool useMaxNorms) {
@@ -117,15 +117,15 @@ void multiply(double prec,
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    MultiplicationCalculator<D> calculator(inp);
+    TreeBuilder<D, T> builder;
+    MultiplicationCalculator<D, T> calculator(inp);
 
     if (useMaxNorms) {
         for (int i = 0; i < inp.size(); i++) get_func(inp, i).makeMaxSquareNorms();
-        MultiplicationAdaptor<D> adaptor(prec, maxScale, inp);
+        MultiplicationAdaptor<D, T> adaptor(prec, maxScale, inp);
         builder.build(out, calculator, adaptor, maxIter);
     } else {
-        WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
+        WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
         builder.build(out, calculator, adaptor, maxIter);
     }
 
@@ -136,7 +136,7 @@ void multiply(double prec,
 
     Timer clean_t;
     for (int i = 0; i < inp.size(); i++) {
-        FunctionTree<D> &tree = get_func(inp, i);
+        FunctionTree<D, T> &tree = get_func(inp, i);
         tree.deleteGenerated();
     }
     clean_t.stop();
@@ -146,14 +146,14 @@ void multiply(double prec,
     print::separator(10, ' ');
 }
 
-template <int D>
+template <int D, typename T>
 void multiply(double prec,
-              FunctionTree<D> &out,
-              std::vector<FunctionTree<D> *> &inp,
+              FunctionTree<D, T> &out,
+              std::vector<FunctionTree<D, T> *> &inp,
               int maxIter,
               bool absPrec,
               bool useMaxNorms) {
-    FunctionTreeVector<D> inp_vec;
+    FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms);
 }
@@ -179,13 +179,13 @@ void multiply(double prec,
  * no coefs).
  *
  */
-template <int D> void square(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    SquareCalculator<D> calculator(inp);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    SquareCalculator<D, T> calculator(inp);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -225,14 +225,14 @@ template <int D> void square(double prec, FunctionTree<D> &out, FunctionTree<D>
  * no coefs).
  *
  */
-template <int D>
-void power(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, double p, int maxIter, bool absPrec) {
+template <int D, typename T>
+void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double p, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
-    PowerCalculator<D> calculator(inp, p);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
+    PowerCalculator<D, T> calculator(inp, p);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -267,22 +267,22 @@ void power(double prec, FunctionTree<D> &out, FunctionTree<D> &inp, double p, in
  * @note The length of the input vectors must be the same.
  *
  */
-template <int D>
+template <int D, typename T>
 void dot(double prec,
-         FunctionTree<D> &out,
-         FunctionTreeVector<D> &inp_a,
-         FunctionTreeVector<D> &inp_b,
+         FunctionTree<D, T> &out,
+         FunctionTreeVector<D, T> &inp_a,
+         FunctionTreeVector<D, T> &inp_b,
          int maxIter,
          bool absPrec) {
     if (inp_a.size() != inp_b.size()) MSG_ABORT("Input length mismatch");
 
-    FunctionTreeVector<D> tmp_vec;
+    FunctionTreeVector<D, T> tmp_vec;
     for (int d = 0; d < inp_a.size(); d++) {
         double coef_a = get_coef(inp_a, d);
         double coef_b = get_coef(inp_b, d);
-        FunctionTree<D> &tree_a = get_func(inp_a, d);
-        FunctionTree<D> &tree_b = get_func(inp_b, d);
-        auto *out_d = new FunctionTree<D>(out.getMRA());
+        FunctionTree<D, T> &tree_a = get_func(inp_a, d);
+        FunctionTree<D, T> &tree_b = get_func(inp_b, d);
+        auto *out_d = new FunctionTree<D, T>(out.getMRA());
         build_grid(*out_d, out);
         multiply(prec, *out_d, 1.0, tree_a, tree_b, maxIter, absPrec);
         tmp_vec.push_back({coef_a * coef_b, out_d});
@@ -305,19 +305,19 @@ void dot(double prec,
  * grids overlap.
  *
  */
-template <int D> double dot(FunctionTree<D> &bra, FunctionTree<D> &ket) {
+template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket) {
     if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible");
 
-    MWNodeVector<D> nodeTable;
-    TreeIterator<D> it(bra);
+    MWNodeVector<D, T> nodeTable;
+    TreeIterator<D, T> it(bra);
     it.setReturnGenNodes(false);
     while (it.next()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         nodeTable.push_back(&node);
     }
     int nNodes = nodeTable.size();
-    double result = 0.0;
-    double locResult = 0.0;
+    T result = 0.0;
+    T locResult = 0.0;
     // OMP is disabled in order to get EXACT results (to the very last digit), the
     // order of summation makes the result different beyond the 14th digit or so.
     // OMP does improve the performace, but its not worth it for the time being.
@@ -326,11 +326,11 @@ template <int D> double dot(FunctionTree<D> &bra, FunctionTree<D> &ket) {
     //    {
     //#pragma omp for schedule(guided)
     for (int n = 0; n < nNodes; n++) {
-        const auto &braNode = static_cast<const FunctionNode<D> &>(*nodeTable[n]);
-        const MWNode<D> *mwNode = ket.findNode(braNode.getNodeIndex());
+        const auto &braNode = static_cast<const FunctionNode<D, T> &>(*nodeTable[n]);
+        const MWNode<D, T> *mwNode = ket.findNode(braNode.getNodeIndex());
         if (mwNode == nullptr) continue;
 
-        const auto &ketNode = static_cast<const FunctionNode<D> &>(*mwNode);
+        const auto &ketNode = static_cast<const FunctionNode<D, T> &>(*mwNode);
         if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode);
         locResult += dot_wavelet(braNode, ketNode);
     }
@@ -352,30 +352,30 @@ template <int D> double dot(FunctionTree<D> &bra, FunctionTree<D> &ket) {
  * distribution within the node.
  * If the product is zero, the functions are disjoints.
  */
-template <int D> double node_norm_dot(FunctionTree<D> &bra, FunctionTree<D> &ket, bool exact) {
+template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket, bool exact) {
     if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Incompatible MRA");
 
     double result = 0.0;
     int ncoef = bra.getKp1_d() * bra.getTDim();
-    double valA[ncoef];
-    double valB[ncoef];
+    T valA[ncoef];
+    T valB[ncoef];
     int nNodes = bra.getNEndNodes();
 
     for (int n = 0; n < nNodes; n++) {
-        FunctionNode<D> &node = bra.getEndFuncNode(n);
+        FunctionNode<D, T> &node = bra.getEndFuncNode(n);
         const NodeIndex<D> idx = node.getNodeIndex();
         if (exact) {
             // convert to interpolating coef, take abs, convert back
-            FunctionNode<D> *mwNode = static_cast<FunctionNode<D> *>(ket.findNode(idx));
+            FunctionNode<D, T> *mwNode = static_cast<FunctionNode<D, T> *>(ket.findNode(idx));
             if (mwNode == nullptr) MSG_ABORT("Trees must have same grid");
             node.getAbsCoefs(valA);
             mwNode->getAbsCoefs(valB);
-            for (int i = 0; i < ncoef; i++) result += valA[i] * valB[i];
+            for (int i = 0; i < ncoef; i++) result += std::abs(valA[i] * valB[i]);
         } else {
             // approximate by product of node norms
             int rIdx = ket.getRootBox().getBoxIndex(idx);
             assert(rIdx >= 0);
-            const MWNode<D> &root = ket.getRootBox().getNode(rIdx);
+            const MWNode<D, T> &root = ket.getRootBox().getNode(rIdx);
             result += std::sqrt(node.getSquareNorm()) * root.getNodeNorm(idx);
         }
     }
@@ -383,124 +383,249 @@ template <int D> double node_norm_dot(FunctionTree<D> &bra, FunctionTree<D> &ket
     return result;
 }
 
-template void multiply<1>(double prec,
-                          FunctionTree<1> &out,
+template void multiply<1, double>(double prec,
+                          FunctionTree<1, double> &out,
+                          double c,
+                          FunctionTree<1, double> &tree_a,
+                          FunctionTree<1, double> &tree_b,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<2, double>(double prec,
+                          FunctionTree<2, double> &out,
+                          double c,
+                          FunctionTree<2, double> &tree_a,
+                          FunctionTree<2, double> &tree_b,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<3, double>(double prec,
+                          FunctionTree<3, double> &out,
+                          double c,
+                          FunctionTree<3, double> &tree_a,
+                          FunctionTree<3, double> &tree_b,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<1, double>(double prec,
+                          FunctionTree<1, double> &out,
+                          FunctionTreeVector<1, double> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<2, double>(double prec,
+                          FunctionTree<2, double> &out,
+                          FunctionTreeVector<2, double> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<3, double>(double prec,
+                          FunctionTree<3, double> &out,
+                          FunctionTreeVector<3, double> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<1, double>(double prec,
+                          FunctionTree<1, double> &out,
+                          std::vector<FunctionTree<1, double> *> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<2, double>(double prec,
+                          FunctionTree<2, double> &out,
+                          std::vector<FunctionTree<2, double> *> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void multiply<3, double>(double prec,
+                          FunctionTree<3, double> &out,
+                          std::vector<FunctionTree<3, double> *> &inp,
+                          int maxIter,
+                          bool absPrec,
+                          bool useMaxNorms);
+template void power<1, double>(double prec,
+                       FunctionTree<1, double> &out,
+                       FunctionTree<1, double> &tree,
+                       double pow,
+                       int maxIter,
+                       bool absPrec);
+template void power<2, double>(double prec,
+                       FunctionTree<2, double> &out,
+                       FunctionTree<2, double> &tree,
+                       double pow,
+                       int maxIter,
+                       bool absPrec);
+template void power<3, double>(double prec,
+                       FunctionTree<3, double> &out,
+                       FunctionTree<3, double> &tree,
+                       double pow,
+                       int maxIter,
+                       bool absPrec);
+template void square<1, double>(double prec,
+                        FunctionTree<1, double> &out,
+                        FunctionTree<1, double> &tree,
+                        int maxIter,
+                        bool absPrec);
+template void square<2, double>(double prec,
+                        FunctionTree<2, double> &out,
+                        FunctionTree<2, double> &tree,
+                        int maxIter,
+                        bool absPrec);
+template void square<3, double>(double prec,
+                        FunctionTree<3, double> &out,
+                        FunctionTree<3, double> &tree,
+                        int maxIter,
+                        bool absPrec);
+template void dot<1, double>(double prec,
+                     FunctionTree<1, double> &out,
+                     FunctionTreeVector<1, double> &inp_a,
+                     FunctionTreeVector<1, double> &inp_b,
+                     int maxIter,
+                     bool absPrec);
+template void dot<2, double>(double prec,
+                     FunctionTree<2, double> &out,
+                     FunctionTreeVector<2, double> &inp_a,
+                     FunctionTreeVector<2, double> &inp_b,
+                     int maxIter,
+                     bool absPrec);
+template void dot<3, double>(double prec,
+                     FunctionTree<3, double> &out,
+                     FunctionTreeVector<3, double> &inp_a,
+                     FunctionTreeVector<3, double> &inp_b,
+                     int maxIter,
+                     bool absPrec);
+
+template double dot<1, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket);
+template double dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket);
+template double dot<3, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket);
+
+template double node_norm_dot<1, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket, bool exact);
+template double node_norm_dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket, bool exact);
+template double node_norm_dot<3, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket, bool exact);
+
+
+
+
+
+template void multiply<1, ComplexDouble>(double prec,
+                          FunctionTree<1, ComplexDouble> &out,
                           double c,
-                          FunctionTree<1> &tree_a,
-                          FunctionTree<1> &tree_b,
+                          FunctionTree<1, ComplexDouble> &tree_a,
+                          FunctionTree<1, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<2>(double prec,
-                          FunctionTree<2> &out,
+template void multiply<2, ComplexDouble>(double prec,
+                          FunctionTree<2, ComplexDouble> &out,
                           double c,
-                          FunctionTree<2> &tree_a,
-                          FunctionTree<2> &tree_b,
+                          FunctionTree<2, ComplexDouble> &tree_a,
+                          FunctionTree<2, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<3>(double prec,
-                          FunctionTree<3> &out,
+template void multiply<3, ComplexDouble>(double prec,
+                          FunctionTree<3, ComplexDouble> &out,
                           double c,
-                          FunctionTree<3> &tree_a,
-                          FunctionTree<3> &tree_b,
+                          FunctionTree<3, ComplexDouble> &tree_a,
+                          FunctionTree<3, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<1>(double prec,
-                          FunctionTree<1> &out,
-                          FunctionTreeVector<1> &inp,
+template void multiply<1, ComplexDouble>(double prec,
+                          FunctionTree<1, ComplexDouble> &out,
+                          FunctionTreeVector<1, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<2>(double prec,
-                          FunctionTree<2> &out,
-                          FunctionTreeVector<2> &inp,
+template void multiply<2, ComplexDouble>(double prec,
+                          FunctionTree<2, ComplexDouble> &out,
+                          FunctionTreeVector<2, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<3>(double prec,
-                          FunctionTree<3> &out,
-                          FunctionTreeVector<3> &inp,
+template void multiply<3, ComplexDouble>(double prec,
+                          FunctionTree<3, ComplexDouble> &out,
+                          FunctionTreeVector<3, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<1>(double prec,
-                          FunctionTree<1> &out,
-                          std::vector<FunctionTree<1> *> &inp,
+template void multiply<1, ComplexDouble>(double prec,
+                          FunctionTree<1, ComplexDouble> &out,
+                          std::vector<FunctionTree<1, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<2>(double prec,
-                          FunctionTree<2> &out,
-                          std::vector<FunctionTree<2> *> &inp,
+template void multiply<2, ComplexDouble>(double prec,
+                          FunctionTree<2, ComplexDouble> &out,
+                          std::vector<FunctionTree<2, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void multiply<3>(double prec,
-                          FunctionTree<3> &out,
-                          std::vector<FunctionTree<3> *> &inp,
+template void multiply<3, ComplexDouble>(double prec,
+                          FunctionTree<3, ComplexDouble> &out,
+                          std::vector<FunctionTree<3, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
                           bool useMaxNorms);
-template void power<1>(double prec,
-                       FunctionTree<1> &out,
-                       FunctionTree<1> &tree,
+template void power<1, ComplexDouble>(double prec,
+                       FunctionTree<1, ComplexDouble> &out,
+                       FunctionTree<1, ComplexDouble> &tree,
                        double pow,
                        int maxIter,
                        bool absPrec);
-template void power<2>(double prec,
-                       FunctionTree<2> &out,
-                       FunctionTree<2> &tree,
+template void power<2, ComplexDouble>(double prec,
+                       FunctionTree<2, ComplexDouble> &out,
+                       FunctionTree<2, ComplexDouble> &tree,
                        double pow,
                        int maxIter,
                        bool absPrec);
-template void power<3>(double prec,
-                       FunctionTree<3> &out,
-                       FunctionTree<3> &tree,
+template void power<3, ComplexDouble>(double prec,
+                       FunctionTree<3, ComplexDouble> &out,
+                       FunctionTree<3, ComplexDouble> &tree,
                        double pow,
                        int maxIter,
                        bool absPrec);
-template void square<1>(double prec,
-                        FunctionTree<1> &out,
-                        FunctionTree<1> &tree,
+template void square<1, ComplexDouble>(double prec,
+                        FunctionTree<1, ComplexDouble> &out,
+                        FunctionTree<1, ComplexDouble> &tree,
                         int maxIter,
                         bool absPrec);
-template void square<2>(double prec,
-                        FunctionTree<2> &out,
-                        FunctionTree<2> &tree,
+template void square<2, ComplexDouble>(double prec,
+                        FunctionTree<2, ComplexDouble> &out,
+                        FunctionTree<2, ComplexDouble> &tree,
                         int maxIter,
                         bool absPrec);
-template void square<3>(double prec,
-                        FunctionTree<3> &out,
-                        FunctionTree<3> &tree,
+template void square<3, ComplexDouble>(double prec,
+                        FunctionTree<3, ComplexDouble> &out,
+                        FunctionTree<3, ComplexDouble> &tree,
                         int maxIter,
                         bool absPrec);
-template void dot<1>(double prec,
-                     FunctionTree<1> &out,
-                     FunctionTreeVector<1> &inp_a,
-                     FunctionTreeVector<1> &inp_b,
+template void dot<1, ComplexDouble>(double prec,
+                     FunctionTree<1, ComplexDouble> &out,
+                     FunctionTreeVector<1, ComplexDouble> &inp_a,
+                     FunctionTreeVector<1, ComplexDouble> &inp_b,
                      int maxIter,
                      bool absPrec);
-template void dot<2>(double prec,
-                     FunctionTree<2> &out,
-                     FunctionTreeVector<2> &inp_a,
-                     FunctionTreeVector<2> &inp_b,
+template void dot<2, ComplexDouble>(double prec,
+                     FunctionTree<2, ComplexDouble> &out,
+                     FunctionTreeVector<2, ComplexDouble> &inp_a,
+                     FunctionTreeVector<2, ComplexDouble> &inp_b,
                      int maxIter,
                      bool absPrec);
-template void dot<3>(double prec,
-                     FunctionTree<3> &out,
-                     FunctionTreeVector<3> &inp_a,
-                     FunctionTreeVector<3> &inp_b,
+template void dot<3, ComplexDouble>(double prec,
+                     FunctionTree<3, ComplexDouble> &out,
+                     FunctionTreeVector<3, ComplexDouble> &inp_a,
+                     FunctionTreeVector<3, ComplexDouble> &inp_b,
                      int maxIter,
                      bool absPrec);
 
-template double dot<1>(FunctionTree<1> &bra, FunctionTree<1> &ket);
-template double dot<2>(FunctionTree<2> &bra, FunctionTree<2> &ket);
-template double dot<3>(FunctionTree<3> &bra, FunctionTree<3> &ket);
+template ComplexDouble dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket);
+template ComplexDouble dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket);
+template ComplexDouble dot<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket);
+
+template double node_norm_dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket, bool exact);
+template double node_norm_dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket, bool exact);
+template double node_norm_dot<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket, bool exact);
 
-template double node_norm_dot<1>(FunctionTree<1> &bra, FunctionTree<1> &ket, bool exact);
-template double node_norm_dot<2>(FunctionTree<2> &bra, FunctionTree<2> &ket, bool exact);
-template double node_norm_dot<3>(FunctionTree<3> &bra, FunctionTree<3> &ket, bool exact);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index 54947bf78..3f66cd3ad 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -28,56 +28,56 @@
 #include "trees/FunctionTreeVector.h"
 
 namespace mrcpp {
-template <int D> class RepresentableFunction;
-template <int D> class FunctionTree;
+template <int D, typename T> class RepresentableFunction;
+template <int D, typename T> class FunctionTree;
 
-template <int D> void dot(double prec,
-                          FunctionTree<D> &out,
-                          FunctionTreeVector<D> &inp_a,
-                          FunctionTreeVector<D> &inp_b,
+template <int D, typename T> void dot(double prec,
+                          FunctionTree<D, T> &out,
+                          FunctionTreeVector<D, T> &inp_a,
+                          FunctionTreeVector<D, T> &inp_b,
                           int maxIter = -1,
                           bool absPrec = false);
 
-template <int D> double dot(FunctionTree<D> &bra,
-                            FunctionTree<D> &ket);
+template <int D, typename T> T dot(FunctionTree<D, T> &bra,
+                            FunctionTree<D, T> &ket);
 
-template <int D> double node_norm_dot(FunctionTree<D> &bra,
-                                      FunctionTree<D> &ket,
+template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra,
+                                      FunctionTree<D, T> &ket,
                                       bool exact = false);
 
-template <int D> void multiply(double prec,
-                               FunctionTree<D> &out,
+template <int D, typename T> void multiply(double prec,
+                               FunctionTree<D, T> &out,
                                double c,
-                               FunctionTree<D> &inp_a,
-                               FunctionTree<D> &inp_b,
+                               FunctionTree<D, T> &inp_a,
+                               FunctionTree<D, T> &inp_b,
                                int maxIter = -1,
                                bool absPrec = false,
                                bool useMaxNorms = false);
 
-template <int D> void multiply(double prec,
-                               FunctionTree<D> &out,
-                               std::vector<FunctionTree<D> *> &inp,
+template <int D, typename T> void multiply(double prec,
+                               FunctionTree<D, T> &out,
+                               std::vector<FunctionTree<D, T> *> &inp,
                                int maxIter = -1,
                                bool absPrec = false,
                                bool useMaxNorms = false);
 
-template <int D> void multiply(double prec,
-                               FunctionTree<D> &out,
-                               FunctionTreeVector<D> &inp,
+template <int D, typename T> void multiply(double prec,
+                               FunctionTree<D, T> &out,
+                               FunctionTreeVector<D, T> &inp,
                                int maxIter = -1,
                                bool absPrec = false,
                                bool useMaxNorms = false);
 
-template <int D> void power(double prec,
-                            FunctionTree<D> &out,
-                            FunctionTree<D> &inp,
+template <int D, typename T> void power(double prec,
+                            FunctionTree<D, T> &out,
+                            FunctionTree<D, T> &inp,
                             double p,
                             int maxIter = -1,
                             bool absPrec = false);
 
-template <int D> void square(double prec,
-                             FunctionTree<D> &out,
-                             FunctionTree<D> &inp,
+template <int D, typename T> void square(double prec,
+                             FunctionTree<D, T> &out,
+                             FunctionTree<D, T> &inp,
                              int maxIter = -1,
                              bool absPrec = false);
 
diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp
index c22f22ec8..65d17fd16 100644
--- a/src/treebuilders/project.cpp
+++ b/src/treebuilders/project.cpp
@@ -56,8 +56,8 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D> void project(double prec, FunctionTree<D> &out, std::function<double(const Coord<D> &r)> func, int maxIter, bool absPrec) {
-    AnalyticFunction<D> inp(func);
+template <int D, typename T> void project(double prec, FunctionTree<D, T> &out, std::function<T(const Coord<D> &r)> func, int maxIter, bool absPrec) {
+    AnalyticFunction<D, T> inp(func);
     mrcpp::project(prec, out, inp, maxIter, absPrec);
 }
 
@@ -81,14 +81,14 @@ template <int D> void project(double prec, FunctionTree<D> &out, std::function<d
  * no coefs).
  *
  */
-template <int D> void project(double prec, FunctionTree<D> &out, RepresentableFunction<D> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void project(double prec, FunctionTree<D, T> &out, RepresentableFunction<D, T> &inp, int maxIter, bool absPrec) {
 
     int maxScale = out.getMRA().getMaxScale();
     const auto scaling_factor = out.getMRA().getWorldBox().getScalingFactors();
-    TreeBuilder<D> builder;
-    WaveletAdaptor<D> adaptor(prec, maxScale, absPrec);
+    TreeBuilder<D, T> builder;
+    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
 
-    ProjectionCalculator<D> calculator(inp, scaling_factor);
+    ProjectionCalculator<D, T> calculator(inp, scaling_factor);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -121,19 +121,33 @@ template <int D> void project(double prec, FunctionTree<D> &out, RepresentableFu
  * no coefs).
  *
  */
-template <int D> void project(double prec, FunctionTreeVector<D> &out, std::vector<std::function<double(const Coord<D> &r)>> func, int maxIter, bool absPrec) {
+template <int D, typename T> void project(double prec, FunctionTreeVector<D, T> &out, std::vector<std::function<T(const Coord<D> &r)>> func, int maxIter, bool absPrec) {
     if (out.size() != func.size()) MSG_ABORT("Size mismatch");
     for (auto j = 0; j < D; j++) mrcpp::project<D>(prec, get_func(out, j), func[j], maxIter, absPrec);
 }
 
-template void project<1>(double prec, FunctionTree<1> &out, RepresentableFunction<1> &inp, int maxIter, bool absPrec);
-template void project<2>(double prec, FunctionTree<2> &out, RepresentableFunction<2> &inp, int maxIter, bool absPrec);
-template void project<3>(double prec, FunctionTree<3> &out, RepresentableFunction<3> &inp, int maxIter, bool absPrec);
+template void project<1, double>(double prec, FunctionTree<1, double> &out, RepresentableFunction<1, double> &inp, int maxIter, bool absPrec);
+template void project<2, double>(double prec, FunctionTree<2, double> &out, RepresentableFunction<2, double> &inp, int maxIter, bool absPrec);
+template void project<3, double>(double prec, FunctionTree<3, double> &out, RepresentableFunction<3, double> &inp, int maxIter, bool absPrec);
+
+template void project<1, double>(double prec, FunctionTree<1, double> &out, std::function<double(const Coord<1> &r)> func, int maxIter, bool absPrec);
+template void project<2, double>(double prec, FunctionTree<2, double> &out, std::function<double(const Coord<2> &r)> func, int maxIter, bool absPrec);
+template void project<3, double>(double prec, FunctionTree<3, double> &out, std::function<double(const Coord<3> &r)> func, int maxIter, bool absPrec);
+template void project<1, double>(double prec, FunctionTreeVector<1, double> &out, std::vector<std::function<double(const Coord<1> &r)>> inp, int maxIter, bool absPrec);
+template void project<2, double>(double prec, FunctionTreeVector<2, double> &out, std::vector<std::function<double(const Coord<2> &r)>> inp, int maxIter, bool absPrec);
+template void project<3, double>(double prec, FunctionTreeVector<3, double> &out, std::vector<std::function<double(const Coord<3> &r)>> inp, int maxIter, bool absPrec);
+
+
+template void project<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, RepresentableFunction<1, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void project<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, RepresentableFunction<2, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void project<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, RepresentableFunction<3, ComplexDouble> &inp, int maxIter, bool absPrec);
+
+template void project<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, std::function<ComplexDouble(const Coord<1> &r)> func, int maxIter, bool absPrec);
+template void project<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, std::function<ComplexDouble(const Coord<2> &r)> func, int maxIter, bool absPrec);
+template void project<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, std::function<ComplexDouble(const Coord<3> &r)> func, int maxIter, bool absPrec);
+template void project<1, ComplexDouble>(double prec, FunctionTreeVector<1, ComplexDouble> &out, std::vector<std::function<ComplexDouble(const Coord<1> &r)>> inp, int maxIter, bool absPrec);
+template void project<2, ComplexDouble>(double prec, FunctionTreeVector<2, ComplexDouble> &out, std::vector<std::function<ComplexDouble(const Coord<2> &r)>> inp, int maxIter, bool absPrec);
+template void project<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &out, std::vector<std::function<ComplexDouble(const Coord<3> &r)>> inp, int maxIter, bool absPrec);
+
 
-template void project<1>(double prec, FunctionTree<1> &out, std::function<double(const Coord<1> &r)> func, int maxIter, bool absPrec);
-template void project<2>(double prec, FunctionTree<2> &out, std::function<double(const Coord<2> &r)> func, int maxIter, bool absPrec);
-template void project<3>(double prec, FunctionTree<3> &out, std::function<double(const Coord<3> &r)> func, int maxIter, bool absPrec);
-template void project<1>(double prec, FunctionTreeVector<1> &out, std::vector<std::function<double(const Coord<1> &r)>> inp, int maxIter, bool absPrec);
-template void project<2>(double prec, FunctionTreeVector<2> &out, std::vector<std::function<double(const Coord<2> &r)>> inp, int maxIter, bool absPrec);
-template void project<3>(double prec, FunctionTreeVector<3> &out, std::vector<std::function<double(const Coord<3> &r)>> inp, int maxIter, bool absPrec);
 } // namespace mrcpp
diff --git a/src/treebuilders/project.h b/src/treebuilders/project.h
index 790914a4b..f9e070ef2 100644
--- a/src/treebuilders/project.h
+++ b/src/treebuilders/project.h
@@ -30,7 +30,7 @@
 #include <functional>
 
 namespace mrcpp {
-template <int D> void project(double prec, FunctionTree<D> &out, RepresentableFunction<D> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void project(double prec, FunctionTree<D> &out, std::function<double(const Coord<D> &r)> func, int maxIter = -1, bool absPrec = false);
-template <int D> void project(double prec, FunctionTreeVector<D> &out, std::vector<std::function<double(const Coord<D> &r)>> func, int maxIter = -1, bool absPrec = false);
+template <int D, typename T = double> void project(double prec, FunctionTree<D, T> &out, RepresentableFunction<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T = double> void project(double prec, FunctionTree<D, T> &out, std::function<T(const Coord<D> &r)> func, int maxIter = -1, bool absPrec = false);
+template <int D, typename T = double> void project(double prec, FunctionTreeVector<D, T> &out, std::vector<std::function<T(const Coord<D> &r)>> func, int maxIter = -1, bool absPrec = false);
 } // namespace mrcpp
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index c839e2b57..98858e503 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -44,7 +44,7 @@ namespace mrcpp {
 
 /** Function evaluation.
  * Evaluate all polynomials defined on the node. */
-template <int D> double FunctionNode<D>::evalf(Coord<D> r) {
+template <int D, typename T> T FunctionNode<D, T>::evalf(Coord<D> r) {
     if (not this->hasCoefs()) MSG_ERROR("Evaluating node without coefs");
 
     // The 1.0 appearing in the if tests comes from the period is always 1.0
@@ -57,7 +57,7 @@ template <int D> double FunctionNode<D>::evalf(Coord<D> r) {
     return getFuncChild(cIdx).evalScaling(r);
 }
 
-template <int D> double FunctionNode<D>::evalScaling(const Coord<D> &r) const {
+template <int D, typename T> T FunctionNode<D, T>::evalScaling(const Coord<D> &r) const {
     if (not this->hasCoefs()) MSG_ERROR("Evaluating node without coefs");
 
     double arg[D];
@@ -72,10 +72,10 @@ template <int D> double FunctionNode<D>::evalScaling(const Coord<D> &r) const {
     const ScalingBasis &basis = this->getMWTree().getMRA().getScalingBasis();
     basis.evalf(arg, val);
 
-    double result = 0.0;
+    T result = 0.0;
     //#pragma omp parallel for shared(fact) reduction(+:result) num_threads(mrcpp_get_num_threads())
     for (int i = 0; i < this->getKp1_d(); i++) {
-        double temp = this->coefs[i];
+        T temp = this->coefs[i];
         for (int j = 0; j < D; j++) {
             int k = (i % fact[j + 1]) / fact[j];
             temp *= val(k, j);
@@ -92,7 +92,7 @@ template <int D> double FunctionNode<D>::evalScaling(const Coord<D> &r) const {
  * Wrapper for function integration, that requires different methods depending
  * on scaling type. Integrates the function represented on the node on the
  * full support of the node. */
-template <int D> double FunctionNode<D>::integrate() const {
+template <int D, typename T> T FunctionNode<D, T>::integrate() const {
     if (not this->hasCoefs()) { return 0.0; }
     switch (this->getScalingType()) {
         case Legendre:
@@ -115,7 +115,7 @@ template <int D> double FunctionNode<D>::integrate() const {
  *          s_i = int f(x)phi_i(x)dx
  * and since the first Legendre function is the constant 1, the first
  * coefficient is simply the integral of f(x). */
-template <int D> double FunctionNode<D>::integrateLegendre() const {
+template <int D, typename T> T FunctionNode<D, T>::integrateLegendre() const {
     double n = (D * this->getScale()) / 2.0;
     double two_n = std::pow(2.0, -n);
     return two_n * this->getCoefs()[0];
@@ -126,7 +126,7 @@ template <int D> double FunctionNode<D>::integrateLegendre() const {
  * Integrates the function represented on the node on the full support of the
  * node. A bit more involved than in the Legendre basis, as is requires some
  * coupling of quadrature weights. */
-template <int D> double FunctionNode<D>::integrateInterpolating() const {
+template <int D, typename T> T FunctionNode<D, T>::integrateInterpolating() const {
     int qOrder = this->getKp1();
     getQuadratureCache(qc);
     const VectorXd &weights = qc.getWeights(qOrder);
@@ -136,7 +136,7 @@ template <int D> double FunctionNode<D>::integrateInterpolating() const {
     int kp1_p[D];
     for (int i = 0; i < D; i++) kp1_p[i] = math_utils::ipow(qOrder, i);
 
-    VectorXd coefs;
+    Eigen::Matrix<T, Eigen::Dynamic, 1> coefs;
     this->getCoefs(coefs);
     for (int p = 0; p < D; p++) {
 
@@ -152,7 +152,7 @@ template <int D> double FunctionNode<D>::integrateInterpolating() const {
     }
     double n = (D * this->getScale()) / 2.0;
     double two_n = std::pow(2.0, -n);
-    double sum = coefs.segment(0, this->getKp1_d()).sum();
+    T sum = coefs.segment(0, this->getKp1_d()).sum();
 
     return two_n * sum;
 }
@@ -162,27 +162,27 @@ template <int D> double FunctionNode<D>::integrateInterpolating() const {
  * Integrates the function represented on the node on the full support of the
  * node. A bit more involved than in the Legendre basis, as is requires some
  * coupling of quadrature weights. */
-template <int D> double FunctionNode<D>::integrateValues() const {
+template <int D, typename T> T FunctionNode<D, T>::integrateValues() const {
     int qOrder = this->getKp1();
     getQuadratureCache(qc);
     const VectorXd &weights = qc.getWeights(qOrder);
-    VectorXd coefs;
+    Eigen::Matrix<T, Eigen::Dynamic, 1> coefs;
     this->getCoefs(coefs);
     int ncoefs = coefs.size();
     int ncoefChild = ncoefs/(1<<D);
-    double cc[ncoefChild];
+    T cc[ncoefChild];
     // factorize out the children
     for (int i = 0; i < ncoefChild; i++)cc[i]=coefs[i];
     for (int j = 1; j < (1<<D); j++) for (int i = 0; i < ncoefChild; i++)cc[i]+=coefs[j*ncoefChild+i];
 
     int nc = 0;
-    double sum = 0.0;
+    T sum = 0.0;
     if (D > 3) MSG_ABORT("Not Implemented")
     else if (D == 3) {
         for (int i = 0; i < qOrder; i++) {
-            double sumj = 0.0;
+            T sumj = 0.0;
             for (int j = 0; j < qOrder; j++) {
-                double sumk = 0.0;
+                T sumk = 0.0;
                 for (int k = 0; k < qOrder; k++) sumk += cc[nc++] * weights[k];
                 sumj += sumk * weights[j];
             }
@@ -190,7 +190,7 @@ template <int D> double FunctionNode<D>::integrateValues() const {
         }
     } else if (D==2) {
         for (int j = 0; j < qOrder; j++) {
-                double sumk = 0.0;
+                T sumk = 0.0;
                 for (int k = 0; k < qOrder; k++) sumk += cc[nc++] * weights[k];
                 sum += sumk * weights[j];
         }
@@ -203,7 +203,7 @@ template <int D> double FunctionNode<D>::integrateValues() const {
     return sum;
 }
 
-template <int D> void FunctionNode<D>::setValues(const VectorXd &vec) {
+template <int D, typename T> void FunctionNode<D, T>::setValues(const Matrix<T, Eigen::Dynamic, 1> &vec) {
     this->zeroCoefs();
     this->setCoefBlock(0, vec.size(), vec.data());
     this->cvTransform(Backward);
@@ -212,15 +212,15 @@ template <int D> void FunctionNode<D>::setValues(const VectorXd &vec) {
     this->calcNorms();
 }
 
-template <int D> void FunctionNode<D>::getValues(VectorXd &vec) {
+  template <int D, typename T> void FunctionNode<D, T>::getValues(Matrix<T , Eigen::Dynamic, 1 > &vec) {
     if (this->isGenNode()) {
-        MWNode<D> copy(*this);
-        vec = Eigen::VectorXd::Zero(copy.getNCoefs());
+        MWNode<D, T> copy(*this);
+        vec = Eigen::Matrix<T, Eigen::Dynamic, 1>::Zero(copy.getNCoefs());
         copy.mwTransform(Reconstruction);
         copy.cvTransform(Forward);
         for (int i = 0; i < this->n_coefs; i++) vec(i) = copy.getCoefs()[i];
     } else {
-        vec = VectorXd::Zero(this->n_coefs);
+        vec = Eigen::Matrix<T, Eigen::Dynamic, 1>::Zero(this->n_coefs);
         this->mwTransform(Reconstruction);
         this->cvTransform(Forward);
         for (int i = 0; i < this->n_coefs; i++) vec(i) = this->coefs[i];
@@ -232,8 +232,8 @@ template <int D> void FunctionNode<D>::getValues(VectorXd &vec) {
 /** get coefficients corresponding to absolute value of function
  *
  * Leaves the original coefficients unchanged. */
-template <int D> void FunctionNode<D>::getAbsCoefs(double *absCoefs) {
-    double *coefsTmp = this->coefs;
+template <int D, typename T> void FunctionNode<D, T>::getAbsCoefs(T *absCoefs) {
+    T *coefsTmp = this->coefs;
     for (int i = 0; i < this->n_coefs; i++) absCoefs[i] = coefsTmp[i]; // copy
     this->coefs = absCoefs;                                            // swap coefs
     this->mwTransform(Reconstruction);
@@ -244,7 +244,7 @@ template <int D> void FunctionNode<D>::getAbsCoefs(double *absCoefs) {
     this->coefs = coefsTmp; // restore original array (same address)
 }
 
-template <int D> void FunctionNode<D>::createChildren(bool coefs) {
+template <int D, typename T> void FunctionNode<D, T>::createChildren(bool coefs) {
     if (this->isBranchNode()) MSG_ABORT("Node already has children");
     auto &allocator = this->getFuncTree().getNodeAllocator();
 
@@ -258,7 +258,7 @@ template <int D> void FunctionNode<D>::createChildren(bool coefs) {
     this->childSerialIx = sIdx;
     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
         // construct into allocator memory
-        new (child_p) FunctionNode<D>(this, cIdx);
+        new (child_p) FunctionNode<D, T>(this, cIdx);
         this->children[cIdx] = child_p;
 
         child_p->serialIx = sIdx;
@@ -282,7 +282,7 @@ template <int D> void FunctionNode<D>::createChildren(bool coefs) {
     this->clearIsEndNode();
 }
 
-template <int D> void FunctionNode<D>::genChildren() {
+template <int D, typename T> void FunctionNode<D, T>::genChildren() {
     if (this->isBranchNode()) MSG_ABORT("Node already has children");
     auto &allocator = this->getFuncTree().getGenNodeAllocator();
 
@@ -296,7 +296,7 @@ template <int D> void FunctionNode<D>::genChildren() {
     this->childSerialIx = sIdx;
     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
         // construct into allocator memory
-        new (child_p) FunctionNode<D>(this, cIdx);
+        new (child_p) FunctionNode<D, T>(this, cIdx);
         this->children[cIdx] = child_p;
 
         child_p->serialIx = sIdx;
@@ -319,7 +319,7 @@ template <int D> void FunctionNode<D>::genChildren() {
     this->setIsBranchNode();
 }
 
-template <int D> void FunctionNode<D>::genParent() {
+template <int D, typename T> void FunctionNode<D, T>::genParent() {
     if (this->parent != nullptr) MSG_ABORT("Node is not an orphan");
 
     auto &allocator = this->getFuncTree().getNodeAllocator();
@@ -332,7 +332,7 @@ template <int D> void FunctionNode<D>::genParent() {
     this->parentSerialIx = sIdx;
 
     // construct into allocator memory
-    new (parent_p) FunctionNode<D>(this->tree, this->getNodeIndex().parent());
+    new (parent_p) FunctionNode<D, T>(this->tree, this->getNodeIndex().parent());
 
     this->parent = parent_p;
 
@@ -351,12 +351,12 @@ template <int D> void FunctionNode<D>::genParent() {
     this->getMWTree().incrementNodeCount(parent_p->getScale());
 }
 
-template <int D> void FunctionNode<D>::deleteChildren() {
-    MWNode<D>::deleteChildren();
+template <int D, typename T> void FunctionNode<D, T>::deleteChildren() {
+    MWNode<D, T>::deleteChildren();
     this->setIsEndNode();
 }
 
-template <int D> void FunctionNode<D>::dealloc() {
+template <int D, typename T> void FunctionNode<D, T>::dealloc() {
     int sIdx = this->serialIx;
     this->serialIx = -1;
     this->parentSerialIx = -1;
@@ -376,8 +376,8 @@ template <int D> void FunctionNode<D>::dealloc() {
 /** Update the coefficients of the node by a mw transform of the scaling
  * coefficients of the children. Option to overwrite or add up existing
  * coefficients. Specialized for D=3 below. */
-template <int D> void FunctionNode<D>::reCompress() {
-    MWNode<D>::reCompress();
+template <int D, typename T> void FunctionNode<D, T>::reCompress() {
+    MWNode<D, T>::reCompress();
 }
 
 template <> void FunctionNode<3>::reCompress() {
@@ -406,18 +406,18 @@ template <> void FunctionNode<3>::reCompress() {
  * the node on the full support of the nodes. The scaling basis is fully
  * orthonormal, and the inner product is simply the dot product of the
  * coefficient vectors. Assumes the nodes have identical support. */
-template <int D> double dot_scaling(const FunctionNode<D> &bra, const FunctionNode<D> &ket) {
+template <int D, typename T> T dot_scaling(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket) {
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
-    const double *a = bra.getCoefs();
-    const double *b = ket.getCoefs();
+    const T *a = bra.getCoefs();
+    const T *b = ket.getCoefs();
 
     int size = bra.getKp1_d();
 #ifdef HAVE_BLAS
     return cblas_ddot(size, a, 1, b, 1);
 #else
-    double result = 0.0;
+    T result = 0.0;
     for (int i = 0; i < size; i++) result += a[i] * b[i];
     return result;
 #endif
@@ -429,35 +429,46 @@ template <int D> double dot_scaling(const FunctionNode<D> &bra, const FunctionNo
  * the node on the full support of the nodes. The wavelet basis is fully
  * orthonormal, and the inner product is simply the dot product of the
  * coefficient vectors. Assumes the nodes have identical support. */
-template <int D> double dot_wavelet(const FunctionNode<D> &bra, const FunctionNode<D> &ket) {
+template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket) {
     if (bra.isGenNode() or ket.isGenNode()) return 0.0;
 
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
-    const double *a = bra.getCoefs();
-    const double *b = ket.getCoefs();
+    const T *a = bra.getCoefs();
+    const T *b = ket.getCoefs();
 
     int start = bra.getKp1_d();
     int size = (bra.getTDim() - 1) * start;
 #ifdef HAVE_BLAS
     return cblas_ddot(size, &a[start], 1, &b[start], 1);
 #else
-    double result = 0.0;
+    T result = 0.0;
     for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
     return result;
 #endif
 }
 
-template double dot_scaling(const FunctionNode<1> &bra, const FunctionNode<1> &ket);
-template double dot_scaling(const FunctionNode<2> &bra, const FunctionNode<2> &ket);
-template double dot_scaling(const FunctionNode<3> &bra, const FunctionNode<3> &ket);
-template double dot_wavelet(const FunctionNode<1> &bra, const FunctionNode<1> &ket);
-template double dot_wavelet(const FunctionNode<2> &bra, const FunctionNode<2> &ket);
-template double dot_wavelet(const FunctionNode<3> &bra, const FunctionNode<3> &ket);
-
-template class FunctionNode<1>;
-template class FunctionNode<2>;
-template class FunctionNode<3>;
+template double dot_scaling(const FunctionNode<1, double> &bra, const FunctionNode<1, double> &ket);
+template double dot_scaling(const FunctionNode<2, double> &bra, const FunctionNode<2, double> &ket);
+template double dot_scaling(const FunctionNode<3, double> &bra, const FunctionNode<3, double> &ket);
+template double dot_wavelet(const FunctionNode<1, double> &bra, const FunctionNode<1, double> &ket);
+template double dot_wavelet(const FunctionNode<2, double> &bra, const FunctionNode<2, double> &ket);
+template double dot_wavelet(const FunctionNode<3, double> &bra, const FunctionNode<3, double> &ket);
+
+template class FunctionNode<1, double>;
+template class FunctionNode<2, double>;
+template class FunctionNode<3, double>;
+
+template class FunctionNode<1, ComplexDouble>;
+template class FunctionNode<2, ComplexDouble>;
+template class FunctionNode<3, ComplexDouble>;
+
+template ComplexDouble dot_scaling(const FunctionNode<1, ComplexDouble> &bra, const FunctionNode<1, ComplexDouble> &ket);
+template ComplexDouble dot_scaling(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, ComplexDouble> &ket);
+template ComplexDouble dot_scaling(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<1, ComplexDouble> &bra, const FunctionNode<1, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, ComplexDouble> &ket);
 
 } // namespace mrcpp
diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h
index 97a3d74d3..14c44fb7e 100644
--- a/src/trees/FunctionNode.h
+++ b/src/trees/FunctionNode.h
@@ -32,55 +32,55 @@
 
 namespace mrcpp {
 
-template <int D> class FunctionNode final : public MWNode<D> {
+template <int D, typename T> class FunctionNode final : public MWNode<D, T> {
 public:
-    FunctionTree<D> &getFuncTree() { return static_cast<FunctionTree<D> &>(*this->tree); }
-    FunctionNode<D> &getFuncParent() { return static_cast<FunctionNode<D> &>(*this->parent); }
-    FunctionNode<D> &getFuncChild(int i) { return static_cast<FunctionNode<D> &>(*this->children[i]); }
+    FunctionTree<D, T> &getFuncTree() { return static_cast<FunctionTree<D, T> &>(*this->tree); }
+    FunctionNode<D, T> &getFuncParent() { return static_cast<FunctionNode<D, T> &>(*this->parent); }
+    FunctionNode<D, T> &getFuncChild(int i) { return static_cast<FunctionNode<D, T> &>(*this->children[i]); }
 
-    const FunctionTree<D> &getFuncTree() const { return static_cast<const FunctionTree<D> &>(*this->tree); }
-    const FunctionNode<D> &getFuncParent() const { return static_cast<const FunctionNode<D> &>(*this->parent); }
-    const FunctionNode<D> &getFuncChild(int i) const { return static_cast<const FunctionNode<D> &>(*this->children[i]); }
+    const FunctionTree<D, T> &getFuncTree() const { return static_cast<const FunctionTree<D, T> &>(*this->tree); }
+    const FunctionNode<D, T> &getFuncParent() const { return static_cast<const FunctionNode<D, T> &>(*this->parent); }
+    const FunctionNode<D, T> &getFuncChild(int i) const { return static_cast<const FunctionNode<D, T> &>(*this->children[i]); }
 
     void createChildren(bool coefs) override;
     void genChildren() override;
     void genParent() override;
     void deleteChildren() override;
 
-    double integrate() const;
+    T integrate() const;
 
-    void setValues(const Eigen::VectorXd &vec);
-    void getValues(Eigen::VectorXd &vec);
-    void getAbsCoefs(double *absCoefs);
+    void setValues(const Eigen::Matrix<T , Eigen::Dynamic, 1> &vec);
+    void getValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &vec);
+    void getAbsCoefs(T *absCoefs);
 
-    friend class FunctionTree<D>;
-    friend class NodeAllocator<D>;
+    friend class FunctionTree<D, T>;
+    friend class NodeAllocator<D, T>;
 
 protected:
     FunctionNode()
-            : MWNode<D>() {}
-    FunctionNode(MWTree<D> *tree, int rIdx)
-            : MWNode<D>(tree, rIdx) {}
-    FunctionNode(MWNode<D> *parent, int cIdx)
-            : MWNode<D>(parent, cIdx) {}
-    FunctionNode(MWTree<D> *tree, const NodeIndex<D> &idx)
-            : MWNode<D>(tree, idx) {}
-    FunctionNode(const FunctionNode<D> &node) = delete;
-    FunctionNode<D> &operator=(const FunctionNode<D> &node) = delete;
+            : MWNode<D, T>() {}
+    FunctionNode(MWTree<D, T> *tree, int rIdx)
+            : MWNode<D, T>(tree, rIdx) {}
+    FunctionNode(MWNode<D, T> *parent, int cIdx)
+            : MWNode<D, T>(parent, cIdx) {}
+    FunctionNode(MWTree<D, T> *tree, const NodeIndex<D> &idx)
+            : MWNode<D, T>(tree, idx) {}
+    FunctionNode(const FunctionNode<D, T> &node) = delete;
+    FunctionNode<D, T> &operator=(const FunctionNode<D, T> &node) = delete;
     ~FunctionNode() = default;
 
-    double evalf(Coord<D> r);
-    double evalScaling(const Coord<D> &r) const;
+    T evalf(Coord<D> r);
+    T evalScaling(const Coord<D> &r) const;
 
     void dealloc() override;
     void reCompress() override;
 
-    double integrateLegendre() const;
-    double integrateInterpolating() const;
-    double integrateValues() const;
+    T integrateLegendre() const;
+    T integrateInterpolating() const;
+    T integrateValues() const;
 };
 
-template <int D> double dot_scaling(const FunctionNode<D> &bra, const FunctionNode<D> &ket);
-template <int D> double dot_wavelet(const FunctionNode<D> &bra, const FunctionNode<D> &ket);
+template <int D, typename T> T dot_scaling(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);
+template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);
 
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 47614a933..42adc76fa 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -50,20 +50,20 @@ namespace mrcpp {
  *  If a shared memory pointer is provided the tree will be allocated in this
  *  shared memory window, otherwise it will be local to each MPI process.
  */
-template <int D>
-FunctionTree<D>::FunctionTree(const MultiResolutionAnalysis<D> &mra, SharedMemory *sh_mem, const std::string &name)
-        : MWTree<D>(mra, name)
-        , RepresentableFunction<D>(mra.getWorldBox().getLowerBounds().data(), mra.getWorldBox().getUpperBounds().data()) {
+template <int D, typename T>
+FunctionTree<D, T>::FunctionTree(const MultiResolutionAnalysis<D> &mra, SharedMemory<T> *sh_mem, const std::string &name)
+        : MWTree<D, T>(mra, name)
+        , RepresentableFunction<D, T>(mra.getWorldBox().getLowerBounds().data(), mra.getWorldBox().getUpperBounds().data()) {
     int nodesPerChunk = 2048; // Large chunks are required for not leading to memory fragmentation (32 MB on "Betzy" 2023)
     int coefsGenNodes = this->getKp1_d();
     int coefsRegNodes = this->getTDim() * this->getKp1_d();
-    this->nodeAllocator_p = std::make_unique<NodeAllocator<D>>(this, sh_mem, coefsRegNodes, nodesPerChunk);
-    this->genNodeAllocator_p = std::make_unique<NodeAllocator<D>>(this, nullptr, coefsGenNodes, nodesPerChunk);
+    this->nodeAllocator_p = std::make_unique<NodeAllocator<D, T>>(this, sh_mem, coefsRegNodes, nodesPerChunk);
+    this->genNodeAllocator_p = std::make_unique<NodeAllocator<D, T>>(this, nullptr, coefsGenNodes, nodesPerChunk);
     this->allocRootNodes();
     this->resetEndNodeTable();
 }
 
-template <int D> void FunctionTree<D>::allocRootNodes() {
+template <int D, typename T> void FunctionTree<D, T>::allocRootNodes() {
     auto &allocator = this->getNodeAllocator();
     auto &rootbox = this->getRootBox();
 
@@ -74,10 +74,10 @@ template <int D> void FunctionTree<D>::allocRootNodes() {
     auto *coef_p = allocator.getCoef_p(sIdx);
     auto *root_p = allocator.getNode_p(sIdx);
 
-    MWNode<D> **roots = rootbox.getNodes();
+    MWNode<D, T> **roots = rootbox.getNodes();
     for (int rIdx = 0; rIdx < nRoots; rIdx++) {
         // construct into allocator memory
-        new (root_p) FunctionNode<D>(this, rIdx);
+        new (root_p) FunctionNode<D, T>(this, rIdx);
         roots[rIdx] = root_p;
 
         root_p->serialIx = sIdx;
@@ -101,14 +101,14 @@ template <int D> void FunctionTree<D>::allocRootNodes() {
 }
 
 // FunctionTree destructor
-template <int D> FunctionTree<D>::~FunctionTree() {
+template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
     this->deleteRootNodes();
 }
 
 /** @brief Write the tree structure to disk, for later use
  * @param[in] file: File name, will get ".tree" extension
  */
-template <int D> void FunctionTree<D>::saveTree(const std::string &file) {
+template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string &file) {
     Timer t1;
     this->deleteGenerated();
     auto &allocator = this->getNodeAllocator();
@@ -137,7 +137,7 @@ template <int D> void FunctionTree<D>::saveTree(const std::string &file) {
  * @param[in] file: File name, will get ".tree" extension
  * @note This tree must have the exact same MRA the one that was saved
  */
-template <int D> void FunctionTree<D>::loadTree(const std::string &file) {
+template <int D, typename T> void FunctionTree<D, T>::loadTree(const std::string &file) {
     Timer t1;
     std::stringstream fname;
     fname << file << ".tree";
@@ -168,11 +168,11 @@ template <int D> void FunctionTree<D>::loadTree(const std::string &file) {
 }
 
 /** @returns Integral of the function over the entire computational domain */
-template <int D> double FunctionTree<D>::integrate() const {
+template <int D, typename T> T FunctionTree<D, T>::integrate() const {
 
-    double result = 0.0;
+    T result = 0.0;
     for (int i = 0; i < this->rootBox.size(); i++) {
-        const FunctionNode<D> &fNode = getRootFuncNode(i);
+        const FunctionNode<D, T> &fNode = getRootFuncNode(i);
         result += fNode.integrate();
     }
 
@@ -188,7 +188,7 @@ template <int D> double FunctionTree<D>::integrate() const {
 
 
 /** @returns Integral of a representable function over the grid given by the tree */
-template <> double FunctionTree<3>::integrateEndNodes(RepresentableFunction_M &f) {
+  template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) {
     //traverse tree, and treat end nodes only
     std::vector<FunctionNode<3> *> stack;   // node from this
     for (int i = 0; i < this->getRootBox().size(); i++) stack.push_back(&(this->getRootFuncNode(i)));
@@ -236,7 +236,7 @@ template <> double FunctionTree<3>::integrateEndNodes(RepresentableFunction_M &f
  *       the MW grid by one level before evaluating, using
  *       `mrcpp::refine_grid(tree, 1)`
  */
-template <int D> double FunctionTree<D>::evalf(const Coord<D> &r) const {
+template <int D, typename T> T FunctionTree<D, T>::evalf(const Coord<D> &r) const {
     // Handle potential scaling
     const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors();
     auto arg = r;
@@ -249,8 +249,8 @@ template <int D> double FunctionTree<D>::evalf(const Coord<D> &r) const {
     // Function is zero outside the domain for non-periodic functions
     if (this->outOfBounds(arg) and not this->getRootBox().isPeriodic()) return 0.0;
 
-    const MWNode<D> &mw_node = this->getNodeOrEndNode(arg);
-    auto &f_node = static_cast<const FunctionNode<D> &>(mw_node);
+    const MWNode<D, T> &mw_node = this->getNodeOrEndNode(arg);
+    auto &f_node = static_cast<const FunctionNode<D, T> &>(mw_node);
     auto result = f_node.evalScaling(arg);
 
     // Adjust for scaling factor included in basis
@@ -270,7 +270,7 @@ template <int D> double FunctionTree<D>::evalf(const Coord<D> &r) const {
  *       need fast evaluation, use refine_grid(tree, 1) first, and then
  *       evalf.
  */
-template <int D> double FunctionTree<D>::evalf_precise(const Coord<D> &r) {
+template <int D, typename T> T FunctionTree<D, T>::evalf_precise(const Coord<D> &r) {
     // Handle potential scaling
     const auto scaling_factor = this->getMRA().getWorldBox().getScalingFactors();
     auto arg = r;
@@ -283,8 +283,8 @@ template <int D> double FunctionTree<D>::evalf_precise(const Coord<D> &r) {
     // Function is zero outside the domain for non-periodic functions
     if (this->outOfBounds(arg) and not this->getRootBox().isPeriodic()) return 0.0;
 
-    MWNode<D> &mw_node = this->getNodeOrEndNode(arg);
-    auto &f_node = static_cast<FunctionNode<D> &>(mw_node);
+    MWNode<D, T> &mw_node = this->getNodeOrEndNode(arg);
+    auto &f_node = static_cast<FunctionNode<D, T> &>(mw_node);
     auto result = f_node.evalf(arg);
     this->deleteGenerated();
 
@@ -301,7 +301,7 @@ template <int D> double FunctionTree<D>::evalf_precise(const Coord<D> &r) {
  * squared, no grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::square() {
+template <int D, typename T> void FunctionTree<D, T>::square() {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 
 #pragma omp parallel num_threads(mrcpp_get_num_threads())
@@ -310,10 +310,10 @@ template <int D> void FunctionTree<D>::square() {
         int nCoefs = this->getTDim() * this->getKp1_d();
 #pragma omp for schedule(guided)
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &node = *this->endNodeTable[n];
+            MWNode<D, T> &node = *this->endNodeTable[n];
             node.mwTransform(Reconstruction);
             node.cvTransform(Forward);
-            double *coefs = node.getCoefs();
+            T *coefs = node.getCoefs();
             for (int i = 0; i < nCoefs; i++) { coefs[i] *= coefs[i]; }
             node.cvTransform(Backward);
             node.mwTransform(Compression);
@@ -332,7 +332,7 @@ template <int D> void FunctionTree<D>::square() {
  * to the given power, no grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::power(double p) {
+template <int D, typename T> void FunctionTree<D, T>::power(double p) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 
 #pragma omp parallel num_threads(mrcpp_get_num_threads())
@@ -341,10 +341,10 @@ template <int D> void FunctionTree<D>::power(double p) {
         int nCoefs = this->getTDim() * this->getKp1_d();
 #pragma omp for schedule(guided)
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &node = *this->endNodeTable[n];
+            MWNode<D, T> &node = *this->endNodeTable[n];
             node.mwTransform(Reconstruction);
             node.cvTransform(Forward);
-            double *coefs = node.getCoefs();
+            T *coefs = node.getCoefs();
             for (int i = 0; i < nCoefs; i++) { coefs[i] = std::pow(coefs[i], p); }
             node.cvTransform(Backward);
             node.mwTransform(Compression);
@@ -363,7 +363,7 @@ template <int D> void FunctionTree<D>::power(double p) {
  * in-place multiplied by the given coefficient, no grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::rescale(double c) {
+template <int D, typename T> void FunctionTree<D, T>::rescale(double c) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads())
     {
@@ -371,9 +371,9 @@ template <int D> void FunctionTree<D>::rescale(double c) {
         int nCoefs = this->getTDim() * this->getKp1_d();
 #pragma omp for schedule(guided)
         for (int i = 0; i < nNodes; i++) {
-            MWNode<D> &node = *this->endNodeTable[i];
+            MWNode<D, T> &node = *this->endNodeTable[i];
             if (not node.hasCoefs()) MSG_ABORT("No coefs");
-            double *coefs = node.getCoefs();
+            T *coefs = node.getCoefs();
             for (int j = 0; j < nCoefs; j++) { coefs[j] *= c; }
             node.calcNorms();
         }
@@ -383,7 +383,7 @@ template <int D> void FunctionTree<D>::rescale(double c) {
 }
 
 /** @brief In-place rescaling by a function norm \f$ ||f||^{-1} \f$, fixed grid */
-template <int D> void FunctionTree<D>::normalize() {
+template <int D, typename T> void FunctionTree<D, T>::normalize() {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     double sq_norm = this->getSquareNorm();
     if (sq_norm < 0.0) MSG_ERROR("Normalizing uninitialized function");
@@ -399,7 +399,7 @@ template <int D> void FunctionTree<D>::normalize() {
  * the function, i.e. no further grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::add(double c, FunctionTree<D> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::add(double c, FunctionTree<D, T> &inp) {
     if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
@@ -407,10 +407,10 @@ template <int D> void FunctionTree<D>::add(double c, FunctionTree<D> &inp) {
         int nNodes = this->getNEndNodes();
 #pragma omp for schedule(guided)
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &out_node = *this->endNodeTable[n];
-            MWNode<D> &inp_node = inp.getNode(out_node.getNodeIndex());
-            double *out_coefs = out_node.getCoefs();
-            const double *inp_coefs = inp_node.getCoefs();
+            MWNode<D, T> &out_node = *this->endNodeTable[n];
+            MWNode<D, T> &inp_node = inp.getNode(out_node.getNodeIndex());
+            T *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
             for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] += c * inp_coefs[i]; }
             out_node.calcNorms();
         }
@@ -428,21 +428,21 @@ template <int D> void FunctionTree<D>::add(double c, FunctionTree<D> &inp) {
  * function, i.e. no further grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::absadd(double c, FunctionTree<D> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::absadd(double c, FunctionTree<D, T> &inp) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
     {
         int nNodes = this->getNEndNodes();
 #pragma omp for schedule(guided)
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &out_node = *this->endNodeTable[n];
-            MWNode<D> inp_node = inp.getNode(out_node.getNodeIndex()); // Full copy
+            MWNode<D, T> &out_node = *this->endNodeTable[n];
+            MWNode<D, T> inp_node = inp.getNode(out_node.getNodeIndex()); // Full copy
             out_node.mwTransform(Reconstruction);
             out_node.cvTransform(Forward);
             inp_node.mwTransform(Reconstruction);
             inp_node.cvTransform(Forward);
-            double *out_coefs = out_node.getCoefs();
-            const double *inp_coefs = inp_node.getCoefs();
+            T *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
             for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] = abs(out_coefs[i]) + c * abs(inp_coefs[i]); }
             out_node.cvTransform(Backward);
             out_node.mwTransform(Compression);
@@ -463,7 +463,7 @@ template <int D> void FunctionTree<D>::absadd(double c, FunctionTree<D> &inp) {
  * of the function, i.e. no further grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::multiply(double c, FunctionTree<D> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::multiply(double c, FunctionTree<D, T> &inp) {
     if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
@@ -471,14 +471,14 @@ template <int D> void FunctionTree<D>::multiply(double c, FunctionTree<D> &inp)
         int nNodes = this->getNEndNodes();
 #pragma omp for schedule(guided)
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &out_node = *this->endNodeTable[n];
-            MWNode<D> inp_node = inp.getNode(out_node.getNodeIndex()); // Full copy
+            MWNode<D, T> &out_node = *this->endNodeTable[n];
+            MWNode<D, T> inp_node = inp.getNode(out_node.getNodeIndex()); // Full copy
             out_node.mwTransform(Reconstruction);
             out_node.cvTransform(Forward);
             inp_node.mwTransform(Reconstruction);
             inp_node.cvTransform(Forward);
-            double *out_coefs = out_node.getCoefs();
-            const double *inp_coefs = inp_node.getCoefs();
+            T *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
             for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] *= c * inp_coefs[i]; }
             out_node.cvTransform(Backward);
             out_node.mwTransform(Compression);
@@ -498,16 +498,16 @@ template <int D> void FunctionTree<D>::multiply(double c, FunctionTree<D> &inp)
  * of the function, i.e. no further grid refinement.
  *
  */
-template <int D> void FunctionTree<D>::map(FMap fmap) {
+template <int D, typename T> void FunctionTree<D, T>::map(FMap<T, T> fmap) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     {
         int nNodes = this->getNEndNodes();
 #pragma omp parallel for schedule(guided) num_threads(mrcpp_get_num_threads())
         for (int n = 0; n < nNodes; n++) {
-            MWNode<D> &node = *this->endNodeTable[n];
+            MWNode<D, T> &node = *this->endNodeTable[n];
             node.mwTransform(Reconstruction);
             node.cvTransform(Forward);
-            double *coefs = node.getCoefs();
+            T *coefs = node.getCoefs();
             for (int i = 0; i < node.getNCoefs(); i++) { coefs[i] = fmap(coefs[i]); }
             node.cvTransform(Backward);
             node.mwTransform(Compression);
@@ -518,29 +518,29 @@ template <int D> void FunctionTree<D>::map(FMap fmap) {
     this->calcSquareNorm();
 }
 
-template <int D> void FunctionTree<D>::getEndValues(VectorXd &data) {
+template <int D, typename T> void FunctionTree<D, T>::getEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     int nNodes = this->getNEndNodes();
     int nCoefs = this->getTDim() * this->getKp1_d();
     data = VectorXd::Zero(nNodes * nCoefs);
     for (int n = 0; n < nNodes; n++) {
-        MWNode<D> &node = getEndFuncNode(n);
+        MWNode<D, T> &node = getEndFuncNode(n);
         node.mwTransform(Reconstruction);
         node.cvTransform(Forward);
-        const double *c = node.getCoefs();
+        const T *c = node.getCoefs();
         for (int i = 0; i < nCoefs; i++) { data(n * nCoefs + i) = c[i]; }
         node.cvTransform(Backward);
         node.mwTransform(Compression);
     }
 }
 
-template <int D> void FunctionTree<D>::setEndValues(VectorXd &data) {
+template <int D, typename T> void FunctionTree<D, T>::setEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     int nNodes = this->getNEndNodes();
     int nCoefs = this->getTDim() * this->getKp1_d();
     for (int i = 0; i < nNodes; i++) {
-        MWNode<D> &node = getEndFuncNode(i);
-        const double *c = data.segment(i * nCoefs, nCoefs).data();
+        MWNode<D, T> &node = getEndFuncNode(i);
+        const T *c = data.segment(i * nCoefs, nCoefs).data();
         node.setCoefBlock(0, nCoefs, c);
         node.cvTransform(Backward);
         node.mwTransform(Compression);
@@ -551,10 +551,10 @@ template <int D> void FunctionTree<D>::setEndValues(VectorXd &data) {
     this->calcSquareNorm();
 }
 
-template <int D> std::ostream &FunctionTree<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &FunctionTree<D, T>::print(std::ostream &o) const {
     o << std::endl << "*FunctionTree: " << this->name << std::endl;
     o << "  genNodes: " << getNGenNodes() << std::endl;
-    return MWTree<D>::print(o);
+    return MWTree<D, T>::print(o);
 }
 
 /** @brief Reduce the precision of the tree by deleting nodes
@@ -571,9 +571,9 @@ template <int D> std::ostream &FunctionTree<D>::print(std::ostream &o) const {
  * \f$ ||w|| < 2^{-sn/2} ||f|| \epsilon \f$. In principal, `s` should be equal
  * to the dimension; in practice, it is set to `s=1`.
  */
-template <int D> int FunctionTree<D>::crop(double prec, double splitFac, bool absPrec) {
+template <int D, typename T> int FunctionTree<D, T>::crop(double prec, double splitFac, bool absPrec) {
     for (int i = 0; i < this->rootBox.size(); i++) {
-        MWNode<D> &root = this->getRootMWNode(i);
+        MWNode<D, T> &root = this->getRootMWNode(i);
         root.crop(prec, splitFac, absPrec);
     }
     int nChunks = this->getNodeAllocator().compress();
@@ -586,22 +586,22 @@ template <int D> int FunctionTree<D>::crop(double prec, double splitFac, bool ab
  * Also returns an array with the corresponding indices defined as the
  * values of serialIx in refTree, and an array with the indices of the parent.
  * Set index -1 for nodes that are not present in refTree */
-template <int D>
-void FunctionTree<D>::makeCoeffVector(std::vector<double *> &coefs,
+template <int D, typename T>
+void FunctionTree<D, T>::makeCoeffVector(std::vector<T *> &coefs,
                                       std::vector<int> &indices,
                                       std::vector<int> &parent_indices,
                                       std::vector<double> &scalefac,
                                       int &max_index,
-                                      MWTree<D> &refTree,
-                                      std::vector<MWNode<D> *> *refNodes) {
+                                      MWTree<D, T> &refTree,
+                                      std::vector<MWNode<D, T> *> *refNodes) {
     coefs.clear();
     indices.clear();
     parent_indices.clear();
     max_index = 0;
     int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
     int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    std::vector<MWNode<D> *> refstack;  // nodes from refTree
-    std::vector<MWNode<D> *> thisstack; // nodes from this Tree
+    std::vector<MWNode<D, T> *> refstack;  // nodes from refTree
+    std::vector<MWNode<D, T> *> thisstack; // nodes from this Tree
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
         refstack.push_back(refTree.getRootBox().getNodes()[rIdx]);
         thisstack.push_back(this->getRootBox().getNodes()[rIdx]);
@@ -609,8 +609,8 @@ void FunctionTree<D>::makeCoeffVector(std::vector<double *> &coefs,
     int stack_p = 0;
     while (thisstack.size() > stack_p) {
         // refNode and thisNode are the same node in space, but on different trees
-        MWNode<D> *thisNode = thisstack[stack_p];
-        MWNode<D> *refNode = refstack[stack_p++];
+        MWNode<D, T> *thisNode = thisstack[stack_p];
+        MWNode<D, T> *refNode = refstack[stack_p++];
         coefs.push_back(thisNode->getCoefs());
         if (refNodes != nullptr) refNodes->push_back(refNode);
         if (refNode != nullptr) {
@@ -640,26 +640,26 @@ void FunctionTree<D>::makeCoeffVector(std::vector<double *> &coefs,
  * reference tree and a list of coefficients.
  * It is the reference tree (refTree) which is traversed, but one does not descend
  * into children if the norm of the tree is smaller than absPrec. */
-template <int D> void FunctionTree<D>::makeTreefromCoeff(MWTree<D> &refTree, std::vector<double *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode) {
-    std::vector<MWNode<D> *> stack;
-    std::map<int, MWNode<D> *> ix2node; // gives the nodes in this tree for a given ix
+template <int D, typename T> void FunctionTree<D, T>::makeTreefromCoeff(MWTree<D, T> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode) {
+    std::vector<MWNode<D, T> *> stack;
+    std::map<int, MWNode<D, T> *> ix2node; // gives the nodes in this tree for a given ix
     int sizecoef = (1 << this->getDim()) * this->getKp1_d();
     int sizecoefW = ((1 << this->getDim()) - 1) * this->getKp1_d();
     this->squareNorm = 0.0;
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < refTree.getRootBox().size(); rIdx++) {
-        MWNode<D> *refNode = refTree.getRootBox().getNodes()[rIdx];
+        MWNode<D, T> *refNode = refTree.getRootBox().getNodes()[rIdx];
         stack.push_back(refNode);
         int ix = ix2coef[refNode->getSerialIx()];
         ix2node[ix] = this->getRootBox().getNodes()[rIdx];
     }
 
     while (stack.size() > 0) {
-        MWNode<D> *refNode = stack.back(); // node in the reference tree refTree
+        MWNode<D, T> *refNode = stack.back(); // node in the reference tree refTree
         stack.pop_back();
         assert(ix2coef.count(refNode->getSerialIx()) > 0);
         int ix = ix2coef[refNode->getSerialIx()];
-        MWNode<D> *node = ix2node[ix]; // corresponding node in this tree
+        MWNode<D, T> *node = ix2node[ix]; // corresponding node in this tree
         // copy coefficients into this tree
         int size = sizecoefW;
         if (refNode->isRootNode() or mode == "copy") {
@@ -701,8 +701,8 @@ template <int D> void FunctionTree<D>::makeTreefromCoeff(MWTree<D> &refTree, std
         } else if ((absPrec < 0 or tree_utils::split_check(*node, absPrec, 1.0, true)) and refNode->getNChildren() > 0) {
             // include children in tree
             node->createChildren(true);
-            double *inp = node->getCoefs();
-            double *out = node->getMWChild(0).getCoefs();
+            T *inp = node->getCoefs();
+            T *out = node->getMWChild(0).getCoefs();
             tree_utils::mw_transform(*this, inp, out, false, sizecoef, true); // make the scaling part
             for (int i = 0; i < refNode->getNChildren(); i++) {
                 stack.push_back(refNode->children[i]); // means we continue to traverse the reference tree
@@ -717,9 +717,9 @@ template <int D> void FunctionTree<D>::makeTreefromCoeff(MWTree<D> &refTree, std
 }
 
 /** Traverse tree using DFS and append same nodes as another tree, without coefficients */
-template <int D> void FunctionTree<D>::appendTreeNoCoeff(MWTree<D> &inTree) {
-    std::vector<MWNode<D> *> instack;   // node from inTree
-    std::vector<MWNode<D> *> thisstack; // node from this Tree
+template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, T> &inTree) {
+    std::vector<MWNode<D, T> *> instack;   // node from inTree
+    std::vector<MWNode<D, T> *> thisstack; // node from this Tree
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < inTree.getRootBox().size(); rIdx++) {
         instack.push_back(inTree.getRootBox().getNodes()[rIdx]);
@@ -727,9 +727,9 @@ template <int D> void FunctionTree<D>::appendTreeNoCoeff(MWTree<D> &inTree) {
     }
     while (thisstack.size() > 0) {
         // inNode and thisNode are the same node in space, but on different trees
-        MWNode<D> *thisNode = thisstack.back();
+        MWNode<D, T> *thisNode = thisstack.back();
         thisstack.pop_back();
-        MWNode<D> *inNode = instack.back();
+        MWNode<D, T> *inNode = instack.back();
         instack.pop_back();
         if (inNode->getNChildren() > 0) {
             thisNode->clearIsEndNode();
@@ -741,10 +741,10 @@ template <int D> void FunctionTree<D>::appendTreeNoCoeff(MWTree<D> &inTree) {
         } else {
             // construct EndNodeTable for "This", starting from this branch
             // This could be done more efficiently, if it proves to be time consuming
-            std::vector<MWNode<D> *> branchstack; // local stack starting from this branch
+            std::vector<MWNode<D, T> *> branchstack; // local stack starting from this branch
             branchstack.push_back(thisNode);
             while (branchstack.size() > 0) {
-                MWNode<D> *branchNode = branchstack.back();
+                MWNode<D, T> *branchNode = branchstack.back();
                 branchstack.pop_back();
                 if (branchNode->getNChildren() > 0) {
                     for (int i = 0; i < branchNode->getNChildren(); i++) { branchstack.push_back(branchNode->children[i]); }
@@ -755,11 +755,11 @@ template <int D> void FunctionTree<D>::appendTreeNoCoeff(MWTree<D> &inTree) {
     }
 }
 
-template <int D> void FunctionTree<D>::deleteGenerated() {
+template <int D, typename T> void FunctionTree<D, T>::deleteGenerated() {
     for (int n = 0; n < this->getNEndNodes(); n++) this->getEndMWNode(n).deleteGenerated();
 }
 
-template <int D> void FunctionTree<D>::deleteGeneratedParents() {
+template <int D, typename T> void FunctionTree<D, T>::deleteGeneratedParents() {
     for (int n = 0; n < this->getRootBox().size(); n++) this->getRootMWNode(n).deleteParent();
 }
 
@@ -785,8 +785,12 @@ template <> int FunctionTree<3>::saveNodesAndRmCoeff() {
     return this->NodeIndex2serialIx.size();
 }
 
-template class FunctionTree<1>;
-template class FunctionTree<2>;
-template class FunctionTree<3>;
+template class FunctionTree<1, double>;
+template class FunctionTree<2, double>;
+template class FunctionTree<3, double>;
+
+template class FunctionTree<1, ComplexDouble>;
+template class FunctionTree<2, ComplexDouble>;
+template class FunctionTree<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 0be9563ea..0df33685c 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -52,24 +52,24 @@ namespace mrcpp {
  * uninitialized, and its square norm will be negative (minus one).
  */
 
-template <int D> class FunctionTree final : public MWTree<D>, public RepresentableFunction<D> {
+template <int D, typename T> class FunctionTree final : public MWTree<D, T>, public RepresentableFunction<D, T> {
 public:
     FunctionTree(const MultiResolutionAnalysis<D> &mra, const std::string &name)
             : FunctionTree(mra, nullptr, name) {}
-    FunctionTree(const MultiResolutionAnalysis<D> &mra, SharedMemory *sh_mem = nullptr, const std::string &name = "nn");
-    FunctionTree(const FunctionTree<D> &tree) = delete;
-    FunctionTree<D> &operator=(const FunctionTree<D> &tree) = delete;
+    FunctionTree(const MultiResolutionAnalysis<D> &mra, SharedMemory<T> *sh_mem = nullptr, const std::string &name = "nn");
+    FunctionTree(const FunctionTree<D, T> &tree) = delete;
+    FunctionTree<D, T> &operator=(const FunctionTree<D, T> &tree) = delete;
     ~FunctionTree() override;
 
-    double integrate() const;
+    T integrate() const;
     double integrateEndNodes(RepresentableFunction_M &f);
-    double evalf_precise(const Coord<D> &r);
-    double evalf(const Coord<D> &r) const override;
+    T evalf_precise(const Coord<D> &r);
+    T evalf(const Coord<D> &r) const override;
 
     int getNGenNodes() const { return getGenNodeAllocator().getNNodes(); }
 
-    void getEndValues(Eigen::VectorXd &data);
-    void setEndValues(Eigen::VectorXd &data);
+    void getEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data);
+    void setEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data);
 
     void saveTree(const std::string &file);
     void loadTree(const std::string &file);
@@ -79,42 +79,42 @@ template <int D> class FunctionTree final : public MWTree<D>, public Representab
     void power(double p);
     void rescale(double c);
     void normalize();
-    void add(double c, FunctionTree<D> &inp);
-    void absadd(double c, FunctionTree<D> &inp);
-    void multiply(double c, FunctionTree<D> &inp);
-    void map(FMap fmap);
+    void add(double c, FunctionTree<D, T> &inp);
+    void absadd(double c, FunctionTree<D, T> &inp);
+    void multiply(double c, FunctionTree<D, T> &inp);
+    void map(FMap<T, T> fmap);
 
     int getNChunks() { return this->getNodeAllocator().getNChunks(); }
     int getNChunksUsed() { return this->getNodeAllocator().getNChunksUsed(); }
 
     int crop(double prec, double splitFac = 1.0, bool absPrec = true);
 
-    FunctionNode<D> &getEndFuncNode(int i) { return static_cast<FunctionNode<D> &>(this->getEndMWNode(i)); }
-    FunctionNode<D> &getRootFuncNode(int i) { return static_cast<FunctionNode<D> &>(this->rootBox.getNode(i)); }
+    FunctionNode<D, T> &getEndFuncNode(int i) { return static_cast<FunctionNode<D, T> &>(this->getEndMWNode(i)); }
+    FunctionNode<D, T> &getRootFuncNode(int i) { return static_cast<FunctionNode<D, T> &>(this->rootBox.getNode(i)); }
 
-    NodeAllocator<D> &getGenNodeAllocator() { return *this->genNodeAllocator_p; }
-    const NodeAllocator<D> &getGenNodeAllocator() const { return *this->genNodeAllocator_p; }
+    NodeAllocator<D, T> &getGenNodeAllocator() { return *this->genNodeAllocator_p; }
+    const NodeAllocator<D, T> &getGenNodeAllocator() const { return *this->genNodeAllocator_p; }
 
-    const FunctionNode<D> &getEndFuncNode(int i) const { return static_cast<const FunctionNode<D> &>(this->getEndMWNode(i)); }
-    const FunctionNode<D> &getRootFuncNode(int i) const { return static_cast<const FunctionNode<D> &>(this->rootBox.getNode(i)); }
+    const FunctionNode<D, T> &getEndFuncNode(int i) const { return static_cast<const FunctionNode<D, T> &>(this->getEndMWNode(i)); }
+    const FunctionNode<D, T> &getRootFuncNode(int i) const { return static_cast<const FunctionNode<D, T> &>(this->rootBox.getNode(i)); }
 
     void deleteGenerated();
     void deleteGeneratedParents();
 
-    void makeCoeffVector(std::vector<double *> &coefs,
+    void makeCoeffVector(std::vector<T *> &coefs,
                          std::vector<int> &indices,
                          std::vector<int> &parent_indices,
                          std::vector<double> &scalefac,
                          int &max_index,
-                         MWTree<D> &refTree,
-                         std::vector<MWNode<D> *> *refNodes = nullptr);
-    void makeTreefromCoeff(MWTree<D> &refTree, std::vector<double *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode = "adaptive");
-    void appendTreeNoCoeff(MWTree<D> &inTree);
+                         MWTree<D, T> &refTree,
+                         std::vector<MWNode<D, T> *> *refNodes = nullptr);
+    void makeTreefromCoeff(MWTree<D, T> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode = "adaptive");
+    void appendTreeNoCoeff(MWTree<D, T> &inTree);
 
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
 protected:
-    std::unique_ptr<NodeAllocator<D>> genNodeAllocator_p{nullptr};
+    std::unique_ptr<NodeAllocator<D, T>> genNodeAllocator_p{nullptr};
     std::ostream &print(std::ostream &o) const override;
 
     void allocRootNodes();
diff --git a/src/trees/FunctionTreeVector.h b/src/trees/FunctionTreeVector.h
index d73005cd8..c0a4a3a76 100644
--- a/src/trees/FunctionTreeVector.h
+++ b/src/trees/FunctionTreeVector.h
@@ -32,14 +32,14 @@
 
 namespace mrcpp {
 
-template <int D> using CoefsFunctionTree = std::tuple<double, FunctionTree<D> *>;
-template <int D> using FunctionTreeVector = std::vector<CoefsFunctionTree<D>>;
+template <int D, typename T = double> using CoefsFunctionTree = std::tuple<double, FunctionTree<D, T> *>;
+template <int D, typename T = double> using FunctionTreeVector = std::vector<CoefsFunctionTree<D, T>>;
 
 /** @brief Remove all entries in the vector
  *  @param[in] fs: Vector to clear
  *  @param[in] dealloc: Option to free FunctionTree pointer before clearing
  */
-template <int D> void clear(FunctionTreeVector<D> &fs, bool dealloc = false) {
+  template <int D, typename T> void clear(FunctionTreeVector<D, T> &fs, bool dealloc = false) {
     if (dealloc) {
         for (auto &t : fs) {
             auto f = std::get<1>(t);
@@ -52,7 +52,7 @@ template <int D> void clear(FunctionTreeVector<D> &fs, bool dealloc = false) {
 /** @returns Total number of nodes of all trees in the vector
  *  @param[in] fs: Vector to fetch from
  */
-template <int D> int get_n_nodes(const FunctionTreeVector<D> &fs) {
+template <int D, typename T> int get_n_nodes(const FunctionTreeVector<D, T> &fs) {
     int nNodes = 0;
     for (const auto &t : fs) {
         auto f = std::get<1>(t);
@@ -64,7 +64,7 @@ template <int D> int get_n_nodes(const FunctionTreeVector<D> &fs) {
 /** @returns Total size of all trees in the vector, in kB
  *  @param[in] fs: Vector to fetch from
  */
-template <int D> int get_size_nodes(const FunctionTreeVector<D> &fs) {
+template <int D, typename T> int get_size_nodes(const FunctionTreeVector<D, T> &fs) {
     int sNodes = 0;
     for (const auto &t : fs) {
         auto f = std::get<1>(t);
@@ -77,7 +77,7 @@ template <int D> int get_size_nodes(const FunctionTreeVector<D> &fs) {
  *  @param[in] fs: Vector to fetch from
  *  @param[in] i: Position in vector
  */
-template <int D> double get_coef(const FunctionTreeVector<D> &fs, int i) {
+template <int D, typename T> double get_coef(const FunctionTreeVector<D, T> &fs, int i) {
     return std::get<0>(fs[i]);
 }
 
@@ -85,7 +85,7 @@ template <int D> double get_coef(const FunctionTreeVector<D> &fs, int i) {
  *  @param[in] fs: Vector to fetch from
  *  @param[in] i: Position in vector
  */
-template <int D> FunctionTree<D> &get_func(FunctionTreeVector<D> &fs, int i) {
+template <int D, typename T> FunctionTree<D, T> &get_func(FunctionTreeVector<D, T> &fs, int i) {
     return *(std::get<1>(fs[i]));
 }
 
@@ -93,7 +93,7 @@ template <int D> FunctionTree<D> &get_func(FunctionTreeVector<D> &fs, int i) {
  *  @param[in] fs: Vector to fetch from
  *  @param[in] i: Position in vector
  */
-template <int D> const FunctionTree<D> &get_func(const FunctionTreeVector<D> &fs, int i) {
+template <int D, typename T> const FunctionTree<D, T> &get_func(const FunctionTreeVector<D, T> &fs, int i) {
     return *(std::get<1>(fs[i]));
 }
 } // namespace mrcpp
diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp
index 3297ffb3f..ca5c59550 100644
--- a/src/trees/MWNode.cpp
+++ b/src/trees/MWNode.cpp
@@ -45,8 +45,8 @@ namespace mrcpp {
  *
  * @details Should be used only by NodeAllocator to obtain
  *  virtual table pointers for the derived classes. */
-template <int D>
-MWNode<D>::MWNode()
+  template <int D, typename T>
+  MWNode<D, T>::MWNode()
         : tree(nullptr)
         , parent(nullptr)
         , nodeIndex()
@@ -66,8 +66,8 @@ MWNode<D>::MWNode()
  *
  * @details Constructor for an empty node, given the corresponding MWTree and NodeIndex
  */
-template <int D>
-MWNode<D>::MWNode(MWTree<D> *tree, const NodeIndex<D> &idx)
+template <int D, typename T>
+MWNode<D, T>::MWNode(MWTree<D, T> *tree, const NodeIndex<D> &idx)
         : tree(tree)
         , parent(nullptr)
         , nodeIndex(idx)
@@ -87,8 +87,8 @@ MWNode<D>::MWNode(MWTree<D> *tree, const NodeIndex<D> &idx)
  * @details Constructor for root nodes. It requires the corresponding
  * MWTree and an integer to fetch the right NodeIndex
  */
-template <int D>
-MWNode<D>::MWNode(MWTree<D> *tree, int rIdx)
+template <int D, typename T>
+MWNode<D, T>::MWNode(MWTree<D, T> *tree, int rIdx)
         : tree(tree)
         , parent(nullptr)
         , nodeIndex(tree->getRootBox().getNodeIndex(rIdx))
@@ -108,8 +108,8 @@ MWNode<D>::MWNode(MWTree<D> *tree, int rIdx)
  * @details Constructor for leaf nodes. It requires the corresponding
  * parent and an integer to identify the correct child.
  */
-template <int D>
-MWNode<D>::MWNode(MWNode<D> *parent, int cIdx)
+template <int D, typename T>
+MWNode<D, T>::MWNode(MWNode<D, T> *parent, int cIdx)
         : tree(parent->tree)
         , parent(parent)
         , nodeIndex(parent->getNodeIndex().child(cIdx))
@@ -130,8 +130,8 @@ MWNode<D>::MWNode(MWNode<D> *parent, int cIdx)
  * does not "belong" to the tree: it cannot be accessed by traversing
  * the tree.
  */
-template <int D>
-MWNode<D>::MWNode(const MWNode<D> &node, bool allocCoef, bool SetCoef)
+template <int D, typename T>
+MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
         : tree(node.tree)
         , parent(nullptr)
         , nodeIndex(node.nodeIndex)
@@ -163,7 +163,7 @@ MWNode<D>::MWNode(const MWNode<D> &node, bool allocCoef, bool SetCoef)
  *
  * @details Recursive deallocation of a node and all its decendants
  */
-template <int D> MWNode<D>::~MWNode() {
+  template <int D, typename T> MWNode<D, T>::~MWNode() {
     if (this->isLooseNode()) this->freeCoefs();
     MRCPP_DESTROY_OMP_LOCK();
 }
@@ -174,7 +174,7 @@ template <int D> MWNode<D>::~MWNode() {
  * called (derived classes must implement their own version). This was
  * to avoid having pure virtual methods in the base class.
  */
-template <int D> void MWNode<D>::dealloc() {
+  template <int D, typename T> void MWNode<D, T>::dealloc() {
     NOT_REACHED_ABORT;
 }
 
@@ -184,13 +184,13 @@ template <int D> void MWNode<D>::dealloc() {
  * are not treated by the NodeAllocator class.
  *
  */
-template <int D> void MWNode<D>::allocCoefs(int n_blocks, int block_size) {
+  template <int D, typename T> void MWNode<D, T>::allocCoefs(int n_blocks, int block_size) {
     if (this->n_coefs != 0) MSG_ABORT("n_coefs should be zero");
     if (this->isAllocated()) MSG_ABORT("Coefs already allocated");
     if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!");
 
     this->n_coefs = n_blocks * block_size;
-    this->coefs = new double[this->n_coefs];
+    this->coefs = new T[this->n_coefs];
 
     this->clearHasCoefs();
     this->setIsAllocated();
@@ -202,7 +202,7 @@ template <int D> void MWNode<D>::allocCoefs(int n_blocks, int block_size) {
  * are not treated by the NodeAllocator class.
  *
  */
-template <int D> void MWNode<D>::freeCoefs() {
+  template <int D, typename T> void MWNode<D, T>::freeCoefs() {
     if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!");
 
     if (this->coefs != nullptr) delete[] this->coefs;
@@ -216,7 +216,7 @@ template <int D> void MWNode<D>::freeCoefs() {
 
 /** @brief Printout of node coefficients
  */
-template <int D> void MWNode<D>::printCoefs() const {
+  template <int D, typename T> void MWNode<D, T>::printCoefs() const {
     if (not this->isAllocated()) MSG_ABORT("Node is not allocated");
     println(0, "\nMW coefs");
     int kp1_d = this->getKp1_d();
@@ -228,18 +228,18 @@ template <int D> void MWNode<D>::printCoefs() const {
 
 /** @brief wraps the MW coefficients into an eigen vector object
  */
-template <int D> void MWNode<D>::getCoefs(Eigen::VectorXd &c) const {
+  template <int D, typename T> void MWNode<D, T>::getCoefs(Eigen::Matrix<T, Eigen::Dynamic, 1> &c) const {
     if (not this->isAllocated()) MSG_ABORT("Node is not allocated");
     if (not this->hasCoefs()) MSG_ABORT("Node has no coefs");
     if (this->n_coefs == 0) MSG_ABORT("ncoefs == 0");
 
-    c = VectorXd::Map(this->coefs, this->n_coefs);
+    c = Eigen::Matrix<T, Eigen::Dynamic, 1>::Map(this->coefs, this->n_coefs);
 }
 
 /** @brief sets all MW coefficients and the norms to zero
  *
  */
-template <int D> void MWNode<D>::zeroCoefs() {
+  template <int D, typename T> void MWNode<D, T>::zeroCoefs() {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated " << *this);
 
     for (int i = 0; i < this->n_coefs; i++) { this->coefs[i] = 0.0; }
@@ -249,7 +249,7 @@ template <int D> void MWNode<D>::zeroCoefs() {
 
 /** @brief Attach a set of coefs to this node. Only used locally (the tree is not aware of this).
  */
-template <int D> void MWNode<D>::attachCoefs(double *coefs) {
+  template <int D, typename T> void MWNode<D, T>::attachCoefs(T *coefs) {
     this->coefs = coefs;
     this->setHasCoefs();
 }
@@ -264,7 +264,7 @@ template <int D> void MWNode<D>::attachCoefs(double *coefs) {
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-template <int D> void MWNode<D>::setCoefBlock(int block, int block_size, const double *c) {
+  template <int D, typename T> void MWNode<D, T>::setCoefBlock(int block, int block_size, const T *c) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = c[i]; }
 }
@@ -279,7 +279,7 @@ template <int D> void MWNode<D>::setCoefBlock(int block, int block_size, const d
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-template <int D> void MWNode<D>::addCoefBlock(int block, int block_size, const double *c) {
+  template <int D, typename T> void MWNode<D, T>::addCoefBlock(int block, int block_size, const T *c) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] += c[i]; }
 }
@@ -293,7 +293,7 @@ template <int D> void MWNode<D>::addCoefBlock(int block, int block_size, const d
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-template <int D> void MWNode<D>::zeroCoefBlock(int block, int block_size) {
+  template <int D, typename T> void MWNode<D, T>::zeroCoefBlock(int block, int block_size) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = 0.0; }
 }
@@ -309,7 +309,7 @@ template <int D> void MWNode<D>::zeroCoefBlock(int block, int block_size) {
  * already be present and its memory allocated for this to work
  * properly.
  */
-template <int D> void MWNode<D>::giveChildrenCoefs(bool overwrite) {
+  template <int D, typename T> void MWNode<D, T>::giveChildrenCoefs(bool overwrite) {
     assert(this->isBranchNode());
     if (not this->isAllocated()) MSG_ABORT("Not allocated!");
     if (not this->hasCoefs()) MSG_ABORT("No coefficients!");
@@ -320,8 +320,8 @@ template <int D> void MWNode<D>::giveChildrenCoefs(bool overwrite) {
 
     // coeff of child should be have been allocated already here
     int stride = getMWChild(0).getNCoefs();
-    double *inp = getCoefs();
-    double *out = getMWChild(0).getCoefs();
+    T *inp = getCoefs();
+    T *out = getMWChild(0).getCoefs();
     bool readOnlyScaling = false;
     if (this->isGenNode()) readOnlyScaling = true;
 
@@ -345,9 +345,9 @@ template <int D> void MWNode<D>::giveChildrenCoefs(bool overwrite) {
  * node. The scaling coefficients of the selected child are then
  * copied/summed in the correct child node.
  */
-template <int D> void MWNode<D>::giveChildCoefs(int cIdx, bool overwrite) {
+  template <int D, typename T> void MWNode<D, T>::giveChildCoefs(int cIdx, bool overwrite) {
 
-    MWNode<D> node_i = *this;
+    MWNode<D, T> node_i = *this;
 
     node_i.mwTransform(Reconstruction);
 
@@ -355,7 +355,7 @@ template <int D> void MWNode<D>::giveChildCoefs(int cIdx, bool overwrite) {
     int nChildren = this->getTDim();
 
     if (this->children[cIdx] == nullptr) MSG_ABORT("Child does not exist!");
-    MWNode<D> &child = getMWChild(cIdx);
+    MWNode<D, T> &child = getMWChild(cIdx);
     if (overwrite) {
         child.setCoefBlock(0, kp1_d, &node_i.getCoefs()[cIdx * kp1_d]);
     } else {
@@ -371,12 +371,12 @@ template <int D> void MWNode<D>::giveChildCoefs(int cIdx, bool overwrite) {
  *
  * \warning This routine is only used in connection with Periodic Boundary Conditions
  */
-template <int D> void MWNode<D>::giveParentCoefs(bool overwrite) {
-    MWNode<D> node = *this;
-    MWNode<D> &parent = getMWParent();
+  template <int D, typename T> void MWNode<D, T>::giveParentCoefs(bool overwrite) {
+    MWNode<D, T> node = *this;
+    MWNode<D, T> &parent = getMWParent();
     int kp1_d = this->getKp1_d();
     if (node.getScale() == 0) {
-        NodeBox<D> &box = this->getMWTree().getRootBox();
+        NodeBox<D, T> &box = this->getMWTree().getRootBox();
         auto reverse = getTDim() - 1;
         for (auto i = 0; i < getTDim(); i++) { parent.setCoefBlock(i, kp1_d, &box.getNode(reverse - i).getCoefs()[0]); }
     } else {
@@ -393,11 +393,11 @@ template <int D> void MWNode<D>::giveParentCoefs(bool overwrite) {
  * them consecutively in the corresponding block of the parent,
  * following the usual bitwise notation.
  */
-template <int D> void MWNode<D>::copyCoefsFromChildren() {
+  template <int D, typename T> void MWNode<D, T>::copyCoefsFromChildren() {
     int kp1_d = this->getKp1_d();
     int nChildren = this->getTDim();
     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
-        MWNode<D> &child = getMWChild(cIdx);
+      MWNode<D, T> &child = getMWChild(cIdx);
         if (not child.hasCoefs()) MSG_ABORT("Child has no coefs");
         setCoefBlock(cIdx, kp1_d, child.getCoefs());
     }
@@ -409,7 +409,7 @@ template <int D> void MWNode<D>::copyCoefsFromChildren() {
  * coefficients of the parent and it generates the scaling
  * coefficients for the children
  */
-template <int D> void MWNode<D>::threadSafeGenChildren() {
+  template <int D, typename T> void MWNode<D, T>::threadSafeGenChildren() {
     if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; }
     MRCPP_SET_OMP_LOCK();
     if (isLeafNode()) {
@@ -429,7 +429,7 @@ template <int D> void MWNode<D>::threadSafeGenChildren() {
  * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1)
  *       representation, instead of s/d (scaling and wavelet).
  */
-template <int D> void MWNode<D>::cvTransform(int operation) {
+  template <int D, typename T> void MWNode<D, T>::cvTransform(int operation) {
     int kp1 = this->getKp1();
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     int kp1_d = this->getKp1_d();
@@ -437,17 +437,17 @@ template <int D> void MWNode<D>::cvTransform(int operation) {
 
     auto sb = this->getMWTree().getMRA().getScalingBasis();
     const MatrixXd &S = sb.getCVMap(operation);
-    double o_vec[nCoefs];
-    double *out_vec = o_vec;
-    double *in_vec = this->coefs;
+    T o_vec[nCoefs];
+    T *out_vec = o_vec;
+    T *in_vec = this->coefs;
 
     for (int i = 0; i < D; i++) {
         for (int t = 0; t < this->getTDim(); t++) {
-            double *out = out_vec + t * kp1_d;
-            double *in = in_vec + t * kp1_d;
+            T *out = out_vec + t * kp1_d;
+            T *in = in_vec + t * kp1_d;
             math_utils::apply_filter(out, in, S, kp1, kp1_dm1, 0.0);
         }
-        double *tmp = in_vec;
+        T *tmp = in_vec;
         in_vec = out_vec;
         out_vec = tmp;
     }
@@ -471,8 +471,8 @@ template <int D> void MWNode<D>::cvTransform(int operation) {
     }
 }
 /* Old interpolating version, somewhat faster
-template<int D>
-void MWNode<D>::cvTransform(int operation) {
+template<int D, typename T>
+void MWNode<D, T>::cvTransform(int operation) {
     const ScalingBasis &sf = this->getMWTree().getMRA().getScalingBasis();
     if (sf.getScalingType() != Interpol) {
         NOT_IMPLEMENTED_ABORT;
@@ -536,7 +536,7 @@ void MWNode<D>::cvTransform(int operation) {
   *
   *  * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1).
   */
-template <int D> void MWNode<D>::mwTransform(int operation) {
+  template <int D, typename T> void MWNode<D, T>::mwTransform(int operation) {
     int kp1 = this->getKp1();
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     int kp1_d = this->getKp1_d();
@@ -544,20 +544,20 @@ template <int D> void MWNode<D>::mwTransform(int operation) {
     const MWFilter &filter = getMWTree().getMRA().getFilter();
     double overwrite = 0.0;
 
-    double o_vec[nCoefs];
-    double *out_vec = o_vec;
-    double *in_vec = this->coefs;
+    T o_vec[nCoefs];
+    T *out_vec = o_vec;
+    T *in_vec = this->coefs;
 
     for (int i = 0; i < D; i++) {
         int mask = 1 << i;
         for (int gt = 0; gt < this->getTDim(); gt++) {
-            double *out = out_vec + gt * kp1_d;
+            T *out = out_vec + gt * kp1_d;
             for (int ft = 0; ft < this->getTDim(); ft++) {
                 /* Operate in direction i only if the bits along other
                  * directions are identical. The bit of the direction we
                  * operate on determines the appropriate filter/operator */
                 if ((gt | mask) == (ft | mask)) {
-                    double *in = in_vec + ft * kp1_d;
+                    T *in = in_vec + ft * kp1_d;
                     int fIdx = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                     const MatrixXd &oper = filter.getSubFilter(fIdx, operation);
                     math_utils::apply_filter(out, in, oper, kp1, kp1_dm1, overwrite);
@@ -566,7 +566,7 @@ template <int D> void MWNode<D>::mwTransform(int operation) {
             }
             overwrite = 0.0;
         }
-        double *tmp = in_vec;
+        T *tmp = in_vec;
         in_vec = out_vec;
         out_vec = tmp;
     }
@@ -576,19 +576,19 @@ template <int D> void MWNode<D>::mwTransform(int operation) {
 }
 
 /** @brief Set all norms to Undefined. */
-template <int D> void MWNode<D>::clearNorms() {
+  template <int D, typename T> void MWNode<D, T>::clearNorms() {
     this->squareNorm = -1.0;
     for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = -1.0; }
 }
 
 /** @brief Set all norms to zero. */
-template <int D> void MWNode<D>::zeroNorms() {
+  template <int D, typename T> void MWNode<D, T>::zeroNorms() {
     this->squareNorm = 0.0;
     for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = 0.0; }
 }
 
 /** @brief Calculate and store square norm and component norms, if allocated. */
-template <int D> void MWNode<D>::calcNorms() {
+  template <int D, typename T> void MWNode<D, T>::calcNorms() {
     this->squareNorm = 0.0;
     for (int i = 0; i < this->getTDim(); i++) {
         double norm_i = calcComponentNorm(i);
@@ -598,7 +598,7 @@ template <int D> void MWNode<D>::calcNorms() {
 }
 
 /** @brief Calculate and return the squared scaling norm. */
-template <int D> double MWNode<D>::getScalingNorm() const {
+  template <int D, typename T> double MWNode<D, T>::getScalingNorm() const {
     double sNorm = this->getComponentNorm(0);
     if (sNorm >= 0.0) {
         return sNorm * sNorm;
@@ -608,7 +608,7 @@ template <int D> double MWNode<D>::getScalingNorm() const {
 }
 
 /** @brief Calculate and return the squared wavelet norm. */
-template <int D> double MWNode<D>::getWaveletNorm() const {
+  template <int D, typename T> double MWNode<D, T>::getWaveletNorm() const {
     double wNorm = 0.0;
     for (int i = 1; i < this->getTDim(); i++) {
         double norm_i = this->getComponentNorm(i);
@@ -622,28 +622,28 @@ template <int D> double MWNode<D>::getWaveletNorm() const {
 }
 
 /** @brief Calculate the norm of one component (NOT the squared norm!). */
-template <int D> double MWNode<D>::calcComponentNorm(int i) const {
+  template <int D, typename T> double MWNode<D, T>::calcComponentNorm(int i) const {
     if (this->isGenNode() and i != 0) return 0.0;
     assert(this->isAllocated());
     assert(this->hasCoefs());
 
-    const double *c = this->getCoefs();
+    const T *c = this->getCoefs();
     int size = this->getKp1_d();
     int start = i * size;
 
     double sq_norm = 0.0;
-#ifdef HAVE_BLAS
-    sq_norm = cblas_ddot(size, &c[start], 1, &c[start], 1);
-#else
-    for (int i = start; i < start + size; i++) { sq_norm += c[i] * c[i]; }
-#endif
+//#ifdef HAVE_BLAS
+//    sq_norm = cblas_ddot(size, &c[start], 1, &c[start], 1);
+//#else
+    for (int i = start; i < start + size; i++) { sq_norm += std::norm(c[i]); }
+//#endif
     return std::sqrt(sq_norm);
 }
 
 /** @brief Update the coefficients of the node by a mw transform of the scaling
  * coefficients of the children.
  */
-template <int D> void MWNode<D>::reCompress() {
+  template <int D, typename T> void MWNode<D, T>::reCompress() {
     if (this->isGenNode()) NOT_IMPLEMENTED_ABORT;
     if (this->isBranchNode()) {
         if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
@@ -660,12 +660,12 @@ template <int D> void MWNode<D>::reCompress() {
  * @param[in] splitFac: factor used in the split check (larger factor means tighter threshold for finer nodes)
  * @param[in] absPrec: flag to switch from relative (false) to absolute (true) precision.
  */
-template <int D> bool MWNode<D>::crop(double prec, double splitFac, bool absPrec) {
+  template <int D, typename T> bool MWNode<D, T>::crop(double prec, double splitFac, bool absPrec) {
     if (this->isEndNode()) {
         return true;
     } else {
         for (int i = 0; i < this->getTDim(); i++) {
-            MWNode<D> &child = *this->children[i];
+	  MWNode<D, T> &child = *this->children[i];
             if (child.crop(prec, splitFac, absPrec)) {
                 if (tree_utils::split_check(*this, prec, splitFac, absPrec) == false) {
                     this->deleteChildren();
@@ -677,15 +677,15 @@ template <int D> bool MWNode<D>::crop(double prec, double splitFac, bool absPrec
     return false;
 }
 
-template <int D> void MWNode<D>::createChildren(bool coefs) {
+  template <int D, typename T> void MWNode<D, T>::createChildren(bool coefs) {
     NOT_REACHED_ABORT;
 }
 
-template <int D> void MWNode<D>::genChildren() {
+  template <int D, typename T> void MWNode<D, T>::genChildren() {
     NOT_REACHED_ABORT;
 }
 
-template <int D> void MWNode<D>::genParent() {
+  template <int D, typename T> void MWNode<D, T>::genParent() {
     NOT_REACHED_ABORT;
 }
 
@@ -694,11 +694,11 @@ template <int D> void MWNode<D>::genParent() {
  * @details
  * Leaves node as LeafNode and children[] as null pointer.
  */
-template <int D> void MWNode<D>::deleteChildren() {
+  template <int D, typename T> void MWNode<D, T>::deleteChildren() {
     if (this->isLeafNode()) return;
     for (int cIdx = 0; cIdx < getTDim(); cIdx++) {
         if (this->children[cIdx] != nullptr) {
-            MWNode<D> &child = getMWChild(cIdx);
+	  MWNode<D, T> &child = getMWChild(cIdx);
             child.deleteChildren();
             child.dealloc();
         }
@@ -709,9 +709,9 @@ template <int D> void MWNode<D>::deleteChildren() {
 }
 
 /** @brief Recursive deallocation of parent and all their forefathers. */
-template <int D> void MWNode<D>::deleteParent() {
+  template <int D, typename T> void MWNode<D, T>::deleteParent() {
     if (this->parent == nullptr) return;
-    MWNode<D> &parent = getMWParent();
+    MWNode<D, T> &parent = getMWParent();
     parent.deleteParent();
     parent.dealloc();
     this->parentSerialIx = -1;
@@ -720,7 +720,7 @@ template <int D> void MWNode<D>::deleteParent() {
 
 
 /** @brief Deallocation of all generated nodes . */
-template <int D> void MWNode<D>::deleteGenerated() {
+  template <int D, typename T> void MWNode<D, T>::deleteGenerated() {
     if (this->isBranchNode()) {
         if (this->isEndNode()) {
             this->deleteChildren();
@@ -731,7 +731,7 @@ template <int D> void MWNode<D>::deleteGenerated() {
 }
 
 /** @brief returns the coordinates of the centre of the node */
-template <int D> Coord<D> MWNode<D>::getCenter() const {
+  template <int D, typename T> Coord<D> MWNode<D, T>::getCenter() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -741,7 +741,7 @@ template <int D> Coord<D> MWNode<D>::getCenter() const {
 }
 
 /** @brief returns the upper bounds of the D-interval defining the node  */
-template <int D> Coord<D> MWNode<D>::getUpperBounds() const {
+  template <int D, typename T> Coord<D> MWNode<D, T>::getUpperBounds() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -751,7 +751,7 @@ template <int D> Coord<D> MWNode<D>::getUpperBounds() const {
 }
 
 /** @brief returns the lower bounds of the D-interval defining the node  */
-template <int D> Coord<D> MWNode<D>::getLowerBounds() const {
+  template <int D, typename T> Coord<D> MWNode<D, T>::getLowerBounds() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -768,7 +768,7 @@ template <int D> Coord<D> MWNode<D>::getLowerBounds() const {
  * to be followed at the current scale in oder to get to the requested
  * node at the final scale. The result is the index of the child needed.
  * The index is obtained by bit manipulation of of the translation indices. */
-template <int D> int MWNode<D>::getChildIndex(const NodeIndex<D> &nIdx) const {
+  template <int D, typename T> int MWNode<D, T>::getChildIndex(const NodeIndex<D> &nIdx) const {
     assert(isAncestor(nIdx));
     int cIdx = 0;
     int diffScale = nIdx.getScale() - getScale() - 1;
@@ -788,7 +788,7 @@ template <int D> int MWNode<D>::getChildIndex(const NodeIndex<D> &nIdx) const {
  *
  * @detailsGiven a point in space, determines which child should be followed
  * to get to the corresponding terminal node. */
-template <int D> int MWNode<D>::getChildIndex(const Coord<D> &r) const {
+  template <int D, typename T> int MWNode<D, T>::getChildIndex(const Coord<D> &r) const {
     assert(hasCoord(r));
     int cIdx = 0;
     double sFac = std::pow(2.0, -getScale());
@@ -813,7 +813,7 @@ template <int D> int MWNode<D>::getChildIndex(const Coord<D> &r) const {
  * grid of quadrature points.
  *
  */
-template <int D> void MWNode<D>::getPrimitiveQuadPts(MatrixXd &pts) const {
+  template <int D, typename T> void MWNode<D, T>::getPrimitiveQuadPts(MatrixXd &pts) const {
     int kp1 = this->getKp1();
     pts = MatrixXd::Zero(D, kp1);
 
@@ -838,7 +838,7 @@ template <int D> void MWNode<D>::getPrimitiveQuadPts(MatrixXd &pts) const {
  * nodes.
  *
  */
-template <int D> void MWNode<D>::getPrimitiveChildPts(MatrixXd &pts) const {
+  template <int D, typename T> void MWNode<D, T>::getPrimitiveChildPts(MatrixXd &pts) const {
     int kp1 = this->getKp1();
     pts = MatrixXd::Zero(D, 2 * kp1);
 
@@ -863,7 +863,7 @@ template <int D> void MWNode<D>::getPrimitiveChildPts(MatrixXd &pts) const {
  * vectors of quadrature points.
  *
  */
-template <int D> void MWNode<D>::getExpandedQuadPts(Eigen::MatrixXd &pts) const {
+  template <int D, typename T> void MWNode<D, T>::getExpandedQuadPts(Eigen::MatrixXd &pts) const {
     MatrixXd prim_pts;
     getPrimitiveQuadPts(prim_pts);
 
@@ -887,7 +887,7 @@ template <int D> void MWNode<D>::getExpandedQuadPts(Eigen::MatrixXd &pts) const
  * vectors of quadrature points.
  *
  */
-template <int D> void MWNode<D>::getExpandedChildPts(MatrixXd &pts) const {
+  template <int D, typename T> void MWNode<D, T>::getExpandedChildPts(MatrixXd &pts) const {
     MatrixXd prim_pts;
     getPrimitiveChildPts(prim_pts);
 
@@ -921,7 +921,7 @@ template <int D> void MWNode<D>::getExpandedChildPts(MatrixXd &pts) const {
  * the node does not exist, or if it is a GenNode. Recursion starts at at this
  * node and ASSUMES the requested node is in fact decending from this node.
  */
-template <int D> const MWNode<D> *MWNode<D>::retrieveNodeNoGen(const NodeIndex<D> &idx) const {
+  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) const {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -945,7 +945,7 @@ template <int D> const MWNode<D> *MWNode<D>::retrieveNodeNoGen(const NodeIndex<D
  * the node does not exist, or if it is a GenNode. Recursion starts at at this
  * node and ASSUMES the requested node is in fact decending from this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveNodeNoGen(const NodeIndex<D> &idx) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -971,7 +971,7 @@ template <int D> MWNode<D> *MWNode<D>::retrieveNodeNoGen(const NodeIndex<D> &idx
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-template <int D> const MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) const {
+  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) const {
     if (getDepth() == depth or this->isEndNode()) { return this; }
     int cIdx = getChildIndex(r);
     assert(this->children[cIdx] != nullptr);
@@ -990,7 +990,7 @@ template <int D> const MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const Coord<D
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) {
     if (getDepth() == depth or this->isEndNode()) { return this; }
     int cIdx = getChildIndex(r);
     assert(this->children[cIdx] != nullptr);
@@ -1008,7 +1008,7 @@ template <int D> MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const Coord<D> &r,
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-template <int D> const MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) const {
+  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) const {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -1034,7 +1034,7 @@ template <int D> const MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const NodeInd
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -1059,7 +1059,7 @@ template <int D> MWNode<D> *MWNode<D>::retrieveNodeOrEndNode(const NodeIndex<D>
  * that does not exist. Recursion starts at this node and ASSUMES the
  * requested node is in fact decending from this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveNode(const Coord<D> &r, int depth) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const Coord<D> &r, int depth) {
     if (depth < 0) MSG_ABORT("Invalid argument");
 
     if (getDepth() == depth) { return this; }
@@ -1080,13 +1080,15 @@ template <int D> MWNode<D> *MWNode<D>::retrieveNode(const Coord<D> &r, int depth
  * does not exist. Recursion starts at this node and ASSUMES the requested
  * node is in fact descending from this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveNode(const NodeIndex<D> &idx) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const NodeIndex<D> &idx) {
     if (getScale() == idx.getScale()) { // we're done
         if (tree->isLocal) {
+	   NOT_IMPLEMENTED_ABORT;
             // has to fetch coeff in Bank. NOT USED YET
-            int ncoefs = (1 << D) * this->getKp1_d();
-            coefs = new double[ncoefs]; // TODO must be cleaned at some stage
-            tree->getNodeCoeff(idx, coefs);
+            //int ncoefs = (1 << D) * this->getKp1_d();
+            //coefs = new double[ncoefs]; // TODO must be cleaned at some stage
+            //coefs = new double[ncoefs]; // TODO must be cleaned at some stage
+            //tree->getNodeCoeff(idx, coefs);
         }
         assert(getNodeIndex() == idx);
         return this;
@@ -1111,7 +1113,7 @@ template <int D> MWNode<D> *MWNode<D>::retrieveNode(const NodeIndex<D> &idx) {
  * does not exist. Recursion starts at this node and ASSUMES the requested
  * node is in fact related to this node.
  */
-template <int D> MWNode<D> *MWNode<D>::retrieveParent(const NodeIndex<D> &idx) {
+  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveParent(const NodeIndex<D> &idx) {
     if (getScale() < idx.getScale()) MSG_ABORT("Scale error")
     if (getScale() == idx.getScale()) return this;
     if (this->parent == nullptr) {
@@ -1130,7 +1132,7 @@ template <int D> MWNode<D> *MWNode<D>::retrieveParent(const NodeIndex<D> &idx) {
  * found, do not generate any new node, but rather give the value of the norm
  * assuming the function is uniformly distributed within the node.
  */
-template <int D> double MWNode<D>::getNodeNorm(const NodeIndex<D> &idx) const {
+  template <int D, typename T> double MWNode<D, T>::getNodeNorm(const NodeIndex<D> &idx) const {
     if (this->getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return std::sqrt(this->squareNorm);
@@ -1148,7 +1150,7 @@ template <int D> double MWNode<D>::getNodeNorm(const NodeIndex<D> &idx) const {
  *
  * @param[in] r: point coordinates
  */
-template <int D> bool MWNode<D>::hasCoord(const Coord<D> &r) const {
+  template <int D, typename T> bool MWNode<D, T>::hasCoord(const Coord<D> &r) const {
     double sFac = std::pow(2.0, -getScale());
     const NodeIndex<D> &l = getNodeIndex();
     //    println(1, "[" << r[0] << "," << r[1] << "," << r[2] << "]");
@@ -1166,7 +1168,7 @@ template <int D> bool MWNode<D>::hasCoord(const Coord<D> &r) const {
 
 /** Testing if nodes are compatible wrt NodeIndex and Tree (order, rootScale,
  * relPrec, etc). */
-template <int D> bool MWNode<D>::isCompatible(const MWNode<D> &node) {
+  template <int D, typename T> bool MWNode<D, T>::isCompatible(const MWNode<D, T> &node) {
     NOT_IMPLEMENTED_ABORT;
     //    if (nodeIndex != node.nodeIndex) {
     //        println(0, "nodeIndex mismatch" << std::endl);
@@ -1184,7 +1186,7 @@ template <int D> bool MWNode<D>::isCompatible(const MWNode<D> &node) {
  *
  * @param[in] idx: the NodeIndex of the requested node
  */
-template <int D> bool MWNode<D>::isAncestor(const NodeIndex<D> &idx) const {
+  template <int D, typename T> bool MWNode<D, T>::isAncestor(const NodeIndex<D> &idx) const {
     int relScale = idx.getScale() - getScale();
     if (relScale < 0) return false;
     const NodeIndex<D> &l = getNodeIndex();
@@ -1195,7 +1197,7 @@ template <int D> bool MWNode<D>::isAncestor(const NodeIndex<D> &idx) const {
     return true;
 }
 
-template <int D> bool MWNode<D>::isDecendant(const NodeIndex<D> &idx) const {
+  template <int D, typename T> bool MWNode<D, T>::isDecendant(const NodeIndex<D> &idx) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
@@ -1203,7 +1205,7 @@ template <int D> bool MWNode<D>::isDecendant(const NodeIndex<D> &idx) const {
  *
  * @param[in] o: the output stream
  */
-template <int D> std::ostream &MWNode<D>::print(std::ostream &o) const {
+  template <int D, typename T> std::ostream &MWNode<D, T>::print(std::ostream &o) const {
     std::string flags = "       ";
     o << getNodeIndex();
     if (isRootNode()) flags[0] = 'R';
@@ -1234,14 +1236,14 @@ template <int D> std::ostream &MWNode<D>::print(std::ostream &o) const {
  * normalization is such that a constant function gives constant value,
  * i.e. *not* same normalization as a squareNorm
  */
-template <int D> void MWNode<D>::setMaxSquareNorm() {
+  template <int D, typename T> void MWNode<D, T>::setMaxSquareNorm() {
     auto n = this->getScale();
     this->maxWSquareNorm = calcScaledWSquareNorm();
     this->maxSquareNorm = calcScaledSquareNorm();
 
     if (not this->isEndNode()) {
         for (int i = 0; i < this->getTDim(); i++) {
-            MWNode<D> &child = *this->children[i];
+	  MWNode<D, T> &child = *this->children[i];
             child.setMaxSquareNorm();
             this->maxSquareNorm = std::max(this->maxSquareNorm, child.maxSquareNorm);
             this->maxWSquareNorm = std::max(this->maxWSquareNorm, child.maxWSquareNorm);
@@ -1250,20 +1252,23 @@ template <int D> void MWNode<D>::setMaxSquareNorm() {
 }
 /** @brief recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1
  */
-template <int D> void MWNode<D>::resetMaxSquareNorm() {
+  template <int D, typename T> void MWNode<D, T>::resetMaxSquareNorm() {
     auto n = this->getScale();
     this->maxSquareNorm = -1.0;
     this->maxWSquareNorm = -1.0;
     if (not this->isEndNode()) {
         for (int i = 0; i < this->getTDim(); i++) {
-            MWNode<D> &child = *this->children[i];
+	  MWNode<D, T> &child = *this->children[i];
             child.resetMaxSquareNorm();
         }
     }
 }
 
-template class MWNode<1>;
-template class MWNode<2>;
-template class MWNode<3>;
+  template class MWNode<1, double>;
+  template class MWNode<2, double>;
+  template class MWNode<3, double>;
+  template class MWNode<1, ComplexDouble>;
+  template class MWNode<2, ComplexDouble>;
+  template class MWNode<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h
index a6a2c46c4..b50dc53f7 100644
--- a/src/trees/MWNode.h
+++ b/src/trees/MWNode.h
@@ -30,6 +30,7 @@
 
 #include "MRCPP/macros.h"
 #include "utils/omp_utils.h"
+#include "utils/math_utils.h"
 
 #include "HilbertPath.h"
 #include "MWTree.h"
@@ -51,10 +52,10 @@ namespace mrcpp {
  * data descriptions for details.
  *
  */
-template <int D> class MWNode {
+  template <int D, typename T> class MWNode {
 public:
-    MWNode(const MWNode<D> &node, bool allocCoef = true, bool SetCoef = true);
-    MWNode<D> &operator=(const MWNode<D> &node) = delete;
+    MWNode(const MWNode<D, T> &node, bool allocCoef = true, bool SetCoef = true);
+    MWNode<D , T> &operator=(const MWNode<D , T> &node) = delete;
     virtual ~MWNode();
 
     int getKp1() const { return getMWTree().getKp1(); }
@@ -76,7 +77,7 @@ template <int D> class MWNode {
     Coord<D> getLowerBounds() const;
 
     bool hasCoord(const Coord<D> &r) const;
-    bool isCompatible(const MWNode<D> &node);
+    bool isCompatible(const MWNode<D, T> &node);
     bool isAncestor(const NodeIndex<D> &idx) const;
     bool isDecendant(const NodeIndex<D> &idx) const;
 
@@ -89,30 +90,30 @@ template <int D> class MWNode {
     double getComponentNorm(int i) const { return this->componentNorms[i]; }
 
     int getNCoefs() const { return this->n_coefs; }
-    void getCoefs(Eigen::VectorXd &c) const;
+    void getCoefs(Eigen::Matrix<T, Eigen::Dynamic, 1> &c) const;
     void printCoefs() const;
 
-    double *getCoefs() { return this->coefs; }
-    const double *getCoefs() const { return this->coefs; }
+    T *getCoefs() { return this->coefs; }
+    const T *getCoefs() const { return this->coefs; }
 
     void getPrimitiveQuadPts(Eigen::MatrixXd &pts) const;
     void getPrimitiveChildPts(Eigen::MatrixXd &pts) const;
     void getExpandedQuadPts(Eigen::MatrixXd &pts) const;
     void getExpandedChildPts(Eigen::MatrixXd &pts) const;
 
-    MWTree<D> &getMWTree() { return static_cast<MWTree<D> &>(*this->tree); }
-    MWNode<D> &getMWParent() { return static_cast<MWNode<D> &>(*this->parent); }
-    MWNode<D> &getMWChild(int i) { return static_cast<MWNode<D> &>(*this->children[i]); }
+    MWTree<D, T> &getMWTree() { return static_cast<MWTree<D, T> &>(*this->tree); }
+    MWNode<D , T> &getMWParent() { return static_cast<MWNode<D , T> &>(*this->parent); }
+    MWNode<D , T> &getMWChild(int i) { return static_cast<MWNode<D , T> &>(*this->children[i]); }
 
-    const MWTree<D> &getMWTree() const { return static_cast<const MWTree<D> &>(*this->tree); }
-    const MWNode<D> &getMWParent() const { return static_cast<const MWNode<D> &>(*this->parent); }
-    const MWNode<D> &getMWChild(int i) const { return static_cast<const MWNode<D> &>(*this->children[i]); }
+    const MWTree<D, T> &getMWTree() const { return static_cast<const MWTree<D, T> &>(*this->tree); }
+    const MWNode<D , T> &getMWParent() const { return static_cast<const MWNode<D , T> &>(*this->parent); }
+    const MWNode<D , T> &getMWChild(int i) const { return static_cast<const MWNode<D , T> &>(*this->children[i]); }
 
     void zeroCoefs();
-    void setCoefBlock(int block, int block_size, const double *c);
-    void addCoefBlock(int block, int block_size, const double *c);
+    void setCoefBlock(int block, int block_size, const T *c);
+    void addCoefBlock(int block, int block_size, const T *c);
     void zeroCoefBlock(int block, int block_size);
-    void attachCoefs(double *coefs);
+    void attachCoefs(T *coefs);
 
     void calcNorms();
     void zeroNorms();
@@ -154,34 +155,35 @@ template <int D> class MWNode {
     void clearIsRootNode() { CLEAR_BITS(status, FlagRootNode); }
     void clearIsAllocated() { CLEAR_BITS(status, FlagAllocated); }
 
-    friend std::ostream &operator<<(std::ostream &o, const MWNode<D> &nd) { return nd.print(o); }
+    friend std::ostream &operator<<(std::ostream &o, const MWNode<D , T> &nd) { return nd.print(o); }
 
-    friend class TreeBuilder<D>;
-    friend class MultiplicationCalculator<D>;
-    friend class NodeAllocator<D>;
-    friend class MWTree<D>;
-    friend class FunctionTree<D>;
+    friend class TreeBuilder<D, T>;
+    friend class MultiplicationCalculator<D, T>;
+    friend class NodeAllocator<D, T>;
+    friend class MWTree<D, T>;
+    friend class FunctionTree<D, T>;
     friend class OperatorTree;
-    friend class FunctionNode<D>;
+    friend class FunctionNode<D, T>;
     friend class OperatorNode;
-    friend class DerivativeCalculator<D>;
+    friend class DerivativeCalculator<D, T>;
+    bool isComplex = false; //TODO put as one of the flags
 
 protected:
-    MWTree<D> *tree{nullptr};    ///< Tree the node belongs to
-    MWNode<D> *parent{nullptr};  ///< Parent node
-    MWNode<D> *children[1 << D]; ///< 2^D children
+    MWTree<D, T> *tree{nullptr};    ///< Tree the node belongs to
+    MWNode<D , T> *parent{nullptr};  ///< Parent node
+    MWNode<D , T> *children[1 << D]; ///< 2^D children
 
     double squareNorm{-1.0};       ///< Squared norm of all 2^D (k+1)^D coefficients
     double componentNorms[1 << D]; ///< Squared norms of the separeted 2^D components
     double maxSquareNorm{-1.0};    ///< Largest squared norm among itself and descendants.
     double maxWSquareNorm{-1.0};   ///< Largest wavelet squared norm among itself and descendants.
                                    ///< NB: must be set before used.
-    double *coefs{nullptr};     ///< the 2^D (k+1)^D MW coefficients
-                                ///< For example, in case of a one dimensional function \f$ f \f$
-                                ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$,
-                                ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$
-                                ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$.
-                                ///< Here \f$ n, l \f$ are unique for every node.
+    T *coefs{nullptr};     ///< the 2^D (k+1)^D MW coefficients
+                           ///< For example, in case of a one dimensional function \f$ f \f$
+                           ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$,
+                           ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$
+                           ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$.
+                           ///< Here \f$ n, l \f$ are unique for every node.
     int n_coefs{0};
 
     int serialIx{-1};       ///< index in serial Tree
@@ -192,9 +194,9 @@ template <int D> class MWNode {
     HilbertPath<D> hilbertPath; ///< To be documented
 
     MWNode();
-    MWNode(MWTree<D> *tree, int rIdx);
-    MWNode(MWTree<D> *tree, const NodeIndex<D> &idx);
-    MWNode(MWNode<D> *parent, int cIdx);
+    MWNode(MWTree<D, T> *tree, int rIdx);
+    MWNode(MWTree<D, T> *tree, const NodeIndex<D> &idx);
+    MWNode(MWNode<D , T> *parent, int cIdx);
     virtual void dealloc();
 
     bool crop(double prec, double splitFac, bool absPrec);
@@ -218,20 +220,20 @@ template <int D> class MWNode {
     int getChildIndex(const NodeIndex<D> &nIdx) const;
     int getChildIndex(const Coord<D> &r) const;
 
-    bool diffBranch(const MWNode<D> &rhs) const;
+    bool diffBranch(const MWNode<D , T> &rhs) const;
 
-    MWNode<D> *retrieveNode(const Coord<D> &r, int depth);
-    MWNode<D> *retrieveNode(const NodeIndex<D> &idx);
-    MWNode<D> *retrieveParent(const NodeIndex<D> &idx);
+    MWNode<D , T> *retrieveNode(const Coord<D> &r, int depth);
+    MWNode<D , T> *retrieveNode(const NodeIndex<D> &idx);
+    MWNode<D , T> *retrieveParent(const NodeIndex<D> &idx);
 
-    const MWNode<D> *retrieveNodeNoGen(const NodeIndex<D> &idx) const;
-    MWNode<D> *retrieveNodeNoGen(const NodeIndex<D> &idx);
+    const MWNode<D , T> *retrieveNodeNoGen(const NodeIndex<D> &idx) const;
+    MWNode<D , T> *retrieveNodeNoGen(const NodeIndex<D> &idx);
 
-    const MWNode<D> *retrieveNodeOrEndNode(const Coord<D> &r, int depth) const;
-    MWNode<D> *retrieveNodeOrEndNode(const Coord<D> &r, int depth);
+    const MWNode<D , T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth) const;
+    MWNode<D , T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth);
 
-    const MWNode<D> *retrieveNodeOrEndNode(const NodeIndex<D> &idx) const;
-    MWNode<D> *retrieveNodeOrEndNode(const NodeIndex<D> &idx);
+    const MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx) const;
+    MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx);
 
     void threadSafeGenChildren();
     void deleteGenerated();
diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp
index 583fb1fc1..652f9c2cd 100644
--- a/src/trees/MWTree.cpp
+++ b/src/trees/MWTree.cpp
@@ -49,8 +49,8 @@ namespace mrcpp {
  * root nodes. The information for the root node configuration to use
  * is in the mra object which is passed to the constructor.
  */
-template <int D>
-MWTree<D>::MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n)
+  template <int D, typename T>
+MWTree<D, T>::MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n)
         : MRA(mra)
         , order(mra.getOrder()) /// polynomial order
         , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) ///nr of scaling coefficients \f$ (k+1)^D \f$
@@ -61,7 +61,7 @@ MWTree<D>::MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n)
 }
 
 /** @brief MWTree destructor. */
-template <int D> MWTree<D>::~MWTree() {
+template <int D, typename T> MWTree<D, T>::~MWTree() {
     this->endNodeTable.clear();
     if (this->nodesAtDepth.size() != 1) MSG_ERROR("Nodes at depth != 1 -> " << this->nodesAtDepth.size());
     if (this->nodesAtDepth[0] != 0) MSG_ERROR("Nodes at depth 0 != 0 -> " << this->nodesAtDepth[0]);
@@ -73,9 +73,9 @@ template <int D> MWTree<D>::~MWTree() {
   * including the root nodes. Derived classes will call this method
   * when the object is deleted.
   */
-template <int D> void MWTree<D>::deleteRootNodes() {
+template <int D, typename T> void MWTree<D, T>::deleteRootNodes() {
     for (int i = 0; i < this->rootBox.size(); i++) {
-        MWNode<D> &root = this->getRootMWNode(i);
+        MWNode<D, T> &root = this->getRootMWNode(i);
         root.deleteChildren();
         root.dealloc();
         this->rootBox.clearNode(i);
@@ -90,9 +90,9 @@ template <int D> void MWTree<D>::deleteRootNodes() {
  * nodes, (nodeChunks in NodeAllocator) is NOT released, but is
  * immediately available to the new function.
  */
-template <int D> void MWTree<D>::clear() {
+template <int D, typename T> void MWTree<D, T>::clear() {
     for (int i = 0; i < this->rootBox.size(); i++) {
-        MWNode<D> &root = this->getRootMWNode(i);
+        MWNode<D, T> &root = this->getRootMWNode(i);
         root.deleteChildren();
         root.clearHasCoefs();
         root.clearNorms();
@@ -106,10 +106,10 @@ template <int D> void MWTree<D>::clear() {
  * @details The norm is calculated using endNodes only. The specific
  * type of norm which is computed will depend on the derived class
  */
-template <int D> void MWTree<D>::calcSquareNorm() {
+template <int D, typename T> void MWTree<D, T>::calcSquareNorm() {
     double treeNorm = 0.0;
     for (int n = 0; n < this->getNEndNodes(); n++) {
-        const MWNode<D> &node = getEndMWNode(n);
+        const MWNode<D, T> &node = getEndMWNode(n);
         assert(node.hasCoefs());
         treeNorm += node.getSquareNorm();
     }
@@ -139,7 +139,7 @@ template <int D> void MWTree<D>::calcSquareNorm() {
  * }
  * \f]
  */
-template <int D> void MWTree<D>::mwTransform(int type, bool overwrite) {
+template <int D, typename T> void MWTree<D, T>::mwTransform(int type, bool overwrite) {
     switch (type) {
         case TopDown:
             mwTransformDown(overwrite);
@@ -162,8 +162,8 @@ template <int D> void MWTree<D>::mwTransform(int type, bool overwrite) {
  * projection to purify the coefficients obtained by quadrature at
  * coarser scales which are therefore not precise enough.
  */
-template <int D> void MWTree<D>::mwTransformUp() {
-    std::vector<MWNodeVector<D>> nodeTable;
+template <int D, typename T> void MWTree<D, T>::mwTransformUp() {
+    std::vector<MWNodeVector<D, T>> nodeTable;
     tree_utils::make_node_table(*this, nodeTable);
 #pragma omp parallel shared(nodeTable) num_threads(mrcpp_get_num_threads())
     {
@@ -172,7 +172,7 @@ template <int D> void MWTree<D>::mwTransformUp() {
             int nNodes = nodeTable[n].size();
 #pragma omp for schedule(guided)
             for (int i = 0; i < nNodes; i++) {
-                MWNode<D> &node = *nodeTable[n][i];
+                MWNode<D, T> &node = *nodeTable[n][i];
                 if (node.isBranchNode()) { node.reCompress(); }
             }
         }
@@ -190,8 +190,8 @@ template <int D> void MWTree<D>::mwTransformUp() {
  * operation is generally used after the operator application.
  *
  */
-template <int D> void MWTree<D>::mwTransformDown(bool overwrite) {
-    std::vector<MWNodeVector<D>> nodeTable;
+template <int D, typename T> void MWTree<D, T>::mwTransformDown(bool overwrite) {
+    std::vector<MWNodeVector<D, T>> nodeTable;
     tree_utils::make_node_table(*this, nodeTable);
 #pragma omp parallel shared(nodeTable) num_threads(mrcpp_get_num_threads())
     {
@@ -199,7 +199,7 @@ template <int D> void MWTree<D>::mwTransformDown(bool overwrite) {
             int n_nodes = nodeTable[n].size();
 #pragma omp for schedule(guided)
             for (int i = 0; i < n_nodes; i++) {
-                MWNode<D> &node = *nodeTable[n][i];
+                MWNode<D, T> &node = *nodeTable[n][i];
                 if (node.isBranchNode()) {
                     if (this->getRootScale() > node.getScale()) {
                         int reverse = n_nodes - 1;
@@ -220,10 +220,10 @@ template <int D> void MWTree<D>::mwTransformDown(bool overwrite) {
  * function is representable at depth zero. One should then use \ref cropTree to remove
  * unnecessary nodes.
  */
-template <int D> void MWTree<D>::setZero() {
-    TreeIterator<D> it(*this);
+template <int D, typename T> void MWTree<D, T>::setZero() {
+    TreeIterator<D, T> it(*this);
     while (it.next()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         node.zeroCoefs();
     }
     this->squareNorm = 0.0;
@@ -236,7 +236,7 @@ template <int D> void MWTree<D>::setZero() {
  * safe, and must NEVER be called outside a critical region in parallel.
  * It's way. way too expensive to lock the tree, so don't even think
  * about it. */
-template <int D> void MWTree<D>::incrementNodeCount(int scale) {
+template <int D, typename T> void MWTree<D, T>::incrementNodeCount(int scale) {
     int depth = scale - getRootScale();
     if (depth < 0) {
         int n = this->nodesAtNegativeDepth.size();
@@ -261,7 +261,7 @@ template <int D> void MWTree<D>::incrementNodeCount(int scale) {
  * It's way. way too expensive to lock the tree, so don't even think
  * about it.
  */
-template <int D> void MWTree<D>::decrementNodeCount(int scale) {
+template <int D, typename T> void MWTree<D, T>::decrementNodeCount(int scale) {
     int depth = scale - getRootScale();
     if (depth < 0) {
         assert(-depth - 1 < this->nodesAtNegativeDepth.size());
@@ -280,7 +280,7 @@ template <int D> void MWTree<D>::decrementNodeCount(int scale) {
  *
  * @param[in] depth: Tree depth (0 depth is the coarsest scale) to count.
  */
-template <int D> int MWTree<D>::getNNodesAtDepth(int depth) const {
+template <int D, typename T> int MWTree<D, T>::getNNodesAtDepth(int depth) const {
     int N = 0;
     if (depth < 0) {
         if (this->nodesAtNegativeDepth.size() >= -depth) N = this->nodesAtNegativeDepth[-depth];
@@ -291,9 +291,9 @@ template <int D> int MWTree<D>::getNNodesAtDepth(int depth) const {
 }
 
 /** @returns Size of all MW coefs in the tree, in kB */
-template <int D> int MWTree<D>::getSizeNodes() const {
+template <int D, typename T> int MWTree<D, T>::getSizeNodes() const {
     auto nCoefs = 1ll * getNNodes() * getTDim() * getKp1_d();
-    return sizeof(double) * nCoefs / 1024;
+    return sizeof(T) * nCoefs / 1024;
 }
 
 /** @brief Finds and returns the node pointer with the given \ref NodeIndex, const version.
@@ -303,11 +303,11 @@ template <int D> int MWTree<D>::getSizeNodes() const {
  * pointer if the node does not exist, or if it is a
  * GenNode. Recursion starts at the appropriate rootNode.
  */
-template <int D> const MWNode<D> *MWTree<D>::findNode(NodeIndex<D> idx) const {
+template <int D, typename T> const MWNode<D, T> *MWTree<D, T>::findNode(NodeIndex<D> idx) const {
     if (getRootBox().isPeriodic()) { periodic::index_manipulation<D>(idx, getRootBox().getPeriodic()); }
     int rIdx = getRootBox().getBoxIndex(idx);
     if (rIdx < 0) return nullptr;
-    const MWNode<D> &root = this->rootBox.getNode(rIdx);
+    const MWNode<D, T> &root = this->rootBox.getNode(rIdx);
     assert(root.isAncestor(idx));
     return root.retrieveNodeNoGen(idx);
 }
@@ -319,11 +319,11 @@ template <int D> const MWNode<D> *MWTree<D>::findNode(NodeIndex<D> idx) const {
  * pointer if the node does not exist, or if it is a
  * GenNode. Recursion starts at the appropriate rootNode.
  */
-template <int D> MWNode<D> *MWTree<D>::findNode(NodeIndex<D> idx) {
+template <int D, typename T> MWNode<D, T> *MWTree<D, T>::findNode(NodeIndex<D> idx) {
     if (getRootBox().isPeriodic()) { periodic::index_manipulation<D>(idx, getRootBox().getPeriodic()); }
     int rIdx = getRootBox().getBoxIndex(idx);
     if (rIdx < 0) return nullptr;
-    MWNode<D> &root = this->rootBox.getNode(rIdx);
+    MWNode<D, T> &root = this->rootBox.getNode(rIdx);
     assert(root.isAncestor(idx));
     return root.retrieveNodeNoGen(idx);
 }
@@ -335,11 +335,11 @@ template <int D> MWNode<D> *MWTree<D>::findNode(NodeIndex<D> idx) {
  * transform. Recursion starts at the appropriate rootNode and descends
  * from this.
  */
-template <int D> MWNode<D> &MWTree<D>::getNode(NodeIndex<D> idx) {
+template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> idx) {
     if (getRootBox().isPeriodic()) periodic::index_manipulation<D>(idx, getRootBox().getPeriodic());
 
-    MWNode<D> *out = nullptr;
-    MWNode<D> &root = getRootBox().getNode(idx);
+    MWNode<D, T> *out = nullptr;
+    MWNode<D, T> &root = getRootBox().getNode(idx);
     if (idx.getScale() < getRootScale()) {
 #pragma omp critical(gen_parent)
         out = root.retrieveParent(idx);
@@ -357,9 +357,9 @@ template <int D> MWNode<D> &MWTree<D>::getNode(NodeIndex<D> idx) {
  * GenNodes.  Recursion starts at the appropriate rootNode and decends
  * from this.
  */
-template <int D> MWNode<D> &MWTree<D>::getNodeOrEndNode(NodeIndex<D> idx) {
+template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNodeOrEndNode(NodeIndex<D> idx) {
     if (getRootBox().isPeriodic()) { periodic::index_manipulation<D>(idx, getRootBox().getPeriodic()); }
-    MWNode<D> &root = getRootBox().getNode(idx);
+    MWNode<D, T> &root = getRootBox().getNode(idx);
     assert(root.isAncestor(idx));
     return *root.retrieveNodeOrEndNode(idx);
 }
@@ -371,9 +371,9 @@ template <int D> MWNode<D> &MWTree<D>::getNodeOrEndNode(NodeIndex<D> idx) {
  * transform. Recursion starts at the appropriate rootNode and decends
  * from this.
  */
-template <int D> const MWNode<D> &MWTree<D>::getNodeOrEndNode(NodeIndex<D> idx) const {
+template <int D, typename T> const MWNode<D, T> &MWTree<D, T>::getNodeOrEndNode(NodeIndex<D> idx) const {
     if (getRootBox().isPeriodic()) { periodic::index_manipulation<D>(idx, getRootBox().getPeriodic()); }
-    const MWNode<D> &root = getRootBox().getNode(idx);
+    const MWNode<D, T> &root = getRootBox().getNode(idx);
     assert(root.isAncestor(idx));
     return *root.retrieveNodeOrEndNode(idx);
 }
@@ -387,8 +387,8 @@ template <int D> const MWNode<D> &MWTree<D>::getNodeOrEndNode(NodeIndex<D> idx)
  * generate nodes that do not exist. Recursion starts at the
  * appropriate rootNode and decends from this.
  */
-template <int D> MWNode<D> &MWTree<D>::getNode(Coord<D> r, int depth) {
-    MWNode<D> &root = getRootBox().getNode(r);
+template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(Coord<D> r, int depth) {
+    MWNode<D, T> &root = getRootBox().getNode(r);
     if (depth >= 0) {
         return *root.retrieveNode(r, depth);
     } else {
@@ -405,11 +405,11 @@ template <int D> MWNode<D> &MWTree<D>::getNode(Coord<D> r, int depth) {
  * the path to the requested node, and will never create or return GenNodes.
  * Recursion starts at the appropriate rootNode and decends from this.
  */
-template <int D> MWNode<D> &MWTree<D>::getNodeOrEndNode(Coord<D> r, int depth) {
+template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNodeOrEndNode(Coord<D> r, int depth) {
 
     if (getRootBox().isPeriodic()) { periodic::coord_manipulation<D>(r, getRootBox().getPeriodic()); }
 
-    MWNode<D> &root = getRootBox().getNode(r);
+    MWNode<D, T> &root = getRootBox().getNode(r);
     return *root.retrieveNodeOrEndNode(r, depth);
 }
 
@@ -422,10 +422,10 @@ template <int D> MWNode<D> &MWTree<D>::getNodeOrEndNode(Coord<D> r, int depth) {
  * the path to the requested node, and will never create or return GenNodes.
  * Recursion starts at the appropriate rootNode and decends from this.
  */
-template <int D> const MWNode<D> &MWTree<D>::getNodeOrEndNode(Coord<D> r, int depth) const {
+template <int D, typename T> const MWNode<D, T> &MWTree<D, T>::getNodeOrEndNode(Coord<D> r, int depth) const {
 
     if (getRootBox().isPeriodic()) { periodic::coord_manipulation<D>(r, getRootBox().getPeriodic()); }
-    const MWNode<D> &root = getRootBox().getNode(r);
+    const MWNode<D, T> &root = getRootBox().getNode(r);
     return *root.retrieveNodeOrEndNode(r, depth);
 }
 
@@ -434,10 +434,10 @@ template <int D> const MWNode<D> &MWTree<D>::getNodeOrEndNode(Coord<D> r, int de
  * @details copies the list of all EndNode pointers into a new vector
  * and retunrs it.
  */
-template <int D> MWNodeVector<D> *MWTree<D>::copyEndNodeTable() {
-    auto *nVec = new MWNodeVector<D>;
+template <int D, typename T> MWNodeVector<D, T> *MWTree<D, T>::copyEndNodeTable() {
+    auto *nVec = new MWNodeVector<D, T>;
     for (int n = 0; n < getNEndNodes(); n++) {
-        MWNode<D> &node = getEndMWNode(n);
+        MWNode<D, T> &node = getEndMWNode(n);
         nVec->push_back(&node);
     }
     return nVec;
@@ -449,27 +449,27 @@ template <int D> MWNodeVector<D> *MWTree<D>::copyEndNodeTable() {
  * scratch. It makes use of the TreeIterator to traverse the tree.
  * 
  */
-template <int D> void MWTree<D>::resetEndNodeTable() {
+template <int D, typename T> void MWTree<D, T>::resetEndNodeTable() {
     clearEndNodeTable();
-    TreeIterator<D> it(*this, TopDown, Hilbert);
+    TreeIterator<D, T> it(*this, TopDown, Hilbert);
     it.setReturnGenNodes(false);
     while (it.next()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         if (node.isEndNode()) { this->endNodeTable.push_back(&node); }
     }
 }
 
 
-template <int D> int MWTree<D>::countBranchNodes(int depth) {
+template <int D, typename T> int MWTree<D, T>::countBranchNodes(int depth) {
     NOT_IMPLEMENTED_ABORT;
 }
 
-template <int D> int MWTree<D>::countLeafNodes(int depth) {
+template <int D, typename T> int MWTree<D, T>::countLeafNodes(int depth) {
     NOT_IMPLEMENTED_ABORT;
     //    int nNodes = 0;
-    //    TreeIterator<D> it(*this);
+    //    TreeIterator<D, T> it(*this);
     //    while (it.next()) {
-    //        MWNode<D> &node = it.getNode();
+    //        MWNode<D, T> &node = it.getNode();
     //        if (node.getDepth() == depth or depth < 0) {
     //            if (node.isLeafNode()) {
     //                nNodes++;
@@ -480,12 +480,12 @@ template <int D> int MWTree<D>::countLeafNodes(int depth) {
 }
 
 /* Traverse tree and count nodes belonging to this rank. */
-template <int D> int MWTree<D>::countNodes(int depth) {
+template <int D, typename T> int MWTree<D, T>::countNodes(int depth) {
     NOT_IMPLEMENTED_ABORT;
-    //    TreeIterator<D> it(*this);
+    //    TreeIterator<D, T> it(*this);
     //    int count = 0;
     //    while (it.next()) {
-    //        MWNode<D> &node = it.getNode();
+    //        MWNode<D, T> &node = it.getNode();
     //        if (node.isGenNode()) {
     //            continue;
     //        }
@@ -497,12 +497,12 @@ template <int D> int MWTree<D>::countNodes(int depth) {
 }
 
 /* Traverse tree and count nodes with allocated coefficients. */
-template <int D> int MWTree<D>::countAllocNodes(int depth) {
+template <int D, typename T> int MWTree<D, T>::countAllocNodes(int depth) {
     NOT_IMPLEMENTED_ABORT;
-    //    TreeIterator<D> it(*this);
+    //    TreeIterator<D, T> it(*this);
     //    int count = 0;
     //    while (it.next()) {
-    //        MWNode<D> &node = it.getNode();
+    //        MWNode<D, T> &node = it.getNode();
     //        if (node.isGenNode()) {
     //            continue;
     //        }
@@ -515,7 +515,7 @@ template <int D> int MWTree<D>::countAllocNodes(int depth) {
 
 /** @brief Prints a summary of the tree structure on the output file
  */
-template <int D> std::ostream &MWTree<D>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &MWTree<D, T>::print(std::ostream &o) const {
     o << "  square norm: " << this->squareNorm << std::endl;
     o << "  root scale: " << this->getRootScale() << std::endl;
     o << "  order: " << this->order << std::endl;
@@ -532,9 +532,9 @@ template <int D> std::ostream &MWTree<D>::print(std::ostream &o) const {
  * @details it defines the upper bound of the squared norm \f$
  * ||f||^2_{\ldots} \f$ in this node or its descendents
  */
-template <int D> void MWTree<D>::makeMaxSquareNorms() {
-    NodeBox<D> &rBox = this->getRootBox();
-    MWNode<D> **roots = rBox.getNodes();
+template <int D, typename T> void MWTree<D, T>::makeMaxSquareNorms() {
+    NodeBox<D, T> &rBox = this->getRootBox();
+    MWNode<D, T> **roots = rBox.getNodes();
     for (int rIdx = 0; rIdx < rBox.size(); rIdx++) {
         // recursively set value of children and descendants
         roots[rIdx]->setMaxSquareNorm();
@@ -543,15 +543,16 @@ template <int D> void MWTree<D>::makeMaxSquareNorms() {
 
 /** @brief gives serialIx of a node from its NodeIndex
  *
- * @details Peter will document this!
+ * @details gives a unique integer for each nodes corresponding to the position
+ * of the node in the serialized representation
  */
-template <int D> int MWTree<D>::getIx(NodeIndex<D> nIdx) {
+template <int D, typename T> int MWTree<D, T>::getIx(NodeIndex<D> nIdx) {
     if (this->isLocal == false) MSG_ERROR("getIx only implemented in local representation");
     if(NodeIndex2serialIx.count(nIdx) == 0) return -1;
     else return NodeIndex2serialIx[nIdx];
 }
 
-template <int D> void MWTree<D>::getNodeCoeff(NodeIndex<D> nIdx, double *data) {
+template <int D, typename T> void MWTree<D, T>::getNodeCoeff(NodeIndex<D> nIdx, double *data) {
     assert(this->isLocal);
     int size = (1 << D) * kp1_d;
     int id = 0;
@@ -559,8 +560,13 @@ template <int D> void MWTree<D>::getNodeCoeff(NodeIndex<D> nIdx, double *data) {
     this->NodesCoeff->get_data(id, size, data);
 }
 
-template class MWTree<1>;
-template class MWTree<2>;
-template class MWTree<3>;
+template class MWTree<1, double>;
+template class MWTree<2, double>;
+template class MWTree<3, double>;
+
+
+template class MWTree<1, ComplexDouble>;
+template class MWTree<2, ComplexDouble>;
+template class MWTree<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index 1587aa644..631cbfe68 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -61,11 +61,11 @@ class BankAccount;
  * present. See specific methods for details.
  *
  */
-template <int D> class MWTree {
+  template <int D, typename T> class MWTree {
 public:
     MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n);
-    MWTree(const MWTree<D> &tree) = delete;
-    MWTree<D> &operator=(const MWTree<D> &tree) = delete;
+    MWTree(const MWTree<D, T> &tree) = delete;
+    MWTree<D, T> &operator=(const MWTree<D, T> &tree) = delete;
     virtual ~MWTree();
 
     void setZero();
@@ -90,8 +90,8 @@ template <int D> class MWTree {
     int getSizeNodes() const;
 
     /** @returns */
-    NodeBox<D> &getRootBox() { return this->rootBox; }
-    const NodeBox<D> &getRootBox() const { return this->rootBox; }
+    NodeBox<D, T> &getRootBox() { return this->rootBox; }
+    const NodeBox<D, T> &getRootBox() const { return this->rootBox; }
     const MultiResolutionAnalysis<D> &getMRA() const { return this->MRA; }
 
     void mwTransform(int type, bool overwrite = true);
@@ -102,28 +102,28 @@ template <int D> class MWTree {
     int getRootIndex(Coord<D> r) const { return this->rootBox.getBoxIndex(r); }
     int getRootIndex(NodeIndex<D> nIdx) const { return this->rootBox.getBoxIndex(nIdx); }
 
-    MWNode<D> *findNode(NodeIndex<D> nIdx);
-    const MWNode<D> *findNode(NodeIndex<D> nIdx) const;
+    MWNode<D, T> *findNode(NodeIndex<D> nIdx);
+    const MWNode<D, T> *findNode(NodeIndex<D> nIdx) const;
 
-    MWNode<D> &getNode(NodeIndex<D> nIdx);
-    MWNode<D> &getNodeOrEndNode(NodeIndex<D> nIdx);
-    const MWNode<D> &getNodeOrEndNode(NodeIndex<D> nIdx) const;
+    MWNode<D, T> &getNode(NodeIndex<D> nIdx);
+    MWNode<D, T> &getNodeOrEndNode(NodeIndex<D> nIdx);
+    const MWNode<D, T> &getNodeOrEndNode(NodeIndex<D> nIdx) const;
 
-    MWNode<D> &getNode(Coord<D> r, int depth = -1);
-    MWNode<D> &getNodeOrEndNode(Coord<D> r, int depth = -1);
-    const MWNode<D> &getNodeOrEndNode(Coord<D> r, int depth = -1) const;
+    MWNode<D, T> &getNode(Coord<D> r, int depth = -1);
+    MWNode<D, T> &getNodeOrEndNode(Coord<D> r, int depth = -1);
+    const MWNode<D, T> &getNodeOrEndNode(Coord<D> r, int depth = -1) const;
 
     int getNEndNodes() const { return this->endNodeTable.size(); }
     int getNRootNodes() const { return this->rootBox.size(); }
-    MWNode<D> &getEndMWNode(int i) { return *this->endNodeTable[i]; }
-    MWNode<D> &getRootMWNode(int i) { return this->rootBox.getNode(i); }
-    const MWNode<D> &getEndMWNode(int i) const { return *this->endNodeTable[i]; }
-    const MWNode<D> &getRootMWNode(int i) const { return this->rootBox.getNode(i); }
+    MWNode<D, T> &getEndMWNode(int i) { return *this->endNodeTable[i]; }
+    MWNode<D, T> &getRootMWNode(int i) { return this->rootBox.getNode(i); }
+    const MWNode<D, T> &getEndMWNode(int i) const { return *this->endNodeTable[i]; }
+    const MWNode<D, T> &getRootMWNode(int i) const { return this->rootBox.getNode(i); }
 
     bool isPeriodic() const { return this->MRA.getWorldBox().isPeriodic(); }
 
-    MWNodeVector<D> *copyEndNodeTable();
-    MWNodeVector<D> *getEndNodeTable() { return &this->endNodeTable; }
+    MWNodeVector<D, T> *copyEndNodeTable();
+    MWNodeVector<D, T> *getEndNodeTable() { return &this->endNodeTable; }
 
     void deleteRootNodes();
     void resetEndNodeTable();
@@ -138,19 +138,19 @@ template <int D> class MWTree {
 
     void makeMaxSquareNorms(); // sets values for maxSquareNorm and maxWSquareNorm in all nodes
 
-    NodeAllocator<D> &getNodeAllocator() { return *this->nodeAllocator_p; }
-    const NodeAllocator<D> &getNodeAllocator() const { return *this->nodeAllocator_p; }
-    MWNodeVector<D> endNodeTable;          ///< Final projected nodes
+    NodeAllocator<D, T> &getNodeAllocator() { return *this->nodeAllocator_p; }
+    const NodeAllocator<D, T> &getNodeAllocator() const { return *this->nodeAllocator_p; }
+    MWNodeVector<D, T> endNodeTable;          ///< Final projected nodes
 
     void getNodeCoeff(NodeIndex<D> nIdx, double *data); // fetch coefficient from a specific node stored in Bank
 
-    friend std::ostream &operator<<(std::ostream &o, const MWTree<D> &tree) { return tree.print(o); }
+    friend std::ostream &operator<<(std::ostream &o, const MWTree<D, T> &tree) { return tree.print(o); }
 
-    friend class MWNode<D>;
-    friend class FunctionNode<D>;
+    friend class MWNode<D, T>;
+    friend class FunctionNode<D, T>;
     friend class OperatorNode;
-    friend class TreeBuilder<D>;
-    friend class NodeAllocator<D>;
+    friend class TreeBuilder<D, T>;
+    friend class NodeAllocator<D, T>;
 
 protected:
     // Parameters that are set in construction and should never change
@@ -165,11 +165,11 @@ template <int D> class MWTree {
     // Parameters that are dynamic and can be set by user
     std::string name;
 
-    std::unique_ptr<NodeAllocator<D>> nodeAllocator_p{nullptr};
+    std::unique_ptr<NodeAllocator<D, T>> nodeAllocator_p{nullptr};
 
     // Tree data
     double squareNorm;
-    NodeBox<D> rootBox;                    ///< The actual container of nodes
+    NodeBox<D, T> rootBox;                    ///< The actual container of nodes
     std::vector<int> nodesAtDepth;         ///< Node counter
     std::vector<int> nodesAtNegativeDepth; ///< Node counter
 
diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp
index 9ca79f0b4..b33d5ccf1 100644
--- a/src/trees/NodeAllocator.cpp
+++ b/src/trees/NodeAllocator.cpp
@@ -38,7 +38,7 @@
 
 namespace mrcpp {
 
-template <int D> NodeAllocator<D>::NodeAllocator(FunctionTree<D> *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk)
+template <int D, typename T> NodeAllocator<D, T>::NodeAllocator(FunctionTree<D, T> *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk)
         : coefsPerNode(coefsPerNode)
         , maxNodesPerChunk(nodesPerChunk)
         , tree_p(tree)
@@ -47,14 +47,14 @@ template <int D> NodeAllocator<D>::NodeAllocator(FunctionTree<D> *tree, SharedMe
     this->nodeChunks.reserve(100);
     this->coefChunks.reserve(100);
 
-    FunctionNode<D> tmp;
+    FunctionNode<D, T> tmp;
     this->cvptr = *(char **)(&tmp);
-    this->sizeOfNode = sizeof(FunctionNode<D>);
+    this->sizeOfNode = sizeof(FunctionNode<D, T>);
 
     MRCPP_INIT_OMP_LOCK();
 }
 
-template <> NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk)
+template <> NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory<double> *mem, int coefsPerNode, int nodesPerChunk)
         : coefsPerNode(coefsPerNode)
         , maxNodesPerChunk(nodesPerChunk)
         , tree_p(tree)
@@ -70,11 +70,11 @@ template <> NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory *me
     MRCPP_INIT_OMP_LOCK();
 }
 
-template <int D> NodeAllocator<D>::NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk) {
+template <int D, typename T> NodeAllocator<D, T>::NodeAllocator(OperatorTree *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk) {
     NOT_REACHED_ABORT;
 }
 
-template <int D> NodeAllocator<D>::~NodeAllocator() {
+template <int D, typename T> NodeAllocator<D, T>::~NodeAllocator() {
     for (auto &chunk : this->nodeChunks) delete[](char *) chunk;
     if (not isShared()) // if the data is shared, it must be freed by MPI_Win_free
         for (auto &chunk : this->coefChunks) delete[] chunk;
@@ -82,35 +82,35 @@ template <int D> NodeAllocator<D>::~NodeAllocator() {
     MRCPP_DESTROY_OMP_LOCK();
 }
 
-template <int D> MWNode<D> * NodeAllocator<D>::getNode_p(int sIdx) {
+template <int D, typename T> MWNode<D, T> * NodeAllocator<D, T>::getNode_p(int sIdx) {
     MRCPP_SET_OMP_LOCK();
     auto *node = getNodeNoLock(sIdx);
     MRCPP_UNSET_OMP_LOCK();
     return node;
 }
 
-template <int D> double * NodeAllocator<D>::getCoef_p(int sIdx) {
+template <int D, typename T> T * NodeAllocator<D, T>::getCoef_p(int sIdx) {
     MRCPP_SET_OMP_LOCK();
     auto *coefs = getCoefNoLock(sIdx);
     MRCPP_UNSET_OMP_LOCK();
     return coefs;
 }
 
-template <int D> MWNode<D> * NodeAllocator<D>::getNodeNoLock(int sIdx) {
+template <int D, typename T> MWNode<D, T> * NodeAllocator<D, T>::getNodeNoLock(int sIdx) {
     if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr;
     int chunk = sIdx / this->maxNodesPerChunk; // which chunk
     int cIdx = sIdx % this->maxNodesPerChunk;  // position in chunk
     return this->nodeChunks[chunk] + cIdx;
 }
 
-template <int D> double * NodeAllocator<D>::getCoefNoLock(int sIdx) {
+template <int D, typename T> T * NodeAllocator<D, T>::getCoefNoLock(int sIdx) {
     if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr;
     int chunk = sIdx / this->maxNodesPerChunk; // which chunk
     int idx = sIdx % this->maxNodesPerChunk;   // position in chunk
     return this->coefChunks[chunk] + idx * this->coefsPerNode;
 }
 
-template <int D> int NodeAllocator<D>::alloc(int nNodes, bool coefs) {
+template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coefs) {
     MRCPP_SET_OMP_LOCK();
     if (nNodes <= 0 or nNodes > this->maxNodesPerChunk) MSG_ABORT("Cannot allocate " << nNodes << " nodes");
 
@@ -143,7 +143,7 @@ template <int D> int NodeAllocator<D>::alloc(int nNodes, bool coefs) {
     return sIdx;
 }
 
-template <int D> void NodeAllocator<D>::dealloc(int sIdx) {
+template <int D, typename T> void NodeAllocator<D, T>::dealloc(int sIdx) {
     MRCPP_SET_OMP_LOCK();
     if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx);
     auto *node_p = getNodeNoLock(sIdx);
@@ -161,7 +161,7 @@ template <int D> void NodeAllocator<D>::dealloc(int sIdx) {
     MRCPP_UNSET_OMP_LOCK();
 }
 
-template <int D> void NodeAllocator<D>::deallocAllCoeff() {
+template <int D, typename T> void NodeAllocator<D, T>::deallocAllCoeff() {
     if (not this->isShared())
         for (auto &chunk : this->coefChunks) delete[] chunk;
     else delete this->shmem_p;
@@ -170,7 +170,7 @@ template <int D> void NodeAllocator<D>::deallocAllCoeff() {
 
 }
 
-template <int D> void NodeAllocator<D>::init(int nChunks, bool coefs) {
+template <int D, typename T> void NodeAllocator<D, T>::init(int nChunks, bool coefs) {
     MRCPP_SET_OMP_LOCK();
     if (nChunks <= 0) MSG_ABORT("Invalid number of chunks: " << nChunks);
     for (int i = getNChunks(); i < nChunks; i++) appendChunk(coefs);
@@ -182,10 +182,10 @@ template <int D> void NodeAllocator<D>::init(int nChunks, bool coefs) {
     MRCPP_UNSET_OMP_LOCK();
 }
 
-template <int D> void NodeAllocator<D>::appendChunk(bool coefs) {
+template <int D, typename T> void NodeAllocator<D, T>::appendChunk(bool coefs) {
     // make coeff chunk
     if (coefs) {
-        double *c_chunk = nullptr;
+        T *c_chunk = nullptr;
         if (this->isShared()) {
             // for coefficients, take from the shared memory block
             c_chunk = this->shmem_p->sh_end_ptr;
@@ -193,13 +193,13 @@ template <int D> void NodeAllocator<D>::appendChunk(bool coefs) {
             // may increase size dynamically in the future
             if (this->shmem_p->sh_max_ptr < this->shmem_p->sh_end_ptr) MSG_ABORT("Shared block too small");
         } else {
-            c_chunk = new double[getCoefChunkSize() / sizeof(double)];
+            c_chunk = new T[getCoefChunkSize() / sizeof(T)];
         }
         this->coefChunks.push_back(c_chunk);
     }
 
     // make node chunk
-    auto n_chunk = (MWNode<D> *)new char[getNodeChunkSize()];
+    auto n_chunk = (MWNode<D, T> *)new char[getNodeChunkSize()];
     for (int i = 0; i < this->maxNodesPerChunk; i++) {
         n_chunk[i].serialIx = -1;
         n_chunk[i].parentSerialIx = -1;
@@ -215,7 +215,7 @@ template <int D> void NodeAllocator<D>::appendChunk(bool coefs) {
 }
 
 /** Fill all holes in the chunks with occupied nodes, then remove all empty chunks */
-template <int D> int NodeAllocator<D>::compress() {
+template <int D, typename T> int NodeAllocator<D, T>::compress() {
     MRCPP_SET_OMP_LOCK();
     int nNodes = (1 << D);
     if (this->maxNodesPerChunk * this->nodeChunks.size() <=
@@ -249,7 +249,7 @@ template <int D> int NodeAllocator<D>::compress() {
     return nChunksDeleted;
 }
 
-template <int D> int NodeAllocator<D>::deleteUnusedChunks() {
+template <int D, typename T> int NodeAllocator<D, T>::deleteUnusedChunks() {
     // number of occupied chunks
     int nChunksTotal = getNChunks();
     int nChunksUsed = getNChunksUsed();
@@ -271,7 +271,7 @@ template <int D> int NodeAllocator<D>::deleteUnusedChunks() {
     return nChunksTotal - nChunksUsed;
 }
 
-template <int D> void NodeAllocator<D>::moveNodes(int nNodes, int srcIdx, int dstIdx) {
+template <int D, typename T> void NodeAllocator<D, T>::moveNodes(int nNodes, int srcIdx, int dstIdx) {
     assert(nNodes > 0);
     assert(nNodes <= this->maxNodesPerChunk);
 
@@ -288,7 +288,7 @@ template <int D> void NodeAllocator<D>::moveNodes(int nNodes, int srcIdx, int ds
     for (int i = 0; i < nNodes * this->sizeOfNode; i++) ((char *)dstNode)[i] = ((char *)srcNode)[i];
 
     // coefs have new adresses
-    double *coefs_p = getCoefNoLock(dstIdx);
+    T *coefs_p = getCoefNoLock(dstIdx);
     if (coefs_p == nullptr) NOT_IMPLEMENTED_ABORT; // Nodes without coefs not handled atm
     for (int i = 0; i < nNodes; i++) (dstNode + i)->coefs = coefs_p + i * getNCoefs();
 
@@ -325,7 +325,7 @@ template <int D> void NodeAllocator<D>::moveNodes(int nNodes, int srcIdx, int ds
 }
 
 // Last positions on a chunk cannot be used if there is no place for nNodes siblings on the same chunk
-template <int D> int NodeAllocator<D>::findNextAvailable(int sIdx, int nNodes) const {
+template <int D, typename T> int NodeAllocator<D, T>::findNextAvailable(int sIdx, int nNodes) const {
     assert(sIdx >= 0);
     assert(sIdx < this->stackStatus.size());
     assert(nNodes >= 0);
@@ -343,7 +343,7 @@ template <int D> int NodeAllocator<D>::findNextAvailable(int sIdx, int nNodes) c
     return sIdx;
 }
 
-template <int D> int NodeAllocator<D>::findNextOccupied(int sIdx) const {
+template <int D, typename T> int NodeAllocator<D, T>::findNextOccupied(int sIdx) const {
     assert(sIdx >= 0);
     assert(sIdx < this->stackStatus.size());
     bool endOfStack = (sIdx >= this->topStack);
@@ -359,17 +359,17 @@ template <int D> int NodeAllocator<D>::findNextOccupied(int sIdx) const {
 }
 
 /** Traverse tree and redefine pointer, counter and tables. */
-template <int D> void NodeAllocator<D>::reassemble() {
+template <int D, typename T> void NodeAllocator<D, T>::reassemble() {
     MRCPP_SET_OMP_LOCK();
     this->nNodes = 0;
     getTree().nodesAtDepth.clear();
     getTree().squareNorm = 0.0;
     getTree().clearEndNodeTable();
 
-    NodeBox<D> &rootbox = getTree().getRootBox();
-    MWNode<D> **roots = rootbox.getNodes();
+    NodeBox<D, T> &rootbox = getTree().getRootBox();
+    MWNode<D, T> **roots = rootbox.getNodes();
 
-    std::stack<MWNode<D> *> stack;
+    std::stack<MWNode<D, T> *> stack;
     for (int rIdx = 0; rIdx < rootbox.size(); rIdx++) {
         auto *root_p = getNodeNoLock(rIdx);
         assert(root_p != nullptr);
@@ -414,7 +414,7 @@ template <int D> void NodeAllocator<D>::reassemble() {
     MRCPP_UNSET_OMP_LOCK();
 }
 
-template <int D> void NodeAllocator<D>::print() const {
+template <int D, typename T> void NodeAllocator<D, T>::print() const {
     int n = 0;
     for (int iChunk = 0; iChunk < getNChunks(); iChunk++) {
         int iShift = iChunk * this->maxNodesPerChunk;
@@ -436,8 +436,12 @@ template <int D> void NodeAllocator<D>::print() const {
     }
 }
 
-template class NodeAllocator<1>;
-template class NodeAllocator<2>;
-template class NodeAllocator<3>;
+template class NodeAllocator<1, double>;
+template class NodeAllocator<2, double>;
+template class NodeAllocator<3, double>;
+
+template class NodeAllocator<1, ComplexDouble>;
+template class NodeAllocator<2, ComplexDouble>;
+template class NodeAllocator<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/NodeAllocator.h b/src/trees/NodeAllocator.h
index 38e4ba7eb..b426d0021 100644
--- a/src/trees/NodeAllocator.h
+++ b/src/trees/NodeAllocator.h
@@ -40,12 +40,12 @@
 
 namespace mrcpp {
 
-template <int D> class NodeAllocator final {
+  template <int D, typename T> class NodeAllocator final {
 public:
-    NodeAllocator(OperatorTree *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk);
-    NodeAllocator(FunctionTree<D> *tree, SharedMemory *mem, int coefsPerNode, int nodesPerChunk);
-    NodeAllocator(const NodeAllocator<D> &tree) = delete;
-    NodeAllocator<D> &operator=(const NodeAllocator<D> &tree) = delete;
+    NodeAllocator(OperatorTree *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk);
+    NodeAllocator(FunctionTree<D, T> *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk);
+    NodeAllocator(const NodeAllocator<D, T> &tree) = delete;
+    NodeAllocator<D, T> &operator=(const NodeAllocator<D, T> &tree) = delete;
     ~NodeAllocator();
 
     int alloc(int nNodes, bool coefs = true);
@@ -63,13 +63,13 @@ template <int D> class NodeAllocator final {
     int getNChunks() const { return this->nodeChunks.size(); }
     int getNChunksUsed() const { return (this->topStack + this->maxNodesPerChunk - 1) / this->maxNodesPerChunk; }
     int getNodeChunkSize() const { return this->maxNodesPerChunk * this->sizeOfNode; }
-    int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(double); }
+    int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); }
 
-    double * getCoef_p(int sIdx);
-    MWNode<D> * getNode_p(int sIdx);
+    T * getCoef_p(int sIdx);
+    MWNode<D, T> * getNode_p(int sIdx);
 
-    double * getCoefChunk(int i) { return this->coefChunks[i]; }
-    MWNode<D> * getNodeChunk(int i) { return this->nodeChunks[i]; }
+    T * getCoefChunk(int i) { return this->coefChunks[i]; }
+    MWNode<D, T> * getNodeChunk(int i) { return this->nodeChunks[i]; }
 
     void print() const;
 
@@ -81,20 +81,20 @@ template <int D> class NodeAllocator final {
     int maxNodesPerChunk{0};        // max number of nodes per allocation
 
     std::vector<int> stackStatus{};
-    std::vector<double *> coefChunks{};
-    std::vector<MWNode<D> *> nodeChunks{};
+    std::vector<T *> coefChunks{};
+    std::vector<MWNode<D, T> *> nodeChunks{};
 
     char *cvptr{nullptr};           // pointer to virtual table
-    MWNode<D> *last_p{nullptr};     // pointer just after the last active node, i.e. where to put next node
-    MWTree<D> *tree_p{nullptr};     // pointer to external object
-    SharedMemory *shmem_p{nullptr}; // pointer to external object
+    MWNode<D, T> *last_p{nullptr};     // pointer just after the last active node, i.e. where to put next node
+    MWTree<D, T> *tree_p{nullptr};     // pointer to external object
+    SharedMemory<T> *shmem_p{nullptr}; // pointer to external object
 
     bool isShared() const { return (this->shmem_p != nullptr); }
-    MWTree<D> &getTree() { return *this->tree_p; }
-    SharedMemory &getMemory() { return *this->shmem_p; }
+    MWTree<D, T> &getTree() { return *this->tree_p; }
+    SharedMemory<T> &getMemory() { return *this->shmem_p; }
 
-    double * getCoefNoLock(int sIdx);
-    MWNode<D> * getNodeNoLock(int sIdx);
+    T * getCoefNoLock(int sIdx);
+    MWNode<D, T> * getNodeNoLock(int sIdx);
 
     void moveNodes(int nNodes, int srcIdx, int dstIdx);
     void appendChunk(bool coefs);
diff --git a/src/trees/NodeBox.cpp b/src/trees/NodeBox.cpp
index cc247d58e..bf747c4fc 100644
--- a/src/trees/NodeBox.cpp
+++ b/src/trees/NodeBox.cpp
@@ -36,50 +36,50 @@
 
 namespace mrcpp {
 
-template <int D>
-NodeBox<D>::NodeBox(const NodeIndex<D> &idx, const std::array<int, D> &nb)
+template <int D, typename T>
+NodeBox<D, T>::NodeBox(const NodeIndex<D> &idx, const std::array<int, D> &nb)
         : BoundingBox<D>(idx, nb)
         , nOccupied(0)
         , nodes(nullptr) {
     allocNodePointers();
 }
 
-template <int D>
-NodeBox<D>::NodeBox(const BoundingBox<D> &box)
+template <int D, typename T>
+NodeBox<D, T>::NodeBox(const BoundingBox<D> &box)
         : BoundingBox<D>(box)
         , nOccupied(0)
         , nodes(nullptr) {
     allocNodePointers();
 }
 
-template <int D>
-NodeBox<D>::NodeBox(const NodeBox<D> &box)
+template <int D, typename T>
+NodeBox<D, T>::NodeBox(const NodeBox<D, T> &box)
         : BoundingBox<D>(box)
         , nOccupied(0)
         , nodes(nullptr) {
     allocNodePointers();
 }
 
-template <int D> void NodeBox<D>::allocNodePointers() {
+template <int D, typename T> void NodeBox<D, T>::allocNodePointers() {
     assert(this->nodes == nullptr);
     int nNodes = this->size();
-    this->nodes = new MWNode<D> *[nNodes];
+    this->nodes = new MWNode<D, T> *[nNodes];
     for (int n = 0; n < nNodes; n++) { this->nodes[n] = nullptr; }
     this->nOccupied = 0;
 }
 
-template <int D> NodeBox<D>::~NodeBox() {
+template <int D, typename T> NodeBox<D, T>::~NodeBox() {
     deleteNodes();
 }
 
-template <int D> void NodeBox<D>::deleteNodes() {
+template <int D, typename T> void NodeBox<D, T>::deleteNodes() {
     if (this->nodes == nullptr) { return; }
     for (int n = 0; n < this->size(); n++) { clearNode(n); }
     delete[] this->nodes;
     this->nodes = nullptr;
 }
 
-template <int D> void NodeBox<D>::setNode(int bIdx, MWNode<D> **node) {
+template <int D, typename T> void NodeBox<D, T>::setNode(int bIdx, MWNode<D, T> **node) {
     assert(bIdx >= 0);
     assert(bIdx < this->totBoxes);
     clearNode(bIdx);
@@ -89,44 +89,48 @@ template <int D> void NodeBox<D>::setNode(int bIdx, MWNode<D> **node) {
     *node = nullptr;
 }
 
-template <int D> MWNode<D> &NodeBox<D>::getNode(NodeIndex<D> nIdx) {
+template <int D, typename T> MWNode<D, T> &NodeBox<D, T>::getNode(NodeIndex<D> nIdx) {
     int bIdx = this->getBoxIndex(nIdx);
     return getNode(bIdx);
 }
 
-template <int D> MWNode<D> &NodeBox<D>::getNode(Coord<D> r) {
+template <int D, typename T> MWNode<D, T> &NodeBox<D, T>::getNode(Coord<D> r) {
     int bIdx = this->getBoxIndex(r);
     if (bIdx < 0) MSG_ERROR("Coord out of bounds");
     return getNode(bIdx);
 }
 
-template <int D> MWNode<D> &NodeBox<D>::getNode(int bIdx) {
+template <int D, typename T> MWNode<D, T> &NodeBox<D, T>::getNode(int bIdx) {
     assert(bIdx >= 0);
     assert(bIdx < this->totBoxes);
     assert(this->nodes[bIdx] != nullptr);
     return *this->nodes[bIdx];
 }
 
-template <int D> const MWNode<D> &NodeBox<D>::getNode(NodeIndex<D> nIdx) const {
+template <int D, typename T> const MWNode<D, T> &NodeBox<D, T>::getNode(NodeIndex<D> nIdx) const {
     int bIdx = this->getBoxIndex(nIdx);
     return getNode(bIdx);
 }
 
-template <int D> const MWNode<D> &NodeBox<D>::getNode(Coord<D> r) const {
+template <int D, typename T> const MWNode<D, T> &NodeBox<D, T>::getNode(Coord<D> r) const {
     int bIdx = this->getBoxIndex(r);
     if (bIdx < 0) MSG_ERROR("Coord out of bounds");
     return getNode(bIdx);
 }
 
-template <int D> const MWNode<D> &NodeBox<D>::getNode(int bIdx) const {
+template <int D, typename T> const MWNode<D, T> &NodeBox<D, T>::getNode(int bIdx) const {
     assert(bIdx >= 0);
     assert(bIdx < this->totBoxes);
     assert(this->nodes[bIdx] != nullptr);
     return *this->nodes[bIdx];
 }
 
-template class NodeBox<1>;
-template class NodeBox<2>;
-template class NodeBox<3>;
+template class NodeBox<1, double>;
+template class NodeBox<2, double>;
+template class NodeBox<3, double>;
+
+template class NodeBox<1, ComplexDouble>;
+template class NodeBox<2, ComplexDouble>;
+template class NodeBox<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/NodeBox.h b/src/trees/NodeBox.h
index dfb0dc20c..3b53da538 100644
--- a/src/trees/NodeBox.h
+++ b/src/trees/NodeBox.h
@@ -30,31 +30,31 @@
 
 namespace mrcpp {
 
-template <int D> class NodeBox final : public BoundingBox<D> {
+  template <int D, typename T> class NodeBox final : public BoundingBox<D> {
 public:
     NodeBox(const NodeIndex<D> &idx, const std::array<int, D> &nb = {});
-    NodeBox(const NodeBox<D> &box);
+    NodeBox(const NodeBox<D, T> &box);
     NodeBox(const BoundingBox<D> &box);
-    NodeBox<D> &operator=(const NodeBox<D> &box) = delete;
+    NodeBox<D, T> &operator=(const NodeBox<D, T> &box) = delete;
     ~NodeBox() override;
 
-    void setNode(int idx, MWNode<D> **node);
+    void setNode(int idx, MWNode<D, T> **node);
     void clearNode(int idx) { this->nodes[idx] = nullptr; }
 
-    MWNode<D> &getNode(NodeIndex<D> idx);
-    MWNode<D> &getNode(Coord<D> r);
-    MWNode<D> &getNode(int i = 0);
+    MWNode<D, T> &getNode(NodeIndex<D> idx);
+    MWNode<D, T> &getNode(Coord<D> r);
+    MWNode<D, T> &getNode(int i = 0);
 
-    const MWNode<D> &getNode(NodeIndex<D> idx) const;
-    const MWNode<D> &getNode(Coord<D> r) const;
-    const MWNode<D> &getNode(int i = 0) const;
+    const MWNode<D, T> &getNode(NodeIndex<D> idx) const;
+    const MWNode<D, T> &getNode(Coord<D> r) const;
+    const MWNode<D, T> &getNode(int i = 0) const;
 
     int getNOccupied() const { return this->nOccupied; }
-    MWNode<D> **getNodes() { return this->nodes; }
+    MWNode<D, T> **getNodes() { return this->nodes; }
 
 protected:
     int nOccupied;     ///< Number of non-zero pointers in box
-    MWNode<D> **nodes; ///< Container of nodes
+    MWNode<D, T> **nodes; ///< Container of nodes
 
     void allocNodePointers();
     void deleteNodes();
diff --git a/src/trees/TreeIterator.cpp b/src/trees/TreeIterator.cpp
index 9bf9fb054..f7f88e03b 100644
--- a/src/trees/TreeIterator.cpp
+++ b/src/trees/TreeIterator.cpp
@@ -29,7 +29,7 @@
 
 namespace mrcpp {
 
-template <int D> TreeIterator<D>::TreeIterator(int traverse, int iterator)
+template <int D, typename T> TreeIterator<D, T>::TreeIterator(int traverse, int iterator)
         : root(0)
         , nRoots(0)
         , mode(traverse)
@@ -38,7 +38,7 @@ template <int D> TreeIterator<D>::TreeIterator(int traverse, int iterator)
         , state(nullptr)
         , initialState(nullptr) {}
 
-template <int D> TreeIterator<D>::TreeIterator(MWTree<D> &tree, int traverse, int iterator)
+template <int D, typename T> TreeIterator<D, T>::TreeIterator(MWTree<D, T> &tree, int traverse, int iterator)
         : root(0)
         , nRoots(0)
         , mode(traverse)
@@ -49,23 +49,23 @@ template <int D> TreeIterator<D>::TreeIterator(MWTree<D> &tree, int traverse, in
     init(tree);
 }
 
-template <int D> TreeIterator<D>::~TreeIterator() {
+template <int D, typename T> TreeIterator<D, T>::~TreeIterator() {
     if (this->initialState != nullptr) delete this->initialState;
 }
 
-template<int D> int TreeIterator<D>::getChildIndex(int i) const {
-    const MWNode<D> &node = *this->state->node;
+  template<int D, typename T> int TreeIterator<D, T>::getChildIndex(int i) const {
+    const MWNode<D, T> &node = *this->state->node;
     const HilbertPath<D> &h = node.getHilbertPath();
     // Legesgue type returns i, Hilbert type returns Hilbert index
     return (this->type == Hilbert) ? h.getZIndex(i) : i;
 }
 
-template <int D> bool TreeIterator<D>::next() {
+template <int D, typename T> bool TreeIterator<D, T>::next() {
     if (not this->state) return false;
     if (this->mode == TopDown) {
         if (this->tryNode()) return true;
     }
-    MWNode<D> &node = *this->state->node;
+    MWNode<D, T> &node = *this->state->node;
     if (checkDepth(node) and checkGenerated(node)) {
         const int nChildren = 1 << D;
         for (int i = 0; i < nChildren; i++) {
@@ -80,12 +80,12 @@ template <int D> bool TreeIterator<D>::next() {
     this->removeState();
     return next();
 }
-template <int D> bool TreeIterator<D>::nextParent() {
+template <int D, typename T> bool TreeIterator<D, T>::nextParent() {
     if (not this->state) return false;
     if (this->mode == BottomUp) {
         if (this->tryNode()) return true;
     }
-    MWNode<D> &node = *this->state->node;
+    MWNode<D, T> &node = *this->state->node;
     if (this->tryNextRootParent()) return true;
     if (checkDepth(node)) {
         if (this->tryParent()) return true;
@@ -97,73 +97,73 @@ template <int D> bool TreeIterator<D>::nextParent() {
     return nextParent();
 }
 
-template <int D> void TreeIterator<D>::init(MWTree<D> &tree) {
+template <int D, typename T> void TreeIterator<D, T>::init(MWTree<D, T> &tree) {
     this->root = 0;
     this->maxDepth = -1;
     this->nRoots = tree.getRootBox().size();
-    this->state = new IteratorNode<D>(&tree.getRootBox().getNode(this->root));
+    this->state = new IteratorNode<D, T>(&tree.getRootBox().getNode(this->root));
     // Save the first state so it can be properly deleted later
     this->initialState = this->state;
 }
 
-template <int D> bool TreeIterator<D>::tryNode() {
+template <int D, typename T> bool TreeIterator<D, T>::tryNode() {
     if (not this->state) { return false; }
     if (this->state->doneNode) { return false; }
     this->state->doneNode = true;
     return true;
 }
 
-template <int D> bool TreeIterator<D>::tryChild(int i) {
+template <int D, typename T> bool TreeIterator<D, T>::tryChild(int i) {
     if (not this->state) { return false; }
     if (this->state->doneChild[i]) { return false; }
     this->state->doneChild[i] = true;
     if (this->state->node->isLeafNode()) { return false; }
-    MWNode<D> *child = &this->state->node->getMWChild(i);
-    this->state = new IteratorNode<D>(child, this->state);
+    MWNode<D, T> *child = &this->state->node->getMWChild(i);
+    this->state = new IteratorNode<D, T>(child, this->state);
     return next();
 }
 
-template <int D> bool TreeIterator<D>::tryParent() {
+template <int D, typename T> bool TreeIterator<D, T>::tryParent() {
     if (not this->state) return false;
     if (this->state->doneParent) return false;
     this->state->doneParent = true;
     if (not this->state->node->hasParent()) return false;
-    MWNode<D> *parent = &this->state->node->getMWParent();
-    this->state = new IteratorNode<D>(parent, this->state);
+    MWNode<D, T> *parent = &this->state->node->getMWParent();
+    this->state = new IteratorNode<D, T>(parent, this->state);
     return nextParent();
 }
 
-template <int D> bool TreeIterator<D>::tryNextRoot() {
+template <int D, typename T> bool TreeIterator<D, T>::tryNextRoot() {
     if (not this->state) { return false; }
     if (not this->state->node->isRootNode()) { return false; }
     this->root++;
     if (this->root >= this->nRoots) { return false; }
-    MWNode<D> *nextRoot = &state->node->getMWTree().getRootBox().getNode(root);
-    this->state = new IteratorNode<D>(nextRoot, this->state);
+    MWNode<D, T> *nextRoot = &state->node->getMWTree().getRootBox().getNode(root);
+    this->state = new IteratorNode<D, T>(nextRoot, this->state);
     return next();
 }
 
-template <int D> bool TreeIterator<D>::tryNextRootParent() {
+template <int D, typename T> bool TreeIterator<D, T>::tryNextRootParent() {
     if (not this->state) { return false; }
     if (not this->state->node->isRootNode()) { return false; }
     this->root++;
     if (this->root >= this->nRoots) { return false; }
-    MWNode<D> *nextRoot = &state->node->getMWTree().getRootBox().getNode(root);
-    this->state = new IteratorNode<D>(nextRoot, this->state);
+    MWNode<D, T> *nextRoot = &state->node->getMWTree().getRootBox().getNode(root);
+    this->state = new IteratorNode<D, T>(nextRoot, this->state);
     return nextParent();
 }
 
-template <int D> void TreeIterator<D>::removeState() {
+template <int D, typename T> void TreeIterator<D, T>::removeState() {
     if (this->state == this->initialState) { this->initialState = nullptr; }
     if (this->state != nullptr) {
-        IteratorNode<D> *spare = this->state;
+        IteratorNode<D, T> *spare = this->state;
         this->state = spare->next;
         spare->next = nullptr;
         delete spare;
     }
 }
 
-template <int D> void TreeIterator<D>::setTraverse(int traverse) {
+template <int D, typename T> void TreeIterator<D, T>::setTraverse(int traverse) {
     switch (traverse) {
         case TopDown:
             this->mode = TopDown;
@@ -177,7 +177,7 @@ template <int D> void TreeIterator<D>::setTraverse(int traverse) {
     }
 }
 
-template <int D> void TreeIterator<D>::setIterator(int iterator) {
+template <int D, typename T> void TreeIterator<D, T>::setIterator(int iterator) {
     switch (iterator) {
         case Lebesgue:
             this->type = Lebesgue;
@@ -191,7 +191,7 @@ template <int D> void TreeIterator<D>::setIterator(int iterator) {
     }
 }
 
-template <int D> bool TreeIterator<D>::checkDepth(const MWNode<D> &node) const {
+template <int D, typename T> bool TreeIterator<D, T>::checkDepth(const MWNode<D, T> &node) const {
     if (this->maxDepth < 0) {
         return true;
     } else if (node.getDepth() < this->maxDepth) {
@@ -201,7 +201,7 @@ template <int D> bool TreeIterator<D>::checkDepth(const MWNode<D> &node) const {
     }
 }
 
-template <int D> bool TreeIterator<D>::checkGenerated(const MWNode<D> &node) const {
+template <int D, typename T> bool TreeIterator<D, T>::checkGenerated(const MWNode<D, T> &node) const {
     if (node.isEndNode() and not this->returnGenNodes) {
         return false;
     } else {
@@ -209,8 +209,8 @@ template <int D> bool TreeIterator<D>::checkGenerated(const MWNode<D> &node) con
     }
 }
 
-template <int D>
-IteratorNode<D>::IteratorNode(MWNode<D> *nd, IteratorNode<D> *nx)
+template <int D, typename T>
+IteratorNode<D, T>::IteratorNode(MWNode<D, T> *nd, IteratorNode<D, T> *nx)
         : node(nd)
         , next(nx)
         , doneNode(false)
@@ -219,8 +219,12 @@ IteratorNode<D>::IteratorNode(MWNode<D> *nd, IteratorNode<D> *nx)
     for (int i = 0; i < nChildren; i++) { this->doneChild[i] = false; }
 }
 
-template class TreeIterator<1>;
-template class TreeIterator<2>;
-template class TreeIterator<3>;
+template class TreeIterator<1, double>;
+template class TreeIterator<2, double>;
+template class TreeIterator<3, double>;
+
+template class TreeIterator<1, ComplexDouble>;
+template class TreeIterator<2, ComplexDouble>;
+template class TreeIterator<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/TreeIterator.h b/src/trees/TreeIterator.h
index d82db82a0..a79fe8412 100644
--- a/src/trees/TreeIterator.h
+++ b/src/trees/TreeIterator.h
@@ -30,10 +30,10 @@
 
 namespace mrcpp {
 
-template <int D> class TreeIterator {
+template <int D, typename T> class TreeIterator {
 public:
     TreeIterator(int traverse = TopDown, int iterator = Lebesgue);
-    TreeIterator(MWTree<D> &tree, int traverse = TopDown, int iterator = Lebesgue);
+    TreeIterator(MWTree<D, T> &tree, int traverse = TopDown, int iterator = Lebesgue);
     virtual ~TreeIterator();
 
     void setReturnGenNodes(bool i = true) { this->returnGenNodes = i; }
@@ -41,12 +41,12 @@ template <int D> class TreeIterator {
     void setTraverse(int traverse);
     void setIterator(int iterator);
 
-    void init(MWTree<D> &tree);
+    void init(MWTree<D, T> &tree);
     bool next();
     bool nextParent();
-    MWNode<D> &getNode() { return *this->state->node; }
+    MWNode<D, T> &getNode() { return *this->state->node; }
 
-    friend class IteratorNode<D>;
+  friend class IteratorNode<D, T>;
 
 protected:
     int root;
@@ -55,8 +55,8 @@ template <int D> class TreeIterator {
     int type;
     int maxDepth;
     bool returnGenNodes{true};
-    IteratorNode<D> *state;
-    IteratorNode<D> *initialState;
+    IteratorNode<D, T> *state;
+    IteratorNode<D, T> *initialState;
 
     int getChildIndex(int i) const;
 
@@ -66,19 +66,19 @@ template <int D> class TreeIterator {
     bool tryNextRoot();
     bool tryNextRootParent();
     void removeState();
-    bool checkDepth(const MWNode<D> &node) const;
-    bool checkGenerated(const MWNode<D> &node) const;
+    bool checkDepth(const MWNode<D, T> &node) const;
+    bool checkGenerated(const MWNode<D, T> &node) const;
 };
 
-template <int D> class IteratorNode final {
+template <int D, typename T> class IteratorNode final {
 public:
-    MWNode<D> *node;
-    IteratorNode<D> *next;
+    MWNode<D, T> *node;
+    IteratorNode<D, T> *next;
     bool doneNode;
     bool doneParent;
     bool doneChild[1 << D];
 
-    IteratorNode(MWNode<D> *nd, IteratorNode<D> *nx = nullptr);
+    IteratorNode(MWNode<D, T> *nd, IteratorNode<D, T> *nx = nullptr);
     ~IteratorNode() { delete this->next; }
 };
 
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index cf33cadfb..7cfac3a8b 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -22,7 +22,7 @@ class MPI_FuncVector;
 namespace mrcpp {
 
 class BankAccount;
-template <int D> class FunctionTree;
+  template <int D, typename T> class FunctionTree;
 template <int D> class MultiResolutionAnalysis;
 
 using ComplexDouble = std::complex<double>;
@@ -58,8 +58,8 @@ class TreePtr final {
         if (this->func_data.is_shared and mpi::share_size > 1) {
             // Memory size in MB defined in input. Virtual memory, does not cost anything if not used.
 #ifdef MRCPP_HAS_MPI
-            this->shared_mem_re = new mrcpp::SharedMemory(mpi::comm_share, mpi::shared_memory_size);
-            this->shared_mem_im = new mrcpp::SharedMemory(mpi::comm_share, mpi::shared_memory_size);
+            this->shared_mem_re = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
+            this->shared_mem_im = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
 #endif
         }
     }
@@ -75,8 +75,8 @@ class TreePtr final {
 
 private:
     FunctionData func_data;
-    mrcpp::SharedMemory *shared_mem_re;
-    mrcpp::SharedMemory *shared_mem_im;
+    mrcpp::SharedMemory<double> *shared_mem_re;
+    mrcpp::SharedMemory<double> *shared_mem_im;
     mrcpp::FunctionTree<3> *re; ///< Real part of function
     mrcpp::FunctionTree<3> *im; ///< Imaginary part of function
 
diff --git a/src/utils/Plotter.cpp b/src/utils/Plotter.cpp
index 455bb57e6..b24f2a643 100644
--- a/src/utils/Plotter.cpp
+++ b/src/utils/Plotter.cpp
@@ -37,24 +37,24 @@ namespace mrcpp {
  *
  *  @param[in] o: Plot origin, default `(0, 0, ... , 0)`
  */
-template <int D>
-Plotter<D>::Plotter(const Coord<D> &o)
+template <int D, typename T>
+Plotter<D, T>::Plotter(const Coord<D> &o)
         : O(o) {
-    setSuffix(Plotter<D>::Line, ".line");
-    setSuffix(Plotter<D>::Surface, ".surf");
-    setSuffix(Plotter<D>::Cube, ".cube");
-    setSuffix(Plotter<D>::Grid, ".grid");
+    setSuffix(Plotter<D, T>::Line, ".line");
+    setSuffix(Plotter<D, T>::Surface, ".surf");
+    setSuffix(Plotter<D, T>::Cube, ".cube");
+    setSuffix(Plotter<D, T>::Grid, ".grid");
 }
 
 /** @brief Set file extension for output file
  *
- *  @param[in] t: Plot type (`Plotter<D>::Line`, `::Surface`, `::Cube`, `::Grid`)
+ *  @param[in] t: Plot type (`Plotter<D, T>::Line`, `::Surface`, `::Cube`, `::Grid`)
  *  @param[in] s: Extension string, default `.line`, `.surf`, `.cube`, `.grid`
  *
  *  @details The file name you decide for the output will get a predefined
  *  suffix that differentiates between different types of plot.
  */
-template <int D> void Plotter<D>::setSuffix(int t, const std::string &s) {
+template <int D, typename T> void Plotter<D, T>::setSuffix(int t, const std::string &s) {
     this->suffix.insert(std::pair<int, std::string>(t, s));
 }
 
@@ -62,7 +62,7 @@ template <int D> void Plotter<D>::setSuffix(int t, const std::string &s) {
  *
  *  @param[in] o: Plot origin, default `(0, 0, ... , 0)`
  */
-template <int D> void Plotter<D>::setOrigin(const Coord<D> &o) {
+template <int D, typename T> void Plotter<D, T>::setOrigin(const Coord<D> &o) {
     this->O = o;
 }
 
@@ -72,7 +72,7 @@ template <int D> void Plotter<D>::setOrigin(const Coord<D> &o) {
  *  @param[in] b: B vector
  *  @param[in] c: C vector
  */
-template <int D> void Plotter<D>::setRange(const Coord<D> &a, const Coord<D> &b, const Coord<D> &c) {
+template <int D, typename T> void Plotter<D, T>::setRange(const Coord<D> &a, const Coord<D> &b, const Coord<D> &c) {
     this->A = a;
     this->B = b;
     this->C = c;
@@ -89,10 +89,10 @@ template <int D> void Plotter<D>::setRange(const Coord<D> &a, const Coord<D> &b,
  *  separate file, and will print only nodes owned by itself (pluss the
  *  rootNodes).
  */
-template <int D> void Plotter<D>::gridPlot(const MWTree<D> &tree, const std::string &fname) {
+template <int D, typename T> void Plotter<D, T>::gridPlot(const MWTree<D, T> &tree, const std::string &fname) {
     println(20, "----------Grid Plot-----------");
     std::stringstream file;
-    file << fname << this->suffix[Plotter<D>::Grid];
+    file << fname << this->suffix[Plotter<D, T>::Grid];
     openPlot(file.str());
     writeGrid(tree);
     closePlot();
@@ -109,16 +109,16 @@ template <int D> void Plotter<D>::gridPlot(const MWTree<D> &tree, const std::str
  *  vector A starting from the origin O to a file named fname + file extension
  *  (".line" as default).
  */
-template <int D>
-void Plotter<D>::linePlot(const std::array<int, 1> &npts,
-                          const RepresentableFunction<D> &func,
+template <int D, typename T>
+void Plotter<D, T>::linePlot(const std::array<int, 1> &npts,
+                          const RepresentableFunction<D, T> &func,
                           const std::string &fname) {
     println(20, "----------Line Plot-----------");
     std::stringstream file;
-    file << fname << this->suffix[Plotter<D>::Line];
+    file << fname << this->suffix[Plotter<D, T>::Line];
     if (verifyRange(1)) { // Verifies only A vector
         Eigen::MatrixXd coords = calcLineCoordinates(npts[0]);
-        Eigen::VectorXd values = evaluateFunction(func, coords);
+        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeData(coords, values);
         closePlot();
@@ -138,16 +138,16 @@ void Plotter<D>::linePlot(const std::array<int, 1> &npts,
  *  vectors A (npts[0] points) and B (npts[1] points), starting from the
  *  origin O, to a file named fname + file extension (".surf" as default).
  */
-template <int D>
-void Plotter<D>::surfPlot(const std::array<int, 2> &npts,
-                          const RepresentableFunction<D> &func,
+template <int D, typename T>
+void Plotter<D, T>::surfPlot(const std::array<int, 2> &npts,
+                          const RepresentableFunction<D, T> &func,
                           const std::string &fname) {
     println(20, "--------Surface Plot----------");
     std::stringstream file;
-    file << fname << this->suffix[Plotter<D>::Surface];
+    file << fname << this->suffix[Plotter<D, T>::Surface];
     if (verifyRange(2)) { // Verifies A and B vectors
         Eigen::MatrixXd coords = calcSurfCoordinates(npts[0], npts[1]);
-        Eigen::VectorXd values = evaluateFunction(func, coords);
+        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeData(coords, values);
         closePlot();
@@ -168,16 +168,16 @@ void Plotter<D>::surfPlot(const std::array<int, 2> &npts,
  *  starting from the origin O, to a file named fname + file extension
  *  (".cube" as default).
  */
-template <int D>
-void Plotter<D>::cubePlot(const std::array<int, 3> &npts,
-                          const RepresentableFunction<D> &func,
+template <int D, typename T>
+void Plotter<D, T>::cubePlot(const std::array<int, 3> &npts,
+                          const RepresentableFunction<D, T> &func,
                           const std::string &fname) {
     println(20, "----------Cube Plot-----------");
     std::stringstream file;
-    file << fname << this->suffix[Plotter<D>::Cube];
+    file << fname << this->suffix[Plotter<D, T>::Cube];
     if (verifyRange(3)) { // Verifies A, B and C vectors
         Eigen::MatrixXd coords = calcCubeCoordinates(npts[0], npts[1], npts[2]);
-        Eigen::VectorXd values = evaluateFunction(func, coords);
+        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeCube(npts, values);
         closePlot();
@@ -192,7 +192,7 @@ void Plotter<D>::cubePlot(const std::array<int, 3> &npts,
  *  @details Generating a vector of pts_a equidistant coordinates that makes
  *  up the vector A in D dimensions, starting from the origin O.
  */
-template <int D> Eigen::MatrixXd Plotter<D>::calcLineCoordinates(int pts_a) const {
+template <int D, typename T> Eigen::MatrixXd Plotter<D, T>::calcLineCoordinates(int pts_a) const {
     MatrixXd coords;
     if (pts_a > 0) {
         Coord<D> a = calcStep(this->A, pts_a);
@@ -211,7 +211,7 @@ template <int D> Eigen::MatrixXd Plotter<D>::calcLineCoordinates(int pts_a) cons
  *  @details Generating a vector of equidistant coordinates that makes up the
  *  area spanned by vectors A and B in D dimensions, starting from the origin O.
  */
-template <int D> Eigen::MatrixXd Plotter<D>::calcSurfCoordinates(int pts_a, int pts_b) const {
+template <int D, typename T> Eigen::MatrixXd Plotter<D, T>::calcSurfCoordinates(int pts_a, int pts_b) const {
     if (D < 2) MSG_ERROR("Cannot surfPlot less than 2D");
 
     MatrixXd coords;
@@ -240,7 +240,7 @@ template <int D> Eigen::MatrixXd Plotter<D>::calcSurfCoordinates(int pts_a, int
  *  volume spanned by vectors A, B and C in D dimensions, starting from
  *  the origin O.
  */
-template <int D> Eigen::MatrixXd Plotter<D>::calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const {
+template <int D, typename T> Eigen::MatrixXd Plotter<D, T>::calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const {
     if (D < 3) MSG_ERROR("Cannot cubePlot less than 3D function");
 
     MatrixXd coords;
@@ -272,12 +272,12 @@ template <int D> Eigen::MatrixXd Plotter<D>::calcCubeCoordinates(int pts_a, int
  *  this routine evaluates the function in these points and stores the results
  *  in the vector "values".
  */
-template <int D>
-Eigen::VectorXd Plotter<D>::evaluateFunction(const RepresentableFunction<D> &func,
+template <int D, typename T>
+Eigen::Matrix< T, Eigen::Dynamic, 1 > Plotter<D, T>::evaluateFunction(const RepresentableFunction<D, T> &func,
                                              const Eigen::MatrixXd &coords) const {
     auto npts = coords.rows();
     if (npts == 0) MSG_ERROR("Empty coordinates");
-    Eigen::VectorXd values = VectorXd::Zero(npts);
+    Eigen::Matrix< T, Eigen::Dynamic, 1 > values = Eigen::Matrix< T, Eigen::Dynamic, 1 >::Zero(npts);
 #pragma omp parallel for schedule(static) num_threads(mrcpp_get_num_threads())
     for (auto i = 0; i < npts; i++) {
         Coord<D> r{};
@@ -294,7 +294,7 @@ Eigen::VectorXd Plotter<D>::evaluateFunction(const RepresentableFunction<D> &fun
  *  point number (between 0 and nPoints), coordinates 1 through D and the
  *  function value.
  */
-template <int D> void Plotter<D>::writeData(const Eigen::MatrixXd &coords, const Eigen::VectorXd &values) {
+  template <int D, typename T> void Plotter<D, T>::writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values) {
     if (coords.rows() != values.size()) INVALID_ARG_ABORT;
     std::ofstream &o = *this->fout;
     for (auto i = 0; i < values.size(); i++) {
@@ -308,17 +308,17 @@ template <int D> void Plotter<D>::writeData(const Eigen::MatrixXd &coords, const
 }
 
 // Specialized for D=3 below
-template <int D> void Plotter<D>::writeCube(const std::array<int, 3> &npts, const Eigen::VectorXd &values) {
+  template <int D, typename T> void Plotter<D, T>::writeCube(const std::array<int, 3> &npts, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values) {
     NOT_IMPLEMENTED_ABORT
 }
 
 // Specialized for D=3 below
-template <int D> void Plotter<D>::writeNodeGrid(const MWNode<D> &node, const std::string &color) {
+template <int D, typename T> void Plotter<D, T>::writeNodeGrid(const MWNode<D, T> &node, const std::string &color) {
     NOT_IMPLEMENTED_ABORT
 }
 
 // Specialized for D=3 below
-template <int D> void Plotter<D>::writeGrid(const MWTree<D> &tree) {
+template <int D, typename T> void Plotter<D, T>::writeGrid(const MWTree<D, T> &tree) {
     NOT_IMPLEMENTED_ABORT
 }
 
@@ -326,7 +326,7 @@ template <int D> void Plotter<D>::writeGrid(const MWTree<D> &tree) {
  *
  *  @details Opens a file output stream fout for file named fname.
  */
-template <int D> void Plotter<D>::openPlot(const std::string &fname) {
+template <int D, typename T> void Plotter<D, T>::openPlot(const std::string &fname) {
     if (fname.empty()) {
         if (this->fout == nullptr) {
             MSG_ERROR("Plot file not set!");
@@ -350,7 +350,7 @@ template <int D> void Plotter<D>::openPlot(const std::string &fname) {
  *
  *  @details Closes the file output stream fout.
  */
-template <int D> void Plotter<D>::closePlot() {
+template <int D, typename T> void Plotter<D, T>::closePlot() {
     if (this->fout != nullptr) this->fout->close();
     this->fout = nullptr;
 }
@@ -462,7 +462,7 @@ template <> void Plotter<3>::writeGrid(const MWTree<3> &tree) {
 }
 
 /** @brief Checks the validity of the plotting range */
-template <int D> bool Plotter<D>::verifyRange(int dim) const {
+template <int D, typename T> bool Plotter<D, T>::verifyRange(int dim) const {
 
     auto is_len_zero = [](Coord<D> vec) {
         double vec_sq = 0.0;
@@ -483,14 +483,18 @@ template <int D> bool Plotter<D>::verifyRange(int dim) const {
 }
 
 /** @brief Compute step length to cover vector with `pts` points, including edges */
-template <int D> Coord<D> Plotter<D>::calcStep(const Coord<D> &vec, int pts) const {
+template <int D, typename T> Coord<D> Plotter<D, T>::calcStep(const Coord<D> &vec, int pts) const {
     Coord<D> step;
     for (auto d = 0; d < D; d++) step[d] = vec[d] / (pts - 1.0);
     return step;
 }
 
-template class Plotter<1>;
-template class Plotter<2>;
-template class Plotter<3>;
+template class Plotter<1, double>;
+template class Plotter<2, double>;
+template class Plotter<3, double>;
+
+template class Plotter<1, ComplexDouble>;
+template class Plotter<2, ComplexDouble>;
+template class Plotter<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/utils/Plotter.h b/src/utils/Plotter.h
index d38941b27..547150197 100644
--- a/src/utils/Plotter.h
+++ b/src/utils/Plotter.h
@@ -56,7 +56,7 @@ namespace mrcpp {
  *
  */
 
-template <int D> class Plotter {
+template <int D, typename T> class Plotter {
 public:
     explicit Plotter(const Coord<D> &o = {});
     virtual ~Plotter() = default;
@@ -65,10 +65,10 @@ template <int D> class Plotter {
     void setOrigin(const Coord<D> &o);
     void setRange(const Coord<D> &a, const Coord<D> &b = {}, const Coord<D> &c = {});
 
-    void gridPlot(const MWTree<D> &tree, const std::string &fname);
-    void linePlot(const std::array<int, 1> &npts, const RepresentableFunction<D> &func, const std::string &fname);
-    void surfPlot(const std::array<int, 2> &npts, const RepresentableFunction<D> &func, const std::string &fname);
-    void cubePlot(const std::array<int, 3> &npts, const RepresentableFunction<D> &func, const std::string &fname);
+    void gridPlot(const MWTree<D, T> &tree, const std::string &fname);
+    void linePlot(const std::array<int, 1> &npts, const RepresentableFunction<D, T> &func, const std::string &fname);
+    void surfPlot(const std::array<int, 2> &npts, const RepresentableFunction<D, T> &func, const std::string &fname);
+    void cubePlot(const std::array<int, 3> &npts, const RepresentableFunction<D, T> &func, const std::string &fname);
 
     enum type { Line, Surface, Cube, Grid };
 
@@ -86,13 +86,13 @@ template <int D> class Plotter {
     Eigen::MatrixXd calcSurfCoordinates(int pts_a, int pts_b) const;
     Eigen::MatrixXd calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const;
 
-    Eigen::VectorXd evaluateFunction(const RepresentableFunction<D> &func, const Eigen::MatrixXd &coords) const;
+    Eigen::Matrix< T, Eigen::Dynamic, 1 > evaluateFunction(const RepresentableFunction<D, T> &func, const Eigen::MatrixXd &coords) const;
 
-    void writeData(const Eigen::MatrixXd &coords, const Eigen::VectorXd &values);
-    virtual void writeCube(const std::array<int, 3> &npts, const Eigen::VectorXd &values);
+  void writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values);
+  virtual void writeCube(const std::array<int, 3> &npts, const Eigen::Matrix< T, Eigen::Dynamic, 1 >  &values);
 
-    void writeGrid(const MWTree<D> &tree);
-    void writeNodeGrid(const MWNode<D> &node, const std::string &color);
+    void writeGrid(const MWTree<D, T> &tree);
+    void writeNodeGrid(const MWNode<D, T> &node, const std::string &color);
 
 private:
     bool verifyRange(int dim) const;
diff --git a/src/utils/Printer.cpp b/src/utils/Printer.cpp
index d9d04f4bd..957d7322b 100644
--- a/src/utils/Printer.cpp
+++ b/src/utils/Printer.cpp
@@ -265,7 +265,7 @@ void print::tree(int level, const std::string &txt, int n, int m, double t) {
  * @param[in] tree: Tree to be printed
  * @param[in] timer: Timer to be evaluated
  */
-template <int D> void print::tree(int level, const std::string &txt, const MWTree<D> &tree, const Timer &timer) {
+  template <int D, typename T> void print::tree(int level, const std::string &txt, const MWTree<D, T> &tree, const Timer &timer) {
     if (level > Printer::getPrintLevel()) return;
 
     auto n = tree.getNNodes();
diff --git a/src/utils/Printer.h b/src/utils/Printer.h
index dc4935aa8..c021155e8 100644
--- a/src/utils/Printer.h
+++ b/src/utils/Printer.h
@@ -39,7 +39,7 @@
 namespace mrcpp {
 
 class Timer;
-template <int D> class MWTree;
+template <int D, typename T> class MWTree;
 
 /** @class Printer
  *
@@ -128,7 +128,7 @@ void memory(int level, const std::string &txt);
 void value(int level, const std::string &txt, double v, const std::string &unit = "", int p = -1, bool sci = true);
 void time(int level, const std::string &txt, const Timer &timer);
 void tree(int level, const std::string &txt, int n, int m, double t);
-template <int D> void tree(int level, const std::string &txt, const MWTree<D> &tree, const Timer &timer);
+template <int D, typename T> void tree(int level, const std::string &txt, const MWTree<D, T> &tree, const Timer &timer);
 } // namespace print
 
 // clang-format off
diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp
index 8506be298..5ee9294f6 100644
--- a/src/utils/math_utils.cpp
+++ b/src/utils/math_utils.cpp
@@ -185,6 +185,20 @@ void math_utils::apply_filter(double *out, double *in, const MatrixXd &filter, i
 #endif
 }
 
+void math_utils::apply_filter(ComplexDouble *out, ComplexDouble *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
+  //#ifdef HAVE_BLAS
+//    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
+//#else
+    Map<MatrixXcd> f(in, kp1, kp1_dm1);
+    Map<MatrixXcd> g(out, kp1_dm1, kp1);
+    if (fac < MachineZero) {
+        g.noalias() = f.transpose() * filter;
+    } else {
+        g.noalias() += f.transpose() * filter;
+    }
+//#endif
+}
+
 /** Make a nD-representation from 1D-representations of separable functions.
  *
  * This method uses the "output" vector as initial input, in order to
diff --git a/src/utils/math_utils.h b/src/utils/math_utils.h
index 9c371aa51..9dcdb6956 100644
--- a/src/utils/math_utils.h
+++ b/src/utils/math_utils.h
@@ -67,6 +67,7 @@ double matrix_norm_1(const Eigen::MatrixXd &M);
 double matrix_norm_2(const Eigen::MatrixXd &M);
 
 void apply_filter(double *out, double *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
+void apply_filter(ComplexDouble *out, ComplexDouble *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
 
 void tensor_expand_coefs(int dim, int dir, int kp1, int kp1_d, const Eigen::MatrixXd &primitive, Eigen::VectorXd &expanded);
 
diff --git a/src/utils/mpi_utils.cpp b/src/utils/mpi_utils.cpp
index d61f2bd23..d50b15868 100644
--- a/src/utils/mpi_utils.cpp
+++ b/src/utils/mpi_utils.cpp
@@ -36,7 +36,7 @@ namespace mrcpp {
  *  @param[in] comm: Communicator sharing resources
  *  @param[in] sh_size: Memory size, in MB
  */
-SharedMemory::SharedMemory(mrcpp::mpi_comm comm, int sh_size)
+template <typename T> SharedMemory<T>::SharedMemory(mrcpp::mpi_comm comm, int sh_size)
         : sh_start_ptr(nullptr)
         , sh_end_ptr(nullptr)
         , sh_max_ptr(nullptr)
@@ -57,18 +57,18 @@ SharedMemory::SharedMemory(mrcpp::mpi_comm comm, int sh_size)
     int qdisp = 0;
     MPI_Win_shared_query(this->sh_win, 0, &qsize, &qdisp, &this->sh_start_ptr);
     MPI_Win_fence(0, this->sh_win);
-    this->sh_max_ptr = this->sh_start_ptr + qsize / sizeof(double);
+    this->sh_max_ptr = this->sh_start_ptr + qsize / sizeof(T);
     this->sh_end_ptr = this->sh_start_ptr;
 #endif
 }
 
-void SharedMemory::clear() {
+template <typename T> void SharedMemory<T>::clear() {
 #ifdef MRCPP_HAS_MPI
     this->sh_end_ptr = this->sh_start_ptr;
 #endif
 }
 
-SharedMemory::~SharedMemory() {
+template <typename T> SharedMemory<T>::~SharedMemory() {
 #ifdef MRCPP_HAS_MPI
     // deallocates the memory block
     MPI_Win_free(&this->sh_win);
@@ -88,7 +88,7 @@ SharedMemory::~SharedMemory() {
  *  to speed up communication, otherwise it will be communicated in a separate
  *  step before the main communication.
  */
-template <int D> void send_tree(FunctionTree<D> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) {
+template <int D, typename T> void send_tree(FunctionTree<D, T> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) {
 #ifdef MRCPP_HAS_MPI
     auto &allocator = tree.getNodeAllocator();
 
@@ -121,7 +121,7 @@ template <int D> void send_tree(FunctionTree<D> &tree, int dst, int tag, mrcpp::
  *  to speed up communication, otherwise it will be communicated in a separate
  *  step before the main communication.
  */
-template <int D> void recv_tree(FunctionTree<D> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) {
+template <int D, typename T> void recv_tree(FunctionTree<D, T> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
     auto &allocator = tree.getNodeAllocator();
@@ -157,7 +157,7 @@ template <int D> void recv_tree(FunctionTree<D> &tree, int src, int tag, mrcpp::
  *  @details This function should be called every time a shared function is
  *  updated, in order to update the local memory of each MPI process.
  */
-template <int D> void share_tree(FunctionTree<D> &tree, int src, int tag, mrcpp::mpi_comm comm) {
+template <int D, typename T> void share_tree(FunctionTree<D, T> &tree, int src, int tag, mrcpp::mpi_comm comm) {
 #ifdef MRCPP_HAS_MPI
     Timer t1;
     auto &allocator = tree.getNodeAllocator();
@@ -197,7 +197,9 @@ template <int D> void share_tree(FunctionTree<D> &tree, int src, int tag, mrcpp:
     println(10, " Time share                  " << std::setw(30) << t1.elapsed());
 #endif
 }
-
+template class SharedMemory<double>;
+template class SharedMemory<ComplexDouble>;
+  
 template void send_tree<1>(FunctionTree<1> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
 template void send_tree<2>(FunctionTree<2> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
 template void send_tree<3>(FunctionTree<3> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
diff --git a/src/utils/mpi_utils.h b/src/utils/mpi_utils.h
index 1b94b0dc9..93211fd1a 100644
--- a/src/utils/mpi_utils.h
+++ b/src/utils/mpi_utils.h
@@ -74,26 +74,26 @@ namespace mrcpp {
  *  communicator. In order to allocate a FunctionTree in shared memory,
  *  simply pass a SharedMemory object to the FunctionTree constructor.
  */
-class SharedMemory {
+template <typename T> class SharedMemory {
 public:
     SharedMemory(mrcpp::mpi_comm comm, int sh_size);
     SharedMemory(const SharedMemory &mem) = delete;
-    SharedMemory &operator=(const SharedMemory &mem) = delete;
+    SharedMemory<T> &operator=(const SharedMemory<T> &mem) = delete;
     ~SharedMemory();
 
     void clear(); // show shared memory as entirely available
 
-    double *sh_start_ptr;  // start of shared block
-    double *sh_end_ptr;    // end of used part
-    double *sh_max_ptr;    // end of shared block
+    T *sh_start_ptr;  // start of shared block
+    T *sh_end_ptr;    // end of used part
+    T *sh_max_ptr;    // end of shared block
     mrcpp::mpi_win sh_win; // MPI window object
     int rank;              // rank among shared group
 };
 
-template <int D> class FunctionTree;
+template <int D, typename T> class FunctionTree;
 
-template <int D> void send_tree(FunctionTree<D> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true);
-template <int D> void recv_tree(FunctionTree<D> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true);
-template <int D> void share_tree(FunctionTree<D> &tree, int src, int tag, mrcpp::mpi_comm comm);
+template <int D, typename T> void send_tree(FunctionTree<D, T> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true);
+template <int D, typename T> void recv_tree(FunctionTree<D, T> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks = -1, bool coeff = true);
+template <int D, typename T> void share_tree(FunctionTree<D, T> &tree, int src, int tag, mrcpp::mpi_comm comm);
 
 } // namespace mrcpp
diff --git a/src/utils/tree_utils.cpp b/src/utils/tree_utils.cpp
index 523d3e263..f45fcc158 100644
--- a/src/utils/tree_utils.cpp
+++ b/src/utils/tree_utils.cpp
@@ -44,7 +44,7 @@ namespace mrcpp {
  * Calculates the threshold that has to be met in the wavelet norm in order to
  * guarantee the precision in the function representation. Depends on the
  * square norm of the function and the requested relative accuracy. */
-template <int D> bool tree_utils::split_check(const MWNode<D> &node, double prec, double split_fac, bool abs_prec) {
+template <int D, typename T> bool tree_utils::split_check(const MWNode<D, T> &node, double prec, double split_fac, bool abs_prec) {
     bool split = false;
     if (prec > 0.0) {
         double t_norm = 1.0;
@@ -66,40 +66,40 @@ template <int D> bool tree_utils::split_check(const MWNode<D> &node, double prec
 
 /** Traverse tree along the Hilbert path and find nodes of any rankId.
  * Returns one nodeVector for the whole tree. GenNodes disregarded. */
-template <int D> void tree_utils::make_node_table(MWTree<D> &tree, MWNodeVector<D> &table) {
-    TreeIterator<D> it(tree, TopDown, Hilbert);
+template <int D, typename T> void tree_utils::make_node_table(MWTree<D, T> &tree, MWNodeVector<D, T> &table) {
+    TreeIterator<D, T> it(tree, TopDown, Hilbert);
     it.setReturnGenNodes(false);
     while (it.nextParent()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         if (node.getDepth() == 0) continue;
         table.push_back(&node);
     }
     it.init(tree);
     while (it.next()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         table.push_back(&node);
     }
 }
 
 /** Traverse tree along the Hilbert path and find nodes of any rankId.
  * Returns one nodeVector per scale. GenNodes disregarded. */
-template <int D> void tree_utils::make_node_table(MWTree<D> &tree, std::vector<MWNodeVector<D>> &table) {
-    TreeIterator<D> it(tree, TopDown, Hilbert);
+template <int D, typename T> void tree_utils::make_node_table(MWTree<D, T> &tree, std::vector<MWNodeVector<D, T>> &table) {
+    TreeIterator<D, T> it(tree, TopDown, Hilbert);
     it.setReturnGenNodes(false);
     while (it.nextParent()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         if (node.getDepth() == 0) continue;
         int depth = node.getDepth() + tree.getNNegScales();
         // Add one more element
-        if (depth + 1 > table.size()) table.push_back(MWNodeVector<D>());
+        if (depth + 1 > table.size()) table.push_back(MWNodeVector<D, T>());
         table[depth].push_back(&node);
     }
     it.init(tree);
     while (it.next()) {
-        MWNode<D> &node = it.getNode();
+        MWNode<D, T> &node = it.getNode();
         int depth = node.getDepth() + tree.getNNegScales();
         // Add one more element
-        if (depth + 1 > table.size()) table.push_back(MWNodeVector<D>());
+        if (depth + 1 > table.size()) table.push_back(MWNodeVector<D, T>());
         table[depth].push_back(&node);
     }
 }
@@ -110,7 +110,7 @@ template <int D> void tree_utils::make_node_table(MWTree<D> &tree, std::vector<M
  * The output is written directly into the 8 children scaling coefficients.
  * NB: ASSUMES that the children coefficients are separated by Children_Stride!
  */
-template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite) {
+template <int D, typename T> void tree_utils::mw_transform(const MWTree<D, T> &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite) {
     int operation = Reconstruction;
     int kp1 = tree.getKp1();
     int kp1_d = tree.getKp1_d();
@@ -118,8 +118,8 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     const MWFilter &filter = tree.getMRA().getFilter();
     double overwrite = 0.0;
-    double tmpcoeff[kp1_d * tDim];
-    double tmpcoeff2[kp1_d * tDim];
+    T tmpcoeff[kp1_d * tDim];
+    T tmpcoeff2[kp1_d * tDim];
     int ftlim = tDim;
     int ftlim2 = tDim;
     int ftlim3 = tDim;
@@ -135,13 +135,13 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
     int i = 0;
     int mask = 1;
     for (int gt = 0; gt < tDim; gt++) {
-        double *out = tmpcoeff + gt * kp1_d;
+        T *out = tmpcoeff + gt * kp1_d;
         for (int ft = 0; ft < ftlim; ft++) {
             // Operate in direction i only if the bits along other
             // directions are identical. The bit of the direction we
             // operate on determines the appropriate filter/operator
             if ((gt | mask) == (ft | mask)) {
-                double *in = coeff_in + ft * kp1_d;
+                T *in = coeff_in + ft * kp1_d;
                 int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                 const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -155,13 +155,13 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
         i++;
         mask = 2; // 1 << i;
         for (int gt = 0; gt < tDim; gt++) {
-            double *out = tmpcoeff2 + gt * kp1_d;
+            T *out = tmpcoeff2 + gt * kp1_d;
             for (int ft = 0; ft < ftlim2; ft++) {
                 // Operate in direction i only if the bits along other
                 // directions are identical. The bit of the direction we
                 // operate on determines the appropriate filter/operator
                 if ((gt | mask) == (ft | mask)) {
-                    double *in = tmpcoeff + ft * kp1_d;
+                    T *in = tmpcoeff + ft * kp1_d;
                     int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                     const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -178,13 +178,13 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
         i++;
         mask = 4; // 1 << i;
         for (int gt = 0; gt < tDim; gt++) {
-            double *out = coeff_out + gt * stride; // write right into children
+            T *out = coeff_out + gt * stride; // write right into children
             for (int ft = 0; ft < ftlim3; ft++) {
                 // Operate in direction i only if the bits along other
                 // directions are identical. The bit of the direction we
                 // operate on determines the appropriate filter/operator
                 if ((gt | mask) == (ft | mask)) {
-                    double *in = tmpcoeff2 + ft * kp1_d;
+                    T *in = tmpcoeff2 + ft * kp1_d;
                     int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                     const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -200,7 +200,7 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
     if (D > 3) MSG_ABORT("D>3 NOT IMPLEMENTED for S_mwtransform");
 
     if (D < 3) {
-        double *out;
+        T *out;
         if (D == 1) out = tmpcoeff;
         if (D == 2) out = tmpcoeff2;
         if (b_overwrite) {
@@ -216,9 +216,9 @@ template <int D> void tree_utils::mw_transform(const MWTree<D> &tree, double *co
 }
 
 // Specialized for D=3 below.
-template <int D> void tree_utils::mw_transform_back(MWTree<D> &tree, double *coeff_in, double *coeff_out, int stride) {
-    NOT_IMPLEMENTED_ABORT;
-}
+//template <int D, typename T> void tree_utils::mw_transform_back(MWTree<D, T> &tree, double *coeff_in, double *coeff_out, int stride) {
+//    NOT_IMPLEMENTED_ABORT;
+//}
 
 /** Make parent from children scaling coefficients
  * Other node info are not used/set
@@ -226,7 +226,7 @@ template <int D> void tree_utils::mw_transform_back(MWTree<D> &tree, double *coe
  * The output is read directly from the 8 children scaling coefficients.
  * NB: ASSUMES that the children coefficients are separated by Children_Stride!
  */
-template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff_in, double *coeff_out, int stride) {
+template <typename T>  void tree_utils::mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride) {
     int operation = Compression;
     int kp1 = tree.getKp1();
     int kp1_d = tree.getKp1_d();
@@ -234,7 +234,7 @@ template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff
     int kp1_dm1 = math_utils::ipow(kp1, 2);
     const MWFilter &filter = tree.getMRA().getFilter();
     double overwrite = 0.0;
-    double tmpcoeff[kp1_d * tDim];
+    T tmpcoeff[kp1_d * tDim];
 
     int ftlim = tDim;
     int ftlim2 = tDim;
@@ -243,13 +243,13 @@ template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff
     int i = 0;
     int mask = 1;
     for (int gt = 0; gt < tDim; gt++) {
-        double *out = coeff_out + gt * kp1_d;
+        T *out = coeff_out + gt * kp1_d;
         for (int ft = 0; ft < ftlim; ft++) {
             // Operate in direction i only if the bits along other
             // directions are identical. The bit of the direction we
             // operate on determines the appropriate filter/operator
             if ((gt | mask) == (ft | mask)) {
-                double *in = coeff_in + ft * stride;
+                T *in = coeff_in + ft * stride;
                 int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                 const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -262,13 +262,13 @@ template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff
     i++;
     mask = 2; // 1 << i;
     for (int gt = 0; gt < tDim; gt++) {
-        double *out = tmpcoeff + gt * kp1_d;
+        T *out = tmpcoeff + gt * kp1_d;
         for (int ft = 0; ft < ftlim2; ft++) {
             // Operate in direction i only if the bits along other
             // directions are identical. The bit of the direction we
             // operate on determines the appropriate filter/operator
             if ((gt | mask) == (ft | mask)) {
-                double *in = coeff_out + ft * kp1_d;
+                T *in = coeff_out + ft * kp1_d;
                 int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                 const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -281,14 +281,14 @@ template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff
     i++;
     mask = 4; // 1 << i;
     for (int gt = 0; gt < tDim; gt++) {
-        double *out = coeff_out + gt * kp1_d;
-        // double *out = coeff_out + gt * N_coeff;
+        T *out = coeff_out + gt * kp1_d;
+        // T *out = coeff_out + gt * N_coeff;
         for (int ft = 0; ft < ftlim3; ft++) {
             // Operate in direction i only if the bits along other
             // directions are identical. The bit of the direction we
             // operate on determines the appropriate filter/operator
             if ((gt | mask) == (ft | mask)) {
-                double *in = tmpcoeff + ft * kp1_d;
+                T *in = tmpcoeff + ft * kp1_d;
                 int filter_index = 2 * ((gt >> i) & 1) + ((ft >> i) & 1);
                 const Eigen::MatrixXd &oper = filter.getSubFilter(filter_index, operation);
 
@@ -300,24 +300,46 @@ template <> void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff
     }
 }
 
-template bool tree_utils::split_check<1>(const MWNode<1> &node, double prec, double split_fac, bool abs_prec);
-template bool tree_utils::split_check<2>(const MWNode<2> &node, double prec, double split_fac, bool abs_prec);
-template bool tree_utils::split_check<3>(const MWNode<3> &node, double prec, double split_fac, bool abs_prec);
 
-template void tree_utils::make_node_table<1>(MWTree<1> &tree, MWNodeVector<1> &table);
-template void tree_utils::make_node_table<2>(MWTree<2> &tree, MWNodeVector<2> &table);
-template void tree_utils::make_node_table<3>(MWTree<3> &tree, MWNodeVector<3> &table);
+template void tree_utils::make_node_table<1, double>(MWTree<1, double> &tree, MWNodeVector<1, double> &table);
+template void tree_utils::make_node_table<2, double>(MWTree<2, double> &tree, MWNodeVector<2, double> &table);
+template void tree_utils::make_node_table<3, double>(MWTree<3, double> &tree, MWNodeVector<3, double> &table);
+
+template void tree_utils::make_node_table<1, double>(MWTree<1, double> &tree, std::vector<MWNodeVector<1, double>> &table);
+template void tree_utils::make_node_table<2, double>(MWTree<2, double> &tree, std::vector<MWNodeVector<2, double>> &table);
+template void tree_utils::make_node_table<3, double>(MWTree<3, double> &tree, std::vector<MWNodeVector<3, double>> &table);
+
+template bool tree_utils::split_check<1, double>(const MWNode<1, double> &node, double prec, double split_fac, bool abs_prec);
+template bool tree_utils::split_check<2, double>(const MWNode<2, double> &node, double prec, double split_fac, bool abs_prec);
+template bool tree_utils::split_check<3, double>(const MWNode<3, double> &node, double prec, double split_fac, bool abs_prec);
+
+template void tree_utils::mw_transform<1, double>(const MWTree<1, double> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+template void tree_utils::mw_transform<2, double>(const MWTree<2, double> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+template void tree_utils::mw_transform<3, double>(const MWTree<3, double> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+
+//template void tree_utils::mw_transform_back<1, double>(MWTree<1, double> &tree, double *coeff_in, double *coeff_out, int stride);
+//template void tree_utils::mw_transform_back<2, double>(MWTree<2, double> &tree, double *coeff_in, double *coeff_out, int stride);
+template void tree_utils::mw_transform_back<double>(MWTree<3, double> &tree, double *coeff_in, double *coeff_out, int stride);
+
+  
+template void tree_utils::make_node_table<1, ComplexDouble>(MWTree<1, ComplexDouble> &tree, MWNodeVector<1, ComplexDouble> &table);
+template void tree_utils::make_node_table<2, ComplexDouble>(MWTree<2, ComplexDouble> &tree, MWNodeVector<2, ComplexDouble> &table);
+template void tree_utils::make_node_table<3, ComplexDouble>(MWTree<3, ComplexDouble> &tree, MWNodeVector<3, ComplexDouble> &table);
+
+template void tree_utils::make_node_table<1, ComplexDouble>(MWTree<1, ComplexDouble> &tree, std::vector<MWNodeVector<1, ComplexDouble>> &table);
+template void tree_utils::make_node_table<2, ComplexDouble>(MWTree<2, ComplexDouble> &tree, std::vector<MWNodeVector<2, ComplexDouble>> &table);
+template void tree_utils::make_node_table<3, ComplexDouble>(MWTree<3, ComplexDouble> &tree, std::vector<MWNodeVector<3, ComplexDouble>> &table);
 
-template void tree_utils::make_node_table<1>(MWTree<1> &tree, std::vector<MWNodeVector<1>> &table);
-template void tree_utils::make_node_table<2>(MWTree<2> &tree, std::vector<MWNodeVector<2>> &table);
-template void tree_utils::make_node_table<3>(MWTree<3> &tree, std::vector<MWNodeVector<3>> &table);
+template bool tree_utils::split_check<1, ComplexDouble>(const MWNode<1, ComplexDouble> &node, double prec, double split_fac, bool abs_prec);
+template bool tree_utils::split_check<2, ComplexDouble>(const MWNode<2, ComplexDouble> &node, double prec, double split_fac, bool abs_prec);
+template bool tree_utils::split_check<3, ComplexDouble>(const MWNode<3, ComplexDouble> &node, double prec, double split_fac, bool abs_prec);
 
-template void tree_utils::mw_transform<1>(const MWTree<1> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
-template void tree_utils::mw_transform<2>(const MWTree<2> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
-template void tree_utils::mw_transform<3>(const MWTree<3> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+template void tree_utils::mw_transform<1, ComplexDouble>(const MWTree<1, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+template void tree_utils::mw_transform<2, ComplexDouble>(const MWTree<2, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
+template void tree_utils::mw_transform<3, ComplexDouble>(const MWTree<3, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
 
-template void tree_utils::mw_transform_back<1>(MWTree<1> &tree, double *coeff_in, double *coeff_out, int stride);
-template void tree_utils::mw_transform_back<2>(MWTree<2> &tree, double *coeff_in, double *coeff_out, int stride);
-template void tree_utils::mw_transform_back<3>(MWTree<3> &tree, double *coeff_in, double *coeff_out, int stride);
+//template void tree_utils::mw_transform_back<1, ComplexDouble>(MWTree<1, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
+//template void tree_utils::mw_transform_back<2, ComplexDouble>(MWTree<2, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
+template void tree_utils::mw_transform_back<ComplexDouble>(MWTree<3, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
 
 } // namespace mrcpp
diff --git a/src/utils/tree_utils.h b/src/utils/tree_utils.h
index 8f2c4220a..90ff2a418 100644
--- a/src/utils/tree_utils.h
+++ b/src/utils/tree_utils.h
@@ -25,18 +25,21 @@
 
 #pragma once
 
+#include "utils/math_utils.h"
 #include "MRCPP/mrcpp_declarations.h"
 
 namespace mrcpp {
 namespace tree_utils {
 
-template <int D> bool split_check(const MWNode<D> &node, double prec, double split_fac, bool abs_prec);
+template <int D, typename T> bool split_check(const MWNode<D, T> &node, double prec, double split_fac, bool abs_prec);
 
-template <int D> void make_node_table(MWTree<D> &tree, MWNodeVector<D> &table);
-template <int D> void make_node_table(MWTree<D> &tree, std::vector<MWNodeVector<D>> &table);
+template <int D, typename T> void make_node_table(MWTree<D, T> &tree, MWNodeVector<D, T> &table);
+template <int D, typename T> void make_node_table(MWTree<D, T> &tree, std::vector<MWNodeVector<D, T>> &table);
+
+template <int D, typename T> void mw_transform(const MWTree<D, T> &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool overwrite = true);
+//template <int D, typename T> void mw_transform_back(MWTree<D, T> &tree, T *coeff_in, T *coeff_out, int stride);
+template <typename T> void mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride);
 
-template <int D> void mw_transform(const MWTree<D> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool overwrite = true);
-template <int D> void mw_transform_back(MWTree<D> &tree, double *coeff_in, double *coeff_out, int stride);
 
 } // namespace tree_utils
 } // namespace mrcpp
diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index 5f4400d1b..7e35db405 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -102,10 +102,10 @@ template <int D> void testDifferentiationABGV(double a, double b) {
     };
 
     FunctionTree<D> f_tree(*mra);
-    project<D>(prec / 10, f_tree, f);
+    project<D, double>(prec / 10, f_tree, f);
 
     FunctionTree<D> df_tree(*mra);
-    project<D>(prec / 10, df_tree, df);
+    project<D, double>(prec / 10, df_tree, df);
 
     FunctionTree<D> dg_tree(*mra);
     apply(dg_tree, diff, f_tree, 0);
@@ -143,10 +143,10 @@ template <int D> void testDifferentiationPH(int order) {
     };
 
     FunctionTree<D> f_tree(*mra);
-    project<D>(prec / 10, f_tree, f);
+    project<D, double>(prec / 10, f_tree, f);
 
     FunctionTree<D> df_tree(*mra);
-    project<D>(prec / 10, df_tree, df);
+    project<D, double>(prec / 10, df_tree, df);
 
     FunctionTree<D> dg_tree(*mra);
     apply(dg_tree, diff, f_tree, 0);
@@ -174,7 +174,7 @@ template <int D> void testDifferentiationPeriodicABGV(double a, double b) {
     FunctionTree<D> g_tree(*mra);
     FunctionTree<D> dg_tree(*mra);
 
-    project<D>(prec, g_tree, g_func);
+    project<D, double>(prec, g_tree, g_func);
 
     apply(dg_tree, diff, g_tree, 0);
     refine_grid(dg_tree, 1); // for accurate evalf
@@ -202,7 +202,7 @@ template <int D> void testDifferentiationPeriodicPH(int order) {
     FunctionTree<D> g_tree(*mra);
     FunctionTree<D> dg_tree(*mra);
 
-    project<D>(prec, g_tree, g_func);
+    project<D, double>(prec, g_tree, g_func);
 
     apply(dg_tree, diff, g_tree, 0);
     refine_grid(dg_tree, 1); // for accurate evalf
@@ -237,10 +237,10 @@ template <int D> void testDifferentiationBS(int order) {
     };
 
     FunctionTree<D> f_tree(*mra);
-    project<D>(prec / 10, f_tree, f);
+    project<D, double>(prec / 10, f_tree, f);
 
     FunctionTree<D> df_tree(*mra);
-    project<D>(prec / 10, df_tree, df);
+    project<D, double>(prec / 10, df_tree, df);
 
     FunctionTree<D> dg_tree(*mra);
     apply(dg_tree, diff, f_tree, 0);
@@ -335,7 +335,7 @@ TEST_CASE("Gradient operator", "[derivative_operator], [gradient_operator]") {
     };
 
     FunctionTree<3> f_tree(*mra);
-    project<3>(prec, f_tree, f);
+    project<3, double>(prec, f_tree, f);
 
     auto grad_f = gradient(diff, f_tree);
     REQUIRE(grad_f.size() == 3);
@@ -373,7 +373,7 @@ TEST_CASE("Divergence operator", "[derivative_operator], [divergence_operator]")
     };
 
     FunctionTree<3> f_tree(*mra);
-    project<3>(prec, f_tree, f);
+    project<3, double>(prec, f_tree, f);
     FunctionTreeVector<3> f_vec;
     f_vec.push_back(std::make_tuple(1.0, &f_tree));
     f_vec.push_back(std::make_tuple(2.0, &f_tree));
@@ -389,6 +389,6 @@ TEST_CASE("Divergence operator", "[derivative_operator], [divergence_operator]")
     }
 
     delete mra;
-}
+    }
 
 } // namespace derivative_operator
diff --git a/tests/operators/helmholtz_operator.cpp b/tests/operators/helmholtz_operator.cpp
index 7a0dc0243..8f570691d 100644
--- a/tests/operators/helmholtz_operator.cpp
+++ b/tests/operators/helmholtz_operator.cpp
@@ -169,14 +169,14 @@ TEST_CASE("Apply Helmholtz' operator", "[apply_helmholtz], [helmholtz_operator],
         return R_0 * Y_00;
     };
     FunctionTree<3> psi_n(MRA);
-    project<3>(proj_prec, psi_n, hFunc);
+    project<3, double>(proj_prec, psi_n, hFunc);
 
     auto f = [Z](const Coord<3> &r) -> double {
         double x = std::sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2]);
         return -Z / x;
     };
     FunctionTree<3> V(MRA);
-    project<3>(proj_prec, V, f);
+    project<3, double>(proj_prec, V, f);
 
     FunctionTree<3> Vpsi(MRA);
     copy_grid(Vpsi, psi_n);
@@ -222,7 +222,7 @@ TEST_CASE("Apply Periodic Helmholtz' operator", "[apply_periodic_helmholtz], [he
     auto source = [mu](const mrcpp::Coord<3> &r) { return 3.0 * cos(r[0]) * cos(r[1]) * cos(r[2]) / (4.0 * pi) + mu * mu * cos(r[0]) * cos(r[1]) * cos(r[2]) / (4.0 * pi); };
 
     FunctionTree<3> source_tree(MRA);
-    project<3>(proj_prec, source_tree, source);
+    project<3, double>(proj_prec, source_tree, source);
 
     FunctionTree<3> sol_tree(MRA);
     FunctionTree<3> in_tree(MRA);
@@ -265,7 +265,7 @@ TEST_CASE("Apply negative scale Helmholtz' operator", "[apply_periodic_helmholtz
     auto source = [mu](const mrcpp::Coord<3> &r) { return 3.0 * cos(r[0]) * cos(r[1]) * cos(r[2]) / (4.0 * pi) + mu * mu * cos(r[0]) * cos(r[1]) * cos(r[2]) / (4.0 * pi); };
 
     FunctionTree<3> source_tree(MRA);
-    project<3>(proj_prec, source_tree, source);
+    project<3, double>(proj_prec, source_tree, source);
 
     FunctionTree<3> sol_tree(MRA);
 
@@ -274,4 +274,5 @@ TEST_CASE("Apply negative scale Helmholtz' operator", "[apply_periodic_helmholtz
     REQUIRE(sol_tree.evalf({0.0, 0.0, 0.0}) == Catch::Approx(1.0).epsilon(apply_prec));
     REQUIRE(sol_tree.evalf({pi, 0.0, 0.0}) == Catch::Approx(-1.0).epsilon(apply_prec));
 }
+
 } // namespace helmholtz_operator
diff --git a/tests/operators/poisson_operator.cpp b/tests/operators/poisson_operator.cpp
index df841a625..eab986886 100644
--- a/tests/operators/poisson_operator.cpp
+++ b/tests/operators/poisson_operator.cpp
@@ -187,7 +187,7 @@ TEST_CASE("Apply Periodic Poisson' operator", "[apply_periodic_Poisson], [poisso
     auto source = [](const mrcpp::Coord<3> &r) { return 3.0 * cos(r[0]) * cos(r[1]) * cos(r[2]) / (4.0 * pi); };
 
     FunctionTree<3> source_tree(MRA);
-    project<3>(proj_prec, source_tree, source);
+    project<3, double>(proj_prec, source_tree, source);
 
     FunctionTree<3> sol_tree(MRA);
 
@@ -195,6 +195,6 @@ TEST_CASE("Apply Periodic Poisson' operator", "[apply_periodic_Poisson], [poisso
 
     REQUIRE(sol_tree.evalf({0.0, 0.0, 0.0}) == Catch::Approx(1.0).epsilon(apply_prec));
     REQUIRE(sol_tree.evalf({pi, 0.0, 0.0}) == Catch::Approx(-1.0).epsilon(apply_prec));
-}
+    }
 
 } // namespace poisson_operator
diff --git a/tests/operators/schrodinger_evolution_operator.cpp b/tests/operators/schrodinger_evolution_operator.cpp
index c986ec756..477065062 100644
--- a/tests/operators/schrodinger_evolution_operator.cpp
+++ b/tests/operators/schrodinger_evolution_operator.cpp
@@ -38,7 +38,6 @@
 
 namespace schrodinger_evolution_operator {
 
-
 TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolution], [schrodinger_evolution_operator], [mw_operator]") {
     const auto min_scale = 0;
     const auto max_depth = 25;
@@ -87,13 +86,13 @@ TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolutio
 
     // Projecting functions
     mrcpp::FunctionTree<1> Re_f_tree(MRA);
-    mrcpp::project<1>(prec, Re_f_tree, Re_f);
+    mrcpp::project<1, double>(prec, Re_f_tree, Re_f);
     mrcpp::FunctionTree<1> Im_f_tree(MRA);
-    mrcpp::project<1>(prec, Im_f_tree, Im_f);
+    mrcpp::project<1, double>(prec, Im_f_tree, Im_f);
     mrcpp::FunctionTree<1> Re_g_tree(MRA);
-    mrcpp::project<1>(prec, Re_g_tree, Re_g);
+    mrcpp::project<1, double>(prec, Re_g_tree, Re_g);
     mrcpp::FunctionTree<1> Im_g_tree(MRA);
-    mrcpp::project<1>(prec, Im_g_tree, Im_g);
+    mrcpp::project<1, double>(prec, Im_g_tree, Im_g);
 
     // Output function trees
     mrcpp::FunctionTree<1> Re_fout_tree(MRA);
@@ -129,5 +128,4 @@ TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolutio
     REQUIRE(Im_sq_norm == Catch::Approx(0.0).margin(tolerance));
 }
 
-
-} // namespace schrodinger_evolution_operator
\ No newline at end of file
+} // namespace schrodinger_evolution_operator
diff --git a/tests/treebuilders/map.cpp b/tests/treebuilders/map.cpp
index 0745db2e6..c3c333bba 100644
--- a/tests/treebuilders/map.cpp
+++ b/tests/treebuilders/map.cpp
@@ -77,7 +77,7 @@ template <int D> void testMapping() {
     const double inp_int = inp_tree.integrate();
     const double inp_norm = inp_tree.getSquareNorm();
 
-    auto fmap = [](double val) { return val * val; };
+    FMap<double,double> fmap = [](double val) { return val * val; };
 
     WHEN("the function is mapped") {
         FunctionTree<D> out_tree(*mra);
diff --git a/tests/treebuilders/multiplication.cpp b/tests/treebuilders/multiplication.cpp
index 5fa2178e1..7e8437f77 100644
--- a/tests/treebuilders/multiplication.cpp
+++ b/tests/treebuilders/multiplication.cpp
@@ -202,7 +202,7 @@ template <int D> void testSquare() {
     }
     finalize(&mra);
 }
-
+  
 TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot]") {
     MultiResolutionAnalysis<3> *mra = nullptr;
     initialize<3>(&mra);
@@ -221,14 +221,14 @@ TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot
         double r2 = (r[0] * r[0] + r[1] * r[1] + r[2] * r[2]);
         return r[0] * r[1] * std::exp(-2.0 * r2);
     };
-
+    
     FunctionTree<3> fx_tree(*mra);
     FunctionTree<3> fy_tree(*mra);
     FunctionTree<3> fz_tree(*mra);
 
-    project<3>(prec, fx_tree, fx);
-    project<3>(prec, fy_tree, fy);
-    project<3>(prec, fz_tree, fz);
+    project<3, double>(prec, fx_tree, fx);
+    project<3, double>(prec, fy_tree, fy);
+    project<3, double>(prec, fz_tree, fz);
 
     FunctionTreeVector<3> vec_a;
     vec_a.push_back(std::make_tuple(1.0, &fx_tree));
@@ -252,6 +252,6 @@ TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot
     }
 
     finalize(&mra);
-}
+    }
 
 } // namespace multiplication

From 0760089657e0d81830981d4a393931094db5a0ab Mon Sep 17 00:00:00 2001
From: gitpeterwind <peterw@met.no>
Date: Fri, 26 Apr 2024 16:27:48 +0200
Subject: [PATCH 02/38] update examples for native complex trees

---
 examples/derivative.cpp              | 4 ++--
 examples/mpi_matrix.cpp              | 2 +-
 examples/mpi_send_tree.cpp           | 2 +-
 examples/mpi_shared_tree.cpp         | 4 ++--
 examples/projection.cpp              | 2 +-
 examples/scf.cpp                     | 4 ++--
 examples/schrodinger_semigroup1d.cpp | 8 ++++----
 examples/tree_cleaner.cpp            | 5 +++--
 8 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/examples/derivative.cpp b/examples/derivative.cpp
index bc0475db9..bd33bcfe7 100644
--- a/examples/derivative.cpp
+++ b/examples/derivative.cpp
@@ -51,8 +51,8 @@ int main(int argc, char **argv) {
     mrcpp::FunctionTree<D> err_tree(MRA);
 
     // Projecting functions
-    mrcpp::project<D>(prec, f_tree, f);
-    mrcpp::project<D>(prec, df_tree, df);
+    mrcpp::project<D, double>(prec, f_tree, f);
+    mrcpp::project<D, double>(prec, df_tree, df);
 
     // Applying derivative operator
     mrcpp::apply(dg_tree, D_00, f_tree, 0);
diff --git a/examples/mpi_matrix.cpp b/examples/mpi_matrix.cpp
index f3580d158..69c370a70 100644
--- a/examples/mpi_matrix.cpp
+++ b/examples/mpi_matrix.cpp
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
         };
         mrcpp::FunctionTree<3> *tree = new mrcpp::FunctionTree<3>(MRA);
         if (i % wsize == wrank) {
-            mrcpp::project<3>(prec, *tree, f);
+	  mrcpp::project<3, double>(prec, *tree, f);
             tree->normalize();
         }
         f_vec.push_back(std::make_tuple(1.0, tree));
diff --git a/examples/mpi_send_tree.cpp b/examples/mpi_send_tree.cpp
index 44ffd0dec..aff8be379 100644
--- a/examples/mpi_send_tree.cpp
+++ b/examples/mpi_send_tree.cpp
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
     mrcpp::FunctionTree<D> f_tree(MRA);
 
     // Only rank 0 projects the function
-    if (wrank == 0) mrcpp::project<D>(prec, f_tree, f);
+    if (wrank == 0) mrcpp::project<D, double>(prec, f_tree, f);
 
     { // Print data before send
         auto integral = f_tree.integrate();
diff --git a/examples/mpi_shared_tree.cpp b/examples/mpi_shared_tree.cpp
index aa59d8204..ba7f7db5e 100644
--- a/examples/mpi_shared_tree.cpp
+++ b/examples/mpi_shared_tree.cpp
@@ -63,12 +63,12 @@ int main(int argc, char **argv) {
     };
 
     // Initialize a shared memory tree, max 100MB
-    auto shared_mem = new mrcpp::SharedMemory(scomm, 100);
+    auto shared_mem = new mrcpp::SharedMemory<double>(scomm, 100);
     mrcpp::FunctionTree<D> f_tree(MRA, shared_mem);
 
     // Only first rank projects
     auto frank = 0;
-    if (srank == frank) mrcpp::project<D>(prec, f_tree, f);
+    if (srank == frank) mrcpp::project<D, double>(prec, f_tree, f);
     mrcpp::share_tree(f_tree, frank, 0, scomm);
 
     { // Print data after share
diff --git a/examples/projection.cpp b/examples/projection.cpp
index 92f1a7b53..9243485fb 100644
--- a/examples/projection.cpp
+++ b/examples/projection.cpp
@@ -37,7 +37,7 @@ int main(int argc, char **argv) {
 
     // Projecting function
     mrcpp::FunctionTree<D> f_tree(MRA);
-    mrcpp::project<D>(prec, f_tree, f, -1);
+    mrcpp::project<D, double>(prec, f_tree, f, -1);
     auto integral = f_tree.integrate();
 
     mrcpp::print::header(0, "Projecting analytic function");
diff --git a/examples/scf.cpp b/examples/scf.cpp
index f91058f14..fe34d936b 100644
--- a/examples/scf.cpp
+++ b/examples/scf.cpp
@@ -31,7 +31,7 @@ void setupNuclearPotential(double Z, FunctionTree<D> &V) {
     };
 
     // Projecting function
-    project<D>(prec, V, f);
+    project<D, double>(prec, V, f);
 
     print::footer(0, timer, 2);
     Printer::setPrintLevel(oldlevel);
@@ -48,7 +48,7 @@ void setupInitialGuess(FunctionTree<D> &phi) {
     };
 
     // Projecting and normalizing function
-    project<D>(prec, phi, f);
+    project<D, double>(prec, phi, f);
     phi.normalize();
 
     print::footer(0, timer, 2);
diff --git a/examples/schrodinger_semigroup1d.cpp b/examples/schrodinger_semigroup1d.cpp
index 2c1de2fa8..6035aa3c3 100644
--- a/examples/schrodinger_semigroup1d.cpp
+++ b/examples/schrodinger_semigroup1d.cpp
@@ -93,13 +93,13 @@ int main(int argc, char **argv)
 
     // Projecting functions
     mrcpp::FunctionTree<1> Re_f_tree(MRA);
-    mrcpp::project<1>(prec, Re_f_tree, Re_f);
+    mrcpp::project<1, double>(prec, Re_f_tree, Re_f);
     mrcpp::FunctionTree<1> Im_f_tree(MRA);
-    mrcpp::project<1>(prec, Im_f_tree, Im_f);
+    mrcpp::project<1, double>(prec, Im_f_tree, Im_f);
     mrcpp::FunctionTree<1> Re_g_tree(MRA);
-    mrcpp::project<1>(prec, Re_g_tree, Re_g);
+    mrcpp::project<1, double>(prec, Re_g_tree, Re_g);
     mrcpp::FunctionTree<1> Im_g_tree(MRA);
-    mrcpp::project<1>(prec, Im_g_tree, Im_g);
+    mrcpp::project<1, double>(prec, Im_g_tree, Im_g);
 
     // Output function trees
     mrcpp::FunctionTree<1> Re_fout_tree(MRA);
diff --git a/examples/tree_cleaner.cpp b/examples/tree_cleaner.cpp
index 6d970c5e3..dd4d85a05 100644
--- a/examples/tree_cleaner.cpp
+++ b/examples/tree_cleaner.cpp
@@ -9,6 +9,7 @@ const auto order = 7;
 const auto prec = 1.0e-5;
 
 const auto D = 3;
+
 int main(int argc, char **argv) {
     auto timer = mrcpp::Timer();
 
@@ -42,14 +43,14 @@ int main(int argc, char **argv) {
     auto iter = 0;
     auto n_nodes = 1;
     while (n_nodes > 0) {
-        mrcpp::project<D>(-1.0, f_tree, f);         // Projecting on fixed grid
+      mrcpp::project<D, double>(-1.0, f_tree, f);         // Projecting on fixed grid
         n_nodes = mrcpp::refine_grid(f_tree, prec); // Refine grid
         mrcpp::clear_grid(f_tree);                  // Clear MW coefs
         printout(0, " iter " << std::setw(3) << iter++ << std::setw(45));
         printout(0, " n_nodes " << std::setw(5) << n_nodes << std::endl);
     }
     // Projecting on final converged grid
-    mrcpp::project<D>(-1.0, f_tree, f);
+    mrcpp::project<D, double>(-1.0, f_tree, f);
 
     auto integral = f_tree.integrate();
     auto sq_norm = f_tree.getSquareNorm();

From f271ea9ddae368a6b6928008fce9d7b90a3de6e0 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 9 Jul 2024 10:56:25 +0200
Subject: [PATCH 03/38] remove Complex template from Gaussians

---
 api/mrcpp_declarations.h         |   8 +-
 src/functions/GaussExp.cpp       | 142 +++++++++++++++----------------
 src/functions/GaussExp.h         |  58 ++++++-------
 src/functions/GaussFunc.cpp      |  47 +++++-----
 src/functions/GaussFunc.h        |  38 ++++-----
 src/functions/GaussPoly.cpp      |  88 +++++++++----------
 src/functions/GaussPoly.h        |  30 +++----
 src/functions/Gaussian.cpp       |  42 +++++----
 src/functions/Gaussian.h         |  28 +++---
 src/functions/function_utils.cpp |  13 +--
 src/functions/function_utils.h   |   2 +-
 src/treebuilders/grid.cpp        |  19 ++---
 src/treebuilders/grid.h          |   2 +-
 13 files changed, 246 insertions(+), 271 deletions(-)

diff --git a/api/mrcpp_declarations.h b/api/mrcpp_declarations.h
index a96880da3..d21058409 100644
--- a/api/mrcpp_declarations.h
+++ b/api/mrcpp_declarations.h
@@ -36,10 +36,10 @@ class Timer;
 class Printer;
 template <int D, typename T = double> class Plotter;
 
-template <int D, typename T = double> class Gaussian;
-template <int D, typename T = double> class GaussFunc;
-template <int D, typename T = double> class GaussPoly;
-template <int D, typename T = double> class GaussExp;
+template <int D> class Gaussian;
+template <int D> class GaussFunc;
+template <int D> class GaussPoly;
+template <int D> class GaussExp;
 
 template <int D> class BoundingBox;
 template <int D, typename T = double> class NodeBox;
diff --git a/src/functions/GaussExp.cpp b/src/functions/GaussExp.cpp
index a51372801..a57fe6708 100644
--- a/src/functions/GaussExp.cpp
+++ b/src/functions/GaussExp.cpp
@@ -39,21 +39,21 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D, typename T> double GaussExp<D, T>::defaultScreening = 10.0;
+template <int D> double GaussExp<D>::defaultScreening = 10.0;
 
-template <int D, typename T> GaussExp<D, T>::GaussExp(int nTerms, double prec) {
+template <int D> GaussExp<D>::GaussExp(int nTerms, double prec) {
     for (int i = 0; i < nTerms; i++) { this->funcs.push_back(nullptr); }
 }
 
-template <int D, typename T> GaussExp<D, T>::GaussExp(const GaussExp<D, T> &gexp) {
+template <int D> GaussExp<D>::GaussExp(const GaussExp<D> &gexp) {
     screening = gexp.screening;
     for (unsigned int i = 0; i < gexp.size(); i++) {
-        Gaussian<D, T> *gauss = gexp.funcs[i]->copy();
+        Gaussian<D> *gauss = gexp.funcs[i]->copy();
         this->funcs.push_back(gauss);
     }
 }
 
-template <int D, typename T> GaussExp<D, T>::~GaussExp() {
+template <int D> GaussExp<D>::~GaussExp() {
     for (int i = 0; i < size(); i++) {
         if (this->funcs[i] != nullptr) {
             delete this->funcs[i];
@@ -62,7 +62,7 @@ template <int D, typename T> GaussExp<D, T>::~GaussExp() {
     }
 }
 
-template <int D, typename T> GaussExp<D, T> &GaussExp<D, T>::operator=(const GaussExp<D, T> &gexp) {
+template <int D> GaussExp<D> &GaussExp<D>::operator=(const GaussExp<D> &gexp) {
     if (&gexp == this) return *this;
     // screening = gexp.screening;
     this->funcs.clear();
@@ -70,77 +70,77 @@ template <int D, typename T> GaussExp<D, T> &GaussExp<D, T>::operator=(const Gau
         if (gexp.funcs[i] == nullptr) {
             this->funcs.push_back(nullptr);
         } else {
-            Gaussian<D, T> *gauss = gexp.getFunc(i).copy();
+            Gaussian<D> *gauss = gexp.getFunc(i).copy();
             this->funcs.push_back(gauss);
         }
     }
     return *this;
 }
 
-template <int D, typename T> T GaussExp<D, T>::evalf(const Coord<D> &r) const {
-    T val = 0.0;
+template <int D> double GaussExp<D>::evalf(const Coord<D> &r) const {
+    double val = 0.0;
     for (int i = 0; i < this->size(); i++) { val += this->getFunc(i).evalf(r); }
     return val;
 }
 
-template <int D, typename T> bool GaussExp<D, T>::isVisibleAtScale(int scale, int nPts) const {
+template <int D> bool GaussExp<D>::isVisibleAtScale(int scale, int nPts) const {
     for (unsigned int i = 0; i < this->size(); i++) {
         if (not this->getFunc(i).isVisibleAtScale(scale, nPts)) { return false; }
     }
     return true;
 }
 
-template <int D, typename T> bool GaussExp<D, T>::isZeroOnInterval(const double *lb, const double *ub) const {
+template <int D> bool GaussExp<D>::isZeroOnInterval(const double *lb, const double *ub) const {
     for (unsigned int i = 0; i < this->size(); i++) {
         if (not this->getFunc(i).isZeroOnInterval(lb, ub)) { return false; }
     }
     return true;
 }
 
-template <int D, typename T> void GaussExp<D, T>::setFunc(int i, const GaussPoly<D, T> &g, double c) {
+template <int D> void GaussExp<D>::setFunc(int i, const GaussPoly<D> &g, double c) {
     if (i < 0 or i > (this->size() - 1)) {
         MSG_ERROR("Index out of bounds!");
         return;
     }
     if (this->funcs[i] != nullptr) { delete this->funcs[i]; }
-    this->funcs[i] = new GaussPoly<D, T>(g);
+    this->funcs[i] = new GaussPoly<D>(g);
     double coef = this->funcs[i]->getCoef();
     this->funcs[i]->setCoef(c * coef);
 }
 
-template <int D, typename T> void GaussExp<D, T>::setFunc(int i, const GaussFunc<D, T> &g, double c) {
+template <int D> void GaussExp<D>::setFunc(int i, const GaussFunc<D> &g, double c) {
     if (i < 0 or i > (this->size() - 1)) {
         MSG_ERROR("Index out of bounds!");
         return;
     }
     if (this->funcs[i] != nullptr) { delete this->funcs[i]; }
-    this->funcs[i] = new GaussFunc<D, T>(g);
+    this->funcs[i] = new GaussFunc<D>(g);
     double coef = this->funcs[i]->getCoef();
     this->funcs[i]->setCoef(c * coef);
 }
 
-template <int D, typename T> void GaussExp<D, T>::append(const Gaussian<D, T> &g) {
-    Gaussian<D, T> *gp = g.copy();
+template <int D> void GaussExp<D>::append(const Gaussian<D> &g) {
+    Gaussian<D> *gp = g.copy();
     this->funcs.push_back(gp);
 }
 
-template <int D, typename T> void GaussExp<D, T>::append(const GaussExp<D, T> &g) {
+template <int D> void GaussExp<D>::append(const GaussExp<D> &g) {
     for (int i = 0; i < g.size(); i++) {
-        Gaussian<D, T> *gp = g.getFunc(i).copy();
+        Gaussian<D> *gp = g.getFunc(i).copy();
         this->funcs.push_back(gp);
     }
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::differentiate(int dir) const {
+template <int D> GaussExp<D> GaussExp<D>::differentiate(int dir) const {
     assert(dir >= 0 and dir < D);
-    GaussExp<D, T> result;
+    GaussExp<D> result;
     for (int i = 0; i < this->size(); i++) result.append(this->getFunc(i).differentiate(dir));
     return result;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::add(GaussExp<D, T> &g) {
+template <int D> GaussExp<D> GaussExp<D>::add(GaussExp<D> &g) {
     int nsum = this->size() + g.size();
-    GaussExp<D, T> sum = GaussExp<D, T>(nsum);
+    GaussExp<D> sum = GaussExp<D>(nsum);
 
     int n = 0;
     for (int i = 0; i < this->size(); i++) {
@@ -155,34 +155,34 @@ template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::add(GaussExp<D, T> &
     return sum;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::add(Gaussian<D, T> &g) {
+template <int D> GaussExp<D> GaussExp<D>::add(Gaussian<D> &g) {
     int nsum = this->size() + 1;
-    GaussExp<D, T> sum = GaussExp<D, T>(nsum);
+    GaussExp<D> sum = GaussExp<D>(nsum);
     for (int n = 0; n < this->size(); n++) { sum.funcs[n] = this->getFunc(n).copy(); }
     sum.funcs[this->size()] = g.copy();
     return sum;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussExp<D, T> &gexp) {
-    GaussExp<D, T> result;
+template <int D> GaussExp<D> GaussExp<D>::mult(GaussExp<D> &gexp) {
+    GaussExp<D> result;
     for (int i = 0; i < this->size(); i++) {
         for (int j = 0; j < gexp.size(); j++) {
-            if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[i])) {
-                if (auto *g = dynamic_cast<GaussFunc<D, T> *>(gexp.funcs[j])) {
-                    GaussPoly<D, T> newTerm = (*g) * (*f);
+            if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[i])) {
+                if (auto *g = dynamic_cast<GaussFunc<D> *>(gexp.funcs[j])) {
+                    GaussPoly<D> newTerm = (*g) * (*f);
                     result.append(newTerm);
-                } else if (auto *g = dynamic_cast<GaussPoly<D, T> *>(gexp.funcs[j])) {
-                    GaussPoly<D, T> newTerm = (*g) * (*f);
+                } else if (auto *g = dynamic_cast<GaussPoly<D> *>(gexp.funcs[j])) {
+                    GaussPoly<D> newTerm = (*g) * (*f);
                     result.append(newTerm);
                 } else {
                     MSG_ABORT("Invalid Gaussian type!");
                 }
-            } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[i])) {
-                if (auto *g = dynamic_cast<GaussFunc<D, T> *>(gexp.funcs[j])) {
-                    GaussPoly<D, T> newTerm = (*f) * (*g);
+            } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[i])) {
+                if (auto *g = dynamic_cast<GaussFunc<D> *>(gexp.funcs[j])) {
+                    GaussPoly<D> newTerm = (*f) * (*g);
                     result.append(newTerm);
-                } else if (auto *g = dynamic_cast<GaussPoly<D, T> *>(gexp.funcs[j])) {
-                    GaussPoly<D, T> newTerm = (*f) * (*g);
+                } else if (auto *g = dynamic_cast<GaussPoly<D> *>(gexp.funcs[j])) {
+                    GaussPoly<D> newTerm = (*f) * (*g);
                     result.append(newTerm);
                 } else {
                     MSG_ABORT("Invalid Gaussian type!");
@@ -195,15 +195,15 @@ template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussExp<D, T>
     return result;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussFunc<D, T> &g) {
-    GaussExp<D, T> result;
+template <int D> GaussExp<D> GaussExp<D>::mult(GaussFunc<D> &g) {
+    GaussExp<D> result;
     int nTerms = this->size();
     for (int n = 0; n < nTerms; n++) {
-        if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[n])) {
-            GaussPoly<D, T> newTerm = *f * g;
+        if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[n])) {
+            GaussPoly<D> newTerm = *f * g;
             result.append(newTerm);
-        } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[n])) {
-            GaussPoly<D, T> newTerm = *f * g;
+        } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[n])) {
+            GaussPoly<D> newTerm = *f * g;
             result.append(newTerm);
         } else {
             MSG_ABORT("Invalid Gaussian type!");
@@ -211,15 +211,15 @@ template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussFunc<D, T>
     }
     return result;
 }
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussPoly<D, T> &g) {
+template <int D> GaussExp<D> GaussExp<D>::mult(GaussPoly<D> &g) {
     int nTerms = this->size();
-    GaussExp<D, T> result(nTerms);
+    GaussExp<D> result(nTerms);
     for (int n = 0; n < nTerms; n++) {
-        if (auto *f = dynamic_cast<GaussFunc<D, T> *>(this->funcs[n])) {
-            GaussPoly<D, T> newTerm(g * *f);
+        if (auto *f = dynamic_cast<GaussFunc<D> *>(this->funcs[n])) {
+            GaussPoly<D> newTerm(g * *f);
             result.append(newTerm);
-        } else if (auto *f = dynamic_cast<GaussPoly<D, T> *>(this->funcs[n])) {
-            GaussPoly<D, T> newTerm(g * *f);
+        } else if (auto *f = dynamic_cast<GaussPoly<D> *>(this->funcs[n])) {
+            GaussPoly<D> newTerm(g * *f);
             result.append(newTerm);
         } else {
             MSG_ABORT("Invalid Gaussian type!");
@@ -228,17 +228,17 @@ template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(GaussPoly<D, T>
     return result;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::mult(double d) {
-    GaussExp<D, T> prod = *this;
+template <int D> GaussExp<D> GaussExp<D>::mult(double d) {
+    GaussExp<D> prod = *this;
     for (int i = 0; i < this->size(); i++) prod.funcs[i]->multConstInPlace(d);
     return prod;
 }
 
-template <int D, typename T> void GaussExp<D, T>::multInPlace(double d) {
+template <int D> void GaussExp<D>::multInPlace(double d) {
     for (int i = 0; i < this->size(); i++) this->funcs[i]->multConstInPlace(d);
 }
 
-template <int D, typename T> double GaussExp<D, T>::calcSquareNorm() const {
+template <int D> double GaussExp<D>::calcSquareNorm() const {
     /* computing the squares */
     double norm = 0.0;
     for (int i = 0; i < this->size(); i++) {
@@ -247,13 +247,13 @@ template <int D, typename T> double GaussExp<D, T>::calcSquareNorm() const {
     }
     /* computing the double products */
     for (int i = 0; i < this->size(); i++) {
-        GaussExp<D, T> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc
+        GaussExp<D> funcs_i = getFunc(i).asGaussExp(); // Make sure all entries are GaussFunc
         for (int fi = 0; fi < funcs_i.size(); fi++) {
-            GaussFunc<D, T> &func_i = static_cast<GaussFunc<D, T> &>(funcs_i.getFunc(fi));
+            GaussFunc<D> &func_i = static_cast<GaussFunc<D> &>(funcs_i.getFunc(fi));
             for (int j = i + 1; j < this->size(); j++) {
-                GaussExp<D, T> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc
+                GaussExp<D> funcs_j = getFunc(j).asGaussExp(); // Make sure all entries are GaussFunc
                 for (int fj = 0; fj < funcs_j.size(); fj++) {
-                    GaussFunc<D, T> &func_j = static_cast<GaussFunc<D, T> &>(funcs_j.getFunc(fj));
+                    GaussFunc<D> &func_j = static_cast<GaussFunc<D> &>(funcs_j.getFunc(fj));
                     double overlap = func_i.calcOverlap(func_j);
                     norm += 2.0 * overlap;
                 }
@@ -263,7 +263,7 @@ template <int D, typename T> double GaussExp<D, T>::calcSquareNorm() const {
     return norm;
 }
 
-template <int D, typename T> void GaussExp<D, T>::normalize() {
+template <int D> void GaussExp<D>::normalize() {
     double norm = std::sqrt(this->calcSquareNorm());
     for (int i = 0; i < this->size(); i++) {
         double coef = this->funcs[i]->getCoef();
@@ -271,12 +271,12 @@ template <int D, typename T> void GaussExp<D, T>::normalize() {
     }
 }
 
-template <int D, typename T> void GaussExp<D, T>::calcScreening(double nStdDev) {
+template <int D> void GaussExp<D>::calcScreening(double nStdDev) {
     screening = nStdDev;
     for (int i = 0; i < this->size(); i++) { this->funcs[i]->calcScreening(nStdDev); }
 }
 
-template <int D, typename T> void GaussExp<D, T>::setScreen(bool screen) {
+template <int D> void GaussExp<D>::setScreen(bool screen) {
     if (screen) {
         this->screening = std::abs(this->screening);
     } else {
@@ -290,7 +290,7 @@ template <int D, typename T> void GaussExp<D, T>::setScreen(bool screen) {
 // is not separable, we have to do the projection term by term.
 /*
 template<int D>
-void GaussExp<D, T>::calcWaveletCoefs(MWNode<D, T> &node) {
+void GaussExp<D>::calcWaveletCoefs(MWNode<D> &node) {
     static const int tDim = 1 << D;
     const ScalingBasis &sf = node.getMWTree().getScalingFunctions();
     MatrixXd &scaling = node.getMWTree().getTmpScalingCoefs();
@@ -319,12 +319,12 @@ void GaussExp<D, T>::calcWaveletCoefs(MWNode<D, T> &node) {
 }
 */
 
-template <int D, typename T> void GaussExp<D, T>::setDefaultScreening(double screen) {
+template <int D> void GaussExp<D>::setDefaultScreening(double screen) {
     if (screen < 0) { MSG_ERROR("Screening constant cannot be negative!"); }
     defaultScreening = screen;
 }
 
-template <int D, typename T> std::ostream &GaussExp<D, T>::print(std::ostream &o) const {
+template <int D> std::ostream &GaussExp<D>::print(std::ostream &o) const {
     o << "Gaussian expansion: " << size() << " terms" << std::endl;
     for (int i = 0; i < size(); i++) {
         o << "Term" << std::setw(3) << i << " :" << std::endl;
@@ -338,7 +338,7 @@ template <int D, typename T> std::ostream &GaussExp<D, T>::print(std::ostream &o
  *  @note Each Gaussian must be normalized to unit charge
  *  \f$ c = (\alpha/\pi)^{D/2} \f$ for this to be correct!
  */
-template <int D, typename T> double GaussExp<D, T>::calcCoulombEnergy() const {
+template <int D> double GaussExp<D>::calcCoulombEnergy() const {
     NOT_IMPLEMENTED_ABORT
 }
 
@@ -362,8 +362,8 @@ template <> double GaussExp<3>::calcCoulombEnergy() const {
     return energy;
 }
 
-template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::periodify(const std::array<double, D> &period, double nStdDev) const {
-    GaussExp<D, T> out_exp;
+template <int D> GaussExp<D> GaussExp<D>::periodify(const std::array<double, D> &period, double nStdDev) const {
+    GaussExp<D> out_exp;
     for (const auto &gauss : *this) {
         auto periodic_gauss = gauss->periodify(period, nStdDev);
         out_exp.append(periodic_gauss);
@@ -371,12 +371,8 @@ template <int D, typename T> GaussExp<D, T> GaussExp<D, T>::periodify(const std:
     return out_exp;
 }
 
-template class GaussExp<1, double>;
-template class GaussExp<2, double>;
-template class GaussExp<3, double>;
-
-template class GaussExp<1, ComplexDouble>;
-template class GaussExp<2, ComplexDouble>;
-template class GaussExp<3, ComplexDouble>;
+template class GaussExp<1>;
+template class GaussExp<2>;
+template class GaussExp<3>;
 
 } // namespace mrcpp
diff --git a/src/functions/GaussExp.h b/src/functions/GaussExp.h
index 58f5d7dd2..f33549ec1 100644
--- a/src/functions/GaussExp.h
+++ b/src/functions/GaussExp.h
@@ -51,11 +51,11 @@ namespace mrcpp {
  *
  */
 
-template <int D, typename T> class GaussExp : public RepresentableFunction<D, T> {
+    template <int D> class GaussExp : public RepresentableFunction<D, double> {
 public:
     GaussExp(int nTerms = 0, double prec = GAUSS_EXP_PREC);
-    GaussExp(const GaussExp<D, T> &gExp);
-    GaussExp &operator=(const GaussExp<D, T> &gExp);
+    GaussExp(const GaussExp<D> &gExp);
+    GaussExp &operator=(const GaussExp<D> &gExp);
     ~GaussExp() override;
 
     auto begin() { return funcs.begin(); }
@@ -70,25 +70,25 @@ template <int D, typename T> class GaussExp : public RepresentableFunction<D, T>
 
     void calcScreening(double nStdDev = defaultScreening);
 
-    T evalf(const Coord<D> &r) const override;
+    double evalf(const Coord<D> &r) const override;
 
-    GaussExp<D, T> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
-    GaussExp<D, T> differentiate(int dir) const;
+    GaussExp<D> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
+    GaussExp<D> differentiate(int dir) const;
 
-    GaussExp<D, T> add(GaussExp<D, T> &g);
-    GaussExp<D, T> add(Gaussian<D, T> &g);
-    GaussExp<D, T> mult(GaussExp<D, T> &g);
-    GaussExp<D, T> mult(GaussFunc<D, T> &g);
-    GaussExp<D, T> mult(GaussPoly<D, T> &g);
-    GaussExp<D, T> mult(double d);
+    GaussExp<D> add(GaussExp<D> &g);
+    GaussExp<D> add(Gaussian<D> &g);
+    GaussExp<D> mult(GaussExp<D> &g);
+    GaussExp<D> mult(GaussFunc<D> &g);
+    GaussExp<D> mult(GaussPoly<D> &g);
+    GaussExp<D> mult(double d);
     void multInPlace(double d);
 
-    GaussExp<D, T> operator+(GaussExp<D, T> &g) { return this->add(g); }
-    GaussExp<D, T> operator+(Gaussian<D, T> &g) { return this->add(g); }
-    GaussExp<D, T> operator*(GaussExp<D, T> &g) { return this->mult(g); }
-    GaussExp<D, T> operator*(GaussFunc<D, T> &g) { return this->mult(g); }
-    GaussExp<D, T> operator*(GaussPoly<D, T> &g) { return this->mult(g); }
-    GaussExp<D, T> operator*(double d) { return this->mult(d); }
+    GaussExp<D> operator+(GaussExp<D> &g) { return this->add(g); }
+    GaussExp<D> operator+(Gaussian<D> &g) { return this->add(g); }
+    GaussExp<D> operator*(GaussExp<D> &g) { return this->mult(g); }
+    GaussExp<D> operator*(GaussFunc<D> &g) { return this->mult(g); }
+    GaussExp<D> operator*(GaussPoly<D> &g) { return this->mult(g); }
+    GaussExp<D> operator*(double d) { return this->mult(d); }
     void operator*=(double d) { this->multInPlace(d); }
 
     double getScreening() const { return screening; }
@@ -98,14 +98,14 @@ template <int D, typename T> class GaussExp : public RepresentableFunction<D, T>
     const std::array<double, D> &getPos(int i) const { return this->funcs[i]->getPos(); }
 
     int size() const { return this->funcs.size(); }
-    Gaussian<D, T> &getFunc(int i) { return *this->funcs[i]; }
-    const Gaussian<D, T> &getFunc(int i) const { return *this->funcs[i]; }
+    Gaussian<D> &getFunc(int i) { return *this->funcs[i]; }
+    const Gaussian<D> &getFunc(int i) const { return *this->funcs[i]; }
 
-    Gaussian<D, T> *operator[](int i) { return this->funcs[i]; }
-    const Gaussian<D, T> *operator[](int i) const { return this->funcs[i]; }
+    Gaussian<D> *operator[](int i) { return this->funcs[i]; }
+    const Gaussian<D> *operator[](int i) const { return this->funcs[i]; }
 
-    void setFunc(int i, const GaussPoly<D, T> &g, double c = 1.0);
-    void setFunc(int i, const GaussFunc<D, T> &g, double c = 1.0);
+    void setFunc(int i, const GaussPoly<D> &g, double c = 1.0);
+    void setFunc(int i, const GaussFunc<D> &g, double c = 1.0);
 
     void setDefaultScreening(double screen);
     void setScreen(bool screen);
@@ -115,15 +115,15 @@ template <int D, typename T> class GaussExp : public RepresentableFunction<D, T>
     void setPos(int i, const std::array<double, D> &pos) { this->funcs[i]->setPos(pos); }
 
     /** @brief Append Gaussian to expansion */
-    void append(const Gaussian<D, T> &g);
+    void append(const Gaussian<D> &g);
     /** @brief Append GaussExp to expansion */
-    void append(const GaussExp<D, T> &g);
+    void append(const GaussExp<D> &g);
 
-    friend std::ostream &operator<<(std::ostream &o, const GaussExp<D, T> &gExp) { return gExp.print(o); }
-    friend class Gaussian<D, T>;
+    friend std::ostream &operator<<(std::ostream &o, const GaussExp<D> &gExp) { return gExp.print(o); }
+    friend class Gaussian<D>;
 
 protected:
-    std::vector<Gaussian<D, T> *> funcs;
+    std::vector<Gaussian<D> *> funcs;
     static double defaultScreening;
     double screening{0.0};
 
diff --git a/src/functions/GaussFunc.cpp b/src/functions/GaussFunc.cpp
index dedf563c3..28736be58 100644
--- a/src/functions/GaussFunc.cpp
+++ b/src/functions/GaussFunc.cpp
@@ -39,12 +39,12 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D, typename T> Gaussian<D, T> *GaussFunc<D, T>::copy() const {
-    auto *gauss = new GaussFunc<D, T>(*this);
+template <int D> Gaussian<D> *GaussFunc<D>::copy() const {
+    auto *gauss = new GaussFunc<D>(*this);
     return gauss;
 }
 
-template <int D, typename T> T GaussFunc<D, T>::evalf(const Coord<D> &r) const {
+template <int D> double GaussFunc<D>::evalf(const Coord<D> &r) const {
     if (this->getScreen()) {
         for (int d = 0; d < D; d++) {
             if (r[d] < this->A[d] or r[d] > this->B[d]) { return 0.0; }
@@ -65,7 +65,7 @@ template <int D, typename T> T GaussFunc<D, T>::evalf(const Coord<D> &r) const {
     return this->coef * p2 * std::exp(-q2);
 }
 
-template <int D, typename T> T GaussFunc<D, T>::evalf1D(double r, int d) const {
+template <int D> double GaussFunc<D>::evalf1D(double r, int d) const {
     if (this->getScreen()) {
         if ((r < this->A[d]) or (r > this->B[d])) { return 0.0; }
     }
@@ -85,7 +85,7 @@ template <int D, typename T> T GaussFunc<D, T>::evalf1D(double r, int d) const {
     return result;
 }
 
-template <int D, typename T> double GaussFunc<D, T>::calcSquareNorm() const {
+template <int D> double GaussFunc<D>::calcSquareNorm() const {
     double norm = 1.0;
     for (int d = 0; d < D; d++) {
         double a = 2.0 * this->alpha[d];
@@ -105,14 +105,14 @@ template <int D, typename T> double GaussFunc<D, T>::calcSquareNorm() const {
     return norm * this->coef * this->coef;
 }
 
-template<int D, typename T> GaussExp<D, T> GaussFunc<D, T>::asGaussExp() const {
-    GaussExp<D, T> gexp;
+template<int D> GaussExp<D> GaussFunc<D>::asGaussExp() const {
+    GaussExp<D> gexp;
     gexp.append(*this);
     return gexp;
 }
 
-template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::differentiate(int dir) const {
-    GaussPoly<D, T> result(*this);
+template <int D> GaussPoly<D> GaussFunc<D>::differentiate(int dir) const {
+    GaussPoly<D> result(*this);
     int oldPow = this->getPower(dir);
 
     Polynomial newPoly(oldPow + 1);
@@ -123,8 +123,8 @@ template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::differentiate(int
     return result;
 }
 
-template <int D, typename T> void GaussFunc<D, T>::multInPlace(const GaussFunc<D, T> &rhs) {
-    GaussFunc<D, T> &lhs = *this;
+template <int D> void GaussFunc<D>::multInPlace(const GaussFunc<D> &rhs) {
+    GaussFunc<D> &lhs = *this;
     for (int d = 0; d < D; d++) {
         if (lhs.getPos()[d] != rhs.getPos()[d]) {
             MSG_ABORT("Cannot multiply GaussFuncs of different center in-place");
@@ -148,9 +148,9 @@ template <int D, typename T> void GaussFunc<D, T>::multInPlace(const GaussFunc<D
  *  @param[in] rhs: Right hand side of multiply
  *  @returns New GaussPoly
  */
-template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::mult(const GaussFunc<D, T> &rhs) {
-    GaussFunc<D, T> &lhs = *this;
-    GaussPoly<D, T> result;
+template <int D> GaussPoly<D> GaussFunc<D>::mult(const GaussFunc<D> &rhs) {
+    GaussFunc<D> &lhs = *this;
+    GaussPoly<D> result;
     result.multPureGauss(lhs, rhs);
     for (int d = 0; d < D; d++) {
         double newPos = result.getPos()[d];
@@ -167,13 +167,13 @@ template <int D, typename T> GaussPoly<D, T> GaussFunc<D, T>::mult(const GaussFu
  *  @param[in] c: Scalar to multiply
  *  @returns New GaussFunc
  */
-template <int D, typename T> GaussFunc<D, T> GaussFunc<D, T>::mult(double c) {
-    GaussFunc<D, T> g = *this;
+template <int D> GaussFunc<D> GaussFunc<D>::mult(double c) {
+    GaussFunc<D> g = *this;
     g.coef *= c;
     return g;
 }
 
-template <int D, typename T> std::ostream &GaussFunc<D, T>::print(std::ostream &o) const {
+template <int D> std::ostream &GaussFunc<D>::print(std::ostream &o) const {
     auto is_array = details::are_all_equal<D>(this->getExp());
 
     // If all of the values in the exponential are the same only
@@ -203,7 +203,7 @@ template <int D, typename T> std::ostream &GaussFunc<D, T>::print(std::ostream &
  *  @note Both Gaussians must be normalized to unit charge
  *  \f$ \alpha = (\beta/\pi)^{D/2} \f$ for this to be correct!
  */
-template <int D, typename T> double GaussFunc<D, T>::calcCoulombEnergy(const GaussFunc<D, T> &gf) const {
+template <int D> double GaussFunc<D>::calcCoulombEnergy(const GaussFunc<D> &gf) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
@@ -236,12 +236,7 @@ template <> double GaussFunc<3>::calcCoulombEnergy(const GaussFunc<3> &gf) const
     return std::sqrt(4.0 * alpha / pi) * boysFac;
 }
 
-template class GaussFunc<1, double>;
-template class GaussFunc<2, double>;
-template class GaussFunc<3, double>;
-
-template class GaussFunc<1, ComplexDouble>;
-template class GaussFunc<2, ComplexDouble>;
-template class GaussFunc<3, ComplexDouble>;
-
+template class GaussFunc<1>;
+template class GaussFunc<2>;
+template class GaussFunc<3>;
 } // namespace mrcpp
diff --git a/src/functions/GaussFunc.h b/src/functions/GaussFunc.h
index fce4f6fd2..874bb3850 100644
--- a/src/functions/GaussFunc.h
+++ b/src/functions/GaussFunc.h
@@ -40,12 +40,12 @@ namespace mrcpp {
  *
  * \f$ g(x) = \alpha (x-x_0)^a e^{-\beta (x-x_0)^2} \f$
  *
- * - Multidimensional Gaussian (GaussFunc<D, T>):
+ * - Multidimensional Gaussian (GaussFunc<D>):
  *
  * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$
  */
 
-template <int D, typename T> class GaussFunc : public Gaussian<D, T> {
+template <int D> class GaussFunc : public Gaussian<D> {
 public:
     /** @returns New GaussFunc object
      *  @param[in] beta: Exponent, \f$ e^{-\beta r^2} \f$
@@ -54,32 +54,32 @@ template <int D, typename T> class GaussFunc : public Gaussian<D, T> {
      *  @param[in] pow: Monomial power, \f$ x^{pow[0]}, y^{pow[1]}, ... \f$
      */
     GaussFunc(double beta, double alpha, const Coord<D> &pos = {}, const std::array<int, D> &pow = {})
-            : Gaussian<D, T>(beta, alpha, pos, pow) {}
+            : Gaussian<D>(beta, alpha, pos, pow) {}
     GaussFunc(const std::array<double, D> &beta,
               double alpha,
               const Coord<D> &pos = {},
               const std::array<int, D> &pow = {})
-            : Gaussian<D, T>(beta, alpha, pos, pow) {}
-    GaussFunc(const GaussFunc<D, T> &gf)
-            : Gaussian<D, T>(gf) {}
-    GaussFunc<D, T> &operator=(const GaussFunc<D, T> &rhs) = delete;
-    Gaussian<D, T> *copy() const override;
+            : Gaussian<D>(beta, alpha, pos, pow) {}
+    GaussFunc(const GaussFunc<D> &gf)
+            : Gaussian<D>(gf) {}
+    GaussFunc<D> &operator=(const GaussFunc<D> &rhs) = delete;
+    Gaussian<D> *copy() const override;
 
-    double calcCoulombEnergy(const GaussFunc<D, T> &rhs) const;
+    double calcCoulombEnergy(const GaussFunc<D> &rhs) const;
     double calcSquareNorm() const override;
 
-    T evalf(const Coord<D> &r) const override;
-    T evalf1D(double r, int dir) const override;
+    double evalf(const Coord<D> &r) const override;
+    double evalf1D(double r, int dir) const override;
 
-    GaussExp<D, T> asGaussExp() const override;
-    GaussPoly<D, T> differentiate(int dir) const override;
+    GaussExp<D> asGaussExp() const override;
+    GaussPoly<D> differentiate(int dir) const override;
 
-    void multInPlace(const GaussFunc<D, T> &rhs);
-    void operator*=(const GaussFunc<D, T> &rhs) { multInPlace(rhs); }
-    GaussPoly<D, T> mult(const GaussFunc<D, T> &rhs);
-    GaussFunc<D, T> mult(double c);
-    GaussPoly<D, T> operator*(const GaussFunc<D, T> &rhs) { return this->mult(rhs); }
-    GaussFunc<D, T> operator*(double c) { return this->mult(c); }
+    void multInPlace(const GaussFunc<D> &rhs);
+    void operator*=(const GaussFunc<D> &rhs) { multInPlace(rhs); }
+    GaussPoly<D> mult(const GaussFunc<D> &rhs);
+    GaussFunc<D> mult(double c);
+    GaussPoly<D> operator*(const GaussFunc<D> &rhs) { return this->mult(rhs); }
+    GaussFunc<D> operator*(double c) { return this->mult(c); }
 
     void setPow(int d, int power) override { this->power[d] = power; }
     void setPow(const std::array<int, D> &power) override { this->power = power; }
diff --git a/src/functions/GaussPoly.cpp b/src/functions/GaussPoly.cpp
index 3e780ba3b..0dfeaf2cd 100644
--- a/src/functions/GaussPoly.cpp
+++ b/src/functions/GaussPoly.cpp
@@ -43,9 +43,9 @@ namespace mrcpp {
  *  @param[in] pos: Position \f$ (x - pos[0]), (y - pos[1]), ... \f$
  *  @param[in] pow: Max polynomial degree, \f$ P_0(x), P_1(y), ... \f$
  */
-template <int D, typename T>
-GaussPoly<D, T>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const std::array<int, D> &power)
-        : Gaussian<D, T>(beta, alpha, pos, power) {
+template <int D>
+GaussPoly<D>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const std::array<int, D> &power)
+        : Gaussian<D>(beta, alpha, pos, power) {
     for (auto d = 0; d < D; d++) {
         if (power != std::array<int, D>{}) {
             this->poly[d] = new Polynomial(this->power[d]);
@@ -55,12 +55,12 @@ GaussPoly<D, T>::GaussPoly(double beta, double alpha, const Coord<D> &pos, const
     }
 }
 
-template <int D, typename T>
-GaussPoly<D, T>::GaussPoly(const std::array<double, D> &beta,
+template <int D>
+GaussPoly<D>::GaussPoly(const std::array<double, D> &beta,
                         double alpha,
                         const Coord<D> &pos,
                         const std::array<int, D> &pow)
-        : Gaussian<D, T>(beta, alpha, pos, pow) {
+        : Gaussian<D>(beta, alpha, pos, pow) {
     for (auto d = 0; d < D; d++) {
         if (pow != std::array<int, D>{}) {
             this->poly[d] = new Polynomial(this->power[d]);
@@ -70,15 +70,15 @@ GaussPoly<D, T>::GaussPoly(const std::array<double, D> &beta,
     }
 }
 
-template <int D, typename T>
-GaussPoly<D, T>::GaussPoly(const GaussPoly<D, T> &gp)
-        : Gaussian<D, T>(gp) {
+template <int D>
+GaussPoly<D>::GaussPoly(const GaussPoly<D> &gp)
+        : Gaussian<D>(gp) {
     for (int d = 0; d < D; d++) { poly[d] = new Polynomial(gp.getPoly(d)); }
 }
 
-template <int D, typename T>
-GaussPoly<D, T>::GaussPoly(const GaussFunc<D, T> &gf)
-        : Gaussian<D, T>(gf) {
+template <int D>
+GaussPoly<D>::GaussPoly(const GaussFunc<D> &gf)
+        : Gaussian<D>(gf) {
     for (int d = 0; d < D; d++) {
         int order = this->getPower(d);
         poly[d] = new Polynomial(order);
@@ -89,29 +89,29 @@ GaussPoly<D, T>::GaussPoly(const GaussFunc<D, T> &gf)
     }
 }
 
-template <int D, typename T> GaussPoly<D, T>::~GaussPoly() {
+template <int D> GaussPoly<D>::~GaussPoly() {
     for (int i = 0; i < D; i++) { delete poly[i]; }
 }
 
-template <int D, typename T> Gaussian<D, T> *GaussPoly<D, T>::copy() const {
-    auto *gauss = new GaussPoly<D, T>(*this);
+template <int D> Gaussian<D> *GaussPoly<D>::copy() const {
+    auto *gauss = new GaussPoly<D>(*this);
     return gauss;
 }
 
-template <int D, typename T> double GaussPoly<D, T>::calcSquareNorm() const {
-    GaussExp<D, T> this_exp = this->asGaussExp();
+template<int D> double GaussPoly<D>::calcSquareNorm() const {
+    GaussExp<D> this_exp = this->asGaussExp();
     double norm = 0.0;
     for (int i = 0; i < this_exp.size(); i++) {
-        auto func_i = static_cast<GaussFunc<D, T> &>(this_exp.getFunc(i));
+        auto func_i = static_cast<GaussFunc<D> &>(this_exp.getFunc(i));
         for (int j = 0; j < this_exp.size(); j++) {
-            auto func_j = static_cast<GaussFunc<D, T> &>(this_exp.getFunc(j));
+            auto func_j = static_cast<GaussFunc<D> &>(this_exp.getFunc(j));
             norm += function_utils::calc_overlap(func_i, func_j);
         }
     }
     return norm;
 }
 
-template <int D, typename T> T GaussPoly<D, T>::evalf(const Coord<D> &r) const {
+template <int D> double GaussPoly<D>::evalf(const Coord<D> &r) const {
     if (this->getScreen()) {
         for (int d = 0; d < D; d++) {
             if (r[d] < this->A[d] or r[d] > this->B[d]) { return 0.0; }
@@ -127,7 +127,7 @@ template <int D, typename T> T GaussPoly<D, T>::evalf(const Coord<D> &r) const {
     return this->coef * p2 * std::exp(-q2);
 }
 
-template <int D, typename T> T GaussPoly<D, T>::evalf1D(const double r, int d) const {
+template <int D> double GaussPoly<D>::evalf1D(const double r, int d) const {
     // NOTE!
     //     This function evaluation will give the first dimension the full coef
     //     amplitude, leaving all other directions with amplitude 1.0. This is to
@@ -146,7 +146,7 @@ template <int D, typename T> T GaussPoly<D, T>::evalf1D(const double r, int d) c
     return p2 * std::exp(-this->alpha[d] * q2);
 }
 
-template <int D, typename T> GaussExp<D, T> GaussPoly<D, T>::asGaussExp() const {
+template <int D> GaussExp<D> GaussPoly<D>::asGaussExp() const {
     std::array<int, D> pow;
     std::array<double, D> pos;
     auto alpha = this->getExp();
@@ -162,12 +162,12 @@ template <int D, typename T> GaussExp<D, T> GaussPoly<D, T>::asGaussExp() const
 
     fillCoefPowVector(coefs, power, pow, D);
 
-    GaussExp<D, T> gexp;
+    GaussExp<D> gexp;
     for (int i = 0; i < nTerms; i++) {
         double coef = coefs[i];
         for (int d = 0; d < D; d++) pow[d] = power[i][d];
         if (coef != 0.0) {
-            GaussFunc<D, T> gFunc(alpha, coef, pos, pow);
+            GaussFunc<D> gFunc(alpha, coef, pos, pow);
             gexp.append(gFunc);
         }
     }
@@ -175,16 +175,16 @@ template <int D, typename T> GaussExp<D, T> GaussPoly<D, T>::asGaussExp() const
     return gexp;
 }
 
-template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::differentiate(int dir) const {
+template <int D> GaussPoly<D> GaussPoly<D>::differentiate(int dir) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
-template <int D, typename T> void GaussPoly<D, T>::multInPlace(const GaussPoly<D, T> &rhs) {
+template <int D> void GaussPoly<D>::multInPlace(const GaussPoly<D> &rhs) {
     NOT_IMPLEMENTED_ABORT;
 }
 
-template <int D, typename T>
-void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs, std::vector<int *> &power, int pow[D], int dir) const {
+template <int D>
+void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs, std::vector<int *> &power, int pow[D], int dir) const {
     dir--;
     for (int i = 0; i < this->getPower(dir) + 1; i++) {
         pow[dir] = i;
@@ -204,8 +204,8 @@ void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs, std::vector<
     }
 }
 
-template <int D, typename T>
-void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs,
+template <int D>
+void GaussPoly<D>::fillCoefPowVector(std::vector<double> &coefs,
                                      std::vector<int *> &power,
                                      std::array<int, D> &pow,
                                      int dir) const {
@@ -228,11 +228,11 @@ void GaussPoly<D, T>::fillCoefPowVector(std::vector<double> &coefs,
     }
 }
 
-template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::mult(const GaussPoly<D, T> &rhs) {
+template <int D> GaussPoly<D> GaussPoly<D>::mult(const GaussPoly<D> &rhs) {
     NOT_IMPLEMENTED_ABORT;
     /*
-    GaussPoly<D, T> &lhs = *this;
-    GaussPoly<D, T> result;
+    GaussPoly<D> &lhs = *this;
+    GaussPoly<D> result;
     result.multPureGauss(lhs, rhs);
     for (int d = 0; d < D; d++) {
         double newPos = result.getPos()[d];
@@ -265,18 +265,18 @@ template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::mult(const GaussPo
  *  @param[in] c: Scalar to multiply
  *  @returns New GaussPoly
  */
-template <int D, typename T> GaussPoly<D, T> GaussPoly<D, T>::mult(double c) {
-    GaussPoly<D, T> g = *this;
+template <int D> GaussPoly<D> GaussPoly<D>::mult(double c) {
+    GaussPoly<D> g = *this;
     g.coef *= c;
     return g;
 }
 
-template <int D, typename T> void GaussPoly<D, T>::setPow(int d, int pow) {
+template <int D> void GaussPoly<D>::setPow(int d, int pow) {
     if (poly[d] != nullptr) { delete poly[d]; }
     poly[d] = new Polynomial(pow);
 }
 
-template <int D, typename T> void GaussPoly<D, T>::setPow(const std::array<int, D> &pow) {
+template <int D> void GaussPoly<D>::setPow(const std::array<int, D> &pow) {
     for (int d = 0; d < D; d++) {
         if (poly[d] != nullptr) { delete poly[d]; }
         poly[d] = new Polynomial(pow[d]);
@@ -288,13 +288,13 @@ template <int D, typename T> void GaussPoly<D, T>::setPow(const std::array<int,
  *  @param[in] d: Cartesian direction
  *  @param[in] poly: Polynomial to set
  */
-template <int D, typename T> void GaussPoly<D, T>::setPoly(int d, Polynomial &poly) {
+template <int D> void GaussPoly<D>::setPoly(int d, Polynomial &poly) {
     if (this->poly[d] != nullptr) { delete this->poly[d]; }
     this->poly[d] = new Polynomial(poly);
     this->power[d] = poly.getOrder();
 }
 
-template <int D, typename T> std::ostream &GaussPoly<D, T>::print(std::ostream &o) const {
+template <int D> std::ostream &GaussPoly<D>::print(std::ostream &o) const {
     auto is_array = details::are_all_equal<D>(this->getExp());
 
     // If all of the values in the exponential are the same only
@@ -316,12 +316,8 @@ template <int D, typename T> std::ostream &GaussPoly<D, T>::print(std::ostream &
     return o;
 }
 
-template class GaussPoly<1, double>;
-template class GaussPoly<2, double>;
-template class GaussPoly<3, double>;
-
-template class GaussPoly<1, ComplexDouble>;
-template class GaussPoly<2, ComplexDouble>;
-template class GaussPoly<3, ComplexDouble>;
+template class GaussPoly<1>;
+template class GaussPoly<2>;
+template class GaussPoly<3>;
 
 } // namespace mrcpp
diff --git a/src/functions/GaussPoly.h b/src/functions/GaussPoly.h
index d70b93474..97ed6f47d 100644
--- a/src/functions/GaussPoly.h
+++ b/src/functions/GaussPoly.h
@@ -43,38 +43,38 @@ namespace mrcpp {
  *
  * \f$ g(x) = \alpha P(x-x_0) e^{-\beta (x-x_0)^2} \f$
  *
- * - Multidimensional Gaussian (GaussFunc<D, T>):
+ * - Multidimensional Gaussian (GaussFunc<D>):
  *
  * \f$ G(x) = \prod_{d=1}^D g^d(x^d) \f$
  */
 
-template <int D, typename T> class GaussPoly : public Gaussian<D, T> {
+template <int D> class GaussPoly : public Gaussian<D> {
 public:
     GaussPoly(double alpha = 0.0, double coef = 1.0, const Coord<D> &pos = {}, const std::array<int, D> &power = {});
     GaussPoly(const std::array<double, D> &alpha,
               double coef,
               const Coord<D> &pos = {},
               const std::array<int, D> &power = {});
-    GaussPoly(const GaussPoly<D, T> &gp);
-    GaussPoly(const GaussFunc<D, T> &gf);
-    GaussPoly<D, T> &operator=(const GaussPoly<D, T> &gp) = delete;
-    Gaussian<D, T> *copy() const override;
+    GaussPoly(const GaussPoly<D> &gp);
+    GaussPoly(const GaussFunc<D> &gf);
+    GaussPoly<D> &operator=(const GaussPoly<D> &gp) = delete;
+    Gaussian<D> *copy() const override;
     ~GaussPoly();
 
     double calcSquareNorm() const override;
 
-    T evalf(const Coord<D> &r) const override;
-    T evalf1D(double r, int dim) const override;
+    double evalf(const Coord<D> &r) const override;
+    double evalf1D(double r, int dim) const override;
 
-    GaussExp<D, T> asGaussExp() const override;
+    GaussExp<D> asGaussExp() const override;
     GaussPoly differentiate(int dir) const override;
 
-    void multInPlace(const GaussPoly<D, T> &rhs);
-    void operator*=(const GaussPoly<D, T> &rhs) { multInPlace(rhs); }
-    GaussPoly<D, T> mult(const GaussPoly<D, T> &rhs);
-    GaussPoly<D, T> mult(double c);
-    GaussPoly<D, T> operator*(const GaussPoly<D, T> &rhs) { return mult(rhs); }
-    GaussPoly<D, T> operator*(double c) { return mult(c); }
+    void multInPlace(const GaussPoly<D> &rhs);
+    void operator*=(const GaussPoly<D> &rhs) { multInPlace(rhs); }
+    GaussPoly<D> mult(const GaussPoly<D> &rhs);
+    GaussPoly<D> mult(double c);
+    GaussPoly<D> operator*(const GaussPoly<D> &rhs) { return mult(rhs); }
+    GaussPoly<D> operator*(double c) { return mult(c); }
 
     const Eigen::VectorXd &getPolyCoefs(int i) const { return poly[i]->getCoefs(); }
     Eigen::VectorXd &getPolyCoefs(int i) { return poly[i]->getCoefs(); }
diff --git a/src/functions/Gaussian.cpp b/src/functions/Gaussian.cpp
index 2583f7c56..6dbfa7c5b 100644
--- a/src/functions/Gaussian.cpp
+++ b/src/functions/Gaussian.cpp
@@ -46,8 +46,8 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D, typename T>
-Gaussian<D, T>::Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p)
+template <int D>
+Gaussian<D>::Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p)
         : screen(false)
         , coef(c)
         , power(p)
@@ -55,15 +55,15 @@ Gaussian<D, T>::Gaussian(double a, double c, const Coord<D> &r, const std::array
     this->alpha.fill(a);
 }
 
-template <int D, typename T>
-Gaussian<D, T>::Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p)
+template <int D>
+Gaussian<D>::Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p)
         : screen(false)
         , coef(c)
         , power(p)
         , alpha(a)
         , pos(r) {}
 
-template <int D, typename T> void Gaussian<D, T>::multPureGauss(const Gaussian<D, T> &lhs, const Gaussian<D, T> &rhs) {
+template <int D> void Gaussian<D>::multPureGauss(const Gaussian<D> &lhs, const Gaussian<D> &rhs) {
 
     auto newAlpha = std::array<double, D>{};
     auto mju = std::array<double, D>{};
@@ -85,7 +85,7 @@ template <int D, typename T> void Gaussian<D, T>::multPureGauss(const Gaussian<D
     setCoef(newCoef);
 }
 
-template <int D, typename T> void Gaussian<D, T>::calcScreening(double nStdDev) {
+template <int D> void Gaussian<D>::calcScreening(double nStdDev) {
     assert(nStdDev > 0);
     if (not this->isBounded()) {
         this->bounded = true;
@@ -100,7 +100,7 @@ template <int D, typename T> void Gaussian<D, T>::calcScreening(double nStdDev)
     screen = true;
 }
 
-template <int D, typename T> bool Gaussian<D, T>::checkScreen(int n, const int *l) const {
+template <int D> bool Gaussian<D>::checkScreen(int n, const int *l) const {
     if (not getScreen()) { return false; }
     double length = std::pow(2.0, -n);
     const double *A = this->getLowerBounds();
@@ -113,7 +113,7 @@ template <int D, typename T> bool Gaussian<D, T>::checkScreen(int n, const int *
     return false;
 }
 
-template <int D, typename T> bool Gaussian<D, T>::isVisibleAtScale(int scale, int nQuadPts) const {
+template <int D> bool Gaussian<D>::isVisibleAtScale(int scale, int nQuadPts) const {
     for (auto &alp : this->alpha) {
         double stdDeviation = std::pow(2.0 * alp, -0.5);
         auto visibleScale = static_cast<int>(-std::floor(std::log2(nQuadPts * 0.5 * stdDeviation)));
@@ -124,7 +124,7 @@ template <int D, typename T> bool Gaussian<D, T>::isVisibleAtScale(int scale, in
     return true;
 }
 
-template <int D, typename T> bool Gaussian<D, T>::isZeroOnInterval(const double *a, const double *b) const {
+template <int D> bool Gaussian<D>::isZeroOnInterval(const double *a, const double *b) const {
     for (int i = 0; i < D; i++) {
         double stdDeviation = std::pow(2.0 * this->alpha[i], -0.5);
         double gaussBoxMin = this->pos[i] - 5.0 * stdDeviation;
@@ -134,7 +134,7 @@ template <int D, typename T> bool Gaussian<D, T>::isZeroOnInterval(const double
     return false;
 }
 
-template <int D, typename T> void Gaussian<D, T>::evalf(const MatrixXd &points, Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &values) const {
+template <int D> void Gaussian<D>::evalf(const MatrixXd &points, MatrixXd &values) const {
     assert(points.cols() == D);
     assert(points.cols() == values.cols());
     assert(points.rows() == values.rows());
@@ -143,7 +143,7 @@ template <int D, typename T> void Gaussian<D, T>::evalf(const MatrixXd &points,
     }
 }
 
-template <int D, typename T> double Gaussian<D, T>::getMaximumStandardDiviation() const {
+template <int D> double Gaussian<D>::getMaximumStandardDiviation() const {
 
     if (details::are_all_equal<D>(this->getExp())) {
         auto exponent = this->getExp()[0];
@@ -156,15 +156,15 @@ template <int D, typename T> double Gaussian<D, T>::getMaximumStandardDiviation(
     }
 }
 
-template <int D, typename T> double Gaussian<D, T>::calcOverlap(const Gaussian<D, T> &inp) const {
+template <int D> double Gaussian<D>::calcOverlap(const Gaussian<D> &inp) const {
     const auto &bra_exp = this->asGaussExp(); // Make sure all entries are GaussFunc
     const auto &ket_exp = inp.asGaussExp();   // Make sure all entries are GaussFunc
 
     double S = 0.0;
     for (int i = 0; i < bra_exp.size(); i++) {
-        const auto &bra_i = static_cast<const GaussFunc<D, T> &>(bra_exp.getFunc(i));
+        const auto &bra_i = static_cast<const GaussFunc<D> &>(bra_exp.getFunc(i));
         for (int j = 0; j < ket_exp.size(); j++) {
-            const auto &ket_j = static_cast<const GaussFunc<D, T> &>(ket_exp.getFunc(j));
+            const auto &ket_j = static_cast<const GaussFunc<D> &>(ket_exp.getFunc(j));
             S += function_utils::calc_overlap(bra_i, ket_j);
         }
     }
@@ -181,8 +181,8 @@ template <int D, typename T> double Gaussian<D, T>::calcOverlap(const Gaussian<D
  * integral is conserved with respect to the integration limits.
  *
  */
-template <int D, typename T> GaussExp<D, T> Gaussian<D, T>::periodify(const std::array<double, D> &period, double nStdDev) const {
-    GaussExp<D, T> gauss_exp;
+template <int D> GaussExp<D> Gaussian<D>::periodify(const std::array<double, D> &period, double nStdDev) const {
+    GaussExp<D> gauss_exp;
     auto pos_vec = std::vector<Coord<D>>();
 
     auto x_std = nStdDev * this->getMaximumStandardDiviation();
@@ -239,12 +239,8 @@ template <int D, typename T> GaussExp<D, T> Gaussian<D, T>::periodify(const std:
     return gauss_exp;
 }
 
-template class Gaussian<1, double>;
-template class Gaussian<2, double>;
-template class Gaussian<3, double>;
-
-template class Gaussian<1, ComplexDouble>;
-template class Gaussian<2, ComplexDouble>;
-template class Gaussian<3, ComplexDouble>;
+template class Gaussian<1>;
+template class Gaussian<2>;
+template class Gaussian<3>;
 
 } // namespace mrcpp
diff --git a/src/functions/Gaussian.h b/src/functions/Gaussian.h
index 7d5bf7dca..7e79e052a 100644
--- a/src/functions/Gaussian.h
+++ b/src/functions/Gaussian.h
@@ -40,28 +40,28 @@
 
 namespace mrcpp {
 
-template <int D, typename T> class Gaussian : public RepresentableFunction<D, T> {
+    template <int D> class Gaussian : public RepresentableFunction<D, double> {
 public:
     Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p);
     Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p);
-    Gaussian<D, T> &operator=(const Gaussian<D, T> &gp) = delete;
-    virtual Gaussian<D, T> *copy() const = 0;
+    Gaussian<D> &operator=(const Gaussian<D> &gp) = delete;
+    virtual Gaussian<D> *copy() const = 0;
     virtual ~Gaussian() = default;
 
-    virtual T evalf(const Coord<D> &r) const = 0;
-    virtual T evalf1D(double r, int dim) const = 0;
-    void evalf(const Eigen::MatrixXd &points, Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic> &values) const;
- 
-    double calcOverlap(const Gaussian<D, T> &inp) const;
+    virtual double evalf(const Coord<D> &r) const = 0;
+    virtual double evalf1D(double r, int dim) const = 0;
+    void evalf(const Eigen::MatrixXd &points, Eigen::MatrixXd &values) const;
+
+    double calcOverlap(const Gaussian<D> &inp) const;
     virtual double calcSquareNorm() const = 0;
-    virtual GaussExp<D, T> asGaussExp() const = 0;
-    GaussExp<D, T> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
+    virtual GaussExp<D> asGaussExp() const = 0;
+    GaussExp<D> periodify(const std::array<double, D> &period, double nStdDev = 4.0) const;
 
     /** @brief Compute analytic derivative of Gaussian
      *  @param[in] dir: Cartesian direction of derivative
      *  @returns New GaussPoly
      */
-    virtual GaussPoly<D, T> differentiate(int dir) const = 0;
+    virtual GaussPoly<D> differentiate(int dir) const = 0;
 
     void calcScreening(double stdDeviations);
 
@@ -70,7 +70,7 @@ template <int D, typename T> class Gaussian : public RepresentableFunction<D, T>
         double norm = std::sqrt(calcSquareNorm());
         multConstInPlace(1.0 / norm);
     }
-    void multPureGauss(const Gaussian<D, T> &lhs, const Gaussian<D, T> &rhs);
+    void multPureGauss(const Gaussian<D> &lhs, const Gaussian<D> &rhs);
     void multConstInPlace(double c) { this->coef *= c; }
     void operator*=(double c) { multConstInPlace(c); }
 
@@ -92,9 +92,9 @@ template <int D, typename T> class Gaussian : public RepresentableFunction<D, T>
     void setExp(const std::array<double, D> &_alpha) { this->alpha = _alpha; }
     void setPos(const std::array<double, D> &r) { this->pos = r; }
 
-    friend std::ostream &operator<<(std::ostream &o, const Gaussian<D, T> &gauss) { return gauss.print(o); }
+    friend std::ostream &operator<<(std::ostream &o, const Gaussian<D> &gauss) { return gauss.print(o); }
 
-    friend class GaussExp<D, T>;
+    friend class GaussExp<D>;
 
 protected:
     bool screen;
diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp
index 641216915..39f30a938 100644
--- a/src/functions/function_utils.cpp
+++ b/src/functions/function_utils.cpp
@@ -31,7 +31,7 @@ namespace function_utils {
 double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, double expo_a, double expo_b);
 } // namespace function_utils
 
-template <int D, typename T> double function_utils::calc_overlap(const GaussFunc<D, T> &a, const GaussFunc<D, T> &b) {
+template <int D> double function_utils::calc_overlap(const GaussFunc<D> &a, const GaussFunc<D> &b) {
     double S = 1.0;
     for (int d = 0; d < D; d++) {
         S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]);
@@ -114,13 +114,8 @@ double function_utils::ObaraSaika_ab(int power_a, int power_b, double pos_a, dou
     return s_coeff[power_b + 2 * power_a];
 }
 
-template double function_utils::calc_overlap<1, double>(const GaussFunc<1, double> &a, const GaussFunc<1, double> &b);
-template double function_utils::calc_overlap<2, double>(const GaussFunc<2, double> &a, const GaussFunc<2, double> &b);
-template double function_utils::calc_overlap<3, double>(const GaussFunc<3, double> &a, const GaussFunc<3, double> &b);
-
-template double function_utils::calc_overlap<1, ComplexDouble>(const GaussFunc<1, ComplexDouble> &a, const GaussFunc<1, ComplexDouble> &b);
-template double function_utils::calc_overlap<2, ComplexDouble>(const GaussFunc<2, ComplexDouble> &a, const GaussFunc<2, ComplexDouble> &b);
-template double function_utils::calc_overlap<3, ComplexDouble>(const GaussFunc<3, ComplexDouble> &a, const GaussFunc<3, ComplexDouble> &b);
-
+template double function_utils::calc_overlap<1>(const GaussFunc<1> &a, const GaussFunc<1> &b);
+template double function_utils::calc_overlap<2>(const GaussFunc<2> &a, const GaussFunc<2> &b);
+template double function_utils::calc_overlap<3>(const GaussFunc<3> &a, const GaussFunc<3> &b);
 
 } // namespace mrcpp
diff --git a/src/functions/function_utils.h b/src/functions/function_utils.h
index 38955af9b..896c06257 100644
--- a/src/functions/function_utils.h
+++ b/src/functions/function_utils.h
@@ -28,6 +28,6 @@
 
 namespace mrcpp {
 namespace function_utils {
-template <int D, typename T> double calc_overlap(const GaussFunc<D, T> &a, const GaussFunc<D, T> &b);
+template <int D> double calc_overlap(const GaussFunc<D> &a, const GaussFunc<D> &b);
 } // namespace function_utils
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index f37730701..86f71f41c 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -103,13 +103,13 @@ template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const Repr
  * term. Higher exponent means finer resolution.
  *
  */
-template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const GaussExp<D, T> &inp, int maxIter) {
+template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, int maxIter) {
     if (!out.getMRA().getWorldBox().isPeriodic()) {
         auto maxScale = out.getMRA().getMaxScale();
-        TreeBuilder<D, T> builder;
-        DefaultCalculator<D, T> calculator;
+        TreeBuilder<D> builder;
+        DefaultCalculator<D> calculator;
         for (auto i = 0; i < inp.size(); i++) {
-	  AnalyticAdaptor<D, T> adaptor(inp.getFunc(i), maxScale);
+	  AnalyticAdaptor<D> adaptor(inp.getFunc(i), maxScale);
             builder.build(out, calculator, adaptor, maxIter);
         }
     } else {
@@ -327,9 +327,9 @@ template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, const Repr
 template void build_grid<1, double>(FunctionTree<1, double> &out, int scales);
 template void build_grid<2, double>(FunctionTree<2, double> &out, int scales);
 template void build_grid<3, double>(FunctionTree<3, double> &out, int scales);
-template void build_grid<1, double>(FunctionTree<1, double> &out, const GaussExp<1, double> &inp, int maxIter);
-template void build_grid<2, double>(FunctionTree<2, double> &out, const GaussExp<2, double> &inp, int maxIter);
-template void build_grid<3, double>(FunctionTree<3, double> &out, const GaussExp<3, double> &inp, int maxIter);
+template void build_grid<1>(FunctionTree<1> &out, const GaussExp<1> &inp, int maxIter);
+template void build_grid<2>(FunctionTree<2> &out, const GaussExp<2> &inp, int maxIter);
+template void build_grid<3>(FunctionTree<3> &out, const GaussExp<3> &inp, int maxIter);
 template void build_grid<1, double>(FunctionTree<1, double> &out, const RepresentableFunction<1, double> &inp, int maxIter);
 template void build_grid<2, double>(FunctionTree<2, double> &out, const RepresentableFunction<2, double> &inp, int maxIter);
 template void build_grid<3, double>(FunctionTree<3, double> &out, const RepresentableFunction<3, double> &inp, int maxIter);
@@ -368,9 +368,6 @@ template int refine_grid<3, double>(FunctionTree<3, double> &out, const Represen
 template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, int scales);
 template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, int scales);
 template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, int scales);
-template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const GaussExp<1, ComplexDouble> &inp, int maxIter);
-template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const GaussExp<2, ComplexDouble> &inp, int maxIter);
-template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const GaussExp<3, ComplexDouble> &inp, int maxIter);
 template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const RepresentableFunction<1, ComplexDouble> &inp, int maxIter);
 template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp, int maxIter);
 template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp, int maxIter);
@@ -404,5 +401,5 @@ template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out,
 template int refine_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, const RepresentableFunction<1, ComplexDouble> &inp);
 template int refine_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, const RepresentableFunction<2, ComplexDouble> &inp);
 template int refine_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, const RepresentableFunction<3, ComplexDouble> &inp);
-  
+
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.h b/src/treebuilders/grid.h
index 8bb683fd2..42f54aa0a 100644
--- a/src/treebuilders/grid.h
+++ b/src/treebuilders/grid.h
@@ -31,7 +31,7 @@
 
 namespace mrcpp {
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, int scales);
-template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const GaussExp<D, T> &inp, int maxIter = -1);
+template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, int maxIter = -1);
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, const RepresentableFunction<D, T> &inp, int maxIter = -1);
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter = -1);
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter = -1);

From 2627f64ce25efca2bb2996ce672095aaf02dc7b0 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 9 Jul 2024 11:42:20 +0200
Subject: [PATCH 04/38] test with derivative of Complex Exponential

---
 tests/operators/derivative_operator.cpp | 50 +++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index 7e35db405..4e158e8ab 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -122,6 +122,48 @@ template <int D> void testDifferentiationABGV(double a, double b) {
     delete mra;
 }
 
+template <int D> void testDifferentiationCplxABGV(double a, double b) {
+    MultiResolutionAnalysis<D> *mra = initializeMRA<D>();
+
+    double prec = 1.0e-3;
+    ABGVOperator<D> diff(*mra, a, b);
+
+    Coord<D> r_0;
+    for (auto &x : r_0) x = pi;
+
+    auto f = [r_0](const Coord<D> &r) {
+        ComplexDouble s = {1.1, 1.3};
+        double R = math_utils::calc_distance<D>(r, r_0);
+        return std::exp(-R * R * s);
+    };
+
+    auto df = [r_0](const Coord<D> &r) {
+        ComplexDouble s = {1.1, 1.3};
+        double R = math_utils::calc_distance<D>(r, r_0);
+        return -2.0 * s * std::exp(-R * R * s) * (r[0] - r_0[0]);
+    };
+
+    FunctionTree<D, ComplexDouble> f_tree(*mra);
+    project<D, ComplexDouble>(prec / 10, f_tree, f);
+
+    FunctionTree<D, ComplexDouble> df_tree(*mra);
+    project<D, ComplexDouble>(prec / 10, df_tree, df);
+
+    FunctionTree<D, ComplexDouble> dg_tree(*mra);
+    apply(dg_tree, diff, f_tree, 0);
+
+    FunctionTree<D, ComplexDouble> err_tree(*mra);
+    add(-1.0, err_tree, 1.0, df_tree, -1.0, dg_tree);
+
+    double df_norm = std::sqrt(df_tree.getSquareNorm());
+    double abs_err = std::sqrt(err_tree.getSquareNorm());
+    double rel_err = abs_err / df_norm;
+
+    REQUIRE(rel_err == Catch::Approx(0.0).margin(prec));
+
+    delete mra;
+}
+
 template <int D> void testDifferentiationPH(int order) {
     MultiResolutionAnalysis<D> *mra = initializeMRA<D>();
 
@@ -271,6 +313,14 @@ TEST_CASE("ABGV differentiantion center difference", "[derivative_operator], [ce
     SECTION("3D derivative test") { testDifferentiationABGV<3>(0, 0); }
 }
 
+
+TEST_CASE("ABGV differentiantion of Complex function", "[derivative_operator], [Complex]") {
+    // 0.5,0.5 specifies central difference
+    SECTION("1D derivative test") { testDifferentiationCplxABGV<1>(0.5, 0.5); }
+    SECTION("2D derivative test") { testDifferentiationCplxABGV<2>(0.5, 0.5); }
+    SECTION("3D derivative test") { testDifferentiationCplxABGV<3>(0.5, 0.5); }
+}
+
 TEST_CASE("PH differentiantion first order", "[derivative_operator], [PH_first_order]") {
     SECTION("1D derivative test") { testDifferentiationPH<1>(1); }
     SECTION("2D derivative test") { testDifferentiationPH<2>(1); }

From fa478e80d0e070150f7865277a15c4e6fc98c219 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peterw@met.no>
Date: Wed, 10 Jul 2024 18:05:06 +0200
Subject: [PATCH 05/38] mrchem compatibility

---
 src/functions/AnalyticFunction.h        |  2 +-
 src/functions/RepresentableFunction.h   |  2 +-
 src/treebuilders/map.cpp                | 20 ++++++++------------
 src/treebuilders/map.h                  |  5 ++---
 src/utils/ComplexFunction.h             | 22 +++++++++++-----------
 tests/operators/derivative_operator.cpp |  7 +++----
 6 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/src/functions/AnalyticFunction.h b/src/functions/AnalyticFunction.h
index adb0a307c..7043d7fe6 100644
--- a/src/functions/AnalyticFunction.h
+++ b/src/functions/AnalyticFunction.h
@@ -32,7 +32,7 @@
 
 namespace mrcpp {
 
-template <int D, typename T> class AnalyticFunction : public RepresentableFunction<D, T> {
+template <int D, typename T = double> class AnalyticFunction : public RepresentableFunction<D, T> {
 public:
     AnalyticFunction() = default;
     ~AnalyticFunction() override = default;
diff --git a/src/functions/RepresentableFunction.h b/src/functions/RepresentableFunction.h
index c22d86292..82381beaa 100644
--- a/src/functions/RepresentableFunction.h
+++ b/src/functions/RepresentableFunction.h
@@ -38,7 +38,7 @@
 #include "MRCPP/constants.h"
 #include "MRCPP/mrcpp_declarations.h"
 #include "trees/NodeIndex.h"
-#include "utils/math_utils.h"
+#include "MRCPP/utils/math_utils.h"
 
 namespace mrcpp {
 
diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp
index d4cb0b900..ba064ca39 100644
--- a/src/treebuilders/map.cpp
+++ b/src/treebuilders/map.cpp
@@ -65,13 +65,13 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D, typename T>
-void map(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, FMap<T, T> fmap, int maxIter, bool absPrec) {
+template <int D>
+void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec) {
 
     int maxScale = out.getMRA().getMaxScale();
-    TreeBuilder<D, T> builder;
-    WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
-    MapCalculator<D, T> calculator(fmap, inp);
+    TreeBuilder<D, double> builder;
+    WaveletAdaptor<D, double> adaptor(prec, maxScale, absPrec);
+    MapCalculator<D, double> calculator(fmap, inp);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -89,12 +89,8 @@ void map(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, FMap<T,
     print::separator(10, ' ');
 }
 
-template void map<1, double>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
-template void map<2, double>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
-template void map<3, double>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
-
-template void map<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
-template void map<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
-template void map<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &inp, FMap<ComplexDouble, ComplexDouble> fmap, int maxIter, bool absPrec);
+template void map<1>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
+template void map<2>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
+template void map<3>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec);
 
 } // Namespace mrcpp
diff --git a/src/treebuilders/map.h b/src/treebuilders/map.h
index db2788c27..4fe3cf72d 100644
--- a/src/treebuilders/map.h
+++ b/src/treebuilders/map.h
@@ -28,10 +28,9 @@
 #include "trees/FunctionTreeVector.h"
 
 namespace mrcpp {
-template <int D, typename T> class RepresentableFunction;
 template <int D, typename T> class FunctionTree;
 
-template <int D, typename T>
-void map(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, FMap<T, T> fmap, int maxIter = -1, bool absPrec = false);
+template <int D>
+void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter = -1, bool absPrec = false);
 
 } // namespace mrcpp
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index 7cfac3a8b..c43d3475c 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -77,8 +77,8 @@ class TreePtr final {
     FunctionData func_data;
     mrcpp::SharedMemory<double> *shared_mem_re;
     mrcpp::SharedMemory<double> *shared_mem_im;
-    mrcpp::FunctionTree<3> *re; ///< Real part of function
-    mrcpp::FunctionTree<3> *im; ///< Imaginary part of function
+    mrcpp::FunctionTree<3, double> *re; ///< Real part of function
+    mrcpp::FunctionTree<3, double> *im; ///< Imaginary part of function
 
     void flushFuncData() {
         this->func_data.real_size = 0;
@@ -121,10 +121,10 @@ class ComplexFunction {
     FunctionData &getFunctionData();
     int occ() const { return this->func_ptr->func_data.occ; }
     int spin() const { return this->func_ptr->func_data.spin; }
-    FunctionTree<3> &real() { return *this->func_ptr->re; }
-    FunctionTree<3> &imag() { return *this->func_ptr->im; }
-    const FunctionTree<3> &real() const { return *this->func_ptr->re; }
-    const FunctionTree<3> &imag() const { return *this->func_ptr->im; }
+    FunctionTree<3, double> &real() { return *this->func_ptr->re; }
+    FunctionTree<3, double> &imag() { return *this->func_ptr->im; }
+    const FunctionTree<3, double> &real() const { return *this->func_ptr->re; }
+    const FunctionTree<3, double> &imag() const { return *this->func_ptr->im; }
     void release() { this->func_ptr.reset(); }
     bool conjugate() const { return this->conj; }
     MultiResolutionAnalysis<3> *funcMRA = nullptr;
@@ -141,8 +141,8 @@ class ComplexFunction {
     int getSizeNodes(int type) const;
     int getNNodes(int type) const;
 
-    void setReal(mrcpp::FunctionTree<3> *tree);
-    void setImag(mrcpp::FunctionTree<3> *tree);
+    void setReal(mrcpp::FunctionTree<3, double> *tree);
+    void setImag(mrcpp::FunctionTree<3, double> *tree);
 
     double norm() const;
     double squaredNorm() const;
@@ -172,8 +172,8 @@ void project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double
 void multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
 void multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
 void multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3> &f, double prec, int nrefine = 0);
-void multiply(ComplexFunction &out, FunctionTree<3> &inp_a, RepresentableFunction<3> &f, double prec, int nrefine = 0);
+void multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
+void multiply(ComplexFunction &out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
 void linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec);
 } // namespace cplxfunc
 
@@ -187,7 +187,7 @@ class MPI_FuncVector : public std::vector<ComplexFunction> {
 namespace mpifuncvec {
 void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
 void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec = -1.0);
-void save_nodes(MPI_FuncVector &Phi, mrcpp::FunctionTree<3> &refTree, BankAccount &account, int sizes = -1);
+void save_nodes(MPI_FuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
 MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
 ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
 ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi);
diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index 4e158e8ab..80a1d8a31 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -127,18 +127,17 @@ template <int D> void testDifferentiationCplxABGV(double a, double b) {
 
     double prec = 1.0e-3;
     ABGVOperator<D> diff(*mra, a, b);
+    ComplexDouble s = {1.1, 1.3};
 
     Coord<D> r_0;
     for (auto &x : r_0) x = pi;
 
-    auto f = [r_0](const Coord<D> &r) {
-        ComplexDouble s = {1.1, 1.3};
+    auto f = [r_0, s](const Coord<D> &r) {
         double R = math_utils::calc_distance<D>(r, r_0);
         return std::exp(-R * R * s);
     };
 
-    auto df = [r_0](const Coord<D> &r) {
-        ComplexDouble s = {1.1, 1.3};
+    auto df = [r_0, s](const Coord<D> &r) {
         double R = math_utils::calc_distance<D>(r, r_0);
         return -2.0 * s * std::exp(-R * R * s) * (r[0] - r_0[0]);
     };

From a595da6bc942d8ec212c26528b599452d182db5a Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 12 Jul 2024 11:44:48 +0200
Subject: [PATCH 06/38] Four components class and apply

---
 src/treebuilders/apply.cpp | 158 +++++++++++++++++++++++++++++++++++++
 src/treebuilders/apply.h   |   9 +++
 src/trees/FunctionTree.cpp |   2 +-
 src/trees/FunctionTree.h   |   2 +-
 4 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 04075d021..599a14c8c 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -91,6 +91,46 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
+
+/** @brief Application of MW integral convolution operator on Four component
+ *
+ * @param[in] prec: Build precision of output function
+ * @param[out] out: Output function to be built
+ * @param[in] oper: Convolution operator to apply
+ * @param[in] inp: Input function
+ * @param[in] metric: 4x4 array with coefficients that relates the in and out components
+ * @param[in] maxIter: Maximum number of refinement iterations in output tree, default -1
+ * @param[in] absPrec: Build output tree based on absolute precision, default false
+ *
+ * @details The output function will be computed using the general algorithm:
+ * - For each input component apply the operator
+ * - Compute MW coefs on current grid
+ * - Refine grid where necessary based on `prec`
+ * - Repeat until convergence or `maxIter` is reached
+ * - `prec < 0` or `maxIter = 0` means NO refinement
+ * - `maxIter < 0` means no bound
+ * - After application multiply by metric coefficient, and put in relevant output component
+ *
+ * @note This algorithm will start at whatever grid is present in the `out`
+ * tree when the function is called (this grid should however be EMPTY, e.i.
+ * no coefs).
+ *
+ */
+template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp.Comp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply(prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /** @brief Application of MW integral convolution operator
  *
  * @param[in] inside: Use points inside (true) or outside (false) the unitcell
@@ -205,6 +245,21 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
+template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, FunctionTreeVector<D, T> *precTrees, T **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp.Comp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply(prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], precTrees[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /** @brief Application of MW integral convolution operator on a periodic cell,
            excluding contributions inside the unit cell.
  *
@@ -231,6 +286,21 @@ template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T
     apply_on_unit_cell<D>(false, prec, out, oper, inp, maxIter, absPrec);
 }
 
+template <int D, typename T> void apply_far_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp.Comp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply_on_unit_cell<D>(false, prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /** @brief Application of MW integral convolution operator on a periodic cell,
            excluding contributions outside the unit cell.
  *
@@ -257,6 +327,22 @@ template <int D, typename T> void apply_near_field(double prec, FunctionTree<D,
     apply_on_unit_cell<D>(true, prec, out, oper, inp, maxIter, absPrec);
 }
 
+
+template <int D, typename T> void apply_near_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp.Comp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply_on_unit_cell<D>(true, prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /** @brief Application of MW derivative operator
  *
  * @param[out] out: Output function to be built
@@ -308,6 +394,21 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
     print::separator(10, ' ');
 }
 
+template <int D, typename T> void apply(CompFunction<D, T> &out, DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int dir) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp.Comp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply(*out.Comp[ocomp], oper, *inp.Comp[icomp], dir);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 /** @brief Calculation of gradient vector of a function
  *
  * @param[in] oper: Derivative operator to apply
@@ -330,6 +431,28 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
     return out;
 }
 
+template <int D, typename T> CompFunctionVector<D, T> gradient(DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric) {
+    CompFunctionVector<D, T> out;
+    for (int d = 0; d < D; d++) {
+        CompFunction<D, T> *grad_d = new CompFunction<D, T>();
+        for (int icomp = 0; icomp < 4; icomp++){
+            if (inp.Comp[icomp]!=nullptr) {
+                for (int ocomp = 0; ocomp < 4; ocomp++){
+                    if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                        grad_d->Comp[ocomp] = new FunctionTree<D, T>(inp.getMRA());
+                        apply(grad_d->Comp[ocomp], oper, *inp.Comp[icomp], d);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            grad_d->Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                        }
+                    }
+                }
+            }
+        }
+        out.oush_back(grad_d);
+    }
+    return out;
+}
+
 /** @brief Calculation of divergence of a function vector
  *
  * @param[out] out: Output function
@@ -364,15 +487,47 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
     clear(tmp_vec, true);
 }
 
+template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, T **metric) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    divergence(*out.Comp[ocomp], oper, inp[icomp]);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
+
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp) {
     FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     divergence(out, oper, inp_vec);
 }
+template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, T **metric) {
+    for (int icomp = 0; icomp < 4; icomp++){
+        if (inp[icomp]!=nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    apply(*out.Comp[ocomp], oper, inp[icomp]);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                }
+            }
+        }
+    }
+}
 
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
+template void apply<1, double>(double prec, CompFunction<1, double> &out, ConvolutionOperator<1> &oper, CompFunction<1, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
+template void apply<2, double>(double prec, CompFunction<2, double> &out, ConvolutionOperator<2> &oper, CompFunction<2, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
+template void apply<3, double>(double prec, CompFunction<3, double> &out, ConvolutionOperator<3> &oper, CompFunction<3, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
@@ -400,6 +555,9 @@ template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper,
 template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
+template void apply<1, ComplexDouble>(double prec, CompFunction<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, CompFunction<1, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template void apply<2, ComplexDouble>(double prec, CompFunction<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, CompFunction<2, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template void apply<3, ComplexDouble>(double prec, CompFunction<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, CompFunction<3, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, FunctionTreeVector<1, ComplexDouble> &precTrees, int maxIter, bool absPrec);
 template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, FunctionTreeVector<2, ComplexDouble> &precTrees, int maxIter, bool absPrec);
 template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec);
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index f6217e381..8c0d4039f 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -26,6 +26,7 @@
 #pragma once
 
 #include "trees/FunctionTreeVector.h"
+#include "utils/CompFunction.h"
 
 namespace mrcpp {
 
@@ -35,13 +36,21 @@ template <int D> class DerivativeOperator;
 template <int D> class ConvolutionOperator;
 
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, FunctionTreeVector<D, T> *precTrees, T **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply_far_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_near_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply_near_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTree<D, T> &inp, int dir = -1);
+template <int D, typename T> void apply(CompFunction<D, T> &out, DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int dir = -1);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> &inp);
+template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, T **metric);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp);
+template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, T **metric);
 template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp);
+template <int D, typename T> CompFunctionVector<D, T> gradient(DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric);
 // clang-format on
 
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 42adc76fa..c39fcb86e 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -363,7 +363,7 @@ template <int D, typename T> void FunctionTree<D, T>::power(double p) {
  * in-place multiplied by the given coefficient, no grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::rescale(double c) {
+template <int D, typename T> void FunctionTree<D, T>::rescale(T c) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads())
     {
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 0df33685c..191c47f16 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -77,7 +77,7 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     // In place operations
     void square();
     void power(double p);
-    void rescale(double c);
+    void rescale(T c);
     void normalize();
     void add(double c, FunctionTree<D, T> &inp);
     void absadd(double c, FunctionTree<D, T> &inp);

From 303a5457aa52c7067ffe57842eaa92cb3f53f825 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 12 Jul 2024 14:33:51 +0200
Subject: [PATCH 07/38] More native Complex tree operations: add, mult, Bank,
 send, rescale

---
 src/treebuilders/AdditionCalculator.h       |  2 +-
 src/treebuilders/MultiplicationCalculator.h |  2 +-
 src/treebuilders/add.cpp                    | 18 ++--
 src/treebuilders/add.h                      |  4 +-
 src/treebuilders/apply.cpp                  |  2 +-
 src/treebuilders/multiply.cpp               | 15 ++--
 src/treebuilders/multiply.h                 |  2 +-
 src/trees/FunctionTree.cpp                  | 39 ++++++--
 src/trees/FunctionTree.h                    |  8 +-
 src/trees/FunctionTreeVector.h              |  4 +-
 src/trees/MWNode.h                          |  4 +-
 src/trees/MWTree.cpp                        | 10 +--
 src/trees/MWTree.h                          |  2 +-
 src/utils/Bank.cpp                          | 75 +++++++++++++++-
 src/utils/Bank.h                            |  4 +
 src/utils/mpi_utils.cpp                     | 30 ++++---
 src/utils/parallel.cpp                      | 98 ++++++++++++++++++++-
 src/utils/parallel.h                        |  9 +-
 tests/operators/derivative_operator.cpp     |  2 +-
 19 files changed, 267 insertions(+), 63 deletions(-)

diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h
index eb0322947..a7804a761 100644
--- a/src/treebuilders/AdditionCalculator.h
+++ b/src/treebuilders/AdditionCalculator.h
@@ -43,7 +43,7 @@ template <int D, typename T> class AdditionCalculator final : public TreeCalcula
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         T *coefs_o = node_o.getCoefs();
         for (int i = 0; i < this->sum_vec.size(); i++) {
-            double c_i = get_coef(this->sum_vec, i);
+            T c_i = get_coef(this->sum_vec, i);
             FunctionTree<D, T> &func_i = get_func(this->sum_vec, i);
             // This generates missing nodes
             const MWNode<D, T> &node_i = func_i.getNode(idx);
diff --git a/src/treebuilders/MultiplicationCalculator.h b/src/treebuilders/MultiplicationCalculator.h
index dac957822..4f82756c2 100644
--- a/src/treebuilders/MultiplicationCalculator.h
+++ b/src/treebuilders/MultiplicationCalculator.h
@@ -43,7 +43,7 @@ template <int D, typename T> class MultiplicationCalculator final : public TreeC
         T *coefs_o = node_o.getCoefs();
         for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
         for (int i = 0; i < this->prod_vec.size(); i++) {
-            double c_i = get_coef(this->prod_vec, i);
+            T c_i = get_coef(this->prod_vec, i);
             FunctionTree<D, T> &func_i = get_func(this->prod_vec, i);
             // This generates missing nodes
             MWNode<D, T> node_i = func_i.getNode(idx); // Copy node
diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp
index 4278b46ca..584e61e68 100644
--- a/src/treebuilders/add.cpp
+++ b/src/treebuilders/add.cpp
@@ -64,9 +64,9 @@ namespace mrcpp {
 template <int D, typename T>
 void add(double prec,
          FunctionTree<D, T> &out,
-         double a,
+         T a,
          FunctionTree<D, T> &inp_a,
-         double b,
+         T b,
          FunctionTree<D, T> &inp_b,
          int maxIter,
          bool absPrec) {
@@ -190,29 +190,27 @@ template void add<3, double>(double prec,
                      bool absPrec);
 
 
-
-  
 template void add<1, ComplexDouble>(double prec,
                      FunctionTree<1, ComplexDouble> &out,
-                     double a,
+                     ComplexDouble a,
                      FunctionTree<1, ComplexDouble> &tree_a,
-                     double b,
+                     ComplexDouble b,
                      FunctionTree<1, ComplexDouble> &tree_b,
                      int maxIter,
                      bool absPrec);
 template void add<2, ComplexDouble>(double prec,
                      FunctionTree<2, ComplexDouble> &out,
-                     double a,
+                     ComplexDouble a,
                      FunctionTree<2, ComplexDouble> &tree_a,
-                     double b,
+                     ComplexDouble b,
                      FunctionTree<2, ComplexDouble> &tree_b,
                      int maxIter,
                      bool absPrec);
 template void add<3, ComplexDouble>(double prec,
                      FunctionTree<3, ComplexDouble> &out,
-                     double a,
+                     ComplexDouble a,
                      FunctionTree<3, ComplexDouble> &tree_a,
-                     double b,
+                     ComplexDouble b,
                      FunctionTree<3, ComplexDouble> &tree_b,
                      int maxIter,
                      bool absPrec);
diff --git a/src/treebuilders/add.h b/src/treebuilders/add.h
index 68a3b3560..dae1b366a 100644
--- a/src/treebuilders/add.h
+++ b/src/treebuilders/add.h
@@ -30,9 +30,9 @@ namespace mrcpp {
 
 template <int D, typename T> void add(double prec,
                           FunctionTree<D, T> &out,
-                          double a,
+                          T a,
                           FunctionTree<D, T> &tree_a,
-                          double b,
+                          T b,
                           FunctionTree<D, T> &tree_b,
                           int maxIter = -1,
                           bool absPrec = false);
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 04075d021..4a072f694 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -353,7 +353,7 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
 
     FunctionTreeVector<D, T> tmp_vec;
     for (int d = 0; d < D; d++) {
-        double coef_d = get_coef(inp, d);
+        T coef_d = get_coef(inp, d);
         FunctionTree<D, T> &func_d = get_func(inp, d);
         auto *out_d = new FunctionTree<D, T>(func_d.getMRA());
         apply(*out_d, oper, func_d, d);
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index 3ab96cd64..6cbf58b72 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -71,7 +71,7 @@ namespace mrcpp {
 template <int D, typename T>
 void multiply(double prec,
               FunctionTree<D, T> &out,
-              double c,
+              T c,
               FunctionTree<D, T> &inp_a,
               FunctionTree<D, T> &inp_b,
               int maxIter,
@@ -278,13 +278,14 @@ void dot(double prec,
 
     FunctionTreeVector<D, T> tmp_vec;
     for (int d = 0; d < inp_a.size(); d++) {
-        double coef_a = get_coef(inp_a, d);
-        double coef_b = get_coef(inp_b, d);
+        T coef_a = get_coef(inp_a, d);
+        T coef_b = get_coef(inp_b, d);
         FunctionTree<D, T> &tree_a = get_func(inp_a, d);
         FunctionTree<D, T> &tree_b = get_func(inp_b, d);
         auto *out_d = new FunctionTree<D, T>(out.getMRA());
         build_grid(*out_d, out);
-        multiply(prec, *out_d, 1.0, tree_a, tree_b, maxIter, absPrec);
+        T One = 1.0;
+        multiply(prec, *out_d, One, tree_a, tree_b, maxIter, absPrec);
         tmp_vec.push_back({coef_a * coef_b, out_d});
     }
     build_grid(out, tmp_vec);
@@ -509,7 +510,7 @@ template double node_norm_dot<3, double>(FunctionTree<3, double> &bra, FunctionT
 
 template void multiply<1, ComplexDouble>(double prec,
                           FunctionTree<1, ComplexDouble> &out,
-                          double c,
+                          ComplexDouble c,
                           FunctionTree<1, ComplexDouble> &tree_a,
                           FunctionTree<1, ComplexDouble> &tree_b,
                           int maxIter,
@@ -517,7 +518,7 @@ template void multiply<1, ComplexDouble>(double prec,
                           bool useMaxNorms);
 template void multiply<2, ComplexDouble>(double prec,
                           FunctionTree<2, ComplexDouble> &out,
-                          double c,
+                          ComplexDouble c,
                           FunctionTree<2, ComplexDouble> &tree_a,
                           FunctionTree<2, ComplexDouble> &tree_b,
                           int maxIter,
@@ -525,7 +526,7 @@ template void multiply<2, ComplexDouble>(double prec,
                           bool useMaxNorms);
 template void multiply<3, ComplexDouble>(double prec,
                           FunctionTree<3, ComplexDouble> &out,
-                          double c,
+                          ComplexDouble c,
                           FunctionTree<3, ComplexDouble> &tree_a,
                           FunctionTree<3, ComplexDouble> &tree_b,
                           int maxIter,
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index 3f66cd3ad..96a956f3b 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -47,7 +47,7 @@ template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra,
 
 template <int D, typename T> void multiply(double prec,
                                FunctionTree<D, T> &out,
-                               double c,
+                               T c,
                                FunctionTree<D, T> &inp_a,
                                FunctionTree<D, T> &inp_b,
                                int maxIter = -1,
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 42adc76fa..1c91cf2cd 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -363,7 +363,7 @@ template <int D, typename T> void FunctionTree<D, T>::power(double p) {
  * in-place multiplied by the given coefficient, no grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::rescale(double c) {
+template <int D, typename T> void FunctionTree<D, T>::rescale(T c) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) num_threads(mrcpp_get_num_threads())
     {
@@ -399,7 +399,7 @@ template <int D, typename T> void FunctionTree<D, T>::normalize() {
  * the function, i.e. no further grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::add(double c, FunctionTree<D, T> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::add(T c, FunctionTree<D, T> &inp) {
     if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
@@ -428,7 +428,7 @@ template <int D, typename T> void FunctionTree<D, T>::add(double c, FunctionTree
  * function, i.e. no further grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::absadd(double c, FunctionTree<D, T> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::absadd (T c, FunctionTree<D, T> &inp) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
     {
@@ -443,7 +443,7 @@ template <int D, typename T> void FunctionTree<D, T>::absadd(double c, FunctionT
             inp_node.cvTransform(Forward);
             T *out_coefs = out_node.getCoefs();
             const T *inp_coefs = inp_node.getCoefs();
-            for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] = abs(out_coefs[i]) + c * abs(inp_coefs[i]); }
+            for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] = std::norm(out_coefs[i]) + std::norm(c * inp_coefs[i]); }
             out_node.cvTransform(Backward);
             out_node.mwTransform(Compression);
             out_node.calcNorms();
@@ -463,7 +463,7 @@ template <int D, typename T> void FunctionTree<D, T>::absadd(double c, FunctionT
  * of the function, i.e. no further grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::multiply(double c, FunctionTree<D, T> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::multiply(T c, FunctionTree<D, T> &inp) {
     if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
@@ -763,16 +763,39 @@ template <int D, typename T> void FunctionTree<D, T>::deleteGeneratedParents() {
     for (int n = 0; n < this->getRootBox().size(); n++) this->getRootMWNode(n).deleteParent();
 }
 
-template <> int FunctionTree<3>::saveNodesAndRmCoeff() {
+template <> int FunctionTree<3, double>::saveNodesAndRmCoeff() {
     if (this->isLocal) MSG_INFO("Tree is already in local representation");
     NodesCoeff = new BankAccount; // NB: must be a collective call!
     int stack_p = 0;
     if (mpi::wrk_rank == 0) {
         int sizecoeff = (1 << 3) * this->getKp1_d();
-        std::vector<MWNode<3> *> stack; // nodes from this Tree
+        std::vector<MWNode<3, double> *> stack; // nodes from this Tree
         for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) { stack.push_back(this->getRootBox().getNodes()[rIdx]); }
         while (stack.size() > stack_p) {
-            MWNode<3> *Node = stack[stack_p++];
+            MWNode<3, double> *Node = stack[stack_p++];
+            int id = 0;
+            NodesCoeff->put_data(Node->getNodeIndex(), sizecoeff, Node->getCoefs());
+            for (int i = 0; i < Node->getNChildren(); i++) { stack.push_back(Node->children[i]); }
+        }
+    }
+    this->nodeAllocator_p->deallocAllCoeff();
+    mpi::broadcast_Tree_noCoeff(*this, mpi::comm_wrk);
+    this->isLocal = true;
+    assert(this->NodeIndex2serialIx.size() == getNNodes());
+    return this->NodeIndex2serialIx.size();
+}
+
+template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
+    if (this->isLocal) MSG_INFO("Tree is already in local representation");
+    NodesCoeff = new BankAccount; // NB: must be a collective call!
+    int stack_p = 0;
+    if (mpi::wrk_rank == 0) {
+        int sizecoeff = (1 << 3) * this->getKp1_d();
+        sizecoeff *= 2; // double->ComplexDouble. Saved as twice as many doubles
+        std::vector<MWNode<3, ComplexDouble> *> stack; // nodes from this Tree
+        for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) { stack.push_back(this->getRootBox().getNodes()[rIdx]); }
+        while (stack.size() > stack_p) {
+            MWNode<3, ComplexDouble> *Node = stack[stack_p++];
             int id = 0;
             NodesCoeff->put_data(Node->getNodeIndex(), sizecoeff, Node->getCoefs());
             for (int i = 0; i < Node->getNChildren(); i++) { stack.push_back(Node->children[i]); }
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 0df33685c..c9e8ecde8 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -77,11 +77,11 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     // In place operations
     void square();
     void power(double p);
-    void rescale(double c);
+    void rescale(T c);
     void normalize();
-    void add(double c, FunctionTree<D, T> &inp);
-    void absadd(double c, FunctionTree<D, T> &inp);
-    void multiply(double c, FunctionTree<D, T> &inp);
+    void add(T c, FunctionTree<D, T> &inp);
+    void absadd(T c, FunctionTree<D, T> &inp);
+    void multiply(T c, FunctionTree<D, T> &inp);
     void map(FMap<T, T> fmap);
 
     int getNChunks() { return this->getNodeAllocator().getNChunks(); }
diff --git a/src/trees/FunctionTreeVector.h b/src/trees/FunctionTreeVector.h
index c0a4a3a76..a9ed84d91 100644
--- a/src/trees/FunctionTreeVector.h
+++ b/src/trees/FunctionTreeVector.h
@@ -32,7 +32,7 @@
 
 namespace mrcpp {
 
-template <int D, typename T = double> using CoefsFunctionTree = std::tuple<double, FunctionTree<D, T> *>;
+template <int D, typename T = double> using CoefsFunctionTree = std::tuple<T, FunctionTree<D, T> *>;
 template <int D, typename T = double> using FunctionTreeVector = std::vector<CoefsFunctionTree<D, T>>;
 
 /** @brief Remove all entries in the vector
@@ -77,7 +77,7 @@ template <int D, typename T> int get_size_nodes(const FunctionTreeVector<D, T> &
  *  @param[in] fs: Vector to fetch from
  *  @param[in] i: Position in vector
  */
-template <int D, typename T> double get_coef(const FunctionTreeVector<D, T> &fs, int i) {
+template <int D, typename T> T get_coef(const FunctionTreeVector<D, T> &fs, int i) {
     return std::get<0>(fs[i]);
 }
 
diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h
index 769545bfa..ed25762b4 100644
--- a/src/trees/MWNode.h
+++ b/src/trees/MWNode.h
@@ -50,9 +50,9 @@ namespace mrcpp {
  * translation index, the norm, pointers to parent node and child
  * nodes, pointer to the corresponding MWTree etc... See member and
  * data descriptions for details.
- * 
+ *
  */
-  template <int D, typename T> class MWNode {
+template <int D, typename T> class MWNode {
 public:
     MWNode(const MWNode<D, T> &node, bool allocCoef = true, bool SetCoef = true);
     MWNode<D , T> &operator=(const MWNode<D , T> &node) = delete;
diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp
index 652f9c2cd..c849517da 100644
--- a/src/trees/MWTree.cpp
+++ b/src/trees/MWTree.cpp
@@ -126,9 +126,9 @@ template <int D, typename T> void MWTree<D, T>::calcSquareNorm() {
  * @details It performs a Multiwavlet transform of the whole tree. The
  * input parameters will specify the direction (upwards or downwards)
  * and whether the result is added to the coefficients or it
- * overwrites them. See the documentation for the #mwTransformUp 
+ * overwrites them. See the documentation for the #mwTransformUp
  * and #mwTransformDown for details.
- * \f[ 
+ * \f[
  * \pmatrix{
  * s_{nl}\\
  * d_{nl}
@@ -215,7 +215,7 @@ template <int D, typename T> void MWTree<D, T>::mwTransformDown(bool overwrite)
 }
 
 /** @brief Set the MW coefficients to zero, keeping the same tree structure
- *   
+ *
  * @details Keeps the node structure of the tree, even though the zero
  * function is representable at depth zero. One should then use \ref cropTree to remove
  * unnecessary nodes.
@@ -447,7 +447,7 @@ template <int D, typename T> MWNodeVector<D, T> *MWTree<D, T>::copyEndNodeTable(
  *
  * @details the endNodeTable is first deleted and then rebuilt from
  * scratch. It makes use of the TreeIterator to traverse the tree.
- * 
+ *
  */
 template <int D, typename T> void MWTree<D, T>::resetEndNodeTable() {
     clearEndNodeTable();
@@ -552,7 +552,7 @@ template <int D, typename T> int MWTree<D, T>::getIx(NodeIndex<D> nIdx) {
     else return NodeIndex2serialIx[nIdx];
 }
 
-template <int D, typename T> void MWTree<D, T>::getNodeCoeff(NodeIndex<D> nIdx, double *data) {
+template <int D, typename T> void MWTree<D, T>::getNodeCoeff(NodeIndex<D> nIdx, T *data) {
     assert(this->isLocal);
     int size = (1 << D) * kp1_d;
     int id = 0;
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index d8158b589..c2f231ccf 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -142,7 +142,7 @@ class BankAccount;
     const NodeAllocator<D, T> &getNodeAllocator() const { return *this->nodeAllocator_p; }
     MWNodeVector<D, T> endNodeTable;          ///< Final projected nodes
 
-    void getNodeCoeff(NodeIndex<D> nIdx, double *data); // fetch coefficient from a specific node stored in Bank
+    void getNodeCoeff(NodeIndex<D> nIdx, T *data); // fetch coefficient from a specific node stored in Bank
 
     friend std::ostream &operator<<(std::ostream &o, const MWTree<D, T> &tree) { return tree.print(o); }
 
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index a774c44ff..c00338a9c 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -385,7 +385,6 @@ void Bank::open() {
             deposits[ix].source = status.MPI_SOURCE;
             if (message == SAVE_FUNCTION) {
                 recv_function(*deposits[ix].orb, deposits[ix].source, 1, comm_bank);
-                cout<<"recv ORB size "<<deposits[ix].orb->getSizeNodes(NUMBER::Total)<<endl;
                 if (exist_flag == 0) {
                     currentsize[account] += deposits[ix].orb->getSizeNodes(NUMBER::Total);
                     totcurrentsize += deposits[ix].orb->getSizeNodes(NUMBER::Total);
@@ -721,6 +720,23 @@ int BankAccount::put_data(int id, int size, double *data) {
     return 1;
 }
 
+// save data in Bank with identity id . datasize MUST have been set already. NB:not tested
+int BankAccount::put_data(int id, int size, ComplexDouble *data) {
+#ifdef MRCPP_HAS_MPI
+    // for now we distribute according to id
+    int messages[message_size];
+
+    messages[0] = SAVE_DATA;
+    messages[1] = account_id;
+    messages[2] = id;
+    messages[3] = size * 2;//save as twice as many doubles
+    messages[4] = MIN_SCALE; // to indicate that it is defined by id
+    MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
+    MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank);
+#endif
+    return 1;
+}
+
 // save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested
 int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) {
 #ifdef MRCPP_HAS_MPI
@@ -740,6 +756,26 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, double *data) {
     return 1;
 }
 
+// save data in Bank with identity nIdx. datasize MUST have been set already. NB:not tested
+int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) {
+#ifdef MRCPP_HAS_MPI
+    // for now we distribute according to id
+    int messages[message_size];
+    messages[0] = SAVE_DATA;
+    messages[1] = account_id;
+    messages[2] = nIdx.getTranslation(0);
+    messages[3] = size * 2; //save as twice as many doubles
+    messages[4] = nIdx.getScale();
+    messages[5] = nIdx.getTranslation(1);
+    messages[6] = nIdx.getTranslation(2);
+    int id = std::abs(nIdx.getTranslation(0) + nIdx.getTranslation(1) + nIdx.getTranslation(2));
+    MPI_Send(messages, 7, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
+    MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank);
+#endif
+    return 1;
+}
+
+
 // get data with identity id
 int BankAccount::get_data(int id, int size, double *data) {
 #ifdef MRCPP_HAS_MPI
@@ -755,6 +791,23 @@ int BankAccount::get_data(int id, int size, double *data) {
     return 1;
 }
 
+
+// get data with identity id
+int BankAccount::get_data(int id, int size, ComplexDouble *data) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    int messages[message_size];
+    messages[0] = GET_DATA;
+    messages[1] = account_id;
+    messages[2] = id;
+    messages[3] = MIN_SCALE;
+    MPI_Send(messages, 4, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
+    //fetch as twice as many doubles
+    MPI_Recv(data, size*2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
+#endif
+    return 1;
+}
+
 // get data with identity id
 int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) {
 #ifdef MRCPP_HAS_MPI
@@ -774,6 +827,26 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, double *data) {
     return 1;
 }
 
+// get data with identity id
+int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    int messages[message_size];
+    int id = std::abs(nIdx.getTranslation(0) + nIdx.getTranslation(1) + nIdx.getTranslation(2));
+    messages[0] = GET_DATA;
+    messages[1] = account_id;
+    messages[2] = id;
+    messages[3] = nIdx.getScale();
+    messages[4] = nIdx.getTranslation(0);
+    messages[5] = nIdx.getTranslation(1);
+    messages[6] = nIdx.getTranslation(2);
+    MPI_Send(messages, 7, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
+    //fetch as twice as many doubles
+    MPI_Recv(data, size*2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
+#endif
+    return 1;
+}
+
 // save data in Bank with identity id as part of block with identity nodeid.
 int BankAccount::put_nodedata(int id, int nodeid, int size, double *data) {
 #ifdef MRCPP_HAS_MPI
diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index 501faa7a0..7293d73ab 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -100,9 +100,13 @@ class BankAccount {
     int put_func(int id, ComplexFunction &func);
     int get_func(int id, ComplexFunction &func, int wait = 0);
     int put_data(int id, int size, double *data);
+    int put_data(int id, int size, ComplexDouble *data);
     int get_data(int id, int size, double *data);
+    int get_data(int id, int size, ComplexDouble *data);
     int put_data(NodeIndex<3> nIdx, int size, double *data);
+    int put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data);
     int get_data(NodeIndex<3> nIdx, int size, double *data);
+    int get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data);
     int put_nodedata(int id, int nodeid, int size, double *data);
     int get_nodedata(int id, int nodeid, int size, double *data, std::vector<int> &idVec);
     int get_nodeblock(int nodeid, double *data, std::vector<int> &idVec);
diff --git a/src/utils/mpi_utils.cpp b/src/utils/mpi_utils.cpp
index d50b15868..e193aea3a 100644
--- a/src/utils/mpi_utils.cpp
+++ b/src/utils/mpi_utils.cpp
@@ -199,15 +199,25 @@ template <int D, typename T> void share_tree(FunctionTree<D, T> &tree, int src,
 }
 template class SharedMemory<double>;
 template class SharedMemory<ComplexDouble>;
-  
-template void send_tree<1>(FunctionTree<1> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void send_tree<2>(FunctionTree<2> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void send_tree<3>(FunctionTree<3> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void recv_tree<1>(FunctionTree<1> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void recv_tree<2>(FunctionTree<2> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void recv_tree<3>(FunctionTree<3> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
-template void share_tree<1>(FunctionTree<1> &tree, int src, int tag, mrcpp::mpi_comm comm);
-template void share_tree<2>(FunctionTree<2> &tree, int src, int tag, mrcpp::mpi_comm comm);
-template void share_tree<3>(FunctionTree<3> &tree, int src, int tag, mrcpp::mpi_comm comm);
+
+template void send_tree<1>(FunctionTree<1, double> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void send_tree<2>(FunctionTree<2, double> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void send_tree<3>(FunctionTree<3, double> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<1>(FunctionTree<1, double> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<2>(FunctionTree<2, double> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<3>(FunctionTree<3, double> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void share_tree<1>(FunctionTree<1, double> &tree, int src, int tag, mrcpp::mpi_comm comm);
+template void share_tree<2>(FunctionTree<2, double> &tree, int src, int tag, mrcpp::mpi_comm comm);
+template void share_tree<3>(FunctionTree<3, double> &tree, int src, int tag, mrcpp::mpi_comm comm);
+
+template void send_tree<1>(FunctionTree<1, ComplexDouble> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void send_tree<2>(FunctionTree<2, ComplexDouble> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void send_tree<3>(FunctionTree<3, ComplexDouble> &tree, int dst, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<1>(FunctionTree<1, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<2>(FunctionTree<2, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void recv_tree<3>(FunctionTree<3, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm, int nChunks, bool coeff);
+template void share_tree<1>(FunctionTree<1, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm);
+template void share_tree<2>(FunctionTree<2, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm);
+template void share_tree<3>(FunctionTree<3, ComplexDouble> &tree, int src, int tag, mrcpp::mpi_comm comm);
 
 } // namespace mrcpp
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 2877d12e9..85afe2426 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -406,7 +406,7 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, MPI_Comm comm) {
+void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -446,10 +446,51 @@ void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, MPI_Comm comm) {
 #endif
 }
 
+/** @brief make union tree and send into rank zero */
+void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+/* 1) Each odd rank send to the left rank
+   2) All odd ranks are "deleted" (can exit routine)
+   3) new "effective" ranks are defined within the non-deleted ranks
+      effective rank = rank/fac , where fac are powers of 2
+   4) repeat
+ */
+#ifdef MRCPP_HAS_MPI
+    int comm_size, comm_rank;
+    MPI_Comm_rank(comm, &comm_rank);
+    MPI_Comm_size(comm, &comm_size);
+    if (comm_size == 1) return;
+
+    int fac = 1; // powers of 2
+    while (fac < comm_size) {
+        if ((comm_rank / fac) % 2 == 0) {
+            // receive
+            int src = comm_rank + fac;
+            if (src < comm_size) {
+                int tag = 3333 + src;
+                mrcpp::FunctionTree<3, ComplexDouble> tree_i(tree.getMRA());
+                mrcpp::recv_tree(tree_i, src, tag, comm, -1, false);
+                tree.appendTreeNoCoeff(tree_i); // make union grid
+            }
+        }
+        if ((comm_rank / fac) % 2 == 1) {
+            // send
+            int dest = comm_rank - fac;
+            if (dest >= 0) {
+                int tag = 3333 + comm_rank;
+                mrcpp::send_tree(tree, dest, tag, comm, -1, false);
+                break; // once data is sent we are done
+            }
+        }
+        fac *= 2;
+    }
+    MPI_Barrier(comm);
+#endif
+}
+
 /** @brief make union tree without coeff and send to all
  *  Include both real and imaginary parts
  */
-void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
+void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -465,6 +506,25 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, vector<ComplexFun
     mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
 }
 
+
+/** @brief make union tree without coeff and send to all
+ *  Include both real and imaginary parts
+ */
+    void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
+    /* 1) make union grid of own orbitals
+       2) make union grid with others orbitals (sent to rank zero)
+       3) rank zero broadcast func to everybody
+     */
+
+    int N = Phi.size();
+    for (int j = 0; j < N; j++) {
+        if (not mpi::my_orb(j)) continue;
+        tree.appendTreeNoCoeff(Phi[j]);
+    }
+    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+}
+
 /** @brief Distribute rank zero function to all ranks */
 void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
@@ -498,7 +558,39 @@ void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, MPI_Comm comm) {
+void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+/* use same strategy as a reduce, but in reverse order */
+#ifdef MRCPP_HAS_MPI
+    int comm_size, comm_rank;
+    MPI_Comm_rank(comm, &comm_rank);
+    MPI_Comm_size(comm, &comm_size);
+    if (comm_size == 1) return;
+
+    int fac = 1; // powers of 2
+    while (fac < comm_size) fac *= 2;
+    fac /= 2;
+
+    while (fac > 0) {
+        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) {
+            // receive
+            int src = comm_rank - fac;
+            int tag = 4334 + comm_rank;
+            mrcpp::recv_tree(tree, src, tag, comm, -1, false);
+        }
+        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
+            // send
+            int dst = comm_rank + fac;
+            int tag = 4334 + dst;
+            if (dst < comm_size) mrcpp::send_tree(tree, dst, tag, comm, -1, false);
+        }
+        fac /= 2;
+    }
+    MPI_Barrier(comm);
+#endif
+}
+
+/** @brief Distribute rank zero function to all ranks */
+void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 78a3e2fd9..50c5ad581 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -54,9 +54,12 @@ void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
 void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm);
 void broadcast_function(ComplexFunction &func, MPI_Comm comm);
 
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 
 void allreduce_vector(IntVector &vec, MPI_Comm comm);
 void allreduce_vector(DoubleVector &vec, MPI_Comm comm);
diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index 80a1d8a31..773c12ec9 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -152,7 +152,7 @@ template <int D> void testDifferentiationCplxABGV(double a, double b) {
     apply(dg_tree, diff, f_tree, 0);
 
     FunctionTree<D, ComplexDouble> err_tree(*mra);
-    add(-1.0, err_tree, 1.0, df_tree, -1.0, dg_tree);
+    add(-1.0, err_tree, {1.0, 0.0}, df_tree, {-1.0, 0.0}, dg_tree);
 
     double df_norm = std::sqrt(df_tree.getSquareNorm());
     double abs_err = std::sqrt(err_tree.getSquareNorm());

From fbc96f7a40aee720bcf50caed0b37edbbce92c0a Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 16 Jul 2024 08:49:10 +0200
Subject: [PATCH 08/38] FunctionnTree deepcopy, addinplace, creator from
 components function

---
 api/mrcpp_declarations.h      |  2 +
 src/trees/FunctionTree.cpp    | 87 +++++++++++++++++++++++++++++++++++
 src/trees/FunctionTree.h      |  4 ++
 src/utils/CMakeLists.txt      |  3 ++
 src/utils/ComplexFunction.cpp | 21 ++++++++-
 src/utils/ComplexFunction.h   |  5 +-
 6 files changed, 120 insertions(+), 2 deletions(-)

diff --git a/api/mrcpp_declarations.h b/api/mrcpp_declarations.h
index d21058409..060c5ec50 100644
--- a/api/mrcpp_declarations.h
+++ b/api/mrcpp_declarations.h
@@ -62,6 +62,8 @@ template <int D, typename T = double> class NodeAllocator;
 
 template <int D, typename T = double> class MWNode;
 template <int D, typename T = double> class FunctionNode;
+template <int D, typename T> class CompFunction;
+class ComplexFunction;
 class OperatorNode;
 
 template <int D> class IdentityConvolution;
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 1c91cf2cd..26a298371 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -36,6 +36,7 @@
 #include "utils/mpi_utils.h"
 #include "utils/periodic_utils.h"
 #include "utils/tree_utils.h"
+#include "treebuilders/grid.h"
 
 using namespace Eigen;
 
@@ -419,6 +420,37 @@ template <int D, typename T> void FunctionTree<D, T>::add(T c, FunctionTree<D, T
     this->calcSquareNorm();
     inp.deleteGenerated();
 }
+/** @brief In-place addition with MW function representations, fixed grid
+ *
+ * @param[in] c: Numerical coefficient of input function
+ * @param[in] inp: Input function to add
+ *
+ * @details The input function will be added to the union of the current grid of
+ * and input the function grid.
+ *
+ */
+template <int D, typename T> void FunctionTree<D, T>::add_inplace(T c, FunctionTree<D, T> &inp) {
+    if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
+    if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
+    build_grid(*this, inp);
+#pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
+    {
+        int nNodes = this->getNEndNodes();
+#pragma omp for schedule(guided)
+        for (int n = 0; n < nNodes; n++) {
+            MWNode<D, T> &out_node = *this->endNodeTable[n];
+            MWNode<D, T> &inp_node = inp.getNode(out_node.getNodeIndex());
+            T *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
+            for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] += c * inp_coefs[i]; }
+            out_node.calcNorms();
+        }
+    }
+    this->mwTransform(BottomUp);
+    this->calcSquareNorm();
+    inp.deleteGenerated();
+}
+
 /** @brief In-place addition of absolute values of MW function representations
  *
  * @param[in] c Numerical coefficient of input function
@@ -808,6 +840,61 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
     return this->NodeIndex2serialIx.size();
 }
 
+/**  @brief Deep copy of tree
+ *
+ * @details Exact copy without any binding between old and new tree
+ */
+template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out,FunctionTree<D, T> &inp){
+    out = new FunctionTree<D, T> (inp.getMRA(), inp.getName());
+    copy_grid(*out, inp);
+    copy_func(*out, inp);
+}
+
+/**  @brief New tree with only real part
+ */
+template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Real(){
+    FunctionTree<D, double> *out = new FunctionTree<D, double> (this->getMRA(), this->getName());
+#pragma omp parallel num_threads(mrcpp_get_num_threads())
+    {
+        int nNodes = this->getNEndNodes();
+#pragma omp for schedule(guided)
+        for (int n = 0; n < nNodes; n++) {
+            MWNode<D, T> &inp_node = *this->endNodeTable[n];
+            MWNode<D, double> out_node = out->getNode(out_node.getNodeIndex()); // Full copy
+            double *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
+            for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] = std::real(inp_coefs[i]); }
+            out_node.calcNorms();
+        }
+    }
+    out->mwTransform(BottomUp);
+    out->calcSquareNorm();
+    return out;
+}
+
+/**  @brief New tree with only imaginary part
+ */
+template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag(){
+    FunctionTree<D, double> *out = new FunctionTree<D, double> (this->getMRA(), this->getName());
+#pragma omp parallel num_threads(mrcpp_get_num_threads())
+    {
+        int nNodes = this->getNEndNodes();
+#pragma omp for schedule(guided)
+        for (int n = 0; n < nNodes; n++) {
+            MWNode<D, T> &inp_node = *this->endNodeTable[n];
+            MWNode<D, double> out_node = out->getNode(out_node.getNodeIndex()); // Full copy
+            double *out_coefs = out_node.getCoefs();
+            const T *inp_coefs = inp_node.getCoefs();
+            for (int i = 0; i < inp_node.getNCoefs(); i++) { out_coefs[i] = std::imag(inp_coefs[i]); }
+            out_node.calcNorms();
+        }
+    }
+    out->mwTransform(BottomUp);
+    out->calcSquareNorm();
+    return out;
+}
+
+
 template class FunctionTree<1, double>;
 template class FunctionTree<2, double>;
 template class FunctionTree<3, double>;
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index c9e8ecde8..6e9249467 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -80,6 +80,7 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     void rescale(T c);
     void normalize();
     void add(T c, FunctionTree<D, T> &inp);
+    void add_inplace(T c, FunctionTree<D, T> &inp);
     void absadd(T c, FunctionTree<D, T> &inp);
     void multiply(T c, FunctionTree<D, T> &inp);
     void map(FMap<T, T> fmap);
@@ -113,6 +114,9 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
 
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
+    void deep_copy(FunctionTree<D, T> *out, FunctionTree<D, T> &inp);
+    FunctionTree<D, double> *Real();
+    FunctionTree<D, double> *Imag();
 protected:
     std::unique_ptr<NodeAllocator<D, T>> genNodeAllocator_p{nullptr};
     std::ostream &print(std::ostream &o) const override;
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt
index bfaa4e0ba..2087a6d35 100644
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -12,6 +12,8 @@ target_sources(mrcpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Bank.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/parallel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/CompFunction.cpp
   )
 
 get_filename_component(_dirname ${CMAKE_CURRENT_LIST_DIR} NAME)
@@ -29,6 +31,7 @@ list(APPEND ${_dirname}_h
   ${CMAKE_CURRENT_SOURCE_DIR}/Bank.h
   ${CMAKE_CURRENT_SOURCE_DIR}/parallel.h
   ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/CompFunction.h
   )
 
 # Sets install directory for all the headers in the list
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
index 63d855727..e2b110d1c 100644
--- a/src/utils/ComplexFunction.cpp
+++ b/src/utils/ComplexFunction.cpp
@@ -1,4 +1,3 @@
-#include "ComplexFunction.h"
 #include "Bank.h"
 #include "Printer.h"
 #include "Timer.h"
@@ -8,6 +7,8 @@
 #include "treebuilders/project.h"
 #include "trees/FunctionNode.h"
 #include "treebuilders/add.h"
+#include "ComplexFunction.h"
+#include "CompFunction.h"
 
 using mrcpp::Timer;
 
@@ -15,6 +16,24 @@ namespace mrcpp {
 
 MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
 
+ComplexFunction::ComplexFunction(CompFunction<3, double> cfunc)
+        : funcMRA(defaultMRA)
+        , func_ptr(std::make_shared<TreePtr>(false))
+        , rank(cfunc.rank) {
+    setSpin(cfunc.data.n1[0]);
+    setOcc(cfunc.data.n2[0]);
+    setReal(cfunc.Comp[0]);
+}
+ComplexFunction::ComplexFunction(CompFunction<3,ComplexDouble> cfunc)
+        : funcMRA(defaultMRA)
+        , func_ptr(std::make_shared<TreePtr>(false))
+        , rank(cfunc.rank) {
+    setSpin(cfunc.data.n1[0]);
+    setOcc(cfunc.data.n2[0]);
+    setReal(cfunc.Comp[0]->Real());
+    setImag(cfunc.Comp[0]->Imag());
+}
+
 ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
         : funcMRA(defaultMRA)
         , func_ptr(funcptr) {}
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index c43d3475c..7326cf235 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -21,8 +21,9 @@ class MPI_FuncVector;
 
 namespace mrcpp {
 
+template <int D, typename T> class CompFunction;
 class BankAccount;
-  template <int D, typename T> class FunctionTree;
+template <int D, typename T> class FunctionTree;
 template <int D> class MultiResolutionAnalysis;
 
 using ComplexDouble = std::complex<double>;
@@ -110,6 +111,8 @@ class TreePtr final {
 
 class ComplexFunction {
 public:
+    ComplexFunction(CompFunction<3, double> cfunc);
+    ComplexFunction(CompFunction<3,ComplexDouble> cfunc);
     ComplexFunction(std::shared_ptr<TreePtr> funcptr);
     ComplexFunction(const ComplexFunction &func);
     ComplexFunction(int spin = 0, int occ = -1, int rank = -1, bool share = false);

From 3aa778a9b69c5dadba06fd518361b07023d1e1cb Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Wed, 17 Jul 2024 15:35:17 +0200
Subject: [PATCH 09/38] Component Functions: mpi send and recv

---
 src/trees/FunctionTree.cpp |   2 +-
 src/utils/parallel.cpp     | 115 +++++++++++++++++++++++++++++++++++++
 src/utils/parallel.h       |  11 +++-
 3 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 26a298371..1e1ccc7f4 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -432,7 +432,7 @@ template <int D, typename T> void FunctionTree<D, T>::add(T c, FunctionTree<D, T
 template <int D, typename T> void FunctionTree<D, T>::add_inplace(T c, FunctionTree<D, T> &inp) {
     if (this->getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
-    build_grid(*this, inp);
+    while (refine_grid(*this, inp)) {};
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
     {
         int nNodes = this->getNEndNodes();
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 85afe2426..7675800a1 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -6,6 +6,7 @@
 
 #include "Bank.h"
 #include "ComplexFunction.h"
+#include "CompFunction.h"
 #include "omp_utils.h"
 #include "parallel.h"
 #include "trees/FunctionTree.h"
@@ -353,6 +354,35 @@ void mpi::recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
 #endif
 }
 
+// send a component function with MPI
+template <typename T>
+void mpi::send_function(CompFunction<3, T> &func, int dst, int tag, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
+    for (int i = 0; i < func.data.Ncomp; i++) {
+        //make sure that Nchunks is up to date
+        func.Nchunks[i] = func.Comp[i]->getNChunks();
+    }
+    MPI_Send(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, dst, 0, comm);
+    for (int i = 0; i < func.data.Ncomp; i++) {
+        mrcpp::send_tree(*func.Comp[i], dst, tag, comm, func.Nchunks[i]);
+    }
+#endif
+}
+
+// receive a component function with MPI
+template <typename T>
+void mpi::recv_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    int func_ncomp_in = func.Ncomp;
+    MPI_Recv(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
+    for (int i = 0; i < func.data.Ncomp; i++) {
+        if (func_ncomp_in <= i) func.alloc(i);
+        mrcpp::recv_tree(*func.Comp[i], src, tag, comm, func.Nchunks[i]);
+    }
+#endif
+}
+
 /** Update a shared function after it has been changed by one of the MPI ranks. */
 void mpi::share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
@@ -405,6 +435,50 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 #endif
 }
 
+template <typename T>
+/** @brief Add all mpi function into rank zero */
+void mpi::reduce_function(double prec, CompFunction<3,T> &func, MPI_Comm comm) {
+/* 1) Each odd rank send to the left rank
+   2) All odd ranks are "deleted" (can exit routine)
+   3) new "effective" ranks are defined within the non-deleted ranks
+      effective rank = rank/fac , where fac are powers of 2
+   4) repeat
+ */
+#ifdef MRCPP_HAS_MPI
+    int comm_size, comm_rank;
+    MPI_Comm_rank(comm, &comm_rank);
+    MPI_Comm_size(comm, &comm_size);
+    if (comm_size == 1) return;
+
+    int fac = 1; // powers of 2
+    while (fac < comm_size) {
+        if ((comm_rank / fac) % 2 == 0) {
+            // receive
+            int src = comm_rank + fac;
+            if (src < comm_size) {
+                MultiResolutionAnalysis<3> mra(func.Comp[0]->getMRA());
+                CompFunction<3,T> func_i(mra);
+                int tag = 3333 + src;
+                mpi::recv_function(func_i, src, tag, comm);
+                func.add(1.0, func_i); // add in place using union grid
+                func.crop(prec);
+            }
+        }
+        if ((comm_rank / fac) % 2 == 1) {
+            // send
+            int dest = comm_rank - fac;
+            if (dest >= 0) {
+                int tag = 3333 + comm_rank;
+                mpi::send_function(func, dest, tag, comm);
+                break; // once data is sent we are done
+            }
+        }
+        fac *= 2;
+    }
+    MPI_Barrier(comm);
+#endif
+}
+
 /** @brief make union tree and send into rank zero */
 void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
@@ -557,6 +631,39 @@ void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 #endif
 }
 
+template <typename T>
+/** @brief Distribute rank zero function to all ranks */
+void mpi::broadcast_function(CompFunction<3, T> &func, MPI_Comm comm) {
+/* use same strategy as a reduce, but in reverse order */
+#ifdef MRCPP_HAS_MPI
+    int comm_size, comm_rank;
+    MPI_Comm_rank(comm, &comm_rank);
+    MPI_Comm_size(comm, &comm_size);
+    if (comm_size == 1) return;
+
+    int fac = 1; // powers of 2
+    while (fac < comm_size) fac *= 2;
+    fac /= 2;
+
+    while (fac > 0) {
+        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) {
+            // receive
+            int src = comm_rank - fac;
+            int tag = 4334 + comm_rank;
+            mpi::recv_function(func, src, tag, comm);
+        }
+        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
+            // send
+            int dst = comm_rank + fac;
+            int tag = 4334 + dst;
+            if (dst < comm_size) mpi::send_function(func, dst, tag, comm);
+        }
+        fac /= 2;
+    }
+    MPI_Barrier(comm);
+#endif
+}
+
 /** @brief Distribute rank zero function to all ranks */
 void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
@@ -620,5 +727,13 @@ void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MP
     MPI_Barrier(comm);
 #endif
 }
+    template void mpi::reduce_function(double prec, CompFunction<3, double> &func, MPI_Comm comm);
+    template void mpi::reduce_function(double prec, CompFunction<3, ComplexDouble> &func, MPI_Comm comm);
+    template void mpi::broadcast_function(CompFunction<3, double> &func, MPI_Comm comm);
+    template void mpi::broadcast_function(CompFunction<3, ComplexDouble> &func, MPI_Comm comm);
+    template void mpi::send_function(CompFunction<3, double> &func, int dst, int tag, MPI_Comm comm);
+    template void mpi::send_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
+    template void mpi::recv_function(CompFunction<3, double> &func, int dst, int tag, MPI_Comm comm);
+    template void mpi::recv_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
 
 } // namespace mrcpp
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 50c5ad581..bbda2332a 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -3,6 +3,7 @@
 #include <Eigen/Core>
 
 #include "ComplexFunction.h"
+#include "CompFunction.h"
 #include "mpi_utils.h"
 #include "trees/MultiResolutionAnalysis.h"
 #include <map>
@@ -10,8 +11,6 @@
 
 // define a class for things that can be sent with MPI
 
-template <int D> class MultiResolutionAnalysis;
-
 using namespace Eigen;
 
 using IntVector = Eigen::VectorXi;
@@ -49,10 +48,18 @@ void free_foreign(MPI_FuncVector &Phi);
 
 void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
+template <typename T>
+void send_function(CompFunction<3, T> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
+template <typename T>
+void recv_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
 void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
 
 void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm);
 void broadcast_function(ComplexFunction &func, MPI_Comm comm);
+template <typename T>
+void reduce_function(double prec, CompFunction<3, T> &func, MPI_Comm comm);
+template <typename T>
+void broadcast_function(CompFunction<3, T> &func, MPI_Comm comm);
 
 void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);

From 9ce1b761b83ab33ea26ac93406089608de71b43e Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 19 Jul 2024 10:28:26 +0200
Subject: [PATCH 10/38] Component Functions: norms

---
 src/utils/CompFunction.cpp | 105 +++++++++++++++++++++++++++++++++++++
 src/utils/CompFunction.h   |  63 ++++++++++++++++++++++
 2 files changed, 168 insertions(+)
 create mode 100644 src/utils/CompFunction.cpp
 create mode 100644 src/utils/CompFunction.h

diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
new file mode 100644
index 000000000..0888f438a
--- /dev/null
+++ b/src/utils/CompFunction.cpp
@@ -0,0 +1,105 @@
+#include <fstream>
+#include "Printer.h"
+#include "CompFunction.h"
+#include "ComplexFunction.h"
+
+namespace mrcpp {
+
+  template <int D>
+  MultiResolutionAnalysis<D> *defaultCompMRA; // Global MRA
+
+  template <int D, typename T>
+  template <int D_, typename std::enable_if<D_ == 3, int>::type>
+  CompFunction<D, T>::CompFunction(T value, ComplexFunction cplxfunc)
+      : Ncomp(1){
+      defaultCompMRA<3> = cplxfunc.funcMRA;
+      //we always copy real part
+      Comp[0] = new FunctionTree<D, T>(*cplxfunc.funcMRA);
+      if (not cplxfunc.hasReal()) MSG_ABORT("Input funcion has nor real part");
+      deep_copy(Comp[0], cplxfunc.real());
+      if (std::is_same<T, ComplexDouble>::value){
+          //We add the imaginary part, if it exist in input function
+          if (cplxfunc.hasImag()){
+              ComplexDouble c;
+              if(cplxfunc.conjugate()) c = {0.0, -1.0};
+              else c = {0.0, 1.0};
+              Comp[0].add_inplace(c, cplxfunc.imag());
+          }
+      }
+      // set metadata
+      data.n1[0] = cplxfunc.spin();
+      data.n2[0] = cplxfunc.occ();
+
+      rank = cplxfunc.getRank();
+  }
+  template <int D, typename T>
+  CompFunction<D, T>::CompFunction(MultiResolutionAnalysis<D> &mra)
+  { defaultCompMRA<D> = &mra;
+    data.Ncomp = 0;
+    Comp[0]=nullptr;
+    Comp[1]=nullptr;
+    Comp[2]=nullptr;
+    Comp[3]=nullptr;
+  }
+  template <int D, typename T>
+  double CompFunction<D, T>::norm() {
+     double norm = squaredNorm();
+     for (int i = 0; i < Ncomp; i++) {
+          norm += Comp[i]->getSquareNorm();
+     }
+     if (norm > 0.0) norm = std::sqrt(norm);
+     return norm;
+  }
+  template <int D, typename T>
+  double CompFunction<D, T>::squaredNorm() {
+     double norm = squaredNorm();
+     for (int i = 0; i < Ncomp; i++) {
+          norm += Comp[i]->getSquareNorm();
+     }
+     return norm;
+  }
+  template <int D, typename T>
+  void CompFunction<D, T>::alloc(int i) {
+      Comp[i] = new FunctionTree<D, T> (*defaultCompMRA<D>);
+  }
+
+/** @brief In place addition.
+ *
+ * Output is extended to union grid.
+ *
+ */
+template <int D, typename T>
+void CompFunction<D, T>::add(T c, CompFunction<D, T> inp) {
+    for (int i = 0; i < Ncomp; i++) {
+        if (i >= inp.Ncomp) break;
+        Comp[i]->add_inplace(c,*inp.Comp[i]);
+    }
+    for (int i = Ncomp; i < inp.Ncomp; i++) {
+        alloc(i);
+        Comp[i]->add_inplace(c,*inp.Comp[i]);
+    }
+}
+
+template <int D, typename T>
+int CompFunction<D, T>::crop(double prec) {
+    if (prec < 0.0) return 0;
+    int nChunksremoved = 0;
+    for (int i = 0; i < Ncomp; i++) {
+        nChunksremoved += Comp[i]->crop(prec, 1.0, false);
+    }
+    return nChunksremoved;
+}
+
+
+template class  MultiResolutionAnalysis<1>;
+template class  MultiResolutionAnalysis<2>;
+template class  MultiResolutionAnalysis<3>;
+template class CompFunction<1, double>;
+template class CompFunction<2, double>;
+template class CompFunction<3, double>;
+
+template class CompFunction<1, ComplexDouble>;
+template class CompFunction<2, ComplexDouble>;
+template class CompFunction<3, ComplexDouble>;
+
+} // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
new file mode 100644
index 000000000..5872641b0
--- /dev/null
+++ b/src/utils/CompFunction.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "trees/FunctionTree.h"
+
+using namespace Eigen;
+
+namespace mrcpp {
+
+template <int D>
+struct CompFunctionData {
+    // additional data that describe the overall multicomponent function (defined by user):
+    // occupancy, quantum number, norm, etc.
+    int Ncomp{1}; // number of components defined
+    int rank{-1}; // rank (index) if part of a vector
+    int CompFn1{0};
+    int CompFn2{0};
+    int isreal{1}; // T=double
+    int iscomplex{0}; // T=DoubleComplex
+    double CompFd1{0.0};
+    double CompFd2{0.0};
+    double CompFd3{0.0};
+    // additional data that describe each component (defined by user):
+    // occupancy, quantum number, conjugation, norm, etc.
+    //Note: defined with fixed size to ease copying and MPI send
+    int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
+    int n1[4]{0,0,0,0};
+    int n2[4]{0,0,0,0};
+    int n3[4]{0,0,0,0};
+    int n4[4]{0,0,0,0};
+    double d1[4]{0.0,0.0,0.0,0.0};
+    double d2[4]{0.0,0.0,0.0,0.0};
+    double d3[4]{0.0,0.0,0.0,0.0};
+};
+
+template <int D, typename T> class CompFunction {
+public:
+    CompFunction(MultiResolutionAnalysis<D> &mra);
+    FunctionTree<D, T> *Comp[4];
+
+    std::string name;
+
+    // additional data that describe each component (defined by user):
+    CompFunctionData<D> data;
+    int& Ncomp = data.Ncomp; //number of components defined
+    int& rank = data.rank; // rank (index) if part of a vector
+    int& isreal = data.isreal; // T=double
+    int& iscomplex = data.iscomplex; // T=DoubleComplex
+    int* Nchunks = data.Nchunks; // number of chunks of each component tree
+
+    // ComplexFunctions are only defined for D=3
+    template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
+    CompFunction(T value, ComplexFunction cplxfunc);
+
+    double norm();
+    double squaredNorm();
+    void alloc(int i);
+    void add(T c, CompFunction<D, T> inp);
+    int crop(double prec);
+};
+
+template <int D, typename T = double> using CompFunctionVector = std::vector<CompFunction<D, T> *>;
+
+} // namespace mrcpp

From 3057d0afa1f10f6534d494d27859dda7a5588cf0 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Sat, 20 Jul 2024 11:27:55 +0200
Subject: [PATCH 11/38] CompFunction destructor

---
 src/utils/CompFunction.cpp | 5 +++--
 src/utils/CompFunction.h   | 8 +++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 0888f438a..c66d98073 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -10,8 +10,8 @@ namespace mrcpp {
 
   template <int D, typename T>
   template <int D_, typename std::enable_if<D_ == 3, int>::type>
-  CompFunction<D, T>::CompFunction(T value, ComplexFunction cplxfunc)
-      : Ncomp(1){
+  CompFunction<D, T>::CompFunction(ComplexFunction cplxfunc){
+      Ncomp = 1;
       defaultCompMRA<3> = cplxfunc.funcMRA;
       //we always copy real part
       Comp[0] = new FunctionTree<D, T>(*cplxfunc.funcMRA);
@@ -41,6 +41,7 @@ namespace mrcpp {
     Comp[2]=nullptr;
     Comp[3]=nullptr;
   }
+
   template <int D, typename T>
   double CompFunction<D, T>::norm() {
      double norm = squaredNorm();
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 5872641b0..fc01c68af 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -49,7 +49,13 @@ template <int D, typename T> class CompFunction {
 
     // ComplexFunctions are only defined for D=3
     template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
-    CompFunction(T value, ComplexFunction cplxfunc);
+    CompFunction(ComplexFunction cplxfunc);
+    // CompFunction destructor
+    ~CompFunction() {
+        for (int i = 0; i < Ncomp; i++) {
+            delete Comp[i];
+        }
+    }
 
     double norm();
     double squaredNorm();

From 9b242813bb1e3db43800bdcd1bb5c1096bcf8e46 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Wed, 24 Jul 2024 14:59:18 +0200
Subject: [PATCH 12/38] FourComp for share, add, multiply rescale, dot

---
 api/MWFunctions               |   1 +
 api/mrcpp_declarations.h      |   2 +-
 src/trees/FunctionNode.cpp    |  71 +++++++--
 src/trees/FunctionNode.h      |  18 ++-
 src/trees/FunctionTree.cpp    |   9 +-
 src/trees/FunctionTree.h      |   2 +-
 src/utils/CompFunction.cpp    | 263 ++++++++++++++++++++++++++++++++--
 src/utils/CompFunction.h      |  53 ++++++-
 src/utils/ComplexFunction.cpp |  36 ++++-
 src/utils/ComplexFunction.h   |  15 +-
 src/utils/parallel.cpp        |  15 ++
 src/utils/parallel.h          |   2 +
 12 files changed, 449 insertions(+), 38 deletions(-)

diff --git a/api/MWFunctions b/api/MWFunctions
index b41efc1c3..13a53214a 100644
--- a/api/MWFunctions
+++ b/api/MWFunctions
@@ -28,6 +28,7 @@
 #include "trees/FunctionTree.h"
 #include "trees/FunctionTreeVector.h"
 #include "utils/ComplexFunction.h"
+#include "utils/CompFunction.h"
 
 #include "core/InterpolatingBasis.h"
 #include "core/LegendreBasis.h"
diff --git a/api/mrcpp_declarations.h b/api/mrcpp_declarations.h
index 060c5ec50..0c04989b0 100644
--- a/api/mrcpp_declarations.h
+++ b/api/mrcpp_declarations.h
@@ -62,7 +62,7 @@ template <int D, typename T = double> class NodeAllocator;
 
 template <int D, typename T = double> class MWNode;
 template <int D, typename T = double> class FunctionNode;
-template <int D, typename T> class CompFunction;
+template <int D = 3, typename T = double> class CompFunction;
 class ComplexFunction;
 class OperatorNode;
 
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index 98858e503..0749ff37e 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -405,50 +405,101 @@ template <> void FunctionNode<3>::reCompress() {
  * Integrates the product of the functions represented by the scaling basis on
  * the node on the full support of the nodes. The scaling basis is fully
  * orthonormal, and the inner product is simply the dot product of the
- * coefficient vectors. Assumes the nodes have identical support. */
-template <int D, typename T> T dot_scaling(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket) {
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D> double dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
-    const T *a = bra.getCoefs();
-    const T *b = ket.getCoefs();
+    const double *a = bra.getCoefs();
+    const double *b = ket.getCoefs();
 
     int size = bra.getKp1_d();
 #ifdef HAVE_BLAS
     return cblas_ddot(size, a, 1, b, 1);
 #else
-    T result = 0.0;
+    double result = 0.0;
     for (int i = 0; i < size; i++) result += a[i] * b[i];
     return result;
 #endif
 }
 
+
+/** Inner product of the functions represented by the scaling basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the scaling basis on
+ * the node on the full support of the nodes. The scaling basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const ComplexDouble *a = bra.getCoefs();
+    const ComplexDouble *b = ket.getCoefs();
+
+    int size = bra.getKp1_d();
+    ComplexDouble result = 0.0;
+    for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
+    return result;
+}
+
 /** Inner product of the functions represented by the wavelet basis of the nodes.
  *
  * Integrates the product of the functions represented by the wavelet basis on
  * the node on the full support of the nodes. The wavelet basis is fully
  * orthonormal, and the inner product is simply the dot product of the
- * coefficient vectors. Assumes the nodes have identical support. */
-template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket) {
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D>
+    double dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
     if (bra.isGenNode() or ket.isGenNode()) return 0.0;
 
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
-    const T *a = bra.getCoefs();
-    const T *b = ket.getCoefs();
+    const double *a = bra.getCoefs();
+    const double *b = ket.getCoefs();
 
     int start = bra.getKp1_d();
     int size = (bra.getTDim() - 1) * start;
 #ifdef HAVE_BLAS
     return cblas_ddot(size, &a[start], 1, &b[start], 1);
 #else
-    T result = 0.0;
+    double result = 0.0;
     for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
     return result;
 #endif
 }
 
+/** Inner product of the functions represented by the wavelet basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the wavelet basis on
+ * the node on the full support of the nodes. The wavelet basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+    if (bra.isGenNode() or ket.isGenNode()) return 0.0;
+
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const ComplexDouble *a = bra.getCoefs();
+    const ComplexDouble *b = ket.getCoefs();
+
+    int start = bra.getKp1_d();
+    int size = (bra.getTDim() - 1) * start;
+    ComplexDouble result = 0.0;
+    for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
+    return result;
+}
+
 template double dot_scaling(const FunctionNode<1, double> &bra, const FunctionNode<1, double> &ket);
 template double dot_scaling(const FunctionNode<2, double> &bra, const FunctionNode<2, double> &ket);
 template double dot_scaling(const FunctionNode<3, double> &bra, const FunctionNode<3, double> &ket);
diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h
index 14c44fb7e..8f2ff4fb5 100644
--- a/src/trees/FunctionNode.h
+++ b/src/trees/FunctionNode.h
@@ -79,8 +79,22 @@ template <int D, typename T> class FunctionNode final : public MWNode<D, T> {
     T integrateInterpolating() const;
     T integrateValues() const;
 };
+template <int D>
+double dot_scaling (const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
+template <int D>
+double dot_wavelet(const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
 
-template <int D, typename T> T dot_scaling(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);
-template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);
+template <int D>
+ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
+template <int D>
+ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
 
+    /*double FunctionNode<D, double>::dot_scaling (const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
+double FunctionNode<D, double>::dot_scaling(const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
+ComplexDouble FunctionNode<D, ComplexDouble>::dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
+ComplexDouble FunctionNode<D, ComplexDouble>::dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
+
+
+    template <int D, typename T> T dot_scaling(const FunctionNode<D, T > &bra, const FunctionNode<D, T> &ket);
+    template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);    */
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 1e1ccc7f4..46ef40897 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -844,10 +844,11 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
  *
  * @details Exact copy without any binding between old and new tree
  */
-template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out,FunctionTree<D, T> &inp){
-    out = new FunctionTree<D, T> (inp.getMRA(), inp.getName());
-    copy_grid(*out, inp);
-    copy_func(*out, inp);
+template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out){
+    delete out;
+    out = new FunctionTree<D, T> (this->getMRA(), this->getName());
+    copy_grid(*out, *this);
+    copy_func(*out, *this);
 }
 
 /**  @brief New tree with only real part
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 6e9249467..4c9e632c6 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -114,7 +114,7 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
 
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
-    void deep_copy(FunctionTree<D, T> *out, FunctionTree<D, T> &inp);
+    void deep_copy(FunctionTree<D, T> *out);
     FunctionTree<D, double> *Real();
     FunctionTree<D, double> *Imag();
 protected:
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index c66d98073..d475b6cae 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -1,40 +1,74 @@
 #include <fstream>
 #include "Printer.h"
+#include "parallel.h"
+#include "treebuilders/project.h"
+#include "treebuilders/add.h"
+#include "treebuilders/multiply.h"
 #include "CompFunction.h"
 #include "ComplexFunction.h"
 
 namespace mrcpp {
 
   template <int D>
-  MultiResolutionAnalysis<D> *defaultCompMRA; // Global MRA
+  MultiResolutionAnalysis<D> *defaultCompMRA = nullptr; // Global MRA
 
   template <int D, typename T>
   template <int D_, typename std::enable_if<D_ == 3, int>::type>
   CompFunction<D, T>::CompFunction(ComplexFunction cplxfunc){
       Ncomp = 1;
+      if (std::is_same<T, ComplexDouble>::value) {
+          isreal = 0;
+          iscomplex = 1;
+      } else {
+          isreal = 1;
+          iscomplex = 0;
+      }
       defaultCompMRA<3> = cplxfunc.funcMRA;
       //we always copy real part
       Comp[0] = new FunctionTree<D, T>(*cplxfunc.funcMRA);
-      if (not cplxfunc.hasReal()) MSG_ABORT("Input funcion has nor real part");
-      deep_copy(Comp[0], cplxfunc.real());
-      if (std::is_same<T, ComplexDouble>::value){
+      if (not cplxfunc.hasReal()) MSG_ABORT("Input function has no real part");
+      FunctionTree<D, T>::deep_copy(Comp[0], cplxfunc.real());
+      if ( iscomplex ){
           //We add the imaginary part, if it exist in input function
           if (cplxfunc.hasImag()){
               ComplexDouble c;
-              if(cplxfunc.conjugate()) c = {0.0, -1.0};
-              else c = {0.0, 1.0};
-              Comp[0].add_inplace(c, cplxfunc.imag());
+              if(cplxfunc.conjugate())  MSG_ERROR("onjugaison not implemented");
+              Comp[0].add_inplace(1.0, cplxfunc.imag());
           }
-      }
+      } else if (cplxfunc.hasImag()) MSG_WARN("Complex part is truncated")
       // set metadata
       data.n1[0] = cplxfunc.spin();
       data.n2[0] = cplxfunc.occ();
 
       rank = cplxfunc.getRank();
   }
+
+  template <int D, typename T>
+  CompFunction<D, T>::CompFunction()
+  { if (std::is_same<T, ComplexDouble>::value) {
+          isreal = 0;
+          iscomplex = 1;
+      } else {
+          isreal = 1;
+          iscomplex = 0;
+      }
+    data.Ncomp = 0;
+    Comp[0]=nullptr;
+    Comp[1]=nullptr;
+    Comp[2]=nullptr;
+    Comp[3]=nullptr;
+  }
+
   template <int D, typename T>
   CompFunction<D, T>::CompFunction(MultiResolutionAnalysis<D> &mra)
-  { defaultCompMRA<D> = &mra;
+  { if (std::is_same<T, ComplexDouble>::value) {
+          isreal = 0;
+          iscomplex = 1;
+      } else {
+          isreal = 1;
+          iscomplex = 0;
+      }
+    defaultCompMRA<D> = &mra;
     data.Ncomp = 0;
     Comp[0]=nullptr;
     Comp[1]=nullptr;
@@ -42,8 +76,69 @@ namespace mrcpp {
     Comp[3]=nullptr;
   }
 
+/** @brief Copy constructor
+ *
+ * Shallow copy: meta data is copied along with the component pointers,
+ * NO transfer of ownership.
+ */
+  template <int D, typename T>
+  CompFunction<D, T>::CompFunction(const CompFunction<D, T> &compfunc) {
+      data = compfunc.data;
+      Comp[0] = compfunc.Comp[0];
+      Comp[1] = compfunc.Comp[1];
+      Comp[2] = compfunc.Comp[2];
+      Comp[3] = compfunc.Comp[3];
+  }
+
+/** @brief Copy constructor
+ *
+ * Shallow copy: meta data is copied along with the component pointers,
+ * NO transfer of ownership.
+ */
+  template <int D, typename T>
+  CompFunction<D, T>::CompFunction(CompFunction<D, T> && compfunc) {
+      data = compfunc.data;
+      Comp[0] = compfunc.Comp[0];
+      Comp[1] = compfunc.Comp[1];
+      Comp[2] = compfunc.Comp[2];
+      Comp[3] = compfunc.Comp[3];
+  }
+
+  template <int D, typename T>
+  CompFunction<D, T> &CompFunction<D, T>::operator=(const CompFunction<D, T> &func) {
+      if (this != &func) {
+          this->data = func.data;
+          for (int i = 0; i < Ncomp; i++) {
+              this->Comp[i] = func.Comp[i];
+          }
+      }
+      return *this;
+  }
+
+    template <int D, typename T>
+    template <int D_, typename std::enable_if<D_ == 3, int>::type>
+    CompFunction<D, T>::operator ComplexFunction() const {
+        return ComplexFunction(*this); // const conversion
+    }
+    //    template <int D, typename T>
+   //    template <int D_, typename std::enable_if<D_ == 3, int>::type>
+   //    CompFunction<D, T>::operator ComplexFunction() {
+   //        return ComplexFunction(std::move(*this)); // non-const conversion
+   //    }
+  //
+
+    template CompFunction<3,double>::operator ComplexFunction() const;
+//template CompFunction<3,double>::operator ComplexFunction();
+
+    template <int D, typename T>
+    void CompFunction<D, T>::flushFuncData() {
+      for (int i = 0; i < Ncomp; i++) {
+          Nchunks[i] = Comp[i]->getNChunksUsed();
+      }
+      for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+    }
   template <int D, typename T>
-  double CompFunction<D, T>::norm() {
+  double CompFunction<D, T>::norm() const {
      double norm = squaredNorm();
      for (int i = 0; i < Ncomp; i++) {
           norm += Comp[i]->getSquareNorm();
@@ -52,7 +147,7 @@ namespace mrcpp {
      return norm;
   }
   template <int D, typename T>
-  double CompFunction<D, T>::squaredNorm() {
+  double CompFunction<D, T>::squaredNorm() const {
      double norm = squaredNorm();
      for (int i = 0; i < Ncomp; i++) {
           norm += Comp[i]->getSquareNorm();
@@ -61,6 +156,7 @@ namespace mrcpp {
   }
   template <int D, typename T>
   void CompFunction<D, T>::alloc(int i) {
+      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
       Comp[i] = new FunctionTree<D, T> (*defaultCompMRA<D>);
   }
 
@@ -81,6 +177,7 @@ void CompFunction<D, T>::add(T c, CompFunction<D, T> inp) {
     }
 }
 
+
 template <int D, typename T>
 int CompFunction<D, T>::crop(double prec) {
     if (prec < 0.0) return 0;
@@ -91,6 +188,17 @@ int CompFunction<D, T>::crop(double prec) {
     return nChunksremoved;
 }
 
+/** @brief In place multiply with scalar. Fully in-place.*/
+template <int D, typename T>
+void CompFunction<D, T>::rescale(T c) {
+    bool need_to_rescale = not(isShared()) or mpi::share_master();
+    if (need_to_rescale) {
+        for (int i = 0; i < Ncomp; i++) {
+            Comp[i]->rescale(c);
+        }
+    } else MSG_ERROR("Not implemented");
+}
+
 
 template class  MultiResolutionAnalysis<1>;
 template class  MultiResolutionAnalysis<2>;
@@ -103,4 +211,137 @@ template class CompFunction<1, ComplexDouble>;
 template class CompFunction<2, ComplexDouble>;
 template class CompFunction<3, ComplexDouble>;
 
+
+namespace compfunc {
+
+
+/** @brief Deep copy
+ *
+ * Deep copy: meta data is copied along with the content of each component.
+ */
+  template <int D, typename T>
+  void deep_copy(CompFunction<D, T> *out, const CompFunction<D, T> &inp) {
+      out->data = inp.data;
+      for (int i = 0; i < inp.Ncomp; i++) {
+          delete out->Comp[i];
+          inp.Comp[i]->deep_copy(out->Comp[i]);
+      }
+  }
+
+
+/** @brief out = a*inp_a + b*inp_b
+ *
+ * Recast into linear_combination.
+ *
+ */
+template <int D, typename T>
+void add(CompFunction<D, T> &out, T a, CompFunction<D, T> inp_a, T b, CompFunction<D, T> inp_b, double prec) {
+    std::vector<T> coefs(2);
+    coefs[0] = a;
+    coefs[1] = b;
+
+    std::vector<CompFunction<D, T>> funcs; // NB: not a CompFunctionVector, because not run in parallel!
+    funcs.push_back(inp_a);
+    funcs.push_back(inp_b);
+
+    linear_combination(out, coefs, funcs, prec);
+}
+
+/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N
+ *
+ * OMP parallel, but not MPI parallel
+ */
+template <int D, typename T>
+    void linear_combination(CompFunction<D, T> &out, const std::vector<T> &c, std::vector<CompFunction<D, T>> &inp, double prec) {
+    double thrs = MachineZero;
+    bool need_to_add = not(out.isShared()) or mpi::share_master();
+    for (int comp = 0; comp < inp[0].Ncomp; comp++) {
+        FunctionTreeVector<D, T> fvec; // one component vector
+        for (int i = 0; i < inp.size(); i++) {
+            if (std::norm(c[i]) < thrs) continue;
+            if (out.iscomplex and inp[i].data.conj) MSG_ERROR("conjugaison not implemented");
+            fvec.push_back(std::make_tuple(c[i], inp[i].Comp[comp]));
+        }
+        if (need_to_add) {
+            if (fvec.size() > 0) {
+                if (prec < 0.0) {
+                    build_grid(out.real(), fvec);
+                    mrcpp::add(prec, *out.Comp[comp], fvec, 0);
+                } else {
+                    mrcpp::add(prec, *out.Comp[comp], fvec);
+                }
+            } else if (out.hasReal()) {
+                out.Comp[comp]->setZero();
+            }
+        }
+        mpi::share_function(out, 0, 9911, mpi::comm_share);
+    }
+}
+
+/** @brief out = inp_a * inp_b
+ *
+ */
+template <int D, typename T>
+void multiply(CompFunction<D, T> &out, CompFunction<D, T> inp_a, CompFunction<D, T> inp_b, double prec, bool absPrec, bool useMaxNorms) {
+    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
+    for (int comp = 0; comp < inp_a[0].Ncomp; comp++) {
+        delete out.Comp[comp];
+        FunctionTree<3, T> *tree = new FunctionTree<3, T>(inp_a.Comp[0].getMRA());
+        T coef = 1.0;
+         if (need_to_multiply) {
+             if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
+             if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
+             if (prec < 0.0) {
+                 // Union grid
+                 build_grid(*tree, inp_a.Comp[comp]);
+                 build_grid(*tree, inp_b.Comp[comp]);
+                 mrcpp::multiply(prec, *tree, coef, *inp_a.Comp[comp], *inp_b.Comp[comp], 0);
+             } else {
+                // Adaptive grid
+                 mrcpp::multiply(prec, *tree, coef, *inp_a.Comp[comp], *inp_b.Comp[comp], -1, absPrec, useMaxNorms);
+             }
+         }
+         out.Comp[comp] = tree;
+    }
+    mpi::share_function(out, 0, 9911, mpi::comm_share);
+
+}
+
+/** @brief out = inp_a * f
+ *
+ *  each component is multiplied
+ */
+template <int D, typename T>
+void multiply(CompFunction<D, T> &out, CompFunction<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
+    MSG_ERROR("Not implemented");
+}
+
+/** @brief out = inp_a * f
+ *
+ */
+template <int D, typename T>
+void multiply(CompFunction<D, T>, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
+    MSG_ERROR("Not implemented");
+}
+
+
+/** @brief Compute <bra|ket> = int bra^\dag(r) * ket(r) dr.
+ *
+ *  Sum of component dots.
+ *  Notice that the <bra| position is already complex conjugated.
+ *
+ */
+template <int D, typename T>
+T compfunc::dot(CompFunction<D, T> bra, CompFunction<D, T> ket) {
+    T dotprod = 0.0;
+    if (bra.data.conj or ket.data.conj) MSG_ERROR("dot with conjugaison not implemented");
+    for (int comp = 0; comp < bra.Ncomp; comp++) {
+        dotprod += mrcpp::dot(bra.Comp[comp], ket.Comp[comp]);
+    }
+    return dotprod;
+}
+
+
+} // namespace compfunc
+
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index fc01c68af..8c196b145 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "trees/FunctionTree.h"
+#include "ComplexFunction.h"
 
 using namespace Eigen;
 
@@ -12,6 +13,7 @@ struct CompFunctionData {
     // occupancy, quantum number, norm, etc.
     int Ncomp{1}; // number of components defined
     int rank{-1}; // rank (index) if part of a vector
+    int conj{0}; // conjugate of all components
     int CompFn1{0};
     int CompFn2{0};
     int isreal{1}; // T=double
@@ -20,9 +22,8 @@ struct CompFunctionData {
     double CompFd2{0.0};
     double CompFd3{0.0};
     // additional data that describe each component (defined by user):
-    // occupancy, quantum number, conjugation, norm, etc.
+    // occupancy, quantum number, norm, etc.
     //Note: defined with fixed size to ease copying and MPI send
-    int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
     int n1[4]{0,0,0,0};
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
@@ -30,11 +31,22 @@ struct CompFunctionData {
     double d1[4]{0.0,0.0,0.0,0.0};
     double d2[4]{0.0,0.0,0.0,0.0};
     double d3[4]{0.0,0.0,0.0,0.0};
+    // used internally
+    int shared{0};
+    int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
 };
 
+
 template <int D, typename T> class CompFunction {
 public:
+    CompFunction();
     CompFunction(MultiResolutionAnalysis<D> &mra);
+    CompFunction(const CompFunction<D, T> &compfunc);
+    CompFunction(CompFunction<D, T> && compfunc);
+
+    ComplexFunction *CPXfct; // temporary solution
+
+
     FunctionTree<D, T> *Comp[4];
 
     std::string name;
@@ -46,10 +58,12 @@ template <int D, typename T> class CompFunction {
     int& isreal = data.isreal; // T=double
     int& iscomplex = data.iscomplex; // T=DoubleComplex
     int* Nchunks = data.Nchunks; // number of chunks of each component tree
-
     // ComplexFunctions are only defined for D=3
     template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
     CompFunction(ComplexFunction cplxfunc);
+    template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
+    operator ComplexFunction() const;
+    CompFunction<D, T> &operator=(const CompFunction<D, T> &func);
     // CompFunction destructor
     ~CompFunction() {
         for (int i = 0; i < Ncomp; i++) {
@@ -57,13 +71,42 @@ template <int D, typename T> class CompFunction {
         }
     }
 
-    double norm();
-    double squaredNorm();
+    double norm() const;
+    double squaredNorm() const;
     void alloc(int i);
     void add(T c, CompFunction<D, T> inp);
+
     int crop(double prec);
+    void rescale(T c);
+
+    //NB: All tbelow should be revised. Now only for backwards compatibility to ComplexFunction class
+    bool hasReal()  const {return isreal;}
+    bool hasImag()  const {return iscomplex;}
+    bool isShared() const {return data.shared;}
+
+    FunctionTree<D, T> &real() {return *Comp[0];}
+    FunctionTree<D, T> &imag() {return *Comp[0];}
+    void free(int type) {delete Comp[0]; Comp[0] = nullptr;}
+    void flushFuncData();
 };
 
 template <int D, typename T = double> using CompFunctionVector = std::vector<CompFunction<D, T> *>;
 
+namespace compfunc {
+template <int D, typename T>
+void deep_copy(CompFunction<D, T> *out, const CompFunction<D, T> &inp);
+template <int D, typename T>
+void add(CompFunction<D, T> &out, T a, CompFunction<D, T> inp_a, T b, CompFunction<D, T> inp_b, double prec);
+template <int D, typename T>
+void linear_combination(CompFunction<D, T> &out, const std::vector<T> &c, std::vector<CompFunction<D, T>> &inp, double prec);
+template <int D, typename T>
+void multiply(CompFunction<D, T> &out, CompFunction<D, T> inp_a, CompFunction<D, T> inp_b, double prec, bool absPrec, bool useMaxNorms);
+template <int D, typename T>
+void multiply(CompFunction<D, T> &out, CompFunction<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+template <int D, typename T>
+void multiply(CompFunction<D, T> &out, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+template <int D, typename T>
+T dot(CompFunction<D, T> bra, CompFunction<D, T> ket);
+
+} // namespace compfunc
 } // namespace mrcpp
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
index e2b110d1c..f7f6a11d9 100644
--- a/src/utils/ComplexFunction.cpp
+++ b/src/utils/ComplexFunction.cpp
@@ -15,8 +15,8 @@ using mrcpp::Timer;
 namespace mrcpp {
 
 MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
-
-ComplexFunction::ComplexFunction(CompFunction<3, double> cfunc)
+  /*    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+    ComplexFunction::ComplexFunction(CompFunction<3, T> cfunc)
         : funcMRA(defaultMRA)
         , func_ptr(std::make_shared<TreePtr>(false))
         , rank(cfunc.rank) {
@@ -24,7 +24,8 @@ ComplexFunction::ComplexFunction(CompFunction<3, double> cfunc)
     setOcc(cfunc.data.n2[0]);
     setReal(cfunc.Comp[0]);
 }
-ComplexFunction::ComplexFunction(CompFunction<3,ComplexDouble> cfunc)
+       template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
+     ComplexFunction::ComplexFunction( CompFunction<3, T> cfunc) {
         : funcMRA(defaultMRA)
         , func_ptr(std::make_shared<TreePtr>(false))
         , rank(cfunc.rank) {
@@ -32,7 +33,36 @@ ComplexFunction::ComplexFunction(CompFunction<3,ComplexDouble> cfunc)
     setOcc(cfunc.data.n2[0]);
     setReal(cfunc.Comp[0]->Real());
     setImag(cfunc.Comp[0]->Imag());
+    }*/
+
+   //  template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+    ComplexFunction::ComplexFunction(CompFunction<3, double>& cfunc)
+        : funcMRA(defaultMRA)
+        , func_ptr(std::make_shared<TreePtr>(false))
+        , rank(cfunc.rank) {
+    setSpin(cfunc.data.n1[0]);
+    setOcc(cfunc.data.n2[0]);
+    setReal(cfunc.Comp[0]);
 }
+    //    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+ //     ComplexFunction::ComplexFunction(CompFunction<3, double> && cfunc)
+//          : funcMRA(defaultMRA)
+//          , func_ptr(std::make_shared<TreePtr>(false))
+//          , rank(cfunc.rank) {
+//      setSpin(cfunc.data.n1[0]);
+//      setOcc(cfunc.data.n2[0]);
+//      setReal(cfunc.Comp[0]);
+//  }
+/*  template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
+     ComplexFunction::ComplexFunction(const CompFunction<3, T>& cfunc) {
+        : funcMRA(defaultMRA)
+        , func_ptr(std::make_shared<TreePtr>(false))
+        , rank(cfunc.rank) {
+    setSpin(cfunc.data.n1[0]);
+    setOcc(cfunc.data.n2[0]);
+    setReal(cfunc.Comp[0]->Real());
+    setImag(cfunc.Comp[0]->Imag());
+    }*/
 
 ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
         : funcMRA(defaultMRA)
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index 7326cf235..2a998617c 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -111,10 +111,23 @@ class TreePtr final {
 
 class ComplexFunction {
 public:
-    ComplexFunction(CompFunction<3, double> cfunc);
+    //  template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+  //ComplexFunction(CompFunction<3, double> cfunc) ;
+   //   template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+      ComplexFunction(CompFunction<3, double>& cfunc);
+  //    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
+  //  ComplexFunction(CompFunction<3, double>&& cfunc);
+    /* template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
+  ComplexFunction( CompFunction<3, T> cfunc);
+  template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
+  ComplexFunction(const  CompFunction<3, T> &cfunc) ;
+     ComplexFunction(CompFunction<3, double> cfunc);
     ComplexFunction(CompFunction<3,ComplexDouble> cfunc);
+    ComplexFunction(const CompFunction<3, double> &cfunc);
+    ComplexFunction(const CompFunction<3, ComplexDouble> &cfunc);*/
     ComplexFunction(std::shared_ptr<TreePtr> funcptr);
     ComplexFunction(const ComplexFunction &func);
+    ComplexFunction(ComplexFunction && func);
     ComplexFunction(int spin = 0, int occ = -1, int rank = -1, bool share = false);
     ComplexFunction &operator=(const ComplexFunction &func);
     ComplexFunction paramCopy() const;
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 7675800a1..89ce66baa 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -393,6 +393,19 @@ void mpi::share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
     }
 }
 
+
+/** Update a shared function after it has been changed by one of the MPI ranks. */
+template <typename T>
+    void mpi::share_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm) {
+    if (func.isShared()) {
+#ifdef MRCPP_HAS_MPI
+        for (int comp = 0; comp < func.Ncomp; comp++) {
+            mrcpp::share_tree(*func.Comp[comp], src, tag, comm);
+#endif
+        }
+    }
+}
+
 /** @brief Add all mpi function into rank zero */
 void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
@@ -735,5 +748,7 @@ void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MP
     template void mpi::send_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
     template void mpi::recv_function(CompFunction<3, double> &func, int dst, int tag, MPI_Comm comm);
     template void mpi::recv_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
+    template void mpi::share_function(CompFunction<3, double> &func, int src, int tag, MPI_Comm comm);
+    template void mpi::share_function(CompFunction<3, ComplexDouble> &func, int src, int tag, MPI_Comm comm);
 
 } // namespace mrcpp
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index bbda2332a..170d5b972 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -53,6 +53,8 @@ void send_function(CompFunction<3, T> &func, int dst, int tag, MPI_Comm comm = m
 template <typename T>
 void recv_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
 void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
+template <typename T>
+void share_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm);
 
 void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm);
 void broadcast_function(ComplexFunction &func, MPI_Comm comm);

From f7343744d51855d0a8c5a222079d2716127a0707 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 26 Jul 2024 15:02:37 +0200
Subject: [PATCH 13/38] removed typename template and upgrades

---
 api/mrcpp_declarations.h      |    2 +-
 src/treebuilders/apply.cpp    |  153 +--
 src/treebuilders/apply.h      |   16 +-
 src/utils/Bank.cpp            |   18 +-
 src/utils/Bank.h              |   10 +-
 src/utils/CompFunction.cpp    | 1895 ++++++++++++++++++++++++++++++---
 src/utils/CompFunction.h      |  104 +-
 src/utils/ComplexFunction.cpp |   51 +-
 src/utils/ComplexFunction.h   |   18 +-
 src/utils/parallel.cpp        |   94 +-
 src/utils/parallel.h          |   22 +-
 11 files changed, 1980 insertions(+), 403 deletions(-)

diff --git a/api/mrcpp_declarations.h b/api/mrcpp_declarations.h
index 0c04989b0..8296da045 100644
--- a/api/mrcpp_declarations.h
+++ b/api/mrcpp_declarations.h
@@ -62,7 +62,7 @@ template <int D, typename T = double> class NodeAllocator;
 
 template <int D, typename T = double> class MWNode;
 template <int D, typename T = double> class FunctionNode;
-template <int D = 3, typename T = double> class CompFunction;
+template <int D = 3> class CompFunction;
 class ComplexFunction;
 class OperatorNode;
 
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index a292ad378..478dd7f15 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -116,14 +116,19 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
  * no coefs).
  *
  */
-template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp.Comp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
-                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply(prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
+template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+        for (int ocomp = 0; ocomp < 4; ocomp++){
+            if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                if (inp.isreal) {
+                    apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.CompD[ocomp]->rescale(metric[icomp][ocomp].real());
+                    }
+                } else {
+                    apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
                     }
                 }
             }
@@ -245,14 +250,19 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
-template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, FunctionTreeVector<D, T> *precTrees, T **metric, int maxIter, bool absPrec) {
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp.Comp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
-                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply(prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], precTrees[icomp], maxIter, absPrec);
+template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble **metric, int maxIter, bool absPrec) {
+    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+        for (int ocomp = 0; ocomp < 4; ocomp++){
+            if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                if (inp.isreal) {
+                    apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], precTrees[icomp], maxIter, absPrec);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                } else {
+                    apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], precTrees[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
                     }
                 }
             }
@@ -286,14 +296,21 @@ template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T
     apply_on_unit_cell<D>(false, prec, out, oper, inp, maxIter, absPrec);
 }
 
-template <int D, typename T> void apply_far_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
+template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
     for (int icomp = 0; icomp < 4; icomp++){
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply_on_unit_cell<D>(false, prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    if (inp.isreal) {
+                        apply_on_unit_cell<D>(false, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                        }
+                    } else {
+                        apply_on_unit_cell<D>(false, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
+                        }
                     }
                 }
             }
@@ -328,14 +345,21 @@ template <int D, typename T> void apply_near_field(double prec, FunctionTree<D,
 }
 
 
-template <int D, typename T> void apply_near_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter, bool absPrec) {
+template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
     for (int icomp = 0; icomp < 4; icomp++){
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply_on_unit_cell<D>(true, prec, *out.Comp[ocomp], oper, *inp.Comp[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                    if (inp.isreal) {
+                        apply_on_unit_cell<D>(true, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                        }
+                    } else {
+                        apply_on_unit_cell<D>(true, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
+                        }
                     }
                 }
             }
@@ -394,14 +418,19 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
     print::separator(10, ' ');
 }
 
-template <int D, typename T> void apply(CompFunction<D, T> &out, DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int dir) {
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp.Comp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
-                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply(*out.Comp[ocomp], oper, *inp.Comp[icomp], dir);
+template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int dir) {
+    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+        for (int ocomp = 0; ocomp < 4; ocomp++){
+            if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                if (inp.isreal) {
+                    apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                    }
+                } else {
+                    apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
                     }
                 }
             }
@@ -431,18 +460,30 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
     return out;
 }
 
-template <int D, typename T> CompFunctionVector<D, T> gradient(DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric) {
-    CompFunctionVector<D, T> out;
+template <int D> CompFunctionVector<D> gradient(DerivativeOperator<D> &oper, CompFunction<D> &inp,ComplexDouble  **metric) {
+    CompFunctionVector<D> out;
     for (int d = 0; d < D; d++) {
-        CompFunction<D, T> *grad_d = new CompFunction<D, T>();
+        CompFunction<D> *grad_d = new CompFunction<D>();
         for (int icomp = 0; icomp < 4; icomp++){
             if (inp.Comp[icomp]!=nullptr) {
                 for (int ocomp = 0; ocomp < 4; ocomp++){
                     if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                        grad_d->Comp[ocomp] = new FunctionTree<D, T>(inp.getMRA());
-                        apply(grad_d->Comp[ocomp], oper, *inp.Comp[icomp], d);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            grad_d->Comp[ocomp]->rescale(metric[icomp][ocomp]);
+                        if (inp.isreal) {
+                            grad_d->isreal = 1;
+                            grad_d->iscomplex = 0;
+                            grad_d->CompD[ocomp] = new FunctionTree<D, double>(inp.getMRA());
+                            apply(grad_d->CompD[ocomp], oper, *inp.CompD[icomp], d);
+                            if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                                grad_d->CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                            }
+                        } else {
+                            grad_d->isreal = 0;
+                            grad_d->iscomplex = 1;
+                            grad_d->CompC[ocomp] = new FunctionTree<D, ComplexDouble>(inp.getMRA());
+                            apply(grad_d->CompC[ocomp], oper, *inp.CompC[icomp], d);
+                            if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                                grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]);
+                            }
                         }
                     }
                 }
@@ -487,19 +528,8 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
     clear(tmp_vec, true);
 }
 
-template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, T **metric) {
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
-                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    divergence(*out.Comp[ocomp], oper, inp[icomp]);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
-                    }
-                }
-            }
-        }
-    }
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble **metric) {
+    MSG_ABORT("not implemented");
 }
 
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp) {
@@ -507,27 +537,16 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     divergence(out, oper, inp_vec);
 }
-template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, T **metric) {
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
-                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    apply(*out.Comp[ocomp], oper, inp[icomp]);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.Comp[ocomp]->rescale(metric[icomp][ocomp]);
-                    }
-                }
-            }
-        }
-    }
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble **metric) {
+    MSG_ABORT("not implemented");
 }
 
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
-template void apply<1, double>(double prec, CompFunction<1, double> &out, ConvolutionOperator<1> &oper, CompFunction<1, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
-template void apply<2, double>(double prec, CompFunction<2, double> &out, ConvolutionOperator<2> &oper, CompFunction<2, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
-template void apply<3, double>(double prec, CompFunction<3, double> &out, ConvolutionOperator<3> &oper, CompFunction<3, double> &inp, double **metric, int maxIter = -1, bool absPrec = false);
+template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, CompFunction<1> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, CompFunction<2> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, CompFunction<3> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
@@ -555,9 +574,7 @@ template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper,
 template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
-template void apply<1, ComplexDouble>(double prec, CompFunction<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, CompFunction<1, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
-template void apply<2, ComplexDouble>(double prec, CompFunction<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, CompFunction<2, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
-template void apply<3, ComplexDouble>(double prec, CompFunction<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, CompFunction<3, ComplexDouble> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+
 template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, FunctionTreeVector<1, ComplexDouble> &precTrees, int maxIter, bool absPrec);
 template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, FunctionTreeVector<2, ComplexDouble> &precTrees, int maxIter, bool absPrec);
 template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec);
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index 8c0d4039f..d66ada61d 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -36,21 +36,21 @@ template <int D> class DerivativeOperator;
 template <int D> class ConvolutionOperator;
 
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter = -1, bool absPrec = false);
-template <int D, typename T> void apply(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, FunctionTreeVector<D, T> *precTrees, T **metric, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D, typename T> void apply_far_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_near_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D, typename T> void apply_near_field(double prec, CompFunction<D, T> &out, ConvolutionOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTree<D, T> &inp, int dir = -1);
-template <int D, typename T> void apply(CompFunction<D, T> &out, DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric, int dir = -1);
+template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int dir = -1);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> &inp);
-template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, T **metric);
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble **metric);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp);
-template <int D, typename T> void divergence(CompFunction<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, T **metric);
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble **metric);
 template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp);
-template <int D, typename T> CompFunctionVector<D, T> gradient(DerivativeOperator<D> &oper, CompFunction<D, T> &inp, T **metric);
+template <int D> CompFunctionVector<D> gradient(DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric);
 // clang-format on
 
 } // namespace mrcpp
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index c00338a9c..f924b971b 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -301,9 +301,9 @@ void Bank::open() {
                     }
                     send_function(*deposits[ix].orb, status.MPI_SOURCE, 1, comm_bank);
                     if (message == GET_FUNCTION_AND_DELETE) {
-                        currentsize[account] -= deposits[ix].orb->getSizeNodes(NUMBER::Total);
-                        totcurrentsize -= deposits[ix].orb->getSizeNodes(NUMBER::Total);
-                        deposits[ix].orb->free(NUMBER::Total);
+                        currentsize[account] -= deposits[ix].orb->getSizeNodes();
+                        totcurrentsize -= deposits[ix].orb->getSizeNodes();
+                        deposits[ix].orb->free();
                         id2ix[id] = 0;
                     }
                 }
@@ -370,7 +370,7 @@ void Bank::open() {
             } else {
                 ix = deposits.size(); // NB: ix is now index of last element + 1
                 deposits.resize(ix + 1);
-                if (message == SAVE_FUNCTION) deposits[ix].orb = new ComplexFunction(0);
+                if (message == SAVE_FUNCTION) deposits[ix].orb = new CompFunction<3>(0);
                 if (message == SAVE_DATA) {
                     datasize = messages[3];
                     deposits[ix].data = mem[account]->get_mem(datasize);//new double[datasize];
@@ -386,8 +386,8 @@ void Bank::open() {
             if (message == SAVE_FUNCTION) {
                 recv_function(*deposits[ix].orb, deposits[ix].source, 1, comm_bank);
                 if (exist_flag == 0) {
-                    currentsize[account] += deposits[ix].orb->getSizeNodes(NUMBER::Total);
-                    totcurrentsize += deposits[ix].orb->getSizeNodes(NUMBER::Total);
+                    currentsize[account] += deposits[ix].orb->getSizeNodes();
+                    totcurrentsize += deposits[ix].orb->getSizeNodes();
                     this->maxsize = std::max(totcurrentsize, this->maxsize);
                 }
             }
@@ -641,7 +641,7 @@ std::vector<int> Bank::get_totalsize() {
 // get orbital with identity id.
 // If wait=0, return immediately with value zero if not available (default)
 // else, wait until available
-int BankAccount::get_func(int id, ComplexFunction &func, int wait) {
+int BankAccount::get_func(int id, CompFunction<3> &func, int wait) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
     int messages[message_size];
@@ -669,7 +669,7 @@ int BankAccount::get_func(int id, ComplexFunction &func, int wait) {
 
 // get orbital with identity id, and delete from bank.
 // return immediately with value zero if not available
-int BankAccount::get_func_del(int id, ComplexFunction &orb) {
+int BankAccount::get_func_del(int id, CompFunction<3> &orb) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
     int messages[message_size];
@@ -690,7 +690,7 @@ int BankAccount::get_func_del(int id, ComplexFunction &orb) {
 }
 
 // save function in Bank with identity id
-int BankAccount::put_func(int id, ComplexFunction &func) {
+int BankAccount::put_func(int id, CompFunction<3> &func) {
 #ifdef MRCPP_HAS_MPI
     // for now we distribute according to id
     int messages[message_size];
diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index 7293d73ab..dc52791b3 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "ComplexFunction.h"
+#include "CompFunction.h"
 #include "parallel.h"
 #include "trees/NodeIndex.h"
 
@@ -9,7 +9,7 @@ namespace mrcpp {
 using namespace mpi;
 
 struct deposit {
-    ComplexFunction *orb;
+    CompFunction<3> *orb;
     double *data; // for pure data arrays
     bool hasdata;
     int datasize;
@@ -96,9 +96,9 @@ class BankAccount {
     void clear(int i = wrk_rank, MPI_Comm comm = comm_wrk);
     //    int put_orb(int id, ComplexFunction &orb);
     //    int get_orb(int id, ComplexFunction &orb, int wait = 0);
-    int get_func_del(int id, ComplexFunction &orb);
-    int put_func(int id, ComplexFunction &func);
-    int get_func(int id, ComplexFunction &func, int wait = 0);
+    int get_func_del(int id, CompFunction<3> &orb);
+    int put_func(int id, CompFunction<3> &func);
+    int get_func(int id, CompFunction<3> &func, int wait = 0);
     int put_data(int id, int size, double *data);
     int put_data(int id, int size, ComplexDouble *data);
     int get_data(int id, int size, double *data);
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index d475b6cae..5be3003fd 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -1,79 +1,72 @@
 #include <fstream>
 #include "Printer.h"
 #include "parallel.h"
+#include "Bank.h"
+#include "treebuilders/grid.h"
+#include "trees/FunctionNode.h"
 #include "treebuilders/project.h"
 #include "treebuilders/add.h"
 #include "treebuilders/multiply.h"
 #include "CompFunction.h"
-#include "ComplexFunction.h"
 
 namespace mrcpp {
 
   template <int D>
   MultiResolutionAnalysis<D> *defaultCompMRA = nullptr; // Global MRA
 
-  template <int D, typename T>
-  template <int D_, typename std::enable_if<D_ == 3, int>::type>
-  CompFunction<D, T>::CompFunction(ComplexFunction cplxfunc){
-      Ncomp = 1;
-      if (std::is_same<T, ComplexDouble>::value) {
-          isreal = 0;
-          iscomplex = 1;
-      } else {
-          isreal = 1;
-          iscomplex = 0;
-      }
-      defaultCompMRA<3> = cplxfunc.funcMRA;
-      //we always copy real part
-      Comp[0] = new FunctionTree<D, T>(*cplxfunc.funcMRA);
-      if (not cplxfunc.hasReal()) MSG_ABORT("Input function has no real part");
-      FunctionTree<D, T>::deep_copy(Comp[0], cplxfunc.real());
-      if ( iscomplex ){
-          //We add the imaginary part, if it exist in input function
-          if (cplxfunc.hasImag()){
-              ComplexDouble c;
-              if(cplxfunc.conjugate())  MSG_ERROR("onjugaison not implemented");
-              Comp[0].add_inplace(1.0, cplxfunc.imag());
-          }
-      } else if (cplxfunc.hasImag()) MSG_WARN("Complex part is truncated")
-      // set metadata
-      data.n1[0] = cplxfunc.spin();
-      data.n2[0] = cplxfunc.occ();
+  template <int D>
+  CompFunction<D>::CompFunction(MultiResolutionAnalysis<D> &mra)
+  { defaultCompMRA<D> = &mra;
+    data.Ncomp = 0;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+  }
 
-      rank = cplxfunc.getRank();
+  template <int D>
+  CompFunction<D>::CompFunction()
+  { data.Ncomp = 0;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
   }
 
-  template <int D, typename T>
-  CompFunction<D, T>::CompFunction()
-  { if (std::is_same<T, ComplexDouble>::value) {
-          isreal = 0;
-          iscomplex = 1;
-      } else {
-          isreal = 1;
-          iscomplex = 0;
-      }
-    data.Ncomp = 0;
-    Comp[0]=nullptr;
-    Comp[1]=nullptr;
-    Comp[2]=nullptr;
-    Comp[3]=nullptr;
+/*
+ * Empty functions (no components defined)
+ */
+  template <int D>
+  CompFunction<D>::CompFunction(int n1)
+  { data.Ncomp = 0;
+      data.n1[0] = n1;
+      data.n2[0] = -1;
+      data.n3[0] = 0;
+      rank = 0;
+      isreal = 1;
+      iscomplex = 0;
+      data.shared = false;
+      //      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
+      //CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+
   }
 
-  template <int D, typename T>
-  CompFunction<D, T>::CompFunction(MultiResolutionAnalysis<D> &mra)
-  { if (std::is_same<T, ComplexDouble>::value) {
-          isreal = 0;
-          iscomplex = 1;
-      } else {
-          isreal = 1;
-          iscomplex = 0;
-      }
-    defaultCompMRA<D> = &mra;
-    data.Ncomp = 0;
-    Comp[0]=nullptr;
-    Comp[1]=nullptr;
-    Comp[2]=nullptr;
-    Comp[3]=nullptr;
+/*
+ * Empty functions (no components defined)
+ */
+  template <int D>
+  CompFunction<D>::CompFunction(int n1, bool share)
+  { data.Ncomp = 0;
+      data.n1[0] = n1;
+      data.n2[0] = -1;
+      data.n3[0] = 0;
+      rank = 0;
+      isreal = 1;
+      iscomplex = 0;
+      data.shared = share;
+     if (share) MSG_ABORT("Not yet implemented");
+      //CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+
   }
 
 /** @brief Copy constructor
@@ -81,13 +74,11 @@ namespace mrcpp {
  * Shallow copy: meta data is copied along with the component pointers,
  * NO transfer of ownership.
  */
-  template <int D, typename T>
-  CompFunction<D, T>::CompFunction(const CompFunction<D, T> &compfunc) {
+  template <int D>
+  CompFunction<D>::CompFunction(const CompFunction<D> &compfunc) {
       data = compfunc.data;
-      Comp[0] = compfunc.Comp[0];
-      Comp[1] = compfunc.Comp[1];
-      Comp[2] = compfunc.Comp[2];
-      Comp[3] = compfunc.Comp[3];
+    for (int i = 0; i < 4; i++) CompD[i] = compfunc.CompD[i];
+    for (int i = 0; i < 4; i++) CompC[i] = compfunc.CompC[i];
   }
 
 /** @brief Copy constructor
@@ -95,31 +86,30 @@ namespace mrcpp {
  * Shallow copy: meta data is copied along with the component pointers,
  * NO transfer of ownership.
  */
-  template <int D, typename T>
-  CompFunction<D, T>::CompFunction(CompFunction<D, T> && compfunc) {
+  template <int D>
+  CompFunction<D>::CompFunction(CompFunction<D> && compfunc) {
       data = compfunc.data;
-      Comp[0] = compfunc.Comp[0];
-      Comp[1] = compfunc.Comp[1];
-      Comp[2] = compfunc.Comp[2];
-      Comp[3] = compfunc.Comp[3];
+      for (int i = 0; i < 4; i++) CompD[i]=compfunc.CompD[i];
+      for (int i = 0; i < 4; i++) CompC[i]=compfunc.CompC[i];
   }
 
-  template <int D, typename T>
-  CompFunction<D, T> &CompFunction<D, T>::operator=(const CompFunction<D, T> &func) {
-      if (this != &func) {
-          this->data = func.data;
+  template <int D>
+  CompFunction<D> &CompFunction<D>::operator=(const CompFunction<D> &compfunc) {
+      if (this != &compfunc) {
+          this->data = compfunc.data;
           for (int i = 0; i < Ncomp; i++) {
-              this->Comp[i] = func.Comp[i];
+              CompD[i] = compfunc.CompD[i];
+              CompC[i] = compfunc.CompC[i];
           }
       }
       return *this;
   }
 
-    template <int D, typename T>
-    template <int D_, typename std::enable_if<D_ == 3, int>::type>
-    CompFunction<D, T>::operator ComplexFunction() const {
-        return ComplexFunction(*this); // const conversion
-    }
+//    template <int D>
+//    template <int D_, typename std::enable_if<D_ == 3, int>::type>
+//    CompFunction<D>::operator ComplexFunction() const {
+ //       return ComplexFunction(*this); // const conversion
+ //   }
     //    template <int D, typename T>
    //    template <int D_, typename std::enable_if<D_ == 3, int>::type>
    //    CompFunction<D, T>::operator ComplexFunction() {
@@ -127,74 +117,156 @@ namespace mrcpp {
    //    }
   //
 
-    template CompFunction<3,double>::operator ComplexFunction() const;
-//template CompFunction<3,double>::operator ComplexFunction();
+ //   template CompFunction<3>::operator ComplexFunction() const;
 
-    template <int D, typename T>
-    void CompFunction<D, T>::flushFuncData() {
+    template <int D>
+    void CompFunction<D>::flushFuncData() {
       for (int i = 0; i < Ncomp; i++) {
-          Nchunks[i] = Comp[i]->getNChunksUsed();
+          if (isreal) {
+              Nchunks[i] = CompD[i]->getNChunksUsed();
+          } else {
+              Nchunks[i] = CompC[i]->getNChunksUsed();
+          }
       }
       for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
     }
-  template <int D, typename T>
-  double CompFunction<D, T>::norm() const {
+  template <int D>
+  double CompFunction<D>::norm() const {
      double norm = squaredNorm();
      for (int i = 0; i < Ncomp; i++) {
-          norm += Comp[i]->getSquareNorm();
+          if (isreal) {
+              norm += CompD[i]->getSquareNorm();
+          } else {
+              norm += CompC[i]->getSquareNorm();
+          }
      }
      if (norm > 0.0) norm = std::sqrt(norm);
      return norm;
   }
-  template <int D, typename T>
-  double CompFunction<D, T>::squaredNorm() const {
+  template <int D>
+  double CompFunction<D>::squaredNorm() const {
      double norm = squaredNorm();
      for (int i = 0; i < Ncomp; i++) {
-          norm += Comp[i]->getSquareNorm();
+          if (isreal) {
+              norm += CompD[i]->getSquareNorm();
+          } else {
+              norm += CompC[i]->getSquareNorm();
+          }
      }
      return norm;
   }
-  template <int D, typename T>
-  void CompFunction<D, T>::alloc(int i) {
+  template <int D>
+  void CompFunction<D>::alloc(int i) {
       if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      Comp[i] = new FunctionTree<D, T> (*defaultCompMRA<D>);
-  }
+      if (CompD[i] != nullptr) delete CompD[i];
+      if (CompC[i] != nullptr) delete CompC[i];
+      if (isreal) {
+          CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
+      } else {
+          CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>);
+      }
+      Ncomp = std::max(Ncomp, i + 1);
+ }
+
+template <int D>
+void CompFunction<D>::free() {
+    //TODO: shared memory handling
+    for (int i = 0; i < Ncomp; i++) {
+        if (CompD[i]!= nullptr) {
+            delete CompD[i];
+        }
+        if (CompC[i]!= nullptr) {
+            delete CompC[i];
+        }
+    }
+}
+
+template <int D>
+int CompFunction<D>::getSizeNodes() const {
+    int size_mb = 0; // Memory size in kB
+    for (int i = 0; i < Ncomp; i++) {
+        if (CompD[i]!= nullptr) size_mb +=CompD[i]->getSizeNodes();
+        if (CompC[i]!= nullptr) size_mb +=CompC[i]->getSizeNodes();
+    }
+    return size_mb;
+}
+
+template <int D>
+int CompFunction<D>::getNNodes() const {
+    int nNodes = 0;
+     for (int i = 0; i < Ncomp; i++) {
+        if (CompD[i]!= nullptr) nNodes +=CompD[i]->getSizeNodes();
+        if (CompC[i]!= nullptr) nNodes +=CompC[i]->getSizeNodes();
+    }
+    return nNodes;
+}
+
+ /* for backwards compatibility */
+template <int D>
+void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
+      if (CompD[i] != nullptr) delete CompD[i];
+      if (iscomplex) MSG_ERROR("cannot write real tree into complex function");
+      CompD[i] = tree;
+      if (tree != nullptr) {
+          isreal = 1;
+          Ncomp = std::max(Ncomp, i + 1);
+      } else {Ncomp = std::min(Ncomp, i);}
+}
+    /*
+template <int D>
+void CompFunction<D>::set(FunctionTree<D, ComplexDouble> *tree, int i) {
+      if (CompC[i] != nullptr) delete CompD[i];
+      if (isreal) MSG_ERROR("cannot write comlex tree into complex function");
+      CompC[i] = tree;
+      if (tree != nullptr) {
+          iscomplex = 1;
+          Ncomp = std::max(Ncomp, i + 1);
+      } else {Ncomp = std::min(Ncomp, i);}
+      } */
 
 /** @brief In place addition.
  *
  * Output is extended to union grid.
  *
  */
-template <int D, typename T>
-void CompFunction<D, T>::add(T c, CompFunction<D, T> inp) {
-    for (int i = 0; i < Ncomp; i++) {
-        if (i >= inp.Ncomp) break;
-        Comp[i]->add_inplace(c,*inp.Comp[i]);
-    }
-    for (int i = Ncomp; i < inp.Ncomp; i++) {
-        alloc(i);
-        Comp[i]->add_inplace(c,*inp.Comp[i]);
+template <int D>
+void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
+    for (int i = 0; i < inp.Ncomp; i++) {
+        if (i >= Ncomp) alloc(i);
+        if (isreal) {
+            CompD[i]->add_inplace(c.real(),*inp.CompD[i]);
+        } else {
+            CompC[i]->add_inplace(c,*inp.CompC[i]);
+        }
     }
 }
 
 
-template <int D, typename T>
-int CompFunction<D, T>::crop(double prec) {
+template <int D>
+int CompFunction<D>::crop(double prec) {
     if (prec < 0.0) return 0;
     int nChunksremoved = 0;
     for (int i = 0; i < Ncomp; i++) {
-        nChunksremoved += Comp[i]->crop(prec, 1.0, false);
+        if (isreal) {
+            nChunksremoved += CompD[i]->crop(prec, 1.0, false);
+        } else {
+            nChunksremoved += CompC[i]->crop(prec, 1.0, false);
+        }
     }
     return nChunksremoved;
 }
 
 /** @brief In place multiply with scalar. Fully in-place.*/
-template <int D, typename T>
-void CompFunction<D, T>::rescale(T c) {
+template <int D>
+void CompFunction<D>::rescale(ComplexDouble c) {
     bool need_to_rescale = not(isShared()) or mpi::share_master();
     if (need_to_rescale) {
         for (int i = 0; i < Ncomp; i++) {
-            Comp[i]->rescale(c);
+            if (isreal) {
+                CompD[i]->rescale(c.real());
+            } else {
+                CompC[i]->rescale(c);
+            }
         }
     } else MSG_ERROR("Not implemented");
 }
@@ -203,44 +275,59 @@ void CompFunction<D, T>::rescale(T c) {
 template class  MultiResolutionAnalysis<1>;
 template class  MultiResolutionAnalysis<2>;
 template class  MultiResolutionAnalysis<3>;
-template class CompFunction<1, double>;
-template class CompFunction<2, double>;
-template class CompFunction<3, double>;
-
-template class CompFunction<1, ComplexDouble>;
-template class CompFunction<2, ComplexDouble>;
-template class CompFunction<3, ComplexDouble>;
-
-
-namespace compfunc {
-
+template class CompFunction<1>;
+template class CompFunction<2>;
+template class CompFunction<3>;
 
 /** @brief Deep copy
  *
  * Deep copy: meta data is copied along with the content of each component.
  */
-  template <int D, typename T>
-  void deep_copy(CompFunction<D, T> *out, const CompFunction<D, T> &inp) {
+  template <int D>
+  void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp) {
       out->data = inp.data;
       for (int i = 0; i < inp.Ncomp; i++) {
-          delete out->Comp[i];
-          inp.Comp[i]->deep_copy(out->Comp[i]);
+          if (inp.isreal) {
+              delete out->CompD[i];
+              inp.CompD[i]->deep_copy(out->CompD[i]);
+          } else {
+              delete out->CompC[i];
+              inp.CompC[i]->deep_copy(out->CompC[i]);
+          }
       }
   }
 
 
+/** @brief Deep copy
+ *
+ * Deep copy: meta data is copied along with the content of each component.
+ */
+  template <int D>
+  void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp) {
+      out.data = inp.data;
+      for (int i = 0; i < inp.Ncomp; i++) {
+          if (inp.isreal) {
+              delete out.CompD[i];
+              inp.CompD[i]->deep_copy(out.CompD[i]);
+          } else {
+              delete out.CompC[i];
+              inp.CompC[i]->deep_copy(out.CompC[i]);
+          }
+      }
+  }
+
 /** @brief out = a*inp_a + b*inp_b
  *
  * Recast into linear_combination.
  *
  */
-template <int D, typename T>
-void add(CompFunction<D, T> &out, T a, CompFunction<D, T> inp_a, T b, CompFunction<D, T> inp_b, double prec) {
-    std::vector<T> coefs(2);
+template <int D>
+void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec) {
+    std::vector<ComplexDouble> coefs(2);
     coefs[0] = a;
     coefs[1] = b;
 
-    std::vector<CompFunction<D, T>> funcs; // NB: not a CompFunctionVector, because not run in parallel!
+    std::vector<CompFunction<D>> funcs; // NB: not a CompFunctionVector, because not run in parallel!
     funcs.push_back(inp_a);
     funcs.push_back(inp_b);
 
@@ -251,27 +338,47 @@ void add(CompFunction<D, T> &out, T a, CompFunction<D, T> inp_a, T b, CompFuncti
  *
  * OMP parallel, but not MPI parallel
  */
-template <int D, typename T>
-    void linear_combination(CompFunction<D, T> &out, const std::vector<T> &c, std::vector<CompFunction<D, T>> &inp, double prec) {
+template <int D>
+    void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec) {
     double thrs = MachineZero;
     bool need_to_add = not(out.isShared()) or mpi::share_master();
     for (int comp = 0; comp < inp[0].Ncomp; comp++) {
-        FunctionTreeVector<D, T> fvec; // one component vector
-        for (int i = 0; i < inp.size(); i++) {
-            if (std::norm(c[i]) < thrs) continue;
-            if (out.iscomplex and inp[i].data.conj) MSG_ERROR("conjugaison not implemented");
-            fvec.push_back(std::make_tuple(c[i], inp[i].Comp[comp]));
-        }
-        if (need_to_add) {
-            if (fvec.size() > 0) {
-                if (prec < 0.0) {
-                    build_grid(out.real(), fvec);
-                    mrcpp::add(prec, *out.Comp[comp], fvec, 0);
-                } else {
-                    mrcpp::add(prec, *out.Comp[comp], fvec);
+        if (inp[0].isreal) {
+            FunctionTreeVector<D, double> fvec; // one component vector
+            for (int i = 0; i < inp.size(); i++) {
+                if (std::norm(c[i]) < thrs) continue;
+                fvec.push_back(std::make_tuple(c[i].real(), inp[i].CompD[comp]));
+            }
+            if (need_to_add) {
+                if (fvec.size() > 0) {
+                    if (prec < 0.0) {
+                        build_grid(out.CompD[comp], fvec);
+                        mrcpp::add(prec, *out.CompD[comp], fvec, 0);
+                    } else {
+                        mrcpp::add(prec, *out.CompD[comp], fvec);
+                    }
                 }
             } else if (out.hasReal()) {
-                out.Comp[comp]->setZero();
+                out.CompD[comp]->setZero();
+            }
+        } else {
+            FunctionTreeVector<D, ComplexDouble> fvec; // one component vector
+            for (int i = 0; i < inp.size(); i++) {
+                if (std::norm(c[i]) < thrs) continue;
+                if (inp[i].data.conj) MSG_ERROR("conjugaison not implemented");
+                fvec.push_back(std::make_tuple(c[i], inp[i].CompD[comp]));
+            }
+            if (need_to_add) {
+                if (fvec.size() > 0) {
+                    if (prec < 0.0) {
+                        build_grid(out.CompC[comp], fvec);
+                        mrcpp::add(prec, *out.CompC[comp], fvec, 0);
+                    } else {
+                        mrcpp::add(prec, *out.CompC[comp], fvec);
+                    }
+                }
+            } else if (out.hasReal()) {
+                out.CompC[comp]->setZero();
             }
         }
         mpi::share_function(out, 0, 9911, mpi::comm_share);
@@ -281,27 +388,47 @@ template <int D, typename T>
 /** @brief out = inp_a * inp_b
  *
  */
-template <int D, typename T>
-void multiply(CompFunction<D, T> &out, CompFunction<D, T> inp_a, CompFunction<D, T> inp_b, double prec, bool absPrec, bool useMaxNorms) {
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms) {
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
     for (int comp = 0; comp < inp_a[0].Ncomp; comp++) {
-        delete out.Comp[comp];
-        FunctionTree<3, T> *tree = new FunctionTree<3, T>(inp_a.Comp[0].getMRA());
-        T coef = 1.0;
-         if (need_to_multiply) {
-             if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
-             if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
-             if (prec < 0.0) {
-                 // Union grid
-                 build_grid(*tree, inp_a.Comp[comp]);
-                 build_grid(*tree, inp_b.Comp[comp]);
-                 mrcpp::multiply(prec, *tree, coef, *inp_a.Comp[comp], *inp_b.Comp[comp], 0);
-             } else {
-                // Adaptive grid
-                 mrcpp::multiply(prec, *tree, coef, *inp_a.Comp[comp], *inp_b.Comp[comp], -1, absPrec, useMaxNorms);
-             }
-         }
-         out.Comp[comp] = tree;
+        if (inp_a.isreal and inp_b.isreal) {
+            delete out.CompD[comp];
+            FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0].getMRA());
+            double coef = 1.0;
+            if (need_to_multiply) {
+                if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
+                if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
+                if (prec < 0.0) {
+                    // Union grid
+                    build_grid(*tree, inp_a.CompD[comp]);
+                    build_grid(*tree, inp_b.CompD[comp]);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0);
+                } else {
+                    // Adaptive grid
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], -1, absPrec, useMaxNorms);
+                }
+            }
+            out.CompD[comp] = tree;
+        } else {
+            delete out.CompC[comp];
+            FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0].getMRA());
+            ComplexDouble coef = 1.0;
+            if (need_to_multiply) {
+                if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
+                if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
+                if (prec < 0.0) {
+                    // Union grid
+                    build_grid(*tree, inp_a.CompC[comp]);
+                    build_grid(*tree, inp_b.CompC[comp]);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0);
+                } else {
+                    // Adaptive grid
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], -1, absPrec, useMaxNorms);
+                }
+            }
+            out.CompC[comp] = tree;
+        }
     }
     mpi::share_function(out, 0, 9911, mpi::comm_share);
 
@@ -312,7 +439,7 @@ void multiply(CompFunction<D, T> &out, CompFunction<D, T> inp_a, CompFunction<D,
  *  each component is multiplied
  */
 template <int D, typename T>
-void multiply(CompFunction<D, T> &out, CompFunction<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
     MSG_ERROR("Not implemented");
 }
 
@@ -320,7 +447,7 @@ void multiply(CompFunction<D, T> &out, CompFunction<D, T> &inp_a, RepresentableF
  *
  */
 template <int D, typename T>
-void multiply(CompFunction<D, T>, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
+void multiply(CompFunction<D>, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
     MSG_ERROR("Not implemented");
 }
 
@@ -331,17 +458,1441 @@ void multiply(CompFunction<D, T>, FunctionTree<D, T> &inp_a, RepresentableFuncti
  *  Notice that the <bra| position is already complex conjugated.
  *
  */
-template <int D, typename T>
-T compfunc::dot(CompFunction<D, T> bra, CompFunction<D, T> ket) {
-    T dotprod = 0.0;
+template <int D>
+ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
+    ComplexDouble dotprod = 0.0;
     if (bra.data.conj or ket.data.conj) MSG_ERROR("dot with conjugaison not implemented");
     for (int comp = 0; comp < bra.Ncomp; comp++) {
-        dotprod += mrcpp::dot(bra.Comp[comp], ket.Comp[comp]);
+          if (bra.isreal and ket.isreal) {
+              dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
+          } else {
+              dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
+          }
+    }
+    if (bra.isreal and ket.isreal) {
+        return dotprod.real();
+    } else {
+        return dotprod;
     }
-    return dotprod;
 }
 
 
-} // namespace compfunc
+template <int D, typename T>
+void project(CompFunction<D> &out, std::function<double(const Coord<D> &r)> f, double prec) {
+if (std::is_same<T, double>::value) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    out.isreal = 1;
+    out.iscomplex = 0;
+    if(out.Ncomp < 1) out.alloc(0);
+    if (need_to_project) mrcpp::project<D, double>(prec, out.CompD[0], f);
+    mpi::share_function(out, 0, 123123, mpi::comm_share);
+}
+}
+
+template <int D, typename T>
+void project(CompFunction<D> &out, std::function<ComplexDouble(const Coord<D> &r)> f, double prec) {
+if (std::is_same<T, ComplexDouble>::value) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    out.isreal = 0;
+    out.iscomplex = 1;
+    if(out.Ncomp < 1) out.alloc(0);
+    if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, out.CompC[0], f);
+    mpi::share_function(out, 0, 123123, mpi::comm_share);
+}
+ }
+template <int D>
+void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    out.isreal = 1;
+    out.iscomplex = 0;
+    if(out.Ncomp < 1) out.alloc(0);
+    if (need_to_project) mrcpp::project<D, double>(prec, out.CompD[0], f);
+    mpi::share_function(out, 0, 132231, mpi::comm_share);
+}
+template <int D>
+void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    out.isreal = 0;
+    out.iscomplex = 1;
+    if(out.Ncomp < 1) out.alloc(0);
+    if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, out.CompC[0], f);
+    mpi::share_function(out, 0, 132231, mpi::comm_share);
+ }
+
+// MPI_CompFuncVector
+
+
+MPI_CompFuncVector::MPI_CompFuncVector(int N)
+    : std::vector<CompFunction<3>>(N) {
+    for (int i = 0; i < N; i++) (*this)[i].rank = i;
+    vecMRA = defaultCompMRA<3>;
+}
+void MPI_CompFuncVector::distribute() {
+    for (int i = 0; i < this->size(); i++) (*this)[i].rank = i;
+}
+
+
+/** @brief Make a linear combination of functions
+ *
+ * Uses "local" representation: treats one node at a time.
+ * For each node, all functions are transformed simultaneously
+ * by a dense matrix multiplication.
+ * Phi input functions, Psi output functions
+ *
+ */
+void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector &Psi, double prec) {
+
+    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
+    // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
+    // MPI version does not use OMP yet, Serial version uses OMP
+    // size of input is N, size of output is M
+    int N = Phi.size();
+    int M = Psi.size();
+    if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
+    if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
+
+    // 1) make union tree without coefficients
+    FunctionTree<3> refTree(*Phi.vecMRA);
+    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+    std::vector<double> scalefac_ref;
+    std::vector<double *> coeffVec_ref; // not used!
+    std::vector<int> indexVec_ref;      // serialIx of the nodes
+    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
+    int max_ix;
+    // get a list of all nodes in union tree, identified by their serialIx indices
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+   // 2) We work with real numbers only. Make real blocks for U matrix
+    bool UhasReal = false;
+    bool UhasImag = false;
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            if (std::abs(U(i, j).real()) > 10*MachineZero) UhasReal = true;
+            if (std::abs(U(i, j).imag()) > 10*MachineZero) UhasImag = true;
+        }
+    }
+
+    IntVector PsihasReIm = IntVector::Zero(2);
+    for (int j = 0; j < N; j++) {
+        if (!mpi::my_func(j)) continue;
+        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
+    }
+    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
+    if (not PsihasReIm[0] and not PsihasReIm[1]) {
+        return; // do nothing
+    }
+
+    bool makeReal = (UhasReal and PsihasReIm[0]) or (UhasImag and PsihasReIm[1]);
+    bool makeImag = (UhasReal and PsihasReIm[1]) or (UhasImag and PsihasReIm[0]);
+
+    for (int j = 0; j < M; j++) {
+        if (!mpi::my_func(j)) continue;
+        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
+        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
+    }
+
+    if (not makeReal and not makeImag) { return; }
+
+    int Neff = N;               // effective number of input orbitals
+    int Meff = M;               // effective number of output orbitals
+    if (makeImag) Neff = 2 * N; // Imag and Real treated independently. We always use real part of U
+    if (makeImag) Meff = 2 * M; // Imag and Real treated independently. We always use real part of U
+
+    IntVector conjMat = IntVector::Zero(Neff);
+    for (int j = 0; j < Neff; j++) {
+        if (!mpi::my_func(j % N)) continue;
+        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
+    }
+    mpi::allreduce_vector(conjMat, mpi::comm_wrk);
+
+    // we make a real matrix = U,  but organized as one or four real blocks
+    // out_r = U_rr*in_r - U_ir*in_i*conjMat
+    // out_i = U_ri*in_r - U_ii*in_i*conjMat
+    // the first index of U is the one used on input Phi
+    DoubleMatrix Ureal(Neff, Meff); // four blocks, for rr ri ir ii
+    for (int j = 0; j < Neff; j++) {
+        for (int i = 0; i < Meff; i++) {
+            double sign = 1.0;
+            if (j < N and i < M) {
+                // real U applied on real Phi
+                Ureal(j, i) = U.real()(j % N, i % M);
+            } else if (j >= N and i >= M) {
+                // real U applied on imag Phi
+                Ureal(j, i) = conjMat[j] * U.real()(j % N, i % M);
+            } else if (j < N and i >= M) {
+                // imag U applied on real Phi
+                Ureal(j, i) = U.imag()(j % N, i % M);
+            } else {
+                // imag U applied on imag Phi
+                Ureal(j, i) = -1.0 * conjMat[j] * U.imag()(j % N, i % M);
+            }
+        }
+    }
+
+    // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
+    BankAccount nodesPhi;             // to put the original nodes
+    BankAccount nodesRotated;         // to put the rotated nodes
+
+    // used for serial only:
+    std::vector<std::vector<double *>> coeffVec(Neff);
+    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+                                                    // orbital given the node index in the reference tree
+    if (serial) {
+
+        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<double> scalefac;
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j]) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j + N]) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+
+        // send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(Phi, refTree, nodesPhi);
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+    }
+
+    // 4) rotate all the nodes
+    IntMatrix split_serial;                             // in the serial case all split are stored in one array
+    std::vector<std::vector<double *>> coeffpVec(Meff); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::map<int, int>> ix2coef(Meff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    int csize;                                          // size of the current coefficients (different for roots and branches)
+    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+    // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    if (serial) {
+        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
+        split_serial.resize(Meff, max_n); // not use in the MPI case
+        for (int n = 0; n < max_n; n++) {
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            ix2coef_ref[node_ix] = n;
+            for (int i = 0; i < Meff; i++) split_serial(i, n) = 1;
+        }
+
+        std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
+
+        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
+        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
+        // n is finished.
+#pragma omp parallel for schedule(dynamic)
+        for (int n = 0; n < max_n; n++) {
+            int csize;
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n
+            std::vector<int> orbjVec; // to remember which orbital correspond to each orbVec.size();
+            if (node2orbVec[node_ix].size() <= 0) continue;
+            csize = sizecoeffW;
+            if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff
+
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbjVec.push_back(j);
+            }
+
+            // 4b) make a list of rotated orbitals needed for this node
+            // OMP must wait until parent is ready
+            while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) {
+#pragma omp flush
+            };
+
+            std::vector<int> orbiVec;
+            for (int i = 0; i < Meff; i++) { // loop over all rotated orbitals
+                if (not makeReal and i < M) continue;
+                if (not makeImag and i >= M) continue;
+                if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4c) rotate this node
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
+            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = Ureal(orbjVec[j], orbiVec[i]); }
+            }
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+            // HERE IT HAPPENS!
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 4d) store and make rotated node pointers
+            // for now we allocate in buffer, in future could be directly allocated in the final trees
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // make all norms:
+            for (int i = 0; i < orbiVec.size(); i++) {
+                // check if parent must be split
+                if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) {
+                    // mark this node for this orbital for later split
+#pragma omp critical
+                    {
+                        ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size();
+                        coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers
+                    }
+                    // check norms for split
+                    double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
+                    int kstart = 0;
+                    if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
+                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
+                    if (thres < wnorm or prec < 0)
+                        split_serial(orbiVec[i], n) = 1;
+                    else
+                        split_serial(orbiVec[i], n) = 0;
+                } else {
+                    ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used
+                    split_serial(orbiVec[i], n) = 0;          // do not split if parent does not need to be split
+                }
+            }
+            nodeReady[n] = 1;
+#pragma omp critical
+            {
+                // this ensures that rotatedCoeff is not deleted, when getting out of scope
+                rotatedCoeffVec.push_back(std::move(rotatedCoeff));
+            }
+        }
+    } else { // MPI case
+
+        // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
+        std::vector<double> split(Meff, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
+        std::vector<double> needsplit(Meff, 1.0); // which orbitals need splitting
+        BankAccount nodeSplits;
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+
+        DoubleMatrix coeffBlock(sizecoeff, Neff);
+        max_ix++; // largest node index + 1. to store rotated orbitals with different id
+        TaskManager tasks(max_n);
+        for (int nn = 0; nn < max_n; nn++) {
+            int n = tasks.next_task();
+            if (n < 0) break;
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // 4a) make list of orbitals that should split the parent node, i.e. include this node
+            int parentid = parindexVec_ref[n];
+            if (parentid == -1) {
+                // root node, split if output needed
+                for (int i = 0; i < M; i++) {
+                    if (makeReal)
+                        split[i] = 1.0;
+                    else
+                        split[i] = -1.0;
+                }
+                for (int i = N; i < Meff; i++) {
+                    if (makeImag)
+                        split[i] = 1.0;
+                    else
+                        split[i] = -1.0;
+                }
+                csize = sizecoeff;
+            } else {
+                // note that it will wait until data is available
+                nodeSplits.get_data(parentid, Meff, split.data());
+                csize = sizecoeffW;
+            }
+            std::vector<int> orbiVec;
+            std::vector<int> orbjVec;
+            for (int i = 0; i < Meff; i++) {  // loop over rotated orbitals
+                if (split[i] < 0.0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4b) rotate this node
+            DoubleMatrix coeffBlock(csize, Neff); // largest possible used size
+            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
+            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
+
+            // chunk of U, with reorganized indices and separate blocks for real and imag:
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+
+            for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
+                    Un(j, i) = Ureal(orbjVec[j], orbiVec[i]);
+                }
+            }
+
+            // HERE IT HAPPENS
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
+            // in cache).
+            for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals
+                needsplit[orbiVec[i]] = -1.0;          // default, do not split
+                // check if this node/orbital needs further refinement
+                double wnorm = 0.0;
+                int kwstart = csize - sizecoeffW; // do not include scaling
+                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
+                if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
+                nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
+            }
+            nodeSplits.put_data(indexVec_ref[n], Meff, needsplit.data());
+        }
+        mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
+    }
+
+    // 5) reconstruct trees using rotated nodes.
+
+    // only serial case can use OMP, because MPI cannot be used by threads
+    if (serial) {
+        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
+        // operation is writing the coefficient into the tree)
+
+#pragma omp parallel for schedule(static)
+        for (int j = 0; j < Meff; j++) {
+            if (coeffpVec[j].size()==0) continue;
+            if (j < M) {
+                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+            } else {
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+            }
+        }
+
+    } else { // MPI case
+
+        for (int j = 0; j < Meff; j++) {
+            if (not mpi::my_func(j % M)) continue;
+            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
+            std::vector<double *> coeffpVec; //
+            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
+            int ix = 0;
+            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
+                std::vector<int> nodeidVec;
+                double *dataVec; // will be allocated by bank
+                nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
+                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
+                int shift = 0;
+                for (int n = 0; n < nodeidVec.size(); n++) {
+                    assert(nodeidVec[n] - max_ix >= 0);                // unrotated nodes have been deleted
+                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
+                    ix2coef[nodeidVec[n] - max_ix] = ix++;
+                    csize = sizecoeffW;
+                    if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff;
+                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
+                    shift += csize;
+                }
+            }
+            if (j < M) {
+                // Real part
+                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+            } else {
+                // Imag part
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+            }
+            for (double *p : pointerstodelete) delete[] p;
+            pointerstodelete.clear();
+        }
+    }
+}
+
+
+void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, double prec) {
+    rotate(Phi, U, Phi, prec);
+    return;
+}
+
+/** @brief Save all nodes in bank; identify them using serialIx from refTree
+ * shift is a shift applied in the id
+ */
+void save_nodes(MPI_CompFuncVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+    int max_nNodes = refTree.getNNodes();
+    std::vector<double *> coeffVec;
+    std::vector<double> scalefac;
+    std::vector<int> indexVec;    // SerialIx of the node in refOrb
+    std::vector<int> parindexVec; // SerialIx of the parent node
+    int N = Phi.size();
+    int max_ix;
+    for (int j = 0; j < N; j++) {
+        if (not mpi::my_func(j)) continue;
+        // make vector with all coef address and their index in the union grid
+        if (Phi[j].hasReal()) {
+            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+            int max_n = indexVec.size();
+            // send node coefs from Phi[j] to bank
+            // except for the root nodes, only wavelets are sent
+            for (int i = 0; i < max_n; i++) {
+                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
+                int csize = sizecoeffW;
+                if (parindexVec[i] < 0) csize = sizecoeff;
+                if (sizes > 0) { // fixed size
+                    account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]);
+                } else {
+                    account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
+                }
+            }
+        }
+        // Imaginary parts are considered as orbitals with an orbid shifted by N
+        if (Phi[j].hasImag()) {
+            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+            int max_n = indexVec.size();
+            // send node coefs from Phi[j] to bank
+            for (int i = 0; i < max_n; i++) {
+                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
+                // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part
+                int csize = sizecoeffW;
+                if (parindexVec[i] < 0) csize = sizecoeff;
+                if (sizes > 0) { // fixed size
+                    account.put_nodedata(j + N, indexVec[i], sizes, coeffVec[i]);
+                } else {
+                    account.put_nodedata(j + N, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
+                }
+            }
+        }
+    }
+}
+
+/** @brief Multiply all orbitals with a function
+ *
+ * @param Phi: orbitals to multiply
+ * @param f  : function to multiply
+ *
+ * Computes the product of each orbital with a function
+ * in parallel using a local representation.
+ * Input trees are extended by one scale at most.
+ */
+MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) {
+
+    int N = Phi.size();
+    const int D = 3;
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
+
+    // 1a) extend grid where f is large (around nuclei)
+    // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree?
+
+    for (int i = 0; i < N; i++) {
+        if (!mpi::my_func(i)) continue;
+        int irefine = 0;
+        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
+        irefine = 0;
+        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
+    }
+
+    // 1b) make union tree without coefficients
+    FunctionTree<D> refTree(*Phi.vecMRA);
+    // refine_grid(refTree, f); //to test
+    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
+
+    int kp1 = refTree.getKp1();
+    int kp1_d = refTree.getKp1_d();
+    int nCoefs = refTree.getTDim() * kp1_d;
+
+    IntVector PsihasReIm = IntVector::Zero(2);
+    for (int i = 0; i < N; i++) {
+        if (!mpi::my_func(i)) continue;
+        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
+    }
+    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
+    MPI_CompFuncVector out(N);
+    MPI_CompFuncVector outtest(N);
+    if (not PsihasReIm[0] and not PsihasReIm[1]) {
+        return out; // do nothing
+    }
+
+    int Neff = N;
+    if (PsihasReIm[1]) Neff = 2 * N; // Imag and Real treated independently. We always treat real part of Psi
+
+    std::vector<double> scalefac_ref;
+    std::vector<double *> coeffVec_ref; // not used!
+    std::vector<int> indexVec_ref;      // serialIx of the nodes
+    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
+    std::vector<MWNode<D> *> refNodes;  // pointers to nodes
+    int max_ix;
+    // get a list of all nodes in union tree, identified by their serialIx indices
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes);
+    int max_n = indexVec_ref.size();
+    std::map<int, int> ix2n; // for a given serialIx, give index in vectors
+    for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn;
+
+    // 2a) send own nodes to bank, identifying them through the serialIx of refTree
+    BankAccount nodesPhi;        // to put the original nodes
+    BankAccount nodesMultiplied; // to put the multiplied nodes
+
+    // used for serial only:
+    std::vector<std::vector<double *>> coeffVec(Neff);
+    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+                                                    // orbital given the node index in the reference tree
+    if (serial) {
+        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<double> scalefac;
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j]) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j + N]) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else {
+        save_nodes(Phi, refTree, nodesPhi, nCoefs);
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+    }
+
+    // 2b) save Func in bank and remove its coefficients
+    if (Func != nullptr and !serial) {
+        // put Func in local representation if not already done
+        if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); }
+    }
+
+    // 3) mutiply for each node
+    std::vector<std::vector<double *>> coeffpVec(Neff); // to put pointers to the multiplied coefficient for each orbital in serial case
+    std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
+    std::vector<std::map<int, int>> ix2coef(Neff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    DoubleVector NODEP = DoubleVector::Zero(nCoefs);
+    DoubleVector NODEF = DoubleVector::Zero(nCoefs);
+
+    if (serial) {
+#pragma omp parallel for schedule(dynamic)
+        for (int n = 0; n < max_n; n++) {
+            MWNode<D> node(*(refNodes[n]), false);
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+
+            // 3a) make values for f at this node
+            // 3a1) get coordinates of quadrature points for this node
+            Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs);
+            double fval[nCoefs];
+            Coord<D> r;
+            double *originalCoef = nullptr;
+            MWNode<3> *Fnode = nullptr;
+            if (Func == nullptr) {
+                node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+                for (int j = 0; j < nCoefs; j++) {
+                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                    fval[j] = f.evalf(r);
+                }
+            } else {
+                Fnode = Func->real().findNode(node.getNodeIndex());
+                if (Fnode == nullptr) {
+                    node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+                    for (int j = 0; j < nCoefs; j++) {
+                        for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                        fval[j] = f.evalf(r);
+                    }
+                } else {
+                    originalCoef = Fnode->getCoefs();
+                    for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j];
+                    Fnode->attachCoefs(fval); // note that each thread has its own copy
+                    Fnode->mwTransform(Reconstruction);
+                    Fnode->cvTransform(Forward);
+                }
+            }
+            DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size());
+            int i = 0;
+            // 3b) fetch all orbitals at this node
+            std::vector<int> orbjVec;            // to remember which orbital correspond to each orbVec.size();
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                orbjVec.push_back(j);
+                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k];
+                // 3c) transform to grid
+                node.attachCoefs(&(multipliedCoeff(0, i)));
+                node.mwTransform(Reconstruction);
+                node.cvTransform(Forward);
+                // 3d) multiply
+                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication?
+                // 3e) transform back to mw
+                node.cvTransform(Backward);
+                node.mwTransform(Compression);
+                i++;
+            }
+            if (Func != nullptr and originalCoef != nullptr) {
+                // restablish original values
+                Fnode->attachCoefs(originalCoef);
+            }
+
+            // 3f) save multiplied nodes
+            for (int i = 0; i < orbjVec.size(); i++) {
+#pragma omp critical
+                {
+                    ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size();
+                    coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers
+                }
+            }
+#pragma omp critical
+            {
+                // this ensures that multipliedCoeff is not deleted, when getting out of scope
+                multipliedCoeffVec.push_back(std::move(multipliedCoeff));
+            }
+            node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
+        }
+    } else {
+        // MPI
+        int count1 = 0;
+        int count2 = 0;
+        TaskManager tasks(max_n);
+        for (int nn = 0; nn < max_n; nn++) {
+            int n = tasks.next_task();
+            if (n < 0) break;
+            MWNode<D> node(*(refNodes[n]), false);
+            // 3a) make values for f
+            // 3a1) get coordinates of quadrature points for this node
+            Eigen::MatrixXd pts;           // Eigen::Zero(D, nCoefs);
+            node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+            double fval[nCoefs];
+            Coord<D> r;
+            MWNode<D> Fnode(*(refNodes[n]), false);
+            if (Func == nullptr) {
+                for (int j = 0; j < nCoefs; j++) {
+                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                    fval[j] = f.evalf(r);
+                }
+            } else {
+                int nIdx = Func->real().getIx(node.getNodeIndex());
+                count1++;
+                if (nIdx < 0) {
+                    // use the function f instead of Func
+                    count2++;
+                    for (int j = 0; j < nCoefs; j++) {
+                        for (int d = 0; d < D; d++) r[d] = pts(d, j);
+                        fval[j] = f.evalf(r);
+                    }
+                } else {
+                    Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank
+                    Fnode.attachCoefs(fval);
+                    Fnode.mwTransform(Reconstruction);
+                    Fnode.cvTransform(Forward);
+                }
+            }
+
+            // 3b) fetch all orbitals at this node
+            DoubleMatrix coeffBlock(nCoefs, Neff); // largest possible used size
+            std::vector<int> orbjVec;
+            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
+            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
+            DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size());
+            // 3c) transform to grid
+            for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ?
+                // TODO: select only nodes that are end nodes?
+                node.attachCoefs(coeffBlock.col(j).data());
+                node.mwTransform(Reconstruction);
+                node.cvTransform(Forward);
+                // 3d) multiply
+                double *coefs = node.getCoefs();
+                for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i];
+                // 3e) transform back to mw
+                node.cvTransform(Backward);
+                node.mwTransform(Compression);
+                // 3f) save multiplied nodes
+                nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs);
+            }
+            node.attachCoefs(nullptr);  // to avoid deletion of valid multipliedCoeff by destructor
+            Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
+        }
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 5) reconstruct trees using multiplied nodes.
+
+    // only serial case can use OMP, because MPI cannot be used by threads
+    if (serial) {
+        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
+        // operation is writing the coefficient into the tree)
+
+#pragma omp parallel for schedule(static)
+        for (int j = 0; j < Neff; j++) {
+            if (j < N) {
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(NUMBER::Real);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    // 6) reconstruct trees from end nodes
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
+                }
+            } else {
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(NUMBER::Imag);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    out[j].imag().mwTransform(BottomUp);
+                    out[j].imag().calcSquareNorm();
+                }
+            }
+        }
+    } else {
+        for (int j = 0; j < Neff; j++) {
+            if (not mpi::my_func(j % N) and not all) continue;
+            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
+            std::vector<double *> coeffpVec; //
+            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx in refTree
+            int ix = 0;
+            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+
+            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
+                std::vector<int> nodeidVec;
+                double *dataVec; // will be allocated by bank
+                nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank);
+                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
+                int shift = 0;
+                for (int n = 0; n < nodeidVec.size(); n++) {
+                    assert(nodeidVec[n] - max_ix >= 0);                // unmultiplied nodes have been deleted
+                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
+                    ix2coef[nodeidVec[n] - max_ix] = ix++;
+                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
+                    shift += nCoefs;
+                }
+            }
+            if (j < N) {
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(NUMBER::Real);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    // 6) reconstruct trees from end nodes
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
+                    out[j].real().resetEndNodeTable();
+                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
+                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
+                }
+            } else {
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(NUMBER::Imag);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    out[j % N].imag().mwTransform(BottomUp);
+                    out[j % N].imag().calcSquareNorm();
+                    // out[j % N].imag().crop(prec, 1.0, false);
+                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
+                }
+            }
+
+            for (double *p : pointerstodelete) delete[] p;
+            pointerstodelete.clear();
+        }
+    }
+    return out;
+}
+
+void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA) {
+    defaultCompMRA<3> = MRA;
+}
+
+ComplexVector dot(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+    int N = Bra.size();
+    ComplexVector result = ComplexVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        // The bra is sent to the owner of the ket
+        if (my_func(Bra[i]) != my_func(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
+        result[i] = dot(Bra[i], Ket[i]);
+        if (not mrcpp::mpi::my_func(i)) Bra[i].free(NUMBER::Total);
+    }
+    mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
+    return result;
+}
+
+/** @brief Compute Löwdin orthonormalization matrix
+ *
+ * @param Phi: orbitals to orthonomalize
+ *
+ * Computes the inverse square root of the orbital overlap matrix S^(-1/2)
+ */
+ComplexMatrix calc_lowdin_matrix(MPI_CompFuncVector &Phi) {
+    ComplexMatrix S_tilde = calc_overlap_matrix(Phi);
+    ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0);
+    return S_m12;
+}
+
+/** @brief Orbital transformation out_j = sum_i inp_i*U_ij
+ *
+ * NOTE: OrbitalVector is considered a ROW vector, so rotation
+ *       means matrix multiplication from the right
+ *
+ * MPI: Rank distribution of output vector is the same as input vector
+ *
+ */
+ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
+    // NB: must be spinseparated at this point!
+
+    int N = BraKet.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, N);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
+
+    // 1) make union tree without coefficients
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double> scalefac;
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    int max_ix;                       // largest index value (not used here)
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+                                                     // the orbital given the node index in the reference tree
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+    mrcpp::BankAccount nodesBraKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(BraKet, refTree, nodesBraKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+
+    int ibank = 0;
+#pragma omp parallel for schedule(dynamic) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+        std::vector<int> orbVec;       // identifies which orbitals use this node
+        if (serial and node2orbVec[node_ix].size() <= 0) continue;
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+
+        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+        if (serial) {
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbVec.push_back(j);
+            }
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                            continue;
+                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else { // MPI case
+            DoubleMatrix coeffBlock(csize, 2 * N);
+            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                            continue;
+                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+    IntVector conjMat = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j <= i; j++) {
+            S.real()(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N);
+            S.imag()(i, j) = conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
+            if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
+        }
+    }
+
+    // Assumes linearity: result is sum of all nodes contributions
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+
+/** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
+ *
+ */
+ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+    mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
+
+    MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
+
+    int N = Bra.size();
+    int M = Ket.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, M);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * M); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+
+    // 1) make union tree without coefficients for Bra (supposed smallest)
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk);
+    // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero.
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    std::vector<double> scalefac;
+    int max_ix;
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+    max_ix++;
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVecBra(2 * N);
+    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeBra(2 * N); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    std::vector<std::vector<double *>> coeffVecKet(2 * M);
+    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeKet(2 * M); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    mrcpp::BankAccount nodesBra;
+    mrcpp::BankAccount nodesKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        // TODO? : do not copy coefficients, but use directly the pointers
+        // could OMP parallelize, but is fast anyway
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Bra[j].hasReal()) {
+                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeBra[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecBra[ix].push_back(j);
+                }
+            }
+            if (Bra[j].hasImag()) {
+                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeBra[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecBra[ix].push_back(j + N);
+                }
+            }
+        }
+        for (int j = 0; j < M; j++) {
+            if (Ket[j].hasReal()) {
+                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeKet[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecKet[ix].push_back(j);
+                }
+            }
+            if (Ket[j].hasImag()) {
+                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeKet[j + M][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecKet[ix].push_back(j + M);
+                }
+            }
+        }
+
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(Bra, refTree, nodesBra);
+        save_nodes(Ket, refTree, nodesKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+    int totsiz = 0;
+    int totget = 0;
+    int mxtotsiz = 0;
+    int ibank = 0;
+    //For some unknown reason the h2_mag_lda test sometimes fails when schedule(dynamic) is chosen
+#pragma omp parallel for schedule(static) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
+        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+        if (serial) {
+            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
+            DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
+            if (parindexVec_ref[n] < 0) shift = 0;
+
+            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeBra[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
+                orbVecBra.push_back(j);
+            }
+            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeKet[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
+                orbVecKet.push_back(j);
+            }
+
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
+                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                            continue;
+                        // must ensure that threads are not competing
+                        double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else {
+
+            DoubleMatrix coeffBlockBra(csize, 2 * N);
+            DoubleMatrix coeffBlockKet(csize, 2 * M);
+            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
+            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
+            totsiz += orbVecBra.size() * orbVecKet.size();
+            mxtotsiz += N * M;
+            totget += orbVecBra.size() + orbVecKet.size();
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
+                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
+                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
+                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                            continue;
+                        Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+
+    IntVector conjMatBra = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(Bra[i])) continue;
+        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
+    IntVector conjMatKet = IntVector::Zero(M);
+    for (int i = 0; i < M; i++) {
+        if (!mrcpp::mpi::my_func(Ket[i])) continue;
+        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            S.real()(i, j) = Sreal(i, j) + conjMatBra[i] * conjMatKet[j] * Sreal(i + N, j + M);
+            S.imag()(i, j) = conjMatKet[j] * Sreal(i, j + M) - conjMatBra[i] * Sreal(i + N, j);
+        }
+    }
+
+    // 4) collect results from all MPI. Linearity: result is sum of all node contributions
+
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+
+/** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
+ *
+ */
+DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
+    int N = BraKet.size();
+    DoubleMatrix S = DoubleMatrix::Zero(N, N);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
+
+    // 1) make union tree without coefficients
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double> scalefac;
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    int max_ix;                       // largest index value (not used here)
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+                                                     // the orbital given the node index in the reference tree
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+    mrcpp::BankAccount nodesBraKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(BraKet, refTree, nodesBraKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+
+    int ibank = 0;
+#pragma omp parallel for schedule(dynamic) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+        std::vector<int> orbVec;       // identifies which orbitals use this node
+        if (serial and node2orbVec[node_ix].size() <= 0) continue;
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+        if (serial) {
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbVec.push_back(j);
+            }
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock = coeffBlock.cwiseAbs();
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                            continue;
+                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else { // MPI case
+            DoubleMatrix coeffBlock(csize, 2 * N);
+            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                coeffBlock = coeffBlock.cwiseAbs();
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                            continue;
+                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+
+    IntVector conjMat = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(i)) continue;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j <= i; j++) {
+            S(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N) + conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
+            S(j, i) = S(i, j);
+        }
+    }
+
+    // Assumes linearity: result is sum of all nodes contributions
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+    return S;
+}
+
+/** @brief Orthogonalize the functions in Bra against all orbitals in Ket
+ *
+ */
+void orthogonalize(double prec, MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+    // TODO: generalize for cases where Ket functions are not orthogonal to each other?
+    ComplexMatrix S = calc_overlap_matrix(Bra, Ket);
+    int N = Bra.size();
+    int M = Ket.size();
+    DoubleVector Ketnorms = DoubleVector::Zero(M);
+    for (int i = 0; i < M; i++) {
+        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
+    }
+    mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
+    ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
+    for (int j = 0; j < N; j++) {
+        for (int i = 0; i < M; i++) {
+            rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
+        }
+    }
+    MPI_CompFuncVector rotatedKet(N);
+    rotate(Ket, rmat, rotatedKet, prec / M);
+    for (int j = 0; j < N; j++) {
+        if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
+    }
+}
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 8c196b145..8ba8088ff 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -24,7 +24,7 @@ struct CompFunctionData {
     // additional data that describe each component (defined by user):
     // occupancy, quantum number, norm, etc.
     //Note: defined with fixed size to ease copying and MPI send
-    int n1[4]{0,0,0,0};
+    int n1[4]{0,0,0,0}; // 0: neutral. values 1 and 2 are orthogonal to each other (product = 0)
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
     int n4[4]{0,0,0,0};
@@ -37,17 +37,19 @@ struct CompFunctionData {
 };
 
 
-template <int D, typename T> class CompFunction {
+template <int D> class CompFunction {
 public:
-    CompFunction();
     CompFunction(MultiResolutionAnalysis<D> &mra);
-    CompFunction(const CompFunction<D, T> &compfunc);
-    CompFunction(CompFunction<D, T> && compfunc);
-
-    ComplexFunction *CPXfct; // temporary solution
+    CompFunction();
+    CompFunction(int n1);
+    CompFunction(int n1, bool share);
+    CompFunction(const CompFunction<D> &compfunc);
+    CompFunction(CompFunction<D> && compfunc);
+    //    ComplexFunction *CPXfct; // temporary solution
 
 
-    FunctionTree<D, T> *Comp[4];
+    FunctionTree<D, double> *CompD[4];
+    FunctionTree<D, ComplexDouble> *CompC[4];
 
     std::string name;
 
@@ -59,54 +61,90 @@ template <int D, typename T> class CompFunction {
     int& iscomplex = data.iscomplex; // T=DoubleComplex
     int* Nchunks = data.Nchunks; // number of chunks of each component tree
     // ComplexFunctions are only defined for D=3
-    template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
-    CompFunction(ComplexFunction cplxfunc);
-    template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
-    operator ComplexFunction() const;
-    CompFunction<D, T> &operator=(const CompFunction<D, T> &func);
+    // template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
+     //CompFunction(ComplexFunction cplxfunc);
+    // template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
+     //operator ComplexFunction() const;
+    CompFunction<D> &operator=(const CompFunction<D> &compfunc);
     // CompFunction destructor
     ~CompFunction() {
         for (int i = 0; i < Ncomp; i++) {
-            delete Comp[i];
+            delete CompD[i];
+            delete CompC[i];
         }
     }
 
     double norm() const;
     double squaredNorm() const;
     void alloc(int i);
-    void add(T c, CompFunction<D, T> inp);
+    void setReal(FunctionTree<D, double> *tree, int i = 0);
+    void setRank(int i) {rank = i;};
+    int getRank() {return rank;};
+    void add(ComplexDouble c, CompFunction<D> inp);
 
     int crop(double prec);
-    void rescale(T c);
+    void rescale(ComplexDouble c);
+    void free();
+    int getSizeNodes() const;
+    int getNNodes() const;
 
     //NB: All tbelow should be revised. Now only for backwards compatibility to ComplexFunction class
     bool hasReal()  const {return isreal;}
     bool hasImag()  const {return iscomplex;}
     bool isShared() const {return data.shared;}
+    bool conjugate() const {return data.conj;}
 
-    FunctionTree<D, T> &real() {return *Comp[0];}
-    FunctionTree<D, T> &imag() {return *Comp[0];}
-    void free(int type) {delete Comp[0]; Comp[0] = nullptr;}
+    FunctionTree<D, double> &real() {return *CompD[0];}
+    FunctionTree<D, double> &imag() {return *CompD[0];} //does not make sense
+    const FunctionTree<D, double> &real() const {return *CompD[0];}
+    const FunctionTree<D, double> &imag() const {return *CompD[0];} //does not make sense
+    void free(int type) {delete CompD[0]; CompD[0] = nullptr; delete CompC[0]; CompC[0] = nullptr;}
     void flushFuncData();
 };
 
-template <int D, typename T = double> using CompFunctionVector = std::vector<CompFunction<D, T> *>;
+template <int D> using CompFunctionVector = std::vector<CompFunction<D> *>;
 
-namespace compfunc {
-template <int D, typename T>
-void deep_copy(CompFunction<D, T> *out, const CompFunction<D, T> &inp);
-template <int D, typename T>
-void add(CompFunction<D, T> &out, T a, CompFunction<D, T> inp_a, T b, CompFunction<D, T> inp_b, double prec);
-template <int D, typename T>
-void linear_combination(CompFunction<D, T> &out, const std::vector<T> &c, std::vector<CompFunction<D, T>> &inp, double prec);
-template <int D, typename T>
-void multiply(CompFunction<D, T> &out, CompFunction<D, T> inp_a, CompFunction<D, T> inp_b, double prec, bool absPrec, bool useMaxNorms);
+template <int D>
+void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp);
+template <int D>
+void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp);
+template <int D>
+void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec);
+template <int D>
+void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec);
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms);
 template <int D, typename T>
-void multiply(CompFunction<D, T> &out, CompFunction<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
 template <int D, typename T>
-void multiply(CompFunction<D, T> &out, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+template <int D>
+ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
 template <int D, typename T>
-T dot(CompFunction<D, T> bra, CompFunction<D, T> ket);
+void project(CompFunction<D> &out, std::function<T(const Coord<D> &r)> f, double prec);
+template <int D>
+void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec);
+template <int D>
+void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
+
+class MPI_CompFuncVector : public std::vector<CompFunction<3>> {
+public:
+    MPI_CompFuncVector(int N = 0);
+    MultiResolutionAnalysis<3> *vecMRA;
+    void distribute();
+};
+
+void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
+void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector &Psi, double prec = -1.0);
+void save_nodes(MPI_CompFuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
+MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
+void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
+ComplexVector dot(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
+ComplexMatrix calc_lowdin_matrix(MPI_CompFuncVector &Phi);
+ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet);
+ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
+DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet);
+void orthogonalize(double prec, MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
+
 
-} // namespace compfunc
 } // namespace mrcpp
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
index f7f6a11d9..63d855727 100644
--- a/src/utils/ComplexFunction.cpp
+++ b/src/utils/ComplexFunction.cpp
@@ -1,3 +1,4 @@
+#include "ComplexFunction.h"
 #include "Bank.h"
 #include "Printer.h"
 #include "Timer.h"
@@ -7,62 +8,12 @@
 #include "treebuilders/project.h"
 #include "trees/FunctionNode.h"
 #include "treebuilders/add.h"
-#include "ComplexFunction.h"
-#include "CompFunction.h"
 
 using mrcpp::Timer;
 
 namespace mrcpp {
 
 MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
-  /*    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
-    ComplexFunction::ComplexFunction(CompFunction<3, T> cfunc)
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(false))
-        , rank(cfunc.rank) {
-    setSpin(cfunc.data.n1[0]);
-    setOcc(cfunc.data.n2[0]);
-    setReal(cfunc.Comp[0]);
-}
-       template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
-     ComplexFunction::ComplexFunction( CompFunction<3, T> cfunc) {
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(false))
-        , rank(cfunc.rank) {
-    setSpin(cfunc.data.n1[0]);
-    setOcc(cfunc.data.n2[0]);
-    setReal(cfunc.Comp[0]->Real());
-    setImag(cfunc.Comp[0]->Imag());
-    }*/
-
-   //  template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
-    ComplexFunction::ComplexFunction(CompFunction<3, double>& cfunc)
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(false))
-        , rank(cfunc.rank) {
-    setSpin(cfunc.data.n1[0]);
-    setOcc(cfunc.data.n2[0]);
-    setReal(cfunc.Comp[0]);
-}
-    //    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
- //     ComplexFunction::ComplexFunction(CompFunction<3, double> && cfunc)
-//          : funcMRA(defaultMRA)
-//          , func_ptr(std::make_shared<TreePtr>(false))
-//          , rank(cfunc.rank) {
-//      setSpin(cfunc.data.n1[0]);
-//      setOcc(cfunc.data.n2[0]);
-//      setReal(cfunc.Comp[0]);
-//  }
-/*  template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
-     ComplexFunction::ComplexFunction(const CompFunction<3, T>& cfunc) {
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(false))
-        , rank(cfunc.rank) {
-    setSpin(cfunc.data.n1[0]);
-    setOcc(cfunc.data.n2[0]);
-    setReal(cfunc.Comp[0]->Real());
-    setImag(cfunc.Comp[0]->Imag());
-    }*/
 
 ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
         : funcMRA(defaultMRA)
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index 2a998617c..c43d3475c 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -21,9 +21,8 @@ class MPI_FuncVector;
 
 namespace mrcpp {
 
-template <int D, typename T> class CompFunction;
 class BankAccount;
-template <int D, typename T> class FunctionTree;
+  template <int D, typename T> class FunctionTree;
 template <int D> class MultiResolutionAnalysis;
 
 using ComplexDouble = std::complex<double>;
@@ -111,23 +110,8 @@ class TreePtr final {
 
 class ComplexFunction {
 public:
-    //  template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
-  //ComplexFunction(CompFunction<3, double> cfunc) ;
-   //   template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
-      ComplexFunction(CompFunction<3, double>& cfunc);
-  //    template <typename T, typename = std::enable_if_t<std::is_same<T, double>::value>>
-  //  ComplexFunction(CompFunction<3, double>&& cfunc);
-    /* template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
-  ComplexFunction( CompFunction<3, T> cfunc);
-  template <typename T, typename = std::enable_if_t<std::is_same<T, ComplexDouble>::value>>
-  ComplexFunction(const  CompFunction<3, T> &cfunc) ;
-     ComplexFunction(CompFunction<3, double> cfunc);
-    ComplexFunction(CompFunction<3,ComplexDouble> cfunc);
-    ComplexFunction(const CompFunction<3, double> &cfunc);
-    ComplexFunction(const CompFunction<3, ComplexDouble> &cfunc);*/
     ComplexFunction(std::shared_ptr<TreePtr> funcptr);
     ComplexFunction(const ComplexFunction &func);
-    ComplexFunction(ComplexFunction && func);
     ComplexFunction(int spin = 0, int occ = -1, int rank = -1, bool share = false);
     ComplexFunction &operator=(const ComplexFunction &func);
     ComplexFunction paramCopy() const;
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 89ce66baa..8d3fa3dfb 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -266,10 +266,20 @@ bool mpi::my_orb(ComplexFunction orbj) {
     return my_orb(orbj.getRank());
 }
 
+/** @brief Test if function belongs to this MPI rank */
+bool mpi::my_func(int j) {
+    return ((j) % mpi::wrk_size == mpi::wrk_rank) ? true : false;
+}
+
+/** @brief Test if function belongs to this MPI rank */
+bool mpi::my_func(CompFunction<3> func) {
+    return my_func(func.rank);
+}
+
 /** @brief Free all function pointers not belonging to this MPI rank */
-void mpi::free_foreign(MPI_FuncVector &Phi) {
-    for (ComplexFunction &i : Phi) {
-        if (not mpi::my_orb(i)) i.free(NUMBER::Total);
+void mpi::free_foreign(MPI_CompFuncVector &Phi) {
+    for (CompFunction<3> &i : Phi) {
+        if (not mpi::my_func(i)) i.alloc(0);
     }
 }
 
@@ -355,30 +365,31 @@ void mpi::recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
 }
 
 // send a component function with MPI
-template <typename T>
-void mpi::send_function(CompFunction<3, T> &func, int dst, int tag, MPI_Comm comm) {
+void mpi::send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     for (int i = 0; i < func.data.Ncomp; i++) {
         //make sure that Nchunks is up to date
-        func.Nchunks[i] = func.Comp[i]->getNChunks();
+        if (func.isreal) func.Nchunks[i] = func.CompD[i]->getNChunks();
+        else func.Nchunks[i] = func.CompC[i]->getNChunks();
     }
     MPI_Send(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, dst, 0, comm);
     for (int i = 0; i < func.data.Ncomp; i++) {
-        mrcpp::send_tree(*func.Comp[i], dst, tag, comm, func.Nchunks[i]);
+        if (func.isreal) mrcpp::send_tree(*func.CompD[i], dst, tag, comm, func.Nchunks[i]);
+        else mrcpp::send_tree(*func.CompC[i], dst, tag, comm, func.Nchunks[i]);
     }
 #endif
 }
 
 // receive a component function with MPI
-template <typename T>
-void mpi::recv_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm) {
+void mpi::recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
     int func_ncomp_in = func.Ncomp;
     MPI_Recv(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
     for (int i = 0; i < func.data.Ncomp; i++) {
         if (func_ncomp_in <= i) func.alloc(i);
-        mrcpp::recv_tree(*func.Comp[i], src, tag, comm, func.Nchunks[i]);
+        if (func.isreal) mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks[i]);
+        else  mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks[i]);
     }
 #endif
 }
@@ -395,12 +406,12 @@ void mpi::share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
 
 
 /** Update a shared function after it has been changed by one of the MPI ranks. */
-template <typename T>
-    void mpi::share_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm) {
+void mpi::share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
 #ifdef MRCPP_HAS_MPI
         for (int comp = 0; comp < func.Ncomp; comp++) {
-            mrcpp::share_tree(*func.Comp[comp], src, tag, comm);
+            if (func.isreal) mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
+            else  mrcpp::share_tree(*func.CompC[comp], src, tag, comm);
 #endif
         }
     }
@@ -448,9 +459,8 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 #endif
 }
 
-template <typename T>
 /** @brief Add all mpi function into rank zero */
-void mpi::reduce_function(double prec, CompFunction<3,T> &func, MPI_Comm comm) {
+void mpi::reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -469,8 +479,7 @@ void mpi::reduce_function(double prec, CompFunction<3,T> &func, MPI_Comm comm) {
             // receive
             int src = comm_rank + fac;
             if (src < comm_size) {
-                MultiResolutionAnalysis<3> mra(func.Comp[0]->getMRA());
-                CompFunction<3,T> func_i(mra);
+                CompFunction<3> func_i;
                 int tag = 3333 + src;
                 mpi::recv_function(func_i, src, tag, comm);
                 func.add(1.0, func_i); // add in place using union grid
@@ -594,6 +603,44 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Co
 }
 
 
+/** @brief make union tree without coeff and send to all
+ *  Real trees
+ */
+void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+    /* 1) make union grid of own orbitals
+       2) make union grid with others orbitals (sent to rank zero)
+       3) rank zero broadcast func to everybody
+     */
+
+    int N = Phi.size();
+    for (int j = 0; j < N; j++) {
+        if (not mpi::my_orb(j)) continue;
+        tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
+    }
+    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+}
+
+
+/** @brief make union tree without coeff and send to all
+ *  Complex trees
+ */
+void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+    /* 1) make union grid of own orbitals
+       2) make union grid with others orbitals (sent to rank zero)
+       3) rank zero broadcast func to everybody
+     */
+
+    int N = Phi.size();
+    for (int j = 0; j < N; j++) {
+        if (not mpi::my_orb(j)) continue;
+        tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
+    }
+    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+}
+
+
 /** @brief make union tree without coeff and send to all
  *  Include both real and imaginary parts
  */
@@ -644,9 +691,8 @@ void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 #endif
 }
 
-template <typename T>
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_function(CompFunction<3, T> &func, MPI_Comm comm) {
+void mpi::broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -740,15 +786,5 @@ void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MP
     MPI_Barrier(comm);
 #endif
 }
-    template void mpi::reduce_function(double prec, CompFunction<3, double> &func, MPI_Comm comm);
-    template void mpi::reduce_function(double prec, CompFunction<3, ComplexDouble> &func, MPI_Comm comm);
-    template void mpi::broadcast_function(CompFunction<3, double> &func, MPI_Comm comm);
-    template void mpi::broadcast_function(CompFunction<3, ComplexDouble> &func, MPI_Comm comm);
-    template void mpi::send_function(CompFunction<3, double> &func, int dst, int tag, MPI_Comm comm);
-    template void mpi::send_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
-    template void mpi::recv_function(CompFunction<3, double> &func, int dst, int tag, MPI_Comm comm);
-    template void mpi::recv_function(CompFunction<3, ComplexDouble> &func, int dst, int tag, MPI_Comm comm);
-    template void mpi::share_function(CompFunction<3, double> &func, int src, int tag, MPI_Comm comm);
-    template void mpi::share_function(CompFunction<3, ComplexDouble> &func, int src, int tag, MPI_Comm comm);
 
 } // namespace mrcpp
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 170d5b972..811dcedb6 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -42,26 +42,23 @@ bool grand_master();
 bool share_master();
 bool my_orb(int j);
 bool my_orb(ComplexFunction orbj);
+bool my_func(int j);
+bool my_func(CompFunction<3> func);
 
 // bool my_unique_orb(const Orbital &orb);
-void free_foreign(MPI_FuncVector &Phi);
+void free_foreign(MPI_CompFuncVector &Phi);
 
 void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
-template <typename T>
-void send_function(CompFunction<3, T> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
-template <typename T>
-void recv_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
+void send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
+void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
 void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
-template <typename T>
-void share_function(CompFunction<3, T> &func, int src, int tag, MPI_Comm comm);
+void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm);
 
 void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm);
 void broadcast_function(ComplexFunction &func, MPI_Comm comm);
-template <typename T>
-void reduce_function(double prec, CompFunction<3, T> &func, MPI_Comm comm);
-template <typename T>
-void broadcast_function(CompFunction<3, T> &func, MPI_Comm comm);
+void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm);
+void broadcast_function(CompFunction<3> &func, MPI_Comm comm);
 
 void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
@@ -70,6 +67,9 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm c
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
 void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
+
 void allreduce_vector(IntVector &vec, MPI_Comm comm);
 void allreduce_vector(DoubleVector &vec, MPI_Comm comm);
 void allreduce_vector(ComplexVector &vec, MPI_Comm comm);

From 47471f895e21addbcda4fbecba8e9053ce2998e7 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 29 Jul 2024 09:48:26 +0200
Subject: [PATCH 14/38] moved ComplexFunctions.h from parallel.cpp to Bank.h

---
 src/utils/Bank.h       | 1 +
 src/utils/parallel.cpp | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index dc52791b3..b4d8b3c76 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "ComplexFunction.h"
 #include "CompFunction.h"
 #include "parallel.h"
 #include "trees/NodeIndex.h"
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 8d3fa3dfb..fd16d5f58 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -5,8 +5,6 @@
 #include <thread>
 
 #include "Bank.h"
-#include "ComplexFunction.h"
-#include "CompFunction.h"
 #include "omp_utils.h"
 #include "parallel.h"
 #include "trees/FunctionTree.h"

From aa6f2549c791b49fb0a3028da5c47c029d232c3f Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 29 Jul 2024 09:56:57 +0200
Subject: [PATCH 15/38] removed the mpi:: in parallel.cpp

---
 src/utils/parallel.cpp | 202 ++++++++++++++++++++---------------------
 1 file changed, 101 insertions(+), 101 deletions(-)

diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index fd16d5f58..03f10d3fd 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -65,89 +65,88 @@ MPI_Comm comm_share;
 MPI_Comm comm_sh_group;
 MPI_Comm comm_bank;
 
-} // namespace mpi
 
 int id_shift; // to ensure that nodes, orbitals and functions do not collide
 
 extern int metadata_block[3]; // can add more metadata in future
 extern int const size_metadata = 3;
 
-void mpi::initialize() {
+void initialize() {
     Eigen::setNbThreads(1);
     mrcpp_set_dynamic(0);
 
 #ifdef MRCPP_HAS_MPI
     MPI_Init(nullptr, nullptr);
-    MPI_Comm_size(MPI_COMM_WORLD, &mpi::world_size);
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpi::world_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
 
     // divide the world into groups
     // each group has its own group communicator definition
 
     // define independent group of MPI processes, that are not part of comm_wrk
     // for now the new group does not include comm_share
-    mpi::comm_bank = MPI_COMM_WORLD; // clients and master
+    comm_bank = MPI_COMM_WORLD; // clients and master
     MPI_Comm comm_remainder;         // clients only
 
     // set bank_size automatically if not defined by user
-    if (mpi::world_size < 2) {
-        mpi::bank_size = 0;
-    } else if (mpi::bank_size < 0) {
-        mpi::bank_size = max(mpi::world_size / 3, 1);
+    if (world_size < 2) {
+        bank_size = 0;
+    } else if (bank_size < 0) {
+        bank_size = max(world_size / 3, 1);
     }
-    if (mpi::world_size - mpi::bank_size < 1) MSG_ABORT("No MPI ranks left for working!");
-    if (mpi::bank_size < 1 and mpi::world_size > 1) MSG_ABORT("Bank size must be at least one when using MPI!");
+    if (world_size - bank_size < 1) MSG_ABORT("No MPI ranks left for working!");
+    if (bank_size < 1 and world_size > 1) MSG_ABORT("Bank size must be at least one when using MPI!");
 
-    mpi::bankmaster.resize(mpi::bank_size);
-    for (int i = 0; i < mpi::bank_size; i++) {
-        mpi::bankmaster[i] = mpi::world_size - i - 1; // rank of the bankmasters
+    bankmaster.resize(bank_size);
+    for (int i = 0; i < bank_size; i++) {
+        bankmaster[i] = world_size - i - 1; // rank of the bankmasters
     }
-    if (mpi::world_rank < mpi::world_size - mpi::bank_size) {
+    if (world_rank < world_size - bank_size) {
         // everything which is left
-        mpi::is_bank = 0;
-        mpi::is_centralbank = 0;
-        mpi::is_bankclient = 1;
+        is_bank = 0;
+        is_centralbank = 0;
+        is_bankclient = 1;
     } else {
         // special group of centralbankmasters
-        mpi::is_bank = 1;
-        mpi::is_centralbank = 1;
-        mpi::is_bankclient = 0;
-        if (mpi::world_rank == mpi::world_size - mpi::bank_size) mpi::is_bankmaster = 1;
+        is_bank = 1;
+        is_centralbank = 1;
+        is_bankclient = 0;
+        if (world_rank == world_size - bank_size) is_bankmaster = 1;
     }
-    MPI_Comm_split(MPI_COMM_WORLD, mpi::is_bankclient, mpi::world_rank, &comm_remainder);
+    MPI_Comm_split(MPI_COMM_WORLD, is_bankclient, world_rank, &comm_remainder);
 
     // split world into groups that can share memory
-    MPI_Comm_split_type(comm_remainder, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &mpi::comm_share);
+    MPI_Comm_split_type(comm_remainder, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share);
 
-    MPI_Comm_rank(mpi::comm_share, &mpi::share_rank);
-    MPI_Comm_size(mpi::comm_share, &mpi::share_size);
+    MPI_Comm_rank(comm_share, &share_rank);
+    MPI_Comm_size(comm_share, &share_size);
 
     // define a rank of the group
-    MPI_Comm_split(comm_remainder, mpi::share_rank, mpi::world_rank, &mpi::comm_sh_group);
+    MPI_Comm_split(comm_remainder, share_rank, world_rank, &comm_sh_group);
     // mpiShRank is color (same color->in same group)
     // MPI_worldrank is key (orders rank within the groups)
 
     // we define a new orbital rank, so that the orbitals within
     // a shared memory group, have consecutive ranks
-    MPI_Comm_rank(mpi::comm_sh_group, &mpi::sh_group_rank);
+    MPI_Comm_rank(comm_sh_group, &sh_group_rank);
 
-    mpi::wrk_rank = mpi::share_rank + mpi::sh_group_rank * mpi::world_size;
-    MPI_Comm_split(comm_remainder, 0, mpi::wrk_rank, &mpi::comm_wrk);
+    wrk_rank = share_rank + sh_group_rank * world_size;
+    MPI_Comm_split(comm_remainder, 0, wrk_rank, &comm_wrk);
     // 0 is color (same color->in same group)
     // mpiOrbRank is key (orders rank in the group)
 
-    MPI_Comm_rank(mpi::comm_wrk, &mpi::wrk_rank);
-    MPI_Comm_size(mpi::comm_wrk, &mpi::wrk_size);
+    MPI_Comm_rank(comm_wrk, &wrk_rank);
+    MPI_Comm_size(comm_wrk, &wrk_size);
 
     // if bank_size is large enough, we reserve one as "task manager"
-    mpi::tot_bank_size = mpi::bank_size;
-    if (mpi::bank_size <= 2 and mpi::bank_size > 0) {
+    tot_bank_size = bank_size;
+    if (bank_size <= 2 and bank_size > 0) {
         // use the first bank as task manager
-        mpi::task_bank = mpi::bankmaster[0];
-    } else if (mpi::bank_size > 1) {
+        task_bank = bankmaster[0];
+    } else if (bank_size > 1) {
         // reserve one bank for task management only
-        mpi::bank_size--;
-        mpi::task_bank = mpi::bankmaster[mpi::bank_size]; // the last rank is reserved as task manager
+        bank_size--;
+        task_bank = bankmaster[bank_size]; // the last rank is reserved as task manager
     }
 
     // determine the maximum value alowed for mpi tags
@@ -213,21 +212,21 @@ void mpi::initialize() {
     omp::n_threads = nthreads;
     mrcpp::set_max_threads(nthreads);
 
-    if (mpi::is_bank) {
+    if (is_bank) {
         // bank is open until end of program
-        if (mpi::is_centralbank) { dataBank.open(); }
-        mpi::finalize();
+        if (is_centralbank) { dataBank.open(); }
+        finalize();
         exit(EXIT_SUCCESS);
     }
 #else
-    mpi::bank_size = 0;
+    bank_size = 0;
     mrcpp::set_max_threads(omp::n_threads);
 #endif
 }
 
-void mpi::finalize() {
+void finalize() {
 #ifdef MRCPP_HAS_MPI
-    if (mpi::bank_size > 0 and mpi::grand_master()) {
+    if (bank_size > 0 and grand_master()) {
         println(4, " max data in bank " << dataBank.get_maxtotalsize() << " MB ");
         dataBank.close();
     }
@@ -236,7 +235,7 @@ void mpi::finalize() {
 #endif
 }
 
-void mpi::barrier(MPI_Comm comm) {
+void barrier(MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     MPI_Barrier(comm);
 #endif
@@ -246,43 +245,43 @@ void mpi::barrier(MPI_Comm comm) {
  * Orbital related MPI functions *
  *********************************/
 
-bool mpi::grand_master() {
-    return (mpi::world_rank == 0 and is_bankclient) ? true : false;
+bool grand_master() {
+    return (world_rank == 0 and is_bankclient) ? true : false;
 }
 
-bool mpi::share_master() {
-    return (mpi::share_rank == 0) ? true : false;
+bool share_master() {
+    return (share_rank == 0) ? true : false;
 }
 
 /** @brief Test if orbital belongs to this MPI rank (or is common)*/
-bool mpi::my_orb(int j) {
-    return ((j) % mpi::wrk_size == mpi::wrk_rank) ? true : false;
+bool my_orb(int j) {
+    return ((j) % wrk_size == wrk_rank) ? true : false;
 }
 
 /** @brief Test if orbital belongs to this MPI rank (or is common)*/
-bool mpi::my_orb(ComplexFunction orbj) {
+bool my_orb(ComplexFunction orbj) {
     return my_orb(orbj.getRank());
 }
 
 /** @brief Test if function belongs to this MPI rank */
-bool mpi::my_func(int j) {
-    return ((j) % mpi::wrk_size == mpi::wrk_rank) ? true : false;
+bool my_func(int j) {
+    return ((j) % wrk_size == wrk_rank) ? true : false;
 }
 
 /** @brief Test if function belongs to this MPI rank */
-bool mpi::my_func(CompFunction<3> func) {
+bool my_func(CompFunction<3> func) {
     return my_func(func.rank);
 }
 
 /** @brief Free all function pointers not belonging to this MPI rank */
-void mpi::free_foreign(MPI_CompFuncVector &Phi) {
+void free_foreign(MPI_CompFuncVector &Phi) {
     for (CompFunction<3> &i : Phi) {
-        if (not mpi::my_func(i)) i.alloc(0);
+        if (not my_func(i)) i.alloc(0);
     }
 }
 
 /** @brief Add up each entry of the vector with contributions from all MPI ranks */
-void mpi::allreduce_vector(IntVector &vec, MPI_Comm comm) {
+void allreduce_vector(IntVector &vec, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = vec.size();
     MPI_Allreduce(MPI_IN_PLACE, vec.data(), N, MPI_INT, MPI_SUM, comm);
@@ -290,7 +289,7 @@ void mpi::allreduce_vector(IntVector &vec, MPI_Comm comm) {
 }
 
 /** @brief Add up each entry of the vector with contributions from all MPI ranks */
-void mpi::allreduce_vector(DoubleVector &vec, MPI_Comm comm) {
+void allreduce_vector(DoubleVector &vec, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = vec.size();
     MPI_Allreduce(MPI_IN_PLACE, vec.data(), N, MPI_DOUBLE, MPI_SUM, comm);
@@ -298,7 +297,7 @@ void mpi::allreduce_vector(DoubleVector &vec, MPI_Comm comm) {
 }
 
 /** @brief Add up each entry of the vector with contributions from all MPI ranks */
-void mpi::allreduce_vector(ComplexVector &vec, MPI_Comm comm) {
+void allreduce_vector(ComplexVector &vec, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = vec.size();
     MPI_Allreduce(MPI_IN_PLACE, vec.data(), N, MPI_C_DOUBLE_COMPLEX, MPI_SUM, comm);
@@ -306,7 +305,7 @@ void mpi::allreduce_vector(ComplexVector &vec, MPI_Comm comm) {
 }
 
 /** @brief Add up each entry of the matrix with contributions from all MPI ranks */
-void mpi::allreduce_matrix(IntMatrix &mat, MPI_Comm comm) {
+void allreduce_matrix(IntMatrix &mat, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = mat.size();
     MPI_Allreduce(MPI_IN_PLACE, mat.data(), N, MPI_INT, MPI_SUM, comm);
@@ -314,7 +313,7 @@ void mpi::allreduce_matrix(IntMatrix &mat, MPI_Comm comm) {
 }
 
 /** @brief Add up each entry of the matrix with contributions from all MPI ranks */
-void mpi::allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) {
+void allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = mat.size();
     MPI_Allreduce(MPI_IN_PLACE, mat.data(), N, MPI_DOUBLE, MPI_SUM, comm);
@@ -322,7 +321,7 @@ void mpi::allreduce_matrix(DoubleMatrix &mat, MPI_Comm comm) {
 }
 
 /** @brief Add up each entry of the matrix with contributions from all MPI ranks */
-void mpi::allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
+void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     int N = mat.size();
     MPI_Allreduce(MPI_IN_PLACE, mat.data(), N, MPI_C_DOUBLE_COMPLEX, MPI_SUM, comm);
@@ -330,7 +329,7 @@ void mpi::allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
 }
 
 // send a function with MPI
-void mpi::send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm) {
+void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     if (func.isShared()) MSG_WARN("Sending a shared function is not recommended");
     FunctionData &funcinfo = func.getFunctionData();
@@ -341,7 +340,7 @@ void mpi::send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm)
 }
 
 // receive a function with MPI
-void mpi::recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
+void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     if (func.isShared()) MSG_WARN("Receiving a shared function is not recommended");
     MPI_Status status;
@@ -363,7 +362,7 @@ void mpi::recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
 }
 
 // send a component function with MPI
-void mpi::send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
+void send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     for (int i = 0; i < func.data.Ncomp; i++) {
         //make sure that Nchunks is up to date
@@ -379,7 +378,7 @@ void mpi::send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm)
 }
 
 // receive a component function with MPI
-void mpi::recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
+void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
     int func_ncomp_in = func.Ncomp;
@@ -393,7 +392,7 @@ void mpi::recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm)
 }
 
 /** Update a shared function after it has been changed by one of the MPI ranks. */
-void mpi::share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
+void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
 #ifdef MRCPP_HAS_MPI
         if (func.hasReal()) mrcpp::share_tree(func.real(), src, tag, comm);
@@ -404,7 +403,7 @@ void mpi::share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm)
 
 
 /** Update a shared function after it has been changed by one of the MPI ranks. */
-void mpi::share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
+void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
 #ifdef MRCPP_HAS_MPI
         for (int comp = 0; comp < func.Ncomp; comp++) {
@@ -416,7 +415,7 @@ void mpi::share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm)
 }
 
 /** @brief Add all mpi function into rank zero */
-void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
+void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -437,7 +436,7 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
             if (src < comm_size) {
                 ComplexFunction func_i(false);
                 int tag = 3333 + src;
-                mpi::recv_function(func_i, src, tag, comm);
+                recv_function(func_i, src, tag, comm);
                 func.add(1.0, func_i); // add in place using union grid
                 func.crop(prec);
             }
@@ -447,7 +446,7 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
             int dest = comm_rank - fac;
             if (dest >= 0) {
                 int tag = 3333 + comm_rank;
-                mpi::send_function(func, dest, tag, comm);
+                send_function(func, dest, tag, comm);
                 break; // once data is sent we are done
             }
         }
@@ -458,7 +457,7 @@ void mpi::reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
 }
 
 /** @brief Add all mpi function into rank zero */
-void mpi::reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
+void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -479,7 +478,7 @@ void mpi::reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
             if (src < comm_size) {
                 CompFunction<3> func_i;
                 int tag = 3333 + src;
-                mpi::recv_function(func_i, src, tag, comm);
+                recv_function(func_i, src, tag, comm);
                 func.add(1.0, func_i); // add in place using union grid
                 func.crop(prec);
             }
@@ -489,7 +488,7 @@ void mpi::reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
             int dest = comm_rank - fac;
             if (dest >= 0) {
                 int tag = 3333 + comm_rank;
-                mpi::send_function(func, dest, tag, comm);
+                send_function(func, dest, tag, comm);
                 break; // once data is sent we are done
             }
         }
@@ -500,7 +499,7 @@ void mpi::reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -541,7 +540,7 @@ void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm com
 }
 
 /** @brief make union tree and send into rank zero */
-void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -584,7 +583,7 @@ void mpi::reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_C
 /** @brief make union tree without coeff and send to all
  *  Include both real and imaginary parts
  */
-void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -592,19 +591,19 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Co
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
+        if (not my_orb(j)) continue;
         if (Phi[j].hasReal()) tree.appendTreeNoCoeff(Phi[j].real());
         if (Phi[j].hasImag()) tree.appendTreeNoCoeff(Phi[j].imag());
     }
-    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
-    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+    reduce_Tree_noCoeff(tree, comm_wrk);
+    broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
 /** @brief make union tree without coeff and send to all
  *  Real trees
  */
-void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -612,18 +611,18 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Co
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
+        if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
     }
-    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
-    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+    reduce_Tree_noCoeff(tree, comm_wrk);
+    broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
 /** @brief make union tree without coeff and send to all
  *  Complex trees
  */
-void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -631,18 +630,18 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, ve
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
+        if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
-    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
-    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+    reduce_Tree_noCoeff(tree, comm_wrk);
+    broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
 /** @brief make union tree without coeff and send to all
  *  Include both real and imaginary parts
  */
-    void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
+    void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -650,15 +649,15 @@ void mpi::allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, ve
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
+        if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(Phi[j]);
     }
-    mpi::reduce_Tree_noCoeff(tree, mpi::comm_wrk);
-    mpi::broadcast_Tree_noCoeff(tree, mpi::comm_wrk);
+    reduce_Tree_noCoeff(tree, comm_wrk);
+    broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
+void broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -675,13 +674,13 @@ void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
             // receive
             int src = comm_rank - fac;
             int tag = 4334 + comm_rank;
-            mpi::recv_function(func, src, tag, comm);
+            recv_function(func, src, tag, comm);
         }
         if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
             // send
             int dst = comm_rank + fac;
             int tag = 4334 + dst;
-            if (dst < comm_size) mpi::send_function(func, dst, tag, comm);
+            if (dst < comm_size) send_function(func, dst, tag, comm);
         }
         fac /= 2;
     }
@@ -690,7 +689,7 @@ void mpi::broadcast_function(ComplexFunction &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
+void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -707,13 +706,13 @@ void mpi::broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
             // receive
             int src = comm_rank - fac;
             int tag = 4334 + comm_rank;
-            mpi::recv_function(func, src, tag, comm);
+            recv_function(func, src, tag, comm);
         }
         if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
             // send
             int dst = comm_rank + fac;
             int tag = 4334 + dst;
-            if (dst < comm_size) mpi::send_function(func, dst, tag, comm);
+            if (dst < comm_size) send_function(func, dst, tag, comm);
         }
         fac /= 2;
     }
@@ -722,7 +721,7 @@ void mpi::broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -754,7 +753,7 @@ void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -784,5 +783,6 @@ void mpi::broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MP
     MPI_Barrier(comm);
 #endif
 }
+} // namespace mpi
 
 } // namespace mrcpp

From 52a6d52f98960689edfa7d882191d3e9eb47a1d9 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 29 Jul 2024 16:08:29 +0200
Subject: [PATCH 16/38] Function vector with pointers

---
 src/treebuilders/apply.cpp |  49 ++++---
 src/treebuilders/apply.h   |   3 +-
 src/utils/CompFunction.cpp | 273 ++++++++++++++++++++-----------------
 src/utils/CompFunction.h   |  41 +++---
 src/utils/parallel.cpp     |  22 +--
 src/utils/parallel.h       |   9 +-
 6 files changed, 212 insertions(+), 185 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 478dd7f15..79e672fd7 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -460,36 +460,35 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
     return out;
 }
 
-template <int D> CompFunctionVector<D> gradient(DerivativeOperator<D> &oper, CompFunction<D> &inp,ComplexDouble  **metric) {
-    CompFunctionVector<D> out;
-    for (int d = 0; d < D; d++) {
-        CompFunction<D> *grad_d = new CompFunction<D>();
-        for (int icomp = 0; icomp < 4; icomp++){
-            if (inp.Comp[icomp]!=nullptr) {
-                for (int ocomp = 0; ocomp < 4; ocomp++){
-                    if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                        if (inp.isreal) {
-                            grad_d->isreal = 1;
-                            grad_d->iscomplex = 0;
-                            grad_d->CompD[ocomp] = new FunctionTree<D, double>(inp.getMRA());
-                            apply(grad_d->CompD[ocomp], oper, *inp.CompD[icomp], d);
-                            if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                                grad_d->CompD[ocomp]->rescale(metric[icomp][ocomp]);
-                            }
-                        } else {
-                            grad_d->isreal = 0;
-                            grad_d->iscomplex = 1;
-                            grad_d->CompC[ocomp] = new FunctionTree<D, ComplexDouble>(inp.getMRA());
-                            apply(grad_d->CompC[ocomp], oper, *inp.CompC[icomp], d);
-                            if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                                grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                            }
+std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  **metric) {
+    std::vector<CompFunction<3>*> out;
+    for (int d = 0; d < 3; d++) {
+        CompFunction<3> *grad_d = new CompFunction<3>();
+        for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+            for (int ocomp = 0; ocomp < 4; ocomp++){
+                if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
+                    grad_d->Ncomp=ocomp;
+                    if (inp.isreal) {
+                        grad_d->isreal = 1;
+                        grad_d->iscomplex = 0;
+                        grad_d->CompD[ocomp] = new FunctionTree<3, double>(inp.CompD[0]->getMRA());
+                        apply(*(grad_d->CompD[ocomp]), oper, *inp.CompD[icomp], d);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real());
+                        }
+                    } else {
+                        grad_d->isreal = 0;
+                        grad_d->iscomplex = 1;
+                        grad_d->CompC[ocomp] = new FunctionTree<3, ComplexDouble>(inp.CompC[0]->getMRA());
+                        apply(*(grad_d->CompC[ocomp]), oper, *inp.CompC[icomp], d);
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                            grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]);
                         }
                     }
                 }
             }
         }
-        out.oush_back(grad_d);
+        out.push_back(grad_d);
     }
     return out;
 }
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index d66ada61d..a839dba2a 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -50,7 +50,8 @@ template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOpe
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp);
 template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble **metric);
 template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp);
-template <int D> CompFunctionVector<D> gradient(DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric);
+// template <int D>
+std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  **metric);
 // clang-format on
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 5be3003fd..3c0dc3276 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -201,6 +201,28 @@ int CompFunction<D>::getNNodes() const {
     return nNodes;
 }
 
+template <int D>
+FunctionTree<D, double> &CompFunction<D>::real(int i) {
+    if (!isreal) MSG_ABORT("not real function");
+    return *CompD[i];
+}
+template <int D> //NB: should return CompC in the future
+FunctionTree<D, double>  &CompFunction<D>::imag(int i) {
+    if (!iscomplex) MSG_ABORT("not complex function");
+    return *CompD[i];
+}
+
+template <int D>
+const FunctionTree<D, double> &CompFunction<D>::real(int i) const {
+    if (!isreal) MSG_ABORT("not real function");
+    return *CompD[i];
+}
+template <int D> //NB: should return CompC in the future
+const FunctionTree<D, double> &CompFunction<D>::imag(int i) const {
+    if (!iscomplex) MSG_ABORT("not complex function");
+    return *CompD[i];
+}
+
  /* for backwards compatibility */
 template <int D>
 void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
@@ -394,7 +416,7 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
     for (int comp = 0; comp < inp_a[0].Ncomp; comp++) {
         if (inp_a.isreal and inp_b.isreal) {
             delete out.CompD[comp];
-            FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0].getMRA());
+            FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0]->getMRA());
             double coef = 1.0;
             if (need_to_multiply) {
                 if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
@@ -412,7 +434,7 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
             out.CompD[comp] = tree;
         } else {
             delete out.CompC[comp];
-            FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0].getMRA());
+            FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0]->getMRA());
             ComplexDouble coef = 1.0;
             if (need_to_multiply) {
                 if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
@@ -519,16 +541,16 @@ void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, d
     mpi::share_function(out, 0, 132231, mpi::comm_share);
  }
 
-// MPI_CompFuncVector
+// CompFunctionVector
 
 
-MPI_CompFuncVector::MPI_CompFuncVector(int N)
-    : std::vector<CompFunction<3>>(N) {
-    for (int i = 0; i < N; i++) (*this)[i].rank = i;
+CompFunctionVector::CompFunctionVector(int N):
+    std::vector<CompFunction<3>*>(N) {
+    for (int i = 0; i < N; i++) (*this)[i]->rank = i;
     vecMRA = defaultCompMRA<3>;
 }
-void MPI_CompFuncVector::distribute() {
-    for (int i = 0; i < this->size(); i++) (*this)[i].rank = i;
+void CompFunctionVector::distribute() {
+    for (int i = 0; i < this->size(); i++) (*this)[i]->rank = i;
 }
 
 
@@ -540,7 +562,7 @@ void MPI_CompFuncVector::distribute() {
  * Phi input functions, Psi output functions
  *
  */
-void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector &Psi, double prec) {
+void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
 
     // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
     // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
@@ -579,8 +601,8 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
     IntVector PsihasReIm = IntVector::Zero(2);
     for (int j = 0; j < N; j++) {
         if (!mpi::my_func(j)) continue;
-        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
+        PsihasReIm[0] = (Phi[j]->hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[j]->hasImag()) ? 1 : 0;
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
     if (not PsihasReIm[0] and not PsihasReIm[1]) {
@@ -592,8 +614,8 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
 
     for (int j = 0; j < M; j++) {
         if (!mpi::my_func(j)) continue;
-        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
-        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
+        if (not makeReal and Psi[j]->hasReal()) Psi[j]->free(NUMBER::Real);
+        if (not makeImag and Psi[j]->hasImag()) Psi[j]->free(NUMBER::Imag);
     }
 
     if (not makeReal and not makeImag) { return; }
@@ -606,7 +628,7 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
     IntVector conjMat = IntVector::Zero(Neff);
     for (int j = 0; j < Neff; j++) {
         if (!mpi::my_func(j % N)) continue;
-        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
+        conjMat[j] = (Phi[j % N]->conjugate()) ? -1 : 1;
     }
     mpi::allreduce_vector(conjMat, mpi::comm_wrk);
 
@@ -653,8 +675,8 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j]->hasReal()) {
+                Phi[j]->real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j]) {
@@ -663,8 +685,8 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j]->hasImag()) {
+                Phi[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j + N]) {
@@ -867,13 +889,13 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
         for (int j = 0; j < Meff; j++) {
             if (coeffpVec[j].size()==0) continue;
             if (j < M) {
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+                if (!Psi[j]->hasReal()) Psi[j]->alloc(0);
+                Psi[j]->real().clear();
+                Psi[j]->real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
             } else {
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+                if (!Psi[j % M]->hasImag()) Psi[j % M]->alloc(0);
+                Psi[j % M]->imag().clear();
+                Psi[j % M]->imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
             }
         }
 
@@ -904,14 +926,14 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
             }
             if (j < M) {
                 // Real part
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+                if (!Psi[j]->hasReal()) Psi[j]->alloc(0);
+                Psi[j]->real().clear();
+                Psi[j]->real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             } else {
                 // Imag part
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+                if (!Psi[j % M]->hasImag()) Psi[j % M]->alloc(0);
+                Psi[j % M]->imag().clear();
+                Psi[j % M]->imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             }
             for (double *p : pointerstodelete) delete[] p;
             pointerstodelete.clear();
@@ -920,7 +942,7 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector
 }
 
 
-void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, double prec) {
+void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec) {
     rotate(Phi, U, Phi, prec);
     return;
 }
@@ -928,7 +950,7 @@ void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, double prec) {
 /** @brief Save all nodes in bank; identify them using serialIx from refTree
  * shift is a shift applied in the id
  */
-void save_nodes(MPI_CompFuncVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
+void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
     int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
     int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
     int max_nNodes = refTree.getNNodes();
@@ -941,8 +963,8 @@ void save_nodes(MPI_CompFuncVector &Phi, FunctionTree<3> &refTree, BankAccount &
     for (int j = 0; j < N; j++) {
         if (not mpi::my_func(j)) continue;
         // make vector with all coef address and their index in the union grid
-        if (Phi[j].hasReal()) {
-            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+        if (Phi[j]->hasReal()) {
+            Phi[j]->real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
             // except for the root nodes, only wavelets are sent
@@ -958,8 +980,8 @@ void save_nodes(MPI_CompFuncVector &Phi, FunctionTree<3> &refTree, BankAccount &
             }
         }
         // Imaginary parts are considered as orbitals with an orbid shifted by N
-        if (Phi[j].hasImag()) {
-            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+        if (Phi[j]->hasImag()) {
+            Phi[j]->imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
             for (int i = 0; i < max_n; i++) {
@@ -986,7 +1008,7 @@ void save_nodes(MPI_CompFuncVector &Phi, FunctionTree<3> &refTree, BankAccount &
  * in parallel using a local representation.
  * Input trees are extended by one scale at most.
  */
-MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) {
+CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) {
 
     int N = Phi.size();
     const int D = 3;
@@ -998,9 +1020,9 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
     for (int i = 0; i < N; i++) {
         if (!mpi::my_func(i)) continue;
         int irefine = 0;
-        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
+        while (Phi[i]->hasReal() and irefine < nrefine and refine_grid(Phi[i]->real(), f) > 0) irefine++;
         irefine = 0;
-        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
+        while (Phi[i]->hasImag() and irefine < nrefine and refine_grid(Phi[i]->imag(), f) > 0) irefine++;
     }
 
     // 1b) make union tree without coefficients
@@ -1015,12 +1037,12 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
     IntVector PsihasReIm = IntVector::Zero(2);
     for (int i = 0; i < N; i++) {
         if (!mpi::my_func(i)) continue;
-        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
+        PsihasReIm[0] = (Phi[i]->hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[i]->hasImag()) ? 1 : 0;
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    MPI_CompFuncVector out(N);
-    MPI_CompFuncVector outtest(N);
+    CompFunctionVector out(N);
+    CompFunctionVector outtest(N);
     if (not PsihasReIm[0] and not PsihasReIm[1]) {
         return out; // do nothing
     }
@@ -1056,8 +1078,8 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j]->hasReal()) {
+                Phi[j]->real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j]) {
@@ -1066,8 +1088,8 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j]->hasImag()) {
+                Phi[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j + N]) {
@@ -1245,21 +1267,21 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < Neff; j++) {
             if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                if (Phi[j]->hasReal()) {
+                    out[j]->alloc(0);
+                    out[j]->real().clear();
+                    out[j]->real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
                     // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
+                    out[j]->real().mwTransform(BottomUp);
+                    out[j]->real().calcSquareNorm();
                 }
             } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    out[j].imag().mwTransform(BottomUp);
-                    out[j].imag().calcSquareNorm();
+                if (Phi[j % N]->hasImag()) {
+                    out[j % N]->alloc(0);
+                    out[j % N]->imag().clear();
+                    out[j % N]->imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    out[j]->imag().mwTransform(BottomUp);
+                    out[j]->imag().calcSquareNorm();
                 }
             }
         }
@@ -1287,26 +1309,26 @@ MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f
                 }
             }
             if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                if (Phi[j]->hasReal()) {
+                    out[j]->alloc(0);
+                    out[j]->real().clear();
+                    out[j]->real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
                     // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
-                    out[j].real().resetEndNodeTable();
-                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
-                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
+                    out[j]->real().mwTransform(BottomUp);
+                    out[j]->real().calcSquareNorm();
+                    out[j]->real().resetEndNodeTable();
+                    // out[j]->real().crop(prec, 1.0, false); //bad convergence if out is cropped
+                    if (nrefine > 0) Phi[j]->real().crop(prec, 1.0, false); // restablishes original Phi
                 }
             } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    out[j % N].imag().mwTransform(BottomUp);
-                    out[j % N].imag().calcSquareNorm();
-                    // out[j % N].imag().crop(prec, 1.0, false);
-                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
+                if (Phi[j % N]->hasImag()) {
+                    out[j % N]->alloc(0);
+                    out[j % N]->imag().clear();
+                    out[j % N]->imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    out[j % N]->imag().mwTransform(BottomUp);
+                    out[j % N]->imag().calcSquareNorm();
+                    // out[j % N]->imag().crop(prec, 1.0, false);
+                    if (nrefine > 0) Phi[j % N]->imag().crop(prec, 1.0, false);
                 }
             }
 
@@ -1321,14 +1343,14 @@ void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA) {
     defaultCompMRA<3> = MRA;
 }
 
-ComplexVector dot(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) {
     int N = Bra.size();
     ComplexVector result = ComplexVector::Zero(N);
     for (int i = 0; i < N; i++) {
         // The bra is sent to the owner of the ket
         if (my_func(Bra[i]) != my_func(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
-        result[i] = dot(Bra[i], Ket[i]);
-        if (not mrcpp::mpi::my_func(i)) Bra[i].free(NUMBER::Total);
+        result[i] = dot(*Bra[i], *Ket[i]);
+        if (not mrcpp::mpi::my_func(i)) Bra[i]->free();
     }
     mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
     return result;
@@ -1340,7 +1362,7 @@ ComplexVector dot(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
  *
  * Computes the inverse square root of the orbital overlap matrix S^(-1/2)
  */
-ComplexMatrix calc_lowdin_matrix(MPI_CompFuncVector &Phi) {
+ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi) {
     ComplexMatrix S_tilde = calc_overlap_matrix(Phi);
     ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0);
     return S_m12;
@@ -1354,7 +1376,7 @@ ComplexMatrix calc_lowdin_matrix(MPI_CompFuncVector &Phi) {
  * MPI: Rank distribution of output vector is the same as input vector
  *
  */
-ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     // NB: must be spinseparated at this point!
 
     int N = BraKet.size();
@@ -1396,8 +1418,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j]->hasReal()) {
+                BraKet[j]->real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1406,8 +1428,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j]->hasImag()) {
+                BraKet[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1453,9 +1475,9 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
+                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N]->data.n1[0] != 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -1474,9 +1496,9 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
+                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N]->data.n1[0] != 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -1487,7 +1509,7 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
     IntVector conjMat = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+        conjMat[i] = (BraKet[i]->conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
@@ -1508,7 +1530,7 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet) {
 /** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
  *
  */
-ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) {
     mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
 
     MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
@@ -1561,8 +1583,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Bra[j].hasReal()) {
-                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Bra[j]->hasReal()) {
+                Bra[j]->real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1571,8 +1593,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
                     node2orbVecBra[ix].push_back(j);
                 }
             }
-            if (Bra[j].hasImag()) {
-                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Bra[j]->hasImag()) {
+                Bra[j]->imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1583,8 +1605,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
             }
         }
         for (int j = 0; j < M; j++) {
-            if (Ket[j].hasReal()) {
-                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Ket[j]->hasReal()) {
+                Ket[j]->real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1593,8 +1615,8 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
                     node2orbVecKet[ix].push_back(j);
                 }
             }
-            if (Ket[j].hasImag()) {
-                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Ket[j]->hasImag()) {
+                Ket[j]->imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1651,9 +1673,9 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
-                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                        if (Bra[orbVecBra[i] % N]->data.n1[0] != Ket[orbVecKet[j] % M]->data.n1[0] and
+                            Bra[orbVecBra[i] % N]->data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M]->data.n1[0] != 0)
                             continue;
                         // must ensure that threads are not competing
                         double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
@@ -1679,9 +1701,9 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
-                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                        if (Bra[orbVecBra[i] % N]->data.n1[0] != Ket[orbVecKet[j] % M]->data.n1[0] and
+                            Bra[orbVecBra[i] % N]->data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M]->data.n1[0] != 0)
                             continue;
                         Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
@@ -1693,13 +1715,13 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
     IntVector conjMatBra = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(Bra[i])) continue;
-        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
+        conjMatBra[i] = (Bra[i]->conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
     IntVector conjMatKet = IntVector::Zero(M);
     for (int i = 0; i < M; i++) {
         if (!mrcpp::mpi::my_func(Ket[i])) continue;
-        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
+        conjMatKet[i] = (Ket[i]->conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
 
@@ -1720,7 +1742,7 @@ ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &K
 /** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
  *
  */
-DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
+DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
     int N = BraKet.size();
     DoubleMatrix S = DoubleMatrix::Zero(N, N);
     DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
@@ -1760,8 +1782,8 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j]->hasReal()) {
+                BraKet[j]->real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1770,8 +1792,8 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j]->hasImag()) {
+                BraKet[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1817,9 +1839,9 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
+                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N]->data.n1[0]!= 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -1839,9 +1861,9 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
+                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N]->data.n1[0]!= 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -1853,7 +1875,7 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
     IntVector conjMat = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(i)) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+        conjMat[i] = (BraKet[i]->conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
@@ -1872,14 +1894,14 @@ DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet) {
 /** @brief Orthogonalize the functions in Bra against all orbitals in Ket
  *
  */
-void orthogonalize(double prec, MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket) {
+void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket) {
     // TODO: generalize for cases where Ket functions are not orthogonal to each other?
     ComplexMatrix S = calc_overlap_matrix(Bra, Ket);
     int N = Bra.size();
     int M = Ket.size();
     DoubleVector Ketnorms = DoubleVector::Zero(M);
     for (int i = 0; i < M; i++) {
-        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
+        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i]->squaredNorm();
     }
     mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
     ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
@@ -1888,11 +1910,12 @@ void orthogonalize(double prec, MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket
             rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
         }
     }
-    MPI_CompFuncVector rotatedKet(N);
+    CompFunctionVector rotatedKet(N);
     rotate(Ket, rmat, rotatedKet, prec / M);
     for (int j = 0; j < N; j++) {
-        if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
+        if(my_func(Bra[j]))Bra[j]->add(1.0,*rotatedKet[j]);
     }
 }
+template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket) ;
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 8ba8088ff..a9d71f722 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -45,8 +45,7 @@ template <int D> class CompFunction {
     CompFunction(int n1, bool share);
     CompFunction(const CompFunction<D> &compfunc);
     CompFunction(CompFunction<D> && compfunc);
-    //    ComplexFunction *CPXfct; // temporary solution
-
+    CompFunction<D> &operator=(const CompFunction<D> &compfunc);
 
     FunctionTree<D, double> *CompD[4];
     FunctionTree<D, ComplexDouble> *CompC[4];
@@ -65,7 +64,6 @@ template <int D> class CompFunction {
      //CompFunction(ComplexFunction cplxfunc);
     // template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
      //operator ComplexFunction() const;
-    CompFunction<D> &operator=(const CompFunction<D> &compfunc);
     // CompFunction destructor
     ~CompFunction() {
         for (int i = 0; i < Ncomp; i++) {
@@ -79,7 +77,7 @@ template <int D> class CompFunction {
     void alloc(int i);
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setRank(int i) {rank = i;};
-    int getRank() {return rank;};
+    const int getRank() const {return rank;};
     void add(ComplexDouble c, CompFunction<D> inp);
 
     int crop(double prec);
@@ -89,21 +87,20 @@ template <int D> class CompFunction {
     int getNNodes() const;
 
     //NB: All tbelow should be revised. Now only for backwards compatibility to ComplexFunction class
+
     bool hasReal()  const {return isreal;}
     bool hasImag()  const {return iscomplex;}
     bool isShared() const {return data.shared;}
     bool conjugate() const {return data.conj;}
 
-    FunctionTree<D, double> &real() {return *CompD[0];}
-    FunctionTree<D, double> &imag() {return *CompD[0];} //does not make sense
-    const FunctionTree<D, double> &real() const {return *CompD[0];}
-    const FunctionTree<D, double> &imag() const {return *CompD[0];} //does not make sense
+    FunctionTree<D, double> &real(int i = 0);
+    FunctionTree<D, double> &imag(int i = 0); //does not make sense now
+    const FunctionTree<D, double> &real(int i = 0) const;
+    const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
     void free(int type) {delete CompD[0]; CompD[0] = nullptr; delete CompC[0]; CompC[0] = nullptr;}
     void flushFuncData();
 };
 
-template <int D> using CompFunctionVector = std::vector<CompFunction<D> *>;
-
 template <int D>
 void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp);
 template <int D>
@@ -127,24 +124,24 @@ void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double p
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
 
-class MPI_CompFuncVector : public std::vector<CompFunction<3>> {
+class CompFunctionVector : public std::vector<CompFunction<3>*> {
 public:
-    MPI_CompFuncVector(int N = 0);
+    CompFunctionVector(int N = 0);
     MultiResolutionAnalysis<3> *vecMRA;
     void distribute();
 };
 
-void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
-void rotate(MPI_CompFuncVector &Phi, const ComplexMatrix &U, MPI_CompFuncVector &Psi, double prec = -1.0);
-void save_nodes(MPI_CompFuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
-MPI_CompFuncVector multiply(MPI_CompFuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
+void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec = -1.0);
+void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
+void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
+CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
 void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
-ComplexVector dot(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
-ComplexMatrix calc_lowdin_matrix(MPI_CompFuncVector &Phi);
-ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &BraKet);
-ComplexMatrix calc_overlap_matrix(MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
-DoubleMatrix calc_norm_overlap_matrix(MPI_CompFuncVector &BraKet);
-void orthogonalize(double prec, MPI_CompFuncVector &Bra, MPI_CompFuncVector &Ket);
+ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket);
+ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi);
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet);
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket);
+DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet);
+void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket);
 
 
 } // namespace mrcpp
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 03f10d3fd..49206cfa9 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -269,14 +269,20 @@ bool my_func(int j) {
 }
 
 /** @brief Test if function belongs to this MPI rank */
-bool my_func(CompFunction<3> func) {
+bool my_func(const CompFunction<3>& func) {
     return my_func(func.rank);
 }
 
+
+/** @brief Test if function belongs to this MPI rank */
+bool my_func(CompFunction<3> *func) {
+    return my_func(func->rank);
+}
+
 /** @brief Free all function pointers not belonging to this MPI rank */
-void free_foreign(MPI_CompFuncVector &Phi) {
-    for (CompFunction<3> &i : Phi) {
-        if (not my_func(i)) i.alloc(0);
+void free_foreign(CompFunctionVector &Phi) {
+    for (CompFunction<3>* &i : Phi) {
+        if (not my_func(i)) i->alloc(0);
     }
 }
 
@@ -603,7 +609,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Complex
 /** @brief make union tree without coeff and send to all
  *  Real trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>*> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -612,7 +618,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
+        tree.appendTreeNoCoeff(*Phi[j]->CompD[0]);
     }
     reduce_Tree_noCoeff(tree, comm_wrk);
     broadcast_Tree_noCoeff(tree, comm_wrk);
@@ -622,7 +628,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
 /** @brief make union tree without coeff and send to all
  *  Complex trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>*> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -631,7 +637,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
+        tree.appendTreeNoCoeff(*Phi[j]->CompC[0]);
     }
     reduce_Tree_noCoeff(tree, comm_wrk);
     broadcast_Tree_noCoeff(tree, comm_wrk);
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 811dcedb6..d81145871 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -43,10 +43,11 @@ bool share_master();
 bool my_orb(int j);
 bool my_orb(ComplexFunction orbj);
 bool my_func(int j);
-bool my_func(CompFunction<3> func);
+bool my_func(const CompFunction<3>& func);
+bool my_func(CompFunction<3> *func);
 
 // bool my_unique_orb(const Orbital &orb);
-void free_foreign(MPI_CompFuncVector &Phi);
+void free_foreign(CompFunctionVector &Phi);
 
 void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
@@ -67,8 +68,8 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm c
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
 void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);
 
 void allreduce_vector(IntVector &vec, MPI_Comm comm);
 void allreduce_vector(DoubleVector &vec, MPI_Comm comm);

From 09715896591c6bf95c1a4ae09215d3ce7f38bef2 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 29 Jul 2024 16:24:44 +0200
Subject: [PATCH 17/38] explicit real and complex reduce and broadcast

---
 src/trees/FunctionTree.cpp |  4 ++--
 src/utils/parallel.cpp     | 24 ++++++++++++------------
 src/utils/parallel.h       |  8 ++++----
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 46ef40897..e16202169 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -811,7 +811,7 @@ template <> int FunctionTree<3, double>::saveNodesAndRmCoeff() {
         }
     }
     this->nodeAllocator_p->deallocAllCoeff();
-    mpi::broadcast_Tree_noCoeff(*this, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff_real(*this, mpi::comm_wrk);
     this->isLocal = true;
     assert(this->NodeIndex2serialIx.size() == getNNodes());
     return this->NodeIndex2serialIx.size();
@@ -834,7 +834,7 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
         }
     }
     this->nodeAllocator_p->deallocAllCoeff();
-    mpi::broadcast_Tree_noCoeff(*this, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff_complex(*this, mpi::comm_wrk);
     this->isLocal = true;
     assert(this->NodeIndex2serialIx.size() == getNNodes());
     return this->NodeIndex2serialIx.size();
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 49206cfa9..38bd08859 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -505,7 +505,7 @@ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -546,7 +546,7 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -601,8 +601,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Complex
         if (Phi[j].hasReal()) tree.appendTreeNoCoeff(Phi[j].real());
         if (Phi[j].hasImag()) tree.appendTreeNoCoeff(Phi[j].imag());
     }
-    reduce_Tree_noCoeff(tree, comm_wrk);
-    broadcast_Tree_noCoeff(tree, comm_wrk);
+    reduce_Tree_noCoeff_real(tree, comm_wrk);
+    broadcast_Tree_noCoeff_real(tree, comm_wrk);
 }
 
 
@@ -620,8 +620,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
         if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(*Phi[j]->CompD[0]);
     }
-    reduce_Tree_noCoeff(tree, comm_wrk);
-    broadcast_Tree_noCoeff(tree, comm_wrk);
+    reduce_Tree_noCoeff_real(tree, comm_wrk);
+    broadcast_Tree_noCoeff_real(tree, comm_wrk);
 }
 
 
@@ -639,8 +639,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
         if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(*Phi[j]->CompC[0]);
     }
-    reduce_Tree_noCoeff(tree, comm_wrk);
-    broadcast_Tree_noCoeff(tree, comm_wrk);
+    reduce_Tree_noCoeff_complex(tree, comm_wrk);
+    broadcast_Tree_noCoeff_complex(tree, comm_wrk);
 }
 
 
@@ -658,8 +658,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
         if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(Phi[j]);
     }
-    reduce_Tree_noCoeff(tree, comm_wrk);
-    broadcast_Tree_noCoeff(tree, comm_wrk);
+    reduce_Tree_noCoeff_complex(tree, comm_wrk);
+    broadcast_Tree_noCoeff_complex(tree, comm_wrk);
 }
 
 /** @brief Distribute rank zero function to all ranks */
@@ -727,7 +727,7 @@ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -759,7 +759,7 @@ void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm)
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index d81145871..32275073f 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -61,12 +61,12 @@ void broadcast_function(ComplexFunction &func, MPI_Comm comm);
 void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm);
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm);
 
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+void broadcast_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+void broadcast_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);

From 6e6e881a8fd04c0b3a2fdd288862640f5d8005f6 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Thu, 1 Aug 2024 16:32:49 +0200
Subject: [PATCH 18/38] all functions upgraded to component functions

---
 src/treebuilders/apply.cpp    |  83 ++++++-
 src/treebuilders/apply.h      |  16 +-
 src/treebuilders/grid.cpp     |  21 ++
 src/treebuilders/grid.h       |   2 +
 src/treebuilders/multiply.cpp |  15 +-
 src/trees/FunctionNode.cpp    |   7 +-
 src/trees/FunctionTree.cpp    |  26 ++-
 src/trees/FunctionTree.h      |   3 +-
 src/trees/MWTree.h            |   1 -
 src/utils/CompFunction.cpp    | 419 ++++++++++++++++++++++------------
 src/utils/CompFunction.h      |  35 ++-
 src/utils/parallel.cpp        |  38 +--
 src/utils/parallel.h          |  14 +-
 13 files changed, 470 insertions(+), 210 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 79e672fd7..e33fc8c85 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -116,7 +116,18 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
  * no coefs).
  *
  */
-template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
+template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
+
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int icomp = 0; icomp < inp.Ncomp; icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -250,7 +261,17 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
-template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble **metric, int maxIter, bool absPrec) {
+template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int icomp = 0; icomp < inp.Ncomp; icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -296,7 +317,17 @@ template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T
     apply_on_unit_cell<D>(false, prec, out, oper, inp, maxIter, absPrec);
 }
 
-template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
+template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int icomp = 0; icomp < 4; icomp++){
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
@@ -345,7 +376,17 @@ template <int D, typename T> void apply_near_field(double prec, FunctionTree<D,
 }
 
 
-template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter, bool absPrec) {
+template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int icomp = 0; icomp < 4; icomp++){
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
@@ -418,7 +459,17 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
     print::separator(10, ' ');
 }
 
-template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int dir) {
+template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int icomp = 0; icomp < inp.Ncomp; icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -460,8 +511,18 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
     return out;
 }
 
-std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  **metric) {
+std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  metric[4][4]) {
     std::vector<CompFunction<3>*> out;
+    ComplexDouble defaultMetric[4][4];
+    for (int i=0; i<4; i++){
+        for (int j=0; j<4; j++){
+            if (i==j) defaultMetric[i][j] = 1.0;
+            else defaultMetric[i][j] = 0.0;
+        }
+    }
+    if (metric == nullptr) {
+        metric = defaultMetric;
+    }
     for (int d = 0; d < 3; d++) {
         CompFunction<3> *grad_d = new CompFunction<3>();
         for (int icomp = 0; icomp < inp.Ncomp; icomp++){
@@ -527,7 +588,7 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
     clear(tmp_vec, true);
 }
 
-template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble **metric) {
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble metric[4][4]) {
     MSG_ABORT("not implemented");
 }
 
@@ -536,16 +597,16 @@ template <int D, typename T> void divergence(FunctionTree<D, T> &out, Derivative
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     divergence(out, oper, inp_vec);
 }
-template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble **metric) {
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble metric[4][4]) {
     MSG_ABORT("not implemented");
 }
 
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
-template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, CompFunction<1> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
-template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, CompFunction<2> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
-template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, CompFunction<3> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, const CompFunction<1> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
+template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, const CompFunction<2> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
+template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, const CompFunction<3> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
 template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
 template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index a839dba2a..452b17f72 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -36,22 +36,22 @@ template <int D> class DerivativeOperator;
 template <int D> class ConvolutionOperator;
 
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter = -1, bool absPrec = false);
-template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_near_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int maxIter = -1, bool absPrec = false);
+template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTree<D, T> &inp, int dir = -1);
-template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, ComplexDouble **metric, int dir = -1);
+template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir = -1, ComplexDouble metric[4][4] = nullptr);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> &inp);
-template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble **metric);
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, FunctionTreeVector<D, T> *inp, ComplexDouble metric[4][4]);
 template <int D, typename T> void divergence(FunctionTree<D, T> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> &inp);
-template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble **metric);
+template <int D, typename T> void divergence(CompFunction<D> &out, DerivativeOperator<D> &oper, std::vector<FunctionTree<D, T> *> *inp, ComplexDouble metric[4][4] = nullptr);
 template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperator<D> &oper, FunctionTree<D, T> &inp);
 // template <int D>
-std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  **metric);
+std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  metric[4][4] = nullptr);
 // clang-format on
 
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index 86f71f41c..e7d83e719 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -224,6 +224,27 @@ template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTre
     build_grid(out, inp);
 }
 
+
+/** @brief Build empty grid that is identical to another MW grid for every component
+ *
+ * @param[out] out: Output to be built
+ * @param[in] inp: Input
+ *
+ * @note The difference from the corresponding `build_grid` function is that
+ * this will first clear the grid of the `out` function, while `build_grid`
+ * will _extend_ the existing grid.
+ *
+ */
+template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
+    out.free();
+    out.data = inp.data;
+    out.alloc(inp.Ncomp);
+    for (int i = 0; i < inp.Ncomp; i++) {
+        if (inp.isreal) build_grid(out.CompD[i], inp.CompD[i]);
+        if (inp.iscomplex) build_grid(out.CompC[i], inp.CompC[i]);
+    }
+}
+
 /** @brief Clear the MW coefficients of a function representation
  *
  * @param[in,out] out: Output function to be cleared
diff --git a/src/treebuilders/grid.h b/src/treebuilders/grid.h
index 42f54aa0a..1d7021f8b 100644
--- a/src/treebuilders/grid.h
+++ b/src/treebuilders/grid.h
@@ -28,6 +28,7 @@
 #include "functions/RepresentableFunction.h"
 #include "trees/FunctionTree.h"
 #include "trees/FunctionTreeVector.h"
+#include "utils/CompFunction.h"
 
 namespace mrcpp {
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, int scales);
@@ -38,6 +39,7 @@ template <int D, typename T> void build_grid(FunctionTree<D, T> &out, FunctionTr
 template <int D, typename T> void build_grid(FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter = -1);
 template <int D, typename T> void copy_func(FunctionTree<D, T> &out, FunctionTree<D, T> &inp);
 template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTree<D, T> &inp);
+template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp);
 template <int D, typename T> void clear_grid(FunctionTree<D, T> &out);
 template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, int scales);
 template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, double prec, bool absPrec = false);
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index 6cbf58b72..b95f030d4 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -82,6 +82,19 @@ void multiply(double prec,
     tmp_vec.push_back({1.0, &inp_b});
     multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms);
 }
+    /*
+template <int D> void multiply(double prec,
+                               FunctionTree<D, ComplexDouble> &out,
+                               ComplexDouble c,
+                               FunctionTree<D, ComplexDouble> &inp_a,
+                               FunctionTree<D, double> &inp_b,
+                               int maxIter = -1,
+                               bool absPrec = false,
+                               bool useMaxNorms = false) {
+    // we rather make a copy with ComplexDouble type only
+    FunctionTree<D, ComplexDouble>* inp_b_CPLX = inp_b.CopyTreeToComplex();
+    multiply(prec, out,c,inp_a,inp_b_CPLX,maxIter,absPrec,useMaxNorms);
+    } */
 
 /** @brief Multiplication of several MW function representations, adaptive grid
  *
@@ -371,7 +384,7 @@ template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra, Funct
             if (mwNode == nullptr) MSG_ABORT("Trees must have same grid");
             node.getAbsCoefs(valA);
             mwNode->getAbsCoefs(valB);
-            for (int i = 0; i < ncoef; i++) result += std::abs(valA[i] * valB[i]);
+            for (int i = 0; i < ncoef; i++) result += std::norm(valA[i] * valB[i]);
         } else {
             // approximate by product of node norms
             int rIdx = ket.getRootBox().getBoxIndex(idx);
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index 0749ff37e..39555c5a9 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -231,14 +231,17 @@ template <int D, typename T> void FunctionNode<D, T>::setValues(const Matrix<T,
 
 /** get coefficients corresponding to absolute value of function
  *
- * Leaves the original coefficients unchanged. */
+ * Leaves the original coefficients unchanged.
+ * Note that we mus use T and not double, even if the norms are double, because
+ * the transforms expect T types.
+ */
 template <int D, typename T> void FunctionNode<D, T>::getAbsCoefs(T *absCoefs) {
     T *coefsTmp = this->coefs;
     for (int i = 0; i < this->n_coefs; i++) absCoefs[i] = coefsTmp[i]; // copy
     this->coefs = absCoefs;                                            // swap coefs
     this->mwTransform(Reconstruction);
     this->cvTransform(Forward);
-    for (int i = 0; i < this->n_coefs; i++) this->coefs[i] = std::abs(this->coefs[i]);
+    for (int i = 0; i < this->n_coefs; i++) this->coefs[i] = std::norm(this->coefs[i]);
     this->cvTransform(Backward);
     this->mwTransform(Compression);
     this->coefs = coefsTmp; // restore original array (same address)
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index e16202169..4a922b5bc 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -787,6 +787,7 @@ template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D
     }
 }
 
+
 template <int D, typename T> void FunctionTree<D, T>::deleteGenerated() {
     for (int n = 0; n < this->getNEndNodes(); n++) this->getEndMWNode(n).deleteGenerated();
 }
@@ -811,7 +812,7 @@ template <> int FunctionTree<3, double>::saveNodesAndRmCoeff() {
         }
     }
     this->nodeAllocator_p->deallocAllCoeff();
-    mpi::broadcast_Tree_noCoeff_real(*this, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff(*this, mpi::comm_wrk);
     this->isLocal = true;
     assert(this->NodeIndex2serialIx.size() == getNNodes());
     return this->NodeIndex2serialIx.size();
@@ -834,7 +835,7 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
         }
     }
     this->nodeAllocator_p->deallocAllCoeff();
-    mpi::broadcast_Tree_noCoeff_complex(*this, mpi::comm_wrk);
+    mpi::broadcast_Tree_noCoeff(*this, mpi::comm_wrk);
     this->isLocal = true;
     assert(this->NodeIndex2serialIx.size() == getNNodes());
     return this->NodeIndex2serialIx.size();
@@ -896,6 +897,27 @@ template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag()
 }
 
 
+/** From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents.  */
+template <int D, typename T> FunctionTree<D, ComplexDouble>* FunctionTree<D, T>::CopyTreeToComplex() {
+    FunctionTree<D, ComplexDouble>* outTree = new FunctionTree<D, ComplexDouble> (this->getMRA());
+    int nChunks=getNChunks();
+    outTree->getNodeAllocator().init(nChunks, true); //also allocate coefficients
+    int Ncoefperchunk = outTree->getNodeAllocator().getCoefChunkSize()/sizeof(ComplexDouble);
+    for (int iChunk = 0; iChunk < nChunks; iChunk++) {
+        //MWNode<D, double> * inNode = inTree.getNodeAllocator().getNodeChunk(iChunk); //TODO
+        //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree.getNodeAllocator().getNodeChunk(iChunk);//TODO
+        ComplexDouble* Ccoefs;
+        Ccoefs = outTree->getNodeAllocator().getCoefChunk(iChunk);
+        auto InCoefs = this->getNodeAllocator().getCoefChunk(iChunk); // can be type double* or ComplexDouble*
+        for (int i = 0; i < Ncoefperchunk; i++) {
+            Ccoefs[i] = InCoefs[i];
+        }
+    }
+    outTree->getNodeAllocator().reassemble();
+    return outTree;
+}
+
+
 template class FunctionTree<1, double>;
 template class FunctionTree<2, double>;
 template class FunctionTree<3, double>;
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 4c9e632c6..1c71dcfc6 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -111,10 +111,11 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
                          std::vector<MWNode<D, T> *> *refNodes = nullptr);
     void makeTreefromCoeff(MWTree<D, T> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode = "adaptive");
     void appendTreeNoCoeff(MWTree<D, T> &inTree);
-
+    void CopyTree(FunctionTree<D, double> &inTree);
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
     void deep_copy(FunctionTree<D, T> *out);
+    FunctionTree<D, ComplexDouble>* CopyTreeToComplex();
     FunctionTree<D, double> *Real();
     FunctionTree<D, double> *Imag();
 protected:
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index c2f231ccf..c2124bda2 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -183,5 +183,4 @@ class BankAccount;
 
     virtual std::ostream &print(std::ostream &o) const;
 };
-
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 3c0dc3276..c2592b1e0 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -69,6 +69,13 @@ namespace mrcpp {
 
   }
 
+/*
+ * Empty functions (no components defined)
+ */
+  template <int D>
+  CompFunction<D>::CompFunction(const CompFunctionData<D>& indata)
+  { data = indata;}
+
 /** @brief Copy constructor
  *
  * Shallow copy: meta data is copied along with the component pointers,
@@ -105,32 +112,79 @@ namespace mrcpp {
       return *this;
   }
 
-//    template <int D>
-//    template <int D_, typename std::enable_if<D_ == 3, int>::type>
-//    CompFunction<D>::operator ComplexFunction() const {
- //       return ComplexFunction(*this); // const conversion
- //   }
-    //    template <int D, typename T>
-   //    template <int D_, typename std::enable_if<D_ == 3, int>::type>
-   //    CompFunction<D, T>::operator ComplexFunction() {
-   //        return ComplexFunction(std::move(*this)); // non-const conversion
-   //    }
-  //
+template <int D>
+/** @brief Parameter copy
+ *
+ * Returns a copy without defined trees.
+ */
+CompFunction<D> CompFunction<D>::paramCopy() const {
+    return CompFunction<D>(data);
+}
 
- //   template CompFunction<3>::operator ComplexFunction() const;
 
-    template <int D>
-    void CompFunction<D>::flushFuncData() {
-      for (int i = 0; i < Ncomp; i++) {
-          if (isreal) {
-              Nchunks[i] = CompD[i]->getNChunksUsed();
-          } else {
-              Nchunks[i] = CompC[i]->getNChunksUsed();
-          }
-      }
-      for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+template <int D>
+void CompFunction<D>::flushMRAData() {
+    const auto &box = defaultCompMRA<3>->getWorldBox();
+    data.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
+    data.order = defaultCompMRA<3>->getOrder();
+    data.depth = defaultCompMRA<3>->getMaxDepth();
+    data.scale = box.getScale();
+    data.boxes[0] = box.size(0);
+    data.boxes[1] = box.size(1);
+    data.boxes[2] = box.size(2);
+    data.corner[0] = box.getCornerIndex().getTranslation(0);
+    data.corner[1] = box.getCornerIndex().getTranslation(1);
+    data.corner[2] = box.getCornerIndex().getTranslation(2);
+}
+
+template <int D>
+void CompFunction<D>::flushFuncData() {
+    if (D == 3) flushMRAData();
+    for (int i = 0; i < Ncomp; i++) {
+        if (isreal) {
+            Nchunks[i] = CompD[i]->getNChunksUsed();
+        } else {
+            Nchunks[i] = CompC[i]->getNChunksUsed();
+        }
     }
-  template <int D>
+    for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+}
+
+template <int D>
+CompFunctionData<D> CompFunction<D>::getFuncData() const {
+    CompFunctionData<D> outdata;
+    const auto &box = defaultCompMRA<3>->getWorldBox();
+    outdata.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
+    outdata.order = defaultCompMRA<3>->getOrder();
+    outdata.depth = defaultCompMRA<3>->getMaxDepth();
+    outdata.scale = box.getScale();
+    outdata.boxes[0] = box.size(0);
+    outdata.boxes[1] = box.size(1);
+    outdata.boxes[2] = box.size(2);
+    outdata.corner[0] = box.getCornerIndex().getTranslation(0);
+    outdata.corner[1] = box.getCornerIndex().getTranslation(1);
+    outdata.corner[2] = box.getCornerIndex().getTranslation(2);
+    for (int i = 0; i < Ncomp; i++) {
+        if (isreal) {
+            outdata.Nchunks[i] = CompD[i]->getNChunksUsed();
+        } else {
+            outdata.Nchunks[i] = CompC[i]->getNChunksUsed();
+        }
+    }
+    for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+    return outdata;
+}
+
+
+template <int D>
+ComplexDouble CompFunction<D>::integrate() const {
+    ComplexDouble integral;
+    if (isreal) integral = CompD[0]->integrate();
+    else integral = CompC[0]->integrate();
+    return integral;
+}
+
+    template <int D>
   double CompFunction<D>::norm() const {
      double norm = squaredNorm();
      for (int i = 0; i < Ncomp; i++) {
@@ -156,28 +210,34 @@ namespace mrcpp {
      return norm;
   }
   template <int D>
-  void CompFunction<D>::alloc(int i) {
+  void CompFunction<D>::alloc(int ialloc) {
       if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      if (CompD[i] != nullptr) delete CompD[i];
-      if (CompC[i] != nullptr) delete CompC[i];
-      if (isreal) {
-          CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
-      } else {
-          CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>);
+      if (isreal == 0 and iscomplex == 0)  MSG_ABORT("Function must be defined either real or complex");
+      for (int i = 0; i < ialloc + 1; i++) {
+          delete CompD[i];
+          delete CompC[i];
+          if (isreal) {
+              CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
+          }
+          if (iscomplex) {
+              CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>);
+          }
+          Ncomp = std::max(Ncomp, i + 1);
       }
-      Ncomp = std::max(Ncomp, i + 1);
+      for (int i = ialloc + 1; i < Ncomp; i++) {
+          //delete possible remaining components
+          delete CompD[i];
+          delete CompC[i];
+      }
+
  }
 
 template <int D>
 void CompFunction<D>::free() {
     //TODO: shared memory handling
     for (int i = 0; i < Ncomp; i++) {
-        if (CompD[i]!= nullptr) {
-            delete CompD[i];
-        }
-        if (CompC[i]!= nullptr) {
-            delete CompC[i];
-        }
+        delete CompD[i];
+        delete CompC[i];
     }
 }
 
@@ -201,6 +261,13 @@ int CompFunction<D>::getNNodes() const {
     return nNodes;
 }
 
+template <int D>
+CompFunction<D> CompFunction<D>::dagger() {
+        CompFunction<D> out(*this); // Returns shallow copy
+        out.data.conj = not(this->data.conj);
+        return out;
+}
+
 template <int D>
 FunctionTree<D, double> &CompFunction<D>::real(int i) {
     if (!isreal) MSG_ABORT("not real function");
@@ -226,11 +293,11 @@ const FunctionTree<D, double> &CompFunction<D>::imag(int i) const {
  /* for backwards compatibility */
 template <int D>
 void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
+      isreal = 1;
       if (CompD[i] != nullptr) delete CompD[i];
       if (iscomplex) MSG_ERROR("cannot write real tree into complex function");
       CompD[i] = tree;
       if (tree != nullptr) {
-          isreal = 1;
           Ncomp = std::max(Ncomp, i + 1);
       } else {Ncomp = std::min(Ncomp, i);}
 }
@@ -329,10 +396,10 @@ template class CompFunction<3>;
       out.data = inp.data;
       for (int i = 0; i < inp.Ncomp; i++) {
           if (inp.isreal) {
-              delete out.CompD[i];
+              out.CompD[i] = nullptr;
               inp.CompD[i]->deep_copy(out.CompD[i]);
           } else {
-              delete out.CompC[i];
+              out.CompC[i] = nullptr;
               inp.CompC[i]->deep_copy(out.CompC[i]);
           }
       }
@@ -412,12 +479,20 @@ template <int D>
  */
 template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms) {
+    multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms);
+}
+
+
+/** @brief out = inp_a * inp_b
+ *
+ */
+template <int D>
+void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms) {
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
     for (int comp = 0; comp < inp_a[0].Ncomp; comp++) {
         if (inp_a.isreal and inp_b.isreal) {
             delete out.CompD[comp];
             FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0]->getMRA());
-            double coef = 1.0;
             if (need_to_multiply) {
                 if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
                 if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
@@ -428,11 +503,25 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
                     mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0);
                 } else {
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], -1, absPrec, useMaxNorms);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms);
                 }
             }
             out.CompD[comp] = tree;
         } else {
+            // if one of the input is real, we simply make a new complex copy of it
+            bool inp_aisReal = inp_a.isreal;
+            bool inp_bisReal = inp_b.isreal;
+            if(inp_aisReal) {
+                inp_a.CompC[comp] = inp_a.CompD[comp]->CopyTreeToComplex();
+                inp_a.iscomplex = true;
+                inp_a.isreal = false;
+            }
+            if(inp_bisReal) {
+                inp_b.CompC[comp] = inp_b.CompD[comp]->CopyTreeToComplex();
+                inp_b.iscomplex = true;
+                inp_b.isreal = false;
+            }
+
             delete out.CompC[comp];
             FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0]->getMRA());
             ComplexDouble coef = 1.0;
@@ -446,10 +535,21 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
                     mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0);
                 } else {
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], -1, absPrec, useMaxNorms);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms);
                 }
             }
             out.CompC[comp] = tree;
+            // restore original tree
+            if(inp_aisReal) {
+                delete inp_a.CompC[comp];
+                inp_a.iscomplex = false;
+                inp_a.isreal = true;
+            }
+            if(inp_bisReal) {
+                delete inp_b.CompC[comp];
+                inp_b.iscomplex = false;
+                inp_b.isreal = true;
+            }
         }
     }
     mpi::share_function(out, 0, 9911, mpi::comm_share);
@@ -499,6 +599,25 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
 }
 
 
+/** @brief Compute  <bra|ket> = int |bra^\dag(r)| * |ket(r)| dr.
+ *
+ *  sum of components
+ */
+template <int D>
+double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
+    double dotprod = 0.0;
+    if (bra.data.conj or ket.data.conj) MSG_ERROR("dot with conjugaison not implemented");
+    for (int comp = 0; comp < bra.Ncomp; comp++) {
+          if (bra.isreal and ket.isreal) {
+              dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
+          } else {
+              dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
+          }
+    }
+    return dotprod;
+}
+
+
 template <int D, typename T>
 void project(CompFunction<D> &out, std::function<double(const Coord<D> &r)> f, double prec) {
 if (std::is_same<T, double>::value) {
@@ -545,12 +664,12 @@ void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, d
 
 
 CompFunctionVector::CompFunctionVector(int N):
-    std::vector<CompFunction<3>*>(N) {
-    for (int i = 0; i < N; i++) (*this)[i]->rank = i;
+    std::vector<CompFunction<3>>(N) {
+    for (int i = 0; i < N; i++) (*this)[i].rank = i;
     vecMRA = defaultCompMRA<3>;
 }
 void CompFunctionVector::distribute() {
-    for (int i = 0; i < this->size(); i++) (*this)[i]->rank = i;
+    for (int i = 0; i < this->size(); i++) (*this)[i].rank = i;
 }
 
 
@@ -601,8 +720,8 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     IntVector PsihasReIm = IntVector::Zero(2);
     for (int j = 0; j < N; j++) {
         if (!mpi::my_func(j)) continue;
-        PsihasReIm[0] = (Phi[j]->hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[j]->hasImag()) ? 1 : 0;
+        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
     if (not PsihasReIm[0] and not PsihasReIm[1]) {
@@ -614,8 +733,8 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
     for (int j = 0; j < M; j++) {
         if (!mpi::my_func(j)) continue;
-        if (not makeReal and Psi[j]->hasReal()) Psi[j]->free(NUMBER::Real);
-        if (not makeImag and Psi[j]->hasImag()) Psi[j]->free(NUMBER::Imag);
+        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
+        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
     }
 
     if (not makeReal and not makeImag) { return; }
@@ -628,7 +747,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     IntVector conjMat = IntVector::Zero(Neff);
     for (int j = 0; j < Neff; j++) {
         if (!mpi::my_func(j % N)) continue;
-        conjMat[j] = (Phi[j % N]->conjugate()) ? -1 : 1;
+        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
     }
     mpi::allreduce_vector(conjMat, mpi::comm_wrk);
 
@@ -675,8 +794,8 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j]->hasReal()) {
-                Phi[j]->real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j]) {
@@ -685,8 +804,8 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (Phi[j]->hasImag()) {
-                Phi[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j + N]) {
@@ -889,13 +1008,13 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         for (int j = 0; j < Meff; j++) {
             if (coeffpVec[j].size()==0) continue;
             if (j < M) {
-                if (!Psi[j]->hasReal()) Psi[j]->alloc(0);
-                Psi[j]->real().clear();
-                Psi[j]->real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+                if (!Psi[j].hasReal()) Psi[j].alloc(0);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
             } else {
-                if (!Psi[j % M]->hasImag()) Psi[j % M]->alloc(0);
-                Psi[j % M]->imag().clear();
-                Psi[j % M]->imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(0);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
             }
         }
 
@@ -926,14 +1045,14 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             }
             if (j < M) {
                 // Real part
-                if (!Psi[j]->hasReal()) Psi[j]->alloc(0);
-                Psi[j]->real().clear();
-                Psi[j]->real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+                if (!Psi[j].hasReal()) Psi[j].alloc(0);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             } else {
                 // Imag part
-                if (!Psi[j % M]->hasImag()) Psi[j % M]->alloc(0);
-                Psi[j % M]->imag().clear();
-                Psi[j % M]->imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(0);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             }
             for (double *p : pointerstodelete) delete[] p;
             pointerstodelete.clear();
@@ -963,8 +1082,8 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
     for (int j = 0; j < N; j++) {
         if (not mpi::my_func(j)) continue;
         // make vector with all coef address and their index in the union grid
-        if (Phi[j]->hasReal()) {
-            Phi[j]->real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+        if (Phi[j].hasReal()) {
+            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
             // except for the root nodes, only wavelets are sent
@@ -980,8 +1099,8 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
             }
         }
         // Imaginary parts are considered as orbitals with an orbid shifted by N
-        if (Phi[j]->hasImag()) {
-            Phi[j]->imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+        if (Phi[j].hasImag()) {
+            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
             for (int i = 0; i < max_n; i++) {
@@ -1020,9 +1139,9 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     for (int i = 0; i < N; i++) {
         if (!mpi::my_func(i)) continue;
         int irefine = 0;
-        while (Phi[i]->hasReal() and irefine < nrefine and refine_grid(Phi[i]->real(), f) > 0) irefine++;
+        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
         irefine = 0;
-        while (Phi[i]->hasImag() and irefine < nrefine and refine_grid(Phi[i]->imag(), f) > 0) irefine++;
+        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
     }
 
     // 1b) make union tree without coefficients
@@ -1037,8 +1156,8 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     IntVector PsihasReIm = IntVector::Zero(2);
     for (int i = 0; i < N; i++) {
         if (!mpi::my_func(i)) continue;
-        PsihasReIm[0] = (Phi[i]->hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[i]->hasImag()) ? 1 : 0;
+        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
     CompFunctionVector out(N);
@@ -1078,8 +1197,8 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j]->hasReal()) {
-                Phi[j]->real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j]) {
@@ -1088,8 +1207,8 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (Phi[j]->hasImag()) {
-                Phi[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec[j + N]) {
@@ -1267,21 +1386,21 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < Neff; j++) {
             if (j < N) {
-                if (Phi[j]->hasReal()) {
-                    out[j]->alloc(0);
-                    out[j]->real().clear();
-                    out[j]->real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(0);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
                     // 6) reconstruct trees from end nodes
-                    out[j]->real().mwTransform(BottomUp);
-                    out[j]->real().calcSquareNorm();
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
                 }
             } else {
-                if (Phi[j % N]->hasImag()) {
-                    out[j % N]->alloc(0);
-                    out[j % N]->imag().clear();
-                    out[j % N]->imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    out[j]->imag().mwTransform(BottomUp);
-                    out[j]->imag().calcSquareNorm();
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(0);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    out[j].imag().mwTransform(BottomUp);
+                    out[j].imag().calcSquareNorm();
                 }
             }
         }
@@ -1309,26 +1428,26 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                 }
             }
             if (j < N) {
-                if (Phi[j]->hasReal()) {
-                    out[j]->alloc(0);
-                    out[j]->real().clear();
-                    out[j]->real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(0);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
                     // 6) reconstruct trees from end nodes
-                    out[j]->real().mwTransform(BottomUp);
-                    out[j]->real().calcSquareNorm();
-                    out[j]->real().resetEndNodeTable();
-                    // out[j]->real().crop(prec, 1.0, false); //bad convergence if out is cropped
-                    if (nrefine > 0) Phi[j]->real().crop(prec, 1.0, false); // restablishes original Phi
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
+                    out[j].real().resetEndNodeTable();
+                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
+                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
                 }
             } else {
-                if (Phi[j % N]->hasImag()) {
-                    out[j % N]->alloc(0);
-                    out[j % N]->imag().clear();
-                    out[j % N]->imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    out[j % N]->imag().mwTransform(BottomUp);
-                    out[j % N]->imag().calcSquareNorm();
-                    // out[j % N]->imag().crop(prec, 1.0, false);
-                    if (nrefine > 0) Phi[j % N]->imag().crop(prec, 1.0, false);
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(0);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    out[j % N].imag().mwTransform(BottomUp);
+                    out[j % N].imag().calcSquareNorm();
+                    // out[j % N].imag().crop(prec, 1.0, false);
+                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
                 }
             }
 
@@ -1349,8 +1468,8 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket) {
     for (int i = 0; i < N; i++) {
         // The bra is sent to the owner of the ket
         if (my_func(Bra[i]) != my_func(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
-        result[i] = dot(*Bra[i], *Ket[i]);
-        if (not mrcpp::mpi::my_func(i)) Bra[i]->free();
+        result[i] = dot(Bra[i], Ket[i]);
+        if (not mrcpp::mpi::my_func(i)) Bra[i].free();
     }
     mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
     return result;
@@ -1418,8 +1537,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j]->hasReal()) {
-                BraKet[j]->real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1428,8 +1547,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (BraKet[j]->hasImag()) {
-                BraKet[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1475,9 +1594,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
-                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N]->data.n1[0] != 0)
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0] != 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -1496,9 +1615,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
-                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N]->data.n1[0] != 0)
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0] != 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -1509,7 +1628,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     IntVector conjMat = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i]->conjugate()) ? -1 : 1;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
@@ -1583,8 +1702,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Bra[j]->hasReal()) {
-                Bra[j]->real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Bra[j].hasReal()) {
+                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1593,8 +1712,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                     node2orbVecBra[ix].push_back(j);
                 }
             }
-            if (Bra[j]->hasImag()) {
-                Bra[j]->imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Bra[j].hasImag()) {
+                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1605,8 +1724,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
             }
         }
         for (int j = 0; j < M; j++) {
-            if (Ket[j]->hasReal()) {
-                Ket[j]->real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Ket[j].hasReal()) {
+                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1615,8 +1734,8 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                     node2orbVecKet[ix].push_back(j);
                 }
             }
-            if (Ket[j]->hasImag()) {
-                Ket[j]->imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (Ket[j].hasImag()) {
+                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1673,9 +1792,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N]->data.n1[0] != Ket[orbVecKet[j] % M]->data.n1[0] and
-                            Bra[orbVecBra[i] % N]->data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M]->data.n1[0] != 0)
+                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
+                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
                             continue;
                         // must ensure that threads are not competing
                         double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
@@ -1701,9 +1820,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N]->data.n1[0] != Ket[orbVecKet[j] % M]->data.n1[0] and
-                            Bra[orbVecBra[i] % N]->data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M]->data.n1[0] != 0)
+                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
+                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
+                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
                             continue;
                         Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
@@ -1715,13 +1834,13 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     IntVector conjMatBra = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(Bra[i])) continue;
-        conjMatBra[i] = (Bra[i]->conjugate()) ? -1 : 1;
+        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
     IntVector conjMatKet = IntVector::Zero(M);
     for (int i = 0; i < M; i++) {
         if (!mrcpp::mpi::my_func(Ket[i])) continue;
-        conjMatKet[i] = (Ket[i]->conjugate()) ? -1 : 1;
+        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
 
@@ -1782,8 +1901,8 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j]->hasReal()) {
-                BraKet[j]->real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1792,8 +1911,8 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                     node2orbVec[ix].push_back(j);
                 }
             }
-            if (BraKet[j]->hasImag()) {
-                BraKet[j]->imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
                 // make a map that gives j from indexVec
                 int orb_node_ix = 0;
                 for (int ix : indexVec) {
@@ -1839,9 +1958,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
-                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N]->data.n1[0]!= 0)
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -1861,9 +1980,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N]->data.n1[0] != BraKet[orbVec[j] % N]->data.n1[0] and
-                            BraKet[orbVec[i] % N]->data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N]->data.n1[0]!= 0)
+                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
+                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
+                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -1875,7 +1994,7 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
     IntVector conjMat = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
         if (!mrcpp::mpi::my_func(i)) continue;
-        conjMat[i] = (BraKet[i]->conjugate()) ? -1 : 1;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
     }
     mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
@@ -1901,7 +2020,7 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
     int M = Ket.size();
     DoubleVector Ketnorms = DoubleVector::Zero(M);
     for (int i = 0; i < M; i++) {
-        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i]->squaredNorm();
+        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
     }
     mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
     ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
@@ -1913,7 +2032,7 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
     CompFunctionVector rotatedKet(N);
     rotate(Ket, rmat, rotatedKet, prec / M);
     for (int j = 0; j < N; j++) {
-        if(my_func(Bra[j]))Bra[j]->add(1.0,*rotatedKet[j]);
+        if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
     }
 }
 template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket) ;
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index a9d71f722..df4bf7267 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -16,8 +16,8 @@ struct CompFunctionData {
     int conj{0}; // conjugate of all components
     int CompFn1{0};
     int CompFn2{0};
-    int isreal{1}; // T=double
-    int iscomplex{0}; // T=DoubleComplex
+    int isreal{0}; // trees are defined for T=double
+    int iscomplex{0}; // trees are defined for T=DoubleComplex
     double CompFd1{0.0};
     double CompFd2{0.0};
     double CompFd3{0.0};
@@ -31,6 +31,14 @@ struct CompFunctionData {
     double d1[4]{0.0,0.0,0.0,0.0};
     double d2[4]{0.0,0.0,0.0,0.0};
     double d3[4]{0.0,0.0,0.0,0.0};
+    // used for storage on disk
+    int type{0};
+    int order{1};
+    int scale{0};
+    int depth{0};
+    int boxes[3] = {0, 0, 0};
+    int corner[3] = {0, 0, 0};
+
     // used internally
     int shared{0};
     int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
@@ -43,6 +51,7 @@ template <int D> class CompFunction {
     CompFunction();
     CompFunction(int n1);
     CompFunction(int n1, bool share);
+    CompFunction(const CompFunctionData<D>& indata);
     CompFunction(const CompFunction<D> &compfunc);
     CompFunction(CompFunction<D> && compfunc);
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
@@ -72,6 +81,8 @@ template <int D> class CompFunction {
         }
     }
 
+    CompFunction paramCopy() const;
+    ComplexDouble integrate() const;
     double norm() const;
     double squaredNorm() const;
     void alloc(int i);
@@ -85,20 +96,22 @@ template <int D> class CompFunction {
     void free();
     int getSizeNodes() const;
     int getNNodes() const;
+    void flushMRAData();
+    void flushFuncData();
+    CompFunctionData<D> getFuncData() const;
 
-    //NB: All tbelow should be revised. Now only for backwards compatibility to ComplexFunction class
+    //NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class
 
+    void free(int type) {free();}
     bool hasReal()  const {return isreal;}
     bool hasImag()  const {return iscomplex;}
     bool isShared() const {return data.shared;}
     bool conjugate() const {return data.conj;}
-
+    CompFunction<D> dagger();
     FunctionTree<D, double> &real(int i = 0);
     FunctionTree<D, double> &imag(int i = 0); //does not make sense now
     const FunctionTree<D, double> &real(int i = 0) const;
     const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
-    void free(int type) {delete CompD[0]; CompD[0] = nullptr; delete CompC[0]; CompC[0] = nullptr;}
-    void flushFuncData();
 };
 
 template <int D>
@@ -110,13 +123,19 @@ void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDo
 template <int D>
 void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec);
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms);
+void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false);
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false);
 template <int D, typename T>
 void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
 template <int D, typename T>
 void multiply(CompFunction<D> &out, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
+template <int D>
+double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket);
 template <int D, typename T>
 void project(CompFunction<D> &out, std::function<T(const Coord<D> &r)> f, double prec);
 template <int D>
@@ -124,7 +143,7 @@ void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double p
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
 
-class CompFunctionVector : public std::vector<CompFunction<3>*> {
+class CompFunctionVector : public std::vector<CompFunction<3>> {
 public:
     CompFunctionVector(int N = 0);
     MultiResolutionAnalysis<3> *vecMRA;
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 38bd08859..d08185b4a 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -281,8 +281,8 @@ bool my_func(CompFunction<3> *func) {
 
 /** @brief Free all function pointers not belonging to this MPI rank */
 void free_foreign(CompFunctionVector &Phi) {
-    for (CompFunction<3>* &i : Phi) {
-        if (not my_func(i)) i->alloc(0);
+    for (CompFunction<3> &i : Phi) {
+        if (not my_func(i)) i.alloc(0);
     }
 }
 
@@ -368,7 +368,7 @@ void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
 }
 
 // send a component function with MPI
-void send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
+void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     for (int i = 0; i < func.data.Ncomp; i++) {
         //make sure that Nchunks is up to date
@@ -505,7 +505,7 @@ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -546,7 +546,7 @@ void reduce_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm com
 }
 
 /** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -601,15 +601,15 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Complex
         if (Phi[j].hasReal()) tree.appendTreeNoCoeff(Phi[j].real());
         if (Phi[j].hasImag()) tree.appendTreeNoCoeff(Phi[j].imag());
     }
-    reduce_Tree_noCoeff_real(tree, comm_wrk);
-    broadcast_Tree_noCoeff_real(tree, comm_wrk);
+    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
+    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
 /** @brief make union tree without coeff and send to all
  *  Real trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>*> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -618,17 +618,17 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j]->CompD[0]);
+        tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
     }
-    reduce_Tree_noCoeff_real(tree, comm_wrk);
-    broadcast_Tree_noCoeff_real(tree, comm_wrk);
+    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
+    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
 /** @brief make union tree without coeff and send to all
  *  Complex trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>*> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -637,10 +637,10 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j]->CompC[0]);
+        tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
-    reduce_Tree_noCoeff_complex(tree, comm_wrk);
-    broadcast_Tree_noCoeff_complex(tree, comm_wrk);
+    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
+    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 
@@ -658,8 +658,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
         if (not my_orb(j)) continue;
         tree.appendTreeNoCoeff(Phi[j]);
     }
-    reduce_Tree_noCoeff_complex(tree, comm_wrk);
-    broadcast_Tree_noCoeff_complex(tree, comm_wrk);
+    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
+    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
 }
 
 /** @brief Distribute rank zero function to all ranks */
@@ -727,7 +727,7 @@ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -759,7 +759,7 @@ void broadcast_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 32275073f..6277c0c24 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -51,7 +51,7 @@ void free_foreign(CompFunctionVector &Phi);
 
 void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
-void send_function(CompFunction<3> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
+void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
 void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
 void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm);
@@ -61,15 +61,15 @@ void broadcast_function(ComplexFunction &func, MPI_Comm comm);
 void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm);
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm);
 
-void reduce_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff_real(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
-void reduce_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff_complex(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>*> &Phi, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
 
 void allreduce_vector(IntVector &vec, MPI_Comm comm);
 void allreduce_vector(DoubleVector &vec, MPI_Comm comm);

From da6e5e90f798c6b4d4be874e58ccf296fced3a47 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Thu, 1 Aug 2024 16:48:49 +0200
Subject: [PATCH 19/38] added #ifdef MRCPP_HAS_MPI in parallel.cpp

---
 src/utils/parallel.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index d08185b4a..8059b357b 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -590,6 +590,7 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm c
  *  Include both real and imaginary parts
  */
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -603,6 +604,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Complex
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
+#endif
 }
 
 
@@ -610,6 +612,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<Complex
  *  Real trees
  */
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -622,6 +625,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
+#endif
 }
 
 
@@ -629,6 +633,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
  *  Complex trees
  */
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -641,6 +646,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
+#endif
 }
 
 
@@ -648,6 +654,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
  *  Include both real and imaginary parts
  */
     void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
+#ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
        3) rank zero broadcast func to everybody
@@ -660,6 +667,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
+#endif
 }
 
 /** @brief Distribute rank zero function to all ranks */
@@ -790,5 +798,4 @@ void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Com
 #endif
 }
 } // namespace mpi
-
 } // namespace mrcpp

From 6ffb77f3b280c7624fd7133a71f2855641c17739 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Thu, 1 Aug 2024 17:10:59 +0200
Subject: [PATCH 20/38] moved #ifdef MRCPP_HAS_MPI in parallel.cpp

---
 src/utils/parallel.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 8059b357b..34a334c50 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -415,8 +415,8 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
         for (int comp = 0; comp < func.Ncomp; comp++) {
             if (func.isreal) mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
             else  mrcpp::share_tree(*func.CompC[comp], src, tag, comm);
-#endif
         }
+#endif
     }
 }
 

From 36ee92bbe5f08c3be8c3db3aa142184dd259eaa4 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 5 Aug 2024 15:32:12 +0200
Subject: [PATCH 21/38] compiles together with UseComponent branch of mrchem

---
 src/treebuilders/apply.cpp    |    8 +-
 src/treebuilders/grid.cpp     |    8 +-
 src/utils/Bank.cpp            |    2 +-
 src/utils/Bank.h              |    1 -
 src/utils/CMakeLists.txt      |    3 -
 src/utils/CompFunction.cpp    |  113 +-
 src/utils/CompFunction.h      |   32 +-
 src/utils/ComplexFunction.cpp | 2016 ---------------------------------
 src/utils/ComplexFunction.h   |  199 ----
 src/utils/parallel.cpp        |  145 ---
 src/utils/parallel.h          |    8 -
 11 files changed, 113 insertions(+), 2422 deletions(-)
 delete mode 100644 src/utils/ComplexFunction.cpp
 delete mode 100644 src/utils/ComplexFunction.h

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index e33fc8c85..47cffa711 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -473,16 +473,18 @@ template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, C
     for (int icomp = 0; icomp < inp.Ncomp; icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal) {
+                if (inp.isreal and std::imag(metric[icomp][ocomp]) < MachinePrec) {
                     apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
+                        out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
                     }
+                    out.isreal = 1;
                 } else {
                     apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
                     }
+                    out.iscomplex = 1;
                 }
             }
         }
@@ -657,4 +659,6 @@ template FunctionTreeVector<1, ComplexDouble> gradient<1>(DerivativeOperator<1>
 template FunctionTreeVector<2, ComplexDouble> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp);
 template FunctionTreeVector<3, ComplexDouble> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp);
 
+template void apply(CompFunction<3> &out, DerivativeOperator<3> &oper, CompFunction<3> &inp, int dir = -1, ComplexDouble metric[4][4] = nullptr);
+
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index e7d83e719..277ab4d8a 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -240,8 +240,8 @@ template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
     out.data = inp.data;
     out.alloc(inp.Ncomp);
     for (int i = 0; i < inp.Ncomp; i++) {
-        if (inp.isreal) build_grid(out.CompD[i], inp.CompD[i]);
-        if (inp.iscomplex) build_grid(out.CompC[i], inp.CompC[i]);
+        if (inp.isreal) build_grid(*out.CompD[i], *inp.CompD[i]);
+        if (inp.iscomplex) build_grid(*out.CompC[i], *inp.CompC[i]);
     }
 }
 
@@ -345,6 +345,10 @@ template <int D, typename T> int refine_grid(FunctionTree<D, T> &out, const Repr
     return nSplit;
 }
 
+template void copy_grid(CompFunction<1> &out, CompFunction<1> &inp);
+template void copy_grid(CompFunction<2> &out, CompFunction<2> &inp);
+template void copy_grid(CompFunction<3> &out, CompFunction<3> &inp);
+
 template void build_grid<1, double>(FunctionTree<1, double> &out, int scales);
 template void build_grid<2, double>(FunctionTree<2, double> &out, int scales);
 template void build_grid<3, double>(FunctionTree<3, double> &out, int scales);
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index f924b971b..20bcd49b0 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -479,7 +479,7 @@ void Bank::remove_account(int account) {
     }
     std::vector<deposit> &deposits = *get_deposits[account];
     for (int ix = 1; ix < deposits.size(); ix++) {
-       if (deposits[ix].orb != nullptr) deposits[ix].orb->free(NUMBER::Total);
+       if (deposits[ix].orb != nullptr) deposits[ix].orb->free();
        if (deposits[ix].hasdata) {
            currentsize[account] -= deposits[ix].datasize / 128;
            totcurrentsize -= deposits[ix].datasize / 128;
diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index b4d8b3c76..dc52791b3 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include "ComplexFunction.h"
 #include "CompFunction.h"
 #include "parallel.h"
 #include "trees/NodeIndex.h"
diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt
index 2087a6d35..c3238abec 100644
--- a/src/utils/CMakeLists.txt
+++ b/src/utils/CMakeLists.txt
@@ -11,8 +11,6 @@ target_sources(mrcpp
     ${CMAKE_CURRENT_SOURCE_DIR}/tree_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Bank.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/parallel.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/CompFunction.cpp
   )
 
@@ -30,7 +28,6 @@ list(APPEND ${_dirname}_h
   ${CMAKE_CURRENT_SOURCE_DIR}/tree_utils.h
   ${CMAKE_CURRENT_SOURCE_DIR}/Bank.h
   ${CMAKE_CURRENT_SOURCE_DIR}/parallel.h
-  ${CMAKE_CURRENT_SOURCE_DIR}/ComplexFunction.h
   ${CMAKE_CURRENT_SOURCE_DIR}/CompFunction.h
   )
 
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index c2592b1e0..1e6ff066d 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -271,24 +271,39 @@ CompFunction<D> CompFunction<D>::dagger() {
 template <int D>
 FunctionTree<D, double> &CompFunction<D>::real(int i) {
     if (!isreal) MSG_ABORT("not real function");
+    if (CompD[i] == nullptr) alloc(i);
     return *CompD[i];
 }
 template <int D> //NB: should return CompC in the future
 FunctionTree<D, double>  &CompFunction<D>::imag(int i) {
+    MSG_ABORT("Must choose real or complex");
     if (!iscomplex) MSG_ABORT("not complex function");
     return *CompD[i];
 }
 
+template <int D>
+FunctionTree<D, ComplexDouble>  &CompFunction<D>::complex(int i) {
+    if (!iscomplex) MSG_ABORT("not marked as a complex function");
+    if (CompC[i] == nullptr) alloc(i);
+    return *CompC[i];
+}
+
 template <int D>
 const FunctionTree<D, double> &CompFunction<D>::real(int i) const {
     if (!isreal) MSG_ABORT("not real function");
     return *CompD[i];
 }
-template <int D> //NB: should return CompC in the future
+template <int D> //NB: should use complex or real
 const FunctionTree<D, double> &CompFunction<D>::imag(int i) const {
+    MSG_ABORT("Must choose real or complex");
     if (!iscomplex) MSG_ABORT("not complex function");
     return *CompD[i];
 }
+template <int D>
+const FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) const {
+    if (!iscomplex) MSG_ABORT("not marked as a complex function");
+    return *CompC[i];
+}
 
  /* for backwards compatibility */
 template <int D>
@@ -441,7 +456,7 @@ template <int D>
             if (need_to_add) {
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
-                        build_grid(out.CompD[comp], fvec);
+                        build_grid(*out.CompD[comp], fvec);
                         mrcpp::add(prec, *out.CompD[comp], fvec, 0);
                     } else {
                         mrcpp::add(prec, *out.CompD[comp], fvec);
@@ -455,12 +470,12 @@ template <int D>
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
                 if (inp[i].data.conj) MSG_ERROR("conjugaison not implemented");
-                fvec.push_back(std::make_tuple(c[i], inp[i].CompD[comp]));
+                fvec.push_back(std::make_tuple(c[i], inp[i].CompC[comp]));
             }
             if (need_to_add) {
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
-                        build_grid(out.CompC[comp], fvec);
+                        build_grid(*out.CompC[comp], fvec);
                         mrcpp::add(prec, *out.CompC[comp], fvec, 0);
                     } else {
                         mrcpp::add(prec, *out.CompC[comp], fvec);
@@ -489,7 +504,7 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
 template <int D>
 void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms) {
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-    for (int comp = 0; comp < inp_a[0].Ncomp; comp++) {
+    for (int comp = 0; comp < inp_a.Ncomp; comp++) {
         if (inp_a.isreal and inp_b.isreal) {
             delete out.CompD[comp];
             FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0]->getMRA());
@@ -498,8 +513,8 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                 if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
                 if (prec < 0.0) {
                     // Union grid
-                    build_grid(*tree, inp_a.CompD[comp]);
-                    build_grid(*tree, inp_b.CompD[comp]);
+                    build_grid(*tree, *inp_a.CompD[comp]);
+                    build_grid(*tree, *inp_b.CompD[comp]);
                     mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0);
                 } else {
                     // Adaptive grid
@@ -530,8 +545,8 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                 if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
                 if (prec < 0.0) {
                     // Union grid
-                    build_grid(*tree, inp_a.CompC[comp]);
-                    build_grid(*tree, inp_b.CompC[comp]);
+                    build_grid(*tree, *inp_a.CompC[comp]);
+                    build_grid(*tree, *inp_b.CompC[comp]);
                     mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0);
                 } else {
                     // Adaptive grid
@@ -558,19 +573,47 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
 
 /** @brief out = inp_a * f
  *
- *  each component is multiplied
+ *  Only one component is multiplied
  */
-template <int D, typename T>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
-    MSG_ERROR("Not implemented");
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine) {
+    if (inp_a.Ncomp > 1) MSG_ERROR("Not implemented");
+    if (inp_a.isreal != 1) MSG_ERROR("Not implemented");
+    multiply(out, *inp_a.CompD[0], f, prec, nrefine);
 }
 
 /** @brief out = inp_a * f
  *
+ *  Only one component is multiplied
  */
-template <int D, typename T>
-void multiply(CompFunction<D>, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine) {
-    MSG_ERROR("Not implemented");
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine) {
+    if (inp_a.Ncomp > 1) MSG_ERROR("Not implemented");
+    if (inp_a.iscomplex != 1) MSG_ERROR("Not implemented");
+    multiply(out, *inp_a.CompC[0], f, prec, nrefine);
+
+}
+
+/** @brief out = inp_a * f
+ *
+ */
+template <int D>
+void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine) {
+    CompFunction<D> func_a(1);
+    func_a.isreal = 1;
+    func_a.iscomplex = 0;
+    func_a.CompD[0] = &inp_a;
+    multiply(out, func_a, f, prec, nrefine);
+    func_a.CompD[0] = nullptr;
+}
+template <int D>
+void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine) {
+    CompFunction<D> func_a(1);
+    func_a.isreal = 0;
+    func_a.iscomplex = 1;
+    func_a.CompC[0] = &inp_a;
+    multiply(out, func_a, f, prec, nrefine);
+    func_a.CompC[0] = nullptr;
 }
 
 
@@ -617,37 +660,32 @@ double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
     return dotprod;
 }
 
-
-template <int D, typename T>
-void project(CompFunction<D> &out, std::function<double(const Coord<D> &r)> f, double prec) {
-if (std::is_same<T, double>::value) {
+void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.isreal = 1;
     out.iscomplex = 0;
     if(out.Ncomp < 1) out.alloc(0);
-    if (need_to_project) mrcpp::project<D, double>(prec, out.CompD[0], f);
+    if (need_to_project) mrcpp::project<3>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
-}
 
-template <int D, typename T>
-void project(CompFunction<D> &out, std::function<ComplexDouble(const Coord<D> &r)> f, double prec) {
-if (std::is_same<T, ComplexDouble>::value) {
+// template <int D, typename T>
+void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r)> f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.isreal = 0;
     out.iscomplex = 1;
     if(out.Ncomp < 1) out.alloc(0);
-    if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, out.CompC[0], f);
+    if (need_to_project) mrcpp::project<3>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
- }
+
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.isreal = 1;
     out.iscomplex = 0;
     if(out.Ncomp < 1) out.alloc(0);
-    if (need_to_project) mrcpp::project<D, double>(prec, out.CompD[0], f);
+    if (need_to_project) mrcpp::project<D, double>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
 }
 template <int D>
@@ -656,7 +694,7 @@ void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, d
     out.isreal = 0;
     out.iscomplex = 1;
     if(out.Ncomp < 1) out.alloc(0);
-    if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, out.CompC[0], f);
+    if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
  }
 
@@ -733,8 +771,8 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
     for (int j = 0; j < M; j++) {
         if (!mpi::my_func(j)) continue;
-        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
-        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
+        if (not makeReal and Psi[j].hasReal()) Psi[j].free();
+        if (not makeImag and Psi[j].hasImag()) Psi[j].free();
     }
 
     if (not makeReal and not makeImag) { return; }
@@ -2035,6 +2073,17 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
         if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
     }
 }
-template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket) ;
+template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket);
+template void project(CompFunction<3>& out, RepresentableFunction<3, double>& f, double prec);
+template void project(CompFunction<3>& out, RepresentableFunction<3, ComplexDouble>& f, double prec);
+template void multiply(CompFunction<3> &out, CompFunction<3> inp_a, CompFunction<3> inp_b, double prec, bool absPrec, bool useMaxNorms);
+template void multiply(CompFunction<3>& out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double>& f, double prec, int nrefine = 0);
+template void multiply(CompFunction<3>& out, FunctionTree<3, ComplexDouble> &inp_a, RepresentableFunction<3, ComplexDouble>& f, double prec, int nrefine = 0);
+template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
+template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, ComplexDouble> &f, double prec, int nrefine = 0);
+template void deep_copy(CompFunction<3>* out, const CompFunction<3> &inp);
+template void deep_copy(CompFunction<3>& out, const CompFunction<3> &inp);
+template void add(CompFunction<3> &out, ComplexDouble a, CompFunction<3> inp_a, ComplexDouble b, CompFunction<3> inp_b, double prec);
+template double node_norm_dot(CompFunction<3> bra, CompFunction<3> ket);
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index df4bf7267..2c30dc3c2 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include "trees/FunctionTree.h"
-#include "ComplexFunction.h"
 
 using namespace Eigen;
 
@@ -13,7 +12,7 @@ struct CompFunctionData {
     // occupancy, quantum number, norm, etc.
     int Ncomp{1}; // number of components defined
     int rank{-1}; // rank (index) if part of a vector
-    int conj{0}; // conjugate of all components
+    int conj{0}; // soft conjugate (all components)
     int CompFn1{0};
     int CompFn2{0};
     int isreal{0}; // trees are defined for T=double
@@ -65,6 +64,7 @@ template <int D> class CompFunction {
     CompFunctionData<D> data;
     int& Ncomp = data.Ncomp; //number of components defined
     int& rank = data.rank; // rank (index) if part of a vector
+    int& conj = data.conj; // soft conjugate
     int& isreal = data.isreal; // T=double
     int& iscomplex = data.iscomplex; // T=DoubleComplex
     int* Nchunks = data.Nchunks; // number of chunks of each component tree
@@ -85,7 +85,7 @@ template <int D> class CompFunction {
     ComplexDouble integrate() const;
     double norm() const;
     double squaredNorm() const;
-    void alloc(int i);
+    void alloc(int i = 0);
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setRank(int i) {rank = i;};
     const int getRank() const {return rank;};
@@ -99,6 +99,10 @@ template <int D> class CompFunction {
     void flushMRAData();
     void flushFuncData();
     CompFunctionData<D> getFuncData() const;
+    FunctionTree<D, double> &real(int i = 0);
+    FunctionTree<D, ComplexDouble> &complex(int i = 0);
+    const FunctionTree<D, double> &real(int i = 0) const;
+    const FunctionTree<D, ComplexDouble> &complex(int i = 0) const;
 
     //NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class
 
@@ -108,9 +112,7 @@ template <int D> class CompFunction {
     bool isShared() const {return data.shared;}
     bool conjugate() const {return data.conj;}
     CompFunction<D> dagger();
-    FunctionTree<D, double> &real(int i = 0);
     FunctionTree<D, double> &imag(int i = 0); //does not make sense now
-    const FunctionTree<D, double> &real(int i = 0) const;
     const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
 };
 
@@ -123,21 +125,25 @@ void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDo
 template <int D>
 void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec);
 template <int D>
-void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false);
-template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
 template <int D>
+void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false);
+template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false);
-template <int D, typename T>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
-template <int D, typename T>
-void multiply(CompFunction<D> &out, FunctionTree<D, T> &inp_a, RepresentableFunction<D, T> &f, double prec, int nrefine = 0);
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0);
+template <int D>
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0);
+template <int D>
+void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0);
+template <int D>
+void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0);
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
 template <int D>
 double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket);
-template <int D, typename T>
-void project(CompFunction<D> &out, std::function<T(const Coord<D> &r)> f, double prec);
+void project(CompFunction<3> &out, std::function<double(const Coord<3> &r)> f, double prec);
+void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r)> f, double prec);
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec);
 template <int D>
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
deleted file mode 100644
index 63d855727..000000000
--- a/src/utils/ComplexFunction.cpp
+++ /dev/null
@@ -1,2016 +0,0 @@
-#include "ComplexFunction.h"
-#include "Bank.h"
-#include "Printer.h"
-#include "Timer.h"
-#include "parallel.h"
-#include "treebuilders/grid.h"
-#include "treebuilders/multiply.h"
-#include "treebuilders/project.h"
-#include "trees/FunctionNode.h"
-#include "treebuilders/add.h"
-
-using mrcpp::Timer;
-
-namespace mrcpp {
-
-MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
-
-ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
-        : funcMRA(defaultMRA)
-        , func_ptr(funcptr) {}
-
-ComplexFunction::ComplexFunction(const ComplexFunction &func)
-        : funcMRA(func.funcMRA)
-        , conj(func.conj)
-        , func_ptr(func.func_ptr)
-        , rank(func.rank) {}
-
-ComplexFunction &ComplexFunction::operator=(const ComplexFunction &func) {
-    if (this != &func) {
-        this->conj = func.conj;
-        this->func_ptr = func.func_ptr;
-        this->funcMRA = func.funcMRA;
-        this->rank = func.rank;
-    }
-    return *this;
-}
-
-/** @brief Constructor
- *
- * @param spin: electron spin (SPIN::Alpha/Beta/Paired)
- * @param occ: occupation
- * @param rank: MPI ownership (-1 means all MPI ranks)
- *
- * Initializes the mrcpp::ComplexFunction with NULL pointers for both real and imaginary part.
- */
-ComplexFunction::ComplexFunction(int spin, int occ, int rank, bool share)
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(share))
-        , rank(rank) {
-    this->getFunctionData().spin = spin;
-    this->getFunctionData().occ = occ;
-    if (this->spin() < 0) INVALID_ARG_ABORT;
-    if (this->occ() < 0) {
-        if (this->spin() == SPIN::Paired) this->getFunctionData().occ = 2;
-        if (this->spin() == SPIN::Alpha) this->getFunctionData().occ = 1;
-        if (this->spin() == SPIN::Beta) this->getFunctionData().occ = 1;
-    }
-}
-
-/** @brief Parameter copy
- *
- * Returns a new ComplexFunction with the same spin, occupation and rank_id as *this.
- */
-ComplexFunction ComplexFunction::paramCopy() const {
-    return ComplexFunction(this->spin(), this->occ(), this->getRank());
-}
-
-MPI_FuncVector::MPI_FuncVector(int N)
-        : std::vector<ComplexFunction>(N) {
-    for (int i = 0; i < N; i++) (*this)[i].setRank(i);
-    vecMRA = defaultMRA;
-}
-void MPI_FuncVector::distribute() {
-    for (int i = 0; i < this->size(); i++) (*this)[i].setRank(i);
-}
-
-/** @brief Returns the orbital meta data
- *
- * Tree sizes (nChunks) are flushed before return.
- */
-FunctionData &ComplexFunction::getFunctionData() {
-    this->func_ptr->flushFuncData();
-    return this->func_ptr->func_data;
-}
-
-ComplexFunction ComplexFunction::dagger() {
-    ComplexFunction out(*this);
-    out.conj = not(this->conj);
-    return out; // Return shallow copy
-}
-
-void ComplexFunction::setReal(FunctionTree<3> *tree) {
-    if (isShared()) MSG_ABORT("Cannot set in shared function");
-    this->func_ptr->re = tree;
-}
-
-void ComplexFunction::setImag(FunctionTree<3> *tree) {
-    if (isShared()) MSG_ABORT("Cannot set in shared function");
-    this->func_ptr->im = tree;
-}
-
-void ComplexFunction::alloc(int type, MultiResolutionAnalysis<3> *mra) {
-    if (mra == nullptr) mra = funcMRA;
-    if (mra == nullptr) MSG_ABORT("Invalid argument");
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) MSG_ABORT("Real part already allocated");
-        this->func_ptr->re = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_re);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) MSG_ABORT("Imaginary part already allocated");
-        this->func_ptr->im = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_im);
-    }
-}
-
-void ComplexFunction::free(int type) {
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) delete this->func_ptr->re;
-        this->func_ptr->re = nullptr;
-        if (this->func_ptr->shared_mem_re) this->func_ptr->shared_mem_re->clear();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) delete this->func_ptr->im;
-        this->func_ptr->im = nullptr;
-        if (this->func_ptr->shared_mem_im) this->func_ptr->shared_mem_im->clear();
-    }
-}
-
-int ComplexFunction::getSizeNodes(int type) const {
-    int size_mb = 0; // Memory size in kB
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) size_mb += real().getSizeNodes();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) size_mb += imag().getSizeNodes();
-    }
-    return size_mb;
-}
-
-int ComplexFunction::getNNodes(int type) const {
-    int nNodes = 0;
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) nNodes += real().getNNodes();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) nNodes += imag().getNNodes();
-    }
-    return nNodes;
-}
-
-int ComplexFunction::crop(double prec) {
-    if (prec < 0.0) return 0;
-    bool need_to_crop = not(isShared()) or mpi::share_master();
-    int nChunksremoved = 0;
-    if (need_to_crop) {
-        if (hasReal()) nChunksremoved = real().crop(prec, 1.0, false);
-        if (hasImag()) nChunksremoved += imag().crop(prec, 1.0, false);
-    }
-    mpi::share_function(*this, 0, 7744, mpi::comm_share);
-    return nChunksremoved;
-}
-
-ComplexDouble ComplexFunction::integrate() const {
-    double int_r = 0.0;
-    double int_i = 0.0;
-    if (hasReal()) int_r = real().integrate();
-    if (hasImag()) int_i = imag().integrate();
-    return ComplexDouble(int_r, int_i);
-}
-
-/** @brief Returns the norm of the orbital */
-double ComplexFunction::norm() const {
-    double norm = squaredNorm();
-    if (norm > 0.0) norm = std::sqrt(norm);
-    return norm;
-}
-
-/** @brief Returns the squared norm of the orbital */
-double ComplexFunction::squaredNorm() const {
-    double sq_r = -1.0;
-    double sq_i = -1.0;
-    if (hasReal()) sq_r = real().getSquareNorm();
-    if (hasImag()) sq_i = imag().getSquareNorm();
-
-    double sq_norm = 0.0;
-    if (sq_r < 0.0 and sq_i < 0.0) {
-        sq_norm = -1.0;
-    } else {
-        if (sq_r >= 0.0) sq_norm += sq_r;
-        if (sq_i >= 0.0) sq_norm += sq_i;
-    }
-    return sq_norm;
-}
-
-/** @brief In place addition.
- *
- * Output is extended to union grid.
- *
- */
-void ComplexFunction::add(ComplexDouble c, ComplexFunction inp) {
-    double thrs = MachineZero;
-    bool cHasReal = (std::abs(c.real()) > thrs);
-    bool cHasImag = (std::abs(c.imag()) > thrs);
-    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
-    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
-
-    ComplexFunction &out = *this;
-    bool clearReal(false), clearImag(false);
-    if (outNeedsReal and not(out.hasReal())) {
-        out.alloc(NUMBER::Real);
-        clearReal = true;
-    }
-
-    if (outNeedsImag and not(out.hasImag())) {
-        out.alloc(NUMBER::Imag);
-        clearImag = true;
-    }
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (clearReal) out.real().setZero();
-        if (clearImag) out.imag().setZero();
-        if (cHasReal and inp.hasReal()) {
-            while (refine_grid(out.real(), inp.real())) {}
-            out.real().add(c.real(), inp.real());
-        }
-        if (cHasReal and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.imag(), inp.imag())) {}
-            out.imag().add(conj * c.real(), inp.imag());
-        }
-        if (cHasImag and inp.hasReal()) {
-            while (refine_grid(out.imag(), inp.real())) {}
-            out.imag().add(c.imag(), inp.real());
-        }
-        if (cHasImag and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.real(), inp.imag())) {}
-            out.real().add(-1.0 * conj * c.imag(), inp.imag());
-        }
-    }
-    mpi::share_function(out, 0, 9911, mpi::comm_share);
-}
-
-/** @brief In place addition of absolute values.
- *
- * Output is extended to union grid.
- *
- */
-void ComplexFunction::absadd(ComplexDouble c, ComplexFunction inp) {
-    double thrs = MachineZero;
-    bool cHasReal = (std::abs(c.real()) > thrs);
-    bool cHasImag = (std::abs(c.imag()) > thrs);
-    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
-    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
-
-    ComplexFunction &out = *this;
-    bool clearReal(false), clearImag(false);
-    if (outNeedsReal and not(out.hasReal())) {
-        out.alloc(NUMBER::Real);
-        clearReal = true;
-    }
-
-    if (outNeedsImag and not(out.hasImag())) {
-        out.alloc(NUMBER::Imag);
-        clearImag = true;
-    }
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (clearReal) out.real().setZero();
-        if (clearImag) out.imag().setZero();
-        if (cHasReal and inp.hasReal()) {
-            while (refine_grid(out.real(), inp.real())) {}
-            out.real().absadd(c.real(), inp.real());
-        }
-        if (cHasReal and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.imag(), inp.imag())) {}
-            out.imag().absadd(conj * c.real(), inp.imag());
-        }
-        if (cHasImag and inp.hasReal()) {
-            while (refine_grid(out.imag(), inp.real())) {}
-            out.imag().absadd(c.imag(), inp.real());
-        }
-        if (cHasImag and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.real(), inp.imag())) {}
-            out.real().absadd(-1.0 * conj * c.imag(), inp.imag());
-        }
-    }
-    mpi::share_function(out, 0, 9912, mpi::comm_share);
-}
-
-/** @brief In place multiply with real scalar. Fully in-place.*/
-void ComplexFunction::rescale(double c) {
-    bool need_to_rescale = not(isShared()) or mpi::share_master();
-    if (need_to_rescale) {
-        if (hasReal()) real().rescale(c);
-        if (hasImag()) imag().rescale(c);
-    }
-    mpi::share_function(*this, 0, 5543, mpi::comm_share);
-}
-
-/** @brief In place multiply with complex scalar. Involves a deep copy.*/
-void ComplexFunction::rescale(ComplexDouble c) {
-    ComplexFunction &out = *this;
-    ComplexFunction tmp(spin(), occ(), rank, isShared());
-    cplxfunc::deep_copy(tmp, out);
-    out.free(NUMBER::Total);
-    out.add(c, tmp);
-}
-
-/** @brief Returns a character representing the spin (a/b/p) */
-char ComplexFunction::printSpin() const {
-    char sp = 'u';
-    if (this->spin() == SPIN::Paired) sp = 'p';
-    if (this->spin() == SPIN::Alpha) sp = 'a';
-    if (this->spin() == SPIN::Beta) sp = 'b';
-    return sp;
-}
-
-void cplxfunc::SetdefaultMRA(MultiResolutionAnalysis<3> *MRA) {
-    defaultMRA = MRA;
-}
-
-/** @brief Compute <bra|ket> = int bra^\dag(r) * ket(r) dr.
- *
- *  Notice that the <bra| position is already complex conjugated.
- *
- */
-ComplexDouble cplxfunc::dot(ComplexFunction bra, ComplexFunction ket) {
-    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
-    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::dot(bra.real(), ket.real());
-    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::dot(bra.real(), ket.imag());
-    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::dot(bra.imag(), ket.real());
-    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::dot(bra.imag(), ket.imag());
-
-    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
-    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
-
-    double real_part = rr + bra_conj * ket_conj * ii;
-    double imag_part = ket_conj * ri - bra_conj * ir;
-    return ComplexDouble(real_part, imag_part);
-}
-
-/** @brief Compute <bra|ket> = int |bra^\dag(r)| * |ket(r)| dr.
- *
- */
-ComplexDouble cplxfunc::node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact) {
-    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
-    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::node_norm_dot(bra.real(), ket.real(), exact);
-    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::node_norm_dot(bra.real(), ket.imag(), exact);
-    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::node_norm_dot(bra.imag(), ket.real(), exact);
-    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::node_norm_dot(bra.imag(), ket.imag(), exact);
-
-    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
-    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
-
-    double real_part = rr + bra_conj * ket_conj * ii;
-    double imag_part = ket_conj * ri - bra_conj * ir;
-    return ComplexDouble(real_part, imag_part);
-}
-
-/** @brief Deep copy
- *
- * Returns a new function which is a full blueprint copy of the input function.
- * This is achieved by building a new grid for the real and imaginary parts and
- * copying.
- */
-void cplxfunc::deep_copy(ComplexFunction &out, ComplexFunction &inp) {
-    bool need_to_copy = not(out.isShared()) or mpi::share_master();
-    out.funcMRA = inp.funcMRA;
-    out.setRank(inp.getRank());
-    if (inp.hasReal()) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_copy) {
-            copy_grid(out.real(), inp.real());
-            copy_func(out.real(), inp.real());
-        }
-    }
-    if (inp.hasImag()) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_copy) {
-            copy_grid(out.imag(), inp.imag());
-            copy_func(out.imag(), inp.imag());
-            if (out.conjugate()) out.imag().rescale(-1.0);
-        }
-    }
-    mpi::share_function(out, 0, 1324, mpi::comm_share);
-}
-
-void cplxfunc::project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec) {
-    bool need_to_project = not(out.isShared()) or mpi::share_master();
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
-    }
-    mpi::share_function(out, 0, 123123, mpi::comm_share);
-}
-
-void cplxfunc::project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec) {
-    bool need_to_project = not(out.isShared()) or mpi::share_master();
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_project) build_grid(out.real(), f);
-        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_project) build_grid(out.imag(), f);
-        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
-    }
-    mpi::share_function(out, 0, 132231, mpi::comm_share);
-}
-
-/** @brief out = a*inp_a + b*inp_b
- *
- * Recast into linear_combination.
- *
- */
-void cplxfunc::add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec) {
-    ComplexVector coefs(2);
-    coefs(0) = a;
-    coefs(1) = b;
-
-    std::vector<ComplexFunction> funcs; // NB: not a ComplexFunctionVector, because not run in parallel!
-    funcs.push_back(inp_a);
-    funcs.push_back(inp_b);
-
-    cplxfunc::linear_combination(out, coefs, funcs, prec);
-}
-
-/** @brief out = inp_a * inp_b
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    multiply_real(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
-    multiply_imag(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
-}
-
-/** @brief out = inp_a * f
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
-    // uses the mpifuncvec multiply
-    MPI_FuncVector mpi_funcvec_a;
-    mpi_funcvec_a.push_back(inp_a);
-    MPI_FuncVector mpi_funcvec_out;
-    mpi_funcvec_out = mpifuncvec::multiply(mpi_funcvec_a, f, prec, nullptr, nrefine, true);
-    out = mpi_funcvec_out[0];
-}
-
-/** @brief out = inp_a * f
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, FunctionTree<3> &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
-    ComplexFunction cplxfunc_a;
-    cplxfunc_a.setReal(&inp_a);
-    cplxfunc::multiply(out, cplxfunc_a, f, prec, nrefine);
-    cplxfunc_a.setReal(nullptr); // otherwise inp_a is deleted by cplxfunc_a destructor
-}
-
-/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N
- *
- */
-void cplxfunc::linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec) {
-    FunctionTreeVector<3> rvec;
-    FunctionTreeVector<3> ivec;
-
-    double thrs = MachineZero;
-    for (int i = 0; i < inp.size(); i++) {
-        double sign = (inp[i].conjugate()) ? -1.0 : 1.0;
-
-        bool cHasReal = (std::abs(c[i].real()) > thrs);
-        bool cHasImag = (std::abs(c[i].imag()) > thrs);
-
-        if (cHasReal and inp[i].hasReal()) rvec.push_back(std::make_tuple(c[i].real(), &inp[i].real()));
-        if (cHasImag and inp[i].hasImag()) rvec.push_back(std::make_tuple(-sign * c[i].imag(), &inp[i].imag()));
-
-        if (cHasImag and inp[i].hasReal()) ivec.push_back(std::make_tuple(c[i].imag(), &inp[i].real()));
-        if (cHasReal and inp[i].hasImag()) ivec.push_back(std::make_tuple(sign * c[i].real(), &inp[i].imag()));
-    }
-
-    if (rvec.size() > 0 and not out.hasReal()) out.alloc(NUMBER::Real);
-    if (ivec.size() > 0 and not out.hasImag()) out.alloc(NUMBER::Imag);
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (rvec.size() > 0) {
-            if (prec < 0.0) {
-                build_grid(out.real(), rvec);
-                mrcpp::add(prec, out.real(), rvec, 0);
-            } else {
-                mrcpp::add(prec, out.real(), rvec);
-            }
-        } else if (out.hasReal()) {
-            out.real().setZero();
-        }
-        if (ivec.size() > 0) {
-            if (prec < 0.0) {
-                build_grid(out.imag(), ivec);
-                mrcpp::add(prec, out.imag(), ivec, 0);
-            } else {
-                mrcpp::add(prec, out.imag(), ivec);
-            }
-        } else if (out.hasImag()) {
-            out.imag().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9911, mpi::comm_share);
-}
-
-/** @brief out = Re(inp_a * inp_b)
- *
- */
-void cplxfunc::multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
-    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
-
-    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-
-    FunctionTreeVector<3> vec;
-    if (inp_a.hasReal() and inp_b.hasReal()) {
-        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
-        if (need_to_multiply) {
-            double coef = 1.0;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.real());
-                build_grid(*tree, inp_b.real());
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-    if (inp_a.hasImag() and inp_b.hasImag()) {
-        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
-        if (need_to_multiply) {
-            double coef = -1.0 * conj_a * conj_b;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.imag());
-                build_grid(*tree, inp_b.imag());
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-
-    if (vec.size() > 0) {
-        if (out.hasReal()) {
-            if (need_to_multiply) out.real().clear();
-        } else {
-            // All sharing procs must allocate
-            out.alloc(NUMBER::Real);
-        }
-    }
-
-    if (need_to_multiply) {
-        if (vec.size() == 1) {
-            FunctionTree<3> &func_0 = get_func(vec, 0);
-            copy_grid(out.real(), func_0);
-            copy_func(out.real(), func_0);
-            clear(vec, true);
-        } else if (vec.size() == 2) {
-            build_grid(out.real(), vec);
-            mrcpp::add(prec, out.real(), vec, 0);
-            clear(vec, true);
-        } else if (out.hasReal()) {
-            out.real().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9191, mpi::comm_share);
-}
-
-/** @brief out = Im(inp_a * inp_b)
- *
- */
-void cplxfunc::multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
-    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
-    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-
-    FunctionTreeVector<3> vec;
-    if (inp_a.hasReal() and inp_b.hasImag()) {
-        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
-        if (need_to_multiply) {
-            double coef = conj_b;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.real());
-                build_grid(*tree, inp_b.imag());
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-    if (inp_a.hasImag() and inp_b.hasReal()) {
-        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
-        if (need_to_multiply) {
-            double coef = conj_a;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.imag());
-                build_grid(*tree, inp_b.real());
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-
-    if (vec.size() > 0) {
-        if (out.hasImag()) {
-            if (need_to_multiply) out.imag().clear();
-        } else {
-            // All sharing procs must allocate
-            out.alloc(NUMBER::Imag);
-        }
-    }
-
-    if (need_to_multiply) {
-        if (vec.size() == 1) {
-            FunctionTree<3> &func_0 = get_func(vec, 0);
-            copy_grid(out.imag(), func_0);
-            copy_func(out.imag(), func_0);
-            clear(vec, true);
-        } else if (vec.size() == 2) {
-            build_grid(out.imag(), vec);
-            mrcpp::add(prec, out.imag(), vec, 0);
-            clear(vec, true);
-        } else if (out.hasImag()) {
-            out.imag().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9292, mpi::comm_share);
-}
-
-namespace mpifuncvec {
-
-
-/** @brief Make a linear combination of functions
- *
- * Uses "local" representation: treats one node at a time.
- * For each node, all functions are transformed simultaneously
- * by a dense matrix multiplication.
- * Phi input functions, Psi output functions
- *
- */
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec) {
-
-    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
-    // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
-    // MPI version does not use OMP yet, Serial version uses OMP
-    // size of input is N, size of output is M
-    int N = Phi.size();
-    int M = Psi.size();
-    if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
-    if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
-
-    // 1) make union tree without coefficients
-    FunctionTree<3> refTree(*Phi.vecMRA);
-    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    std::vector<double> scalefac_ref;
-    std::vector<double *> coeffVec_ref; // not used!
-    std::vector<int> indexVec_ref;      // serialIx of the nodes
-    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
-    int max_ix;
-    // get a list of all nodes in union tree, identified by their serialIx indices
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-   // 2) We work with real numbers only. Make real blocks for U matrix
-    bool UhasReal = false;
-    bool UhasImag = false;
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            if (std::abs(U(i, j).real()) > 10*MachineZero) UhasReal = true;
-            if (std::abs(U(i, j).imag()) > 10*MachineZero) UhasImag = true;
-        }
-    }
-
-    IntVector PsihasReIm = IntVector::Zero(2);
-    for (int j = 0; j < N; j++) {
-        if (!mpi::my_orb(j)) continue;
-        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
-    }
-    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    if (not PsihasReIm[0] and not PsihasReIm[1]) {
-        return; // do nothing
-    }
-
-    bool makeReal = (UhasReal and PsihasReIm[0]) or (UhasImag and PsihasReIm[1]);
-    bool makeImag = (UhasReal and PsihasReIm[1]) or (UhasImag and PsihasReIm[0]);
-
-    for (int j = 0; j < M; j++) {
-        if (!mpi::my_orb(j)) continue;
-        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
-        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
-    }
-
-    if (not makeReal and not makeImag) { return; }
-
-    int Neff = N;               // effective number of input orbitals
-    int Meff = M;               // effective number of output orbitals
-    if (makeImag) Neff = 2 * N; // Imag and Real treated independently. We always use real part of U
-    if (makeImag) Meff = 2 * M; // Imag and Real treated independently. We always use real part of U
-
-    IntVector conjMat = IntVector::Zero(Neff);
-    for (int j = 0; j < Neff; j++) {
-        if (!mpi::my_orb(j % N)) continue;
-        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
-    }
-    mpi::allreduce_vector(conjMat, mpi::comm_wrk);
-
-    // we make a real matrix = U,  but organized as one or four real blocks
-    // out_r = U_rr*in_r - U_ir*in_i*conjMat
-    // out_i = U_ri*in_r - U_ii*in_i*conjMat
-    // the first index of U is the one used on input Phi
-    DoubleMatrix Ureal(Neff, Meff); // four blocks, for rr ri ir ii
-    for (int j = 0; j < Neff; j++) {
-        for (int i = 0; i < Meff; i++) {
-            double sign = 1.0;
-            if (j < N and i < M) {
-                // real U applied on real Phi
-                Ureal(j, i) = U.real()(j % N, i % M);
-            } else if (j >= N and i >= M) {
-                // real U applied on imag Phi
-                Ureal(j, i) = conjMat[j] * U.real()(j % N, i % M);
-            } else if (j < N and i >= M) {
-                // imag U applied on real Phi
-                Ureal(j, i) = U.imag()(j % N, i % M);
-            } else {
-                // imag U applied on imag Phi
-                Ureal(j, i) = -1.0 * conjMat[j] * U.imag()(j % N, i % M);
-            }
-        }
-    }
-
-    // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
-    BankAccount nodesPhi;             // to put the original nodes
-    BankAccount nodesRotated;         // to put the rotated nodes
-
-    // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
-    if (serial) {
-
-        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<double> scalefac;
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j + N]) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-
-        // send own nodes to bank, identifying them through the serialIx of refTree
-        mpifuncvec::save_nodes(Phi, refTree, nodesPhi);
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-    }
-
-    // 4) rotate all the nodes
-    IntMatrix split_serial;                             // in the serial case all split are stored in one array
-    std::vector<std::vector<double *>> coeffpVec(Meff); // to put pointers to the rotated coefficient for each orbital in serial case
-    std::vector<std::map<int, int>> ix2coef(Meff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
-    // j indices are for unrotated orbitals, i indices are for rotated orbitals
-    if (serial) {
-        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
-        split_serial.resize(Meff, max_n); // not use in the MPI case
-        for (int n = 0; n < max_n; n++) {
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-            ix2coef_ref[node_ix] = n;
-            for (int i = 0; i < Meff; i++) split_serial(i, n) = 1;
-        }
-
-        std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
-
-        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
-        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
-        // n is finished.
-#pragma omp parallel for schedule(dynamic)
-        for (int n = 0; n < max_n; n++) {
-            int csize;
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-            // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n
-            std::vector<int> orbjVec; // to remember which orbital correspond to each orbVec.size();
-            if (node2orbVec[node_ix].size() <= 0) continue;
-            csize = sizecoeffW;
-            if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff
-
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbjVec.push_back(j);
-            }
-
-            // 4b) make a list of rotated orbitals needed for this node
-            // OMP must wait until parent is ready
-            while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) {
-#pragma omp flush
-            };
-
-            std::vector<int> orbiVec;
-            for (int i = 0; i < Meff; i++) { // loop over all rotated orbitals
-                if (not makeReal and i < M) continue;
-                if (not makeImag and i >= M) continue;
-                if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
-                orbiVec.push_back(i);
-            }
-
-            // 4c) rotate this node
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
-            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = Ureal(orbjVec[j], orbiVec[i]); }
-            }
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
-            // HERE IT HAPPENS!
-            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
-
-            // 4d) store and make rotated node pointers
-            // for now we allocate in buffer, in future could be directly allocated in the final trees
-            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
-            // make all norms:
-            for (int i = 0; i < orbiVec.size(); i++) {
-                // check if parent must be split
-                if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) {
-                    // mark this node for this orbital for later split
-#pragma omp critical
-                    {
-                        ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size();
-                        coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers
-                    }
-                    // check norms for split
-                    double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
-                    int kstart = 0;
-                    if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
-                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
-                    if (thres < wnorm or prec < 0)
-                        split_serial(orbiVec[i], n) = 1;
-                    else
-                        split_serial(orbiVec[i], n) = 0;
-                } else {
-                    ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used
-                    split_serial(orbiVec[i], n) = 0;          // do not split if parent does not need to be split
-                }
-            }
-            nodeReady[n] = 1;
-#pragma omp critical
-            {
-                // this ensures that rotatedCoeff is not deleted, when getting out of scope
-                rotatedCoeffVec.push_back(std::move(rotatedCoeff));
-            }
-        }
-    } else { // MPI case
-
-        // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
-        std::vector<double> split(Meff, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
-        std::vector<double> needsplit(Meff, 1.0); // which orbitals need splitting
-        BankAccount nodeSplits;
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-
-        DoubleMatrix coeffBlock(sizecoeff, Neff);
-        max_ix++; // largest node index + 1. to store rotated orbitals with different id
-        TaskManager tasks(max_n);
-        for (int nn = 0; nn < max_n; nn++) {
-            int n = tasks.next_task();
-            if (n < 0) break;
-            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
-            // 4a) make list of orbitals that should split the parent node, i.e. include this node
-            int parentid = parindexVec_ref[n];
-            if (parentid == -1) {
-                // root node, split if output needed
-                for (int i = 0; i < M; i++) {
-                    if (makeReal)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
-                }
-                for (int i = N; i < Meff; i++) {
-                    if (makeImag)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
-                }
-                csize = sizecoeff;
-            } else {
-                // note that it will wait until data is available
-                nodeSplits.get_data(parentid, Meff, split.data());
-                csize = sizecoeffW;
-            }
-            std::vector<int> orbiVec;
-            std::vector<int> orbjVec;
-            for (int i = 0; i < Meff; i++) {  // loop over rotated orbitals
-                if (split[i] < 0.0) continue; // parent node has too small wavelets
-                orbiVec.push_back(i);
-            }
-
-            // 4b) rotate this node
-            DoubleMatrix coeffBlock(csize, Neff); // largest possible used size
-            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
-            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
-
-            // chunk of U, with reorganized indices and separate blocks for real and imag:
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
-
-            for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
-                    Un(j, i) = Ureal(orbjVec[j], orbiVec[i]);
-                }
-            }
-
-            // HERE IT HAPPENS
-            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
-
-            // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
-            // in cache).
-            for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals
-                needsplit[orbiVec[i]] = -1.0;          // default, do not split
-                // check if this node/orbital needs further refinement
-                double wnorm = 0.0;
-                int kwstart = csize - sizecoeffW; // do not include scaling
-                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
-                if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
-                nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
-            }
-            nodeSplits.put_data(indexVec_ref[n], Meff, needsplit.data());
-        }
-        mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
-    }
-
-    // 5) reconstruct trees using rotated nodes.
-
-    // only serial case can use OMP, because MPI cannot be used by threads
-    if (serial) {
-        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
-        // operation is writing the coefficient into the tree)
-
-#pragma omp parallel for schedule(static)
-        for (int j = 0; j < Meff; j++) {
-            if (coeffpVec[j].size()==0) continue;
-            if (j < M) {
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-            } else {
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-            }
-        }
-
-    } else { // MPI case
-
-        for (int j = 0; j < Meff; j++) {
-            if (not mpi::my_orb(j % M)) continue;
-            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
-            std::vector<double *> coeffpVec; //
-            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
-            int ix = 0;
-            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
-            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
-                std::vector<int> nodeidVec;
-                double *dataVec; // will be allocated by bank
-                nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
-                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
-                int shift = 0;
-                for (int n = 0; n < nodeidVec.size(); n++) {
-                    assert(nodeidVec[n] - max_ix >= 0);                // unrotated nodes have been deleted
-                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
-                    ix2coef[nodeidVec[n] - max_ix] = ix++;
-                    csize = sizecoeffW;
-                    if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff;
-                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
-                    shift += csize;
-                }
-            }
-            if (j < M) {
-                // Real part
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            } else {
-                // Imag part
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            }
-            for (double *p : pointerstodelete) delete[] p;
-            pointerstodelete.clear();
-        }
-    }
-}
-
-
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec) {
-    rotate(Phi, U, Phi, prec);
-    return;
-}
-
-/** @brief Save all nodes in bank; identify them using serialIx from refTree
- * shift is a shift applied in the id
- */
-void save_nodes(MPI_FuncVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    int max_nNodes = refTree.getNNodes();
-    std::vector<double *> coeffVec;
-    std::vector<double> scalefac;
-    std::vector<int> indexVec;    // SerialIx of the node in refOrb
-    std::vector<int> parindexVec; // SerialIx of the parent node
-    int N = Phi.size();
-    int max_ix;
-    for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
-        // make vector with all coef address and their index in the union grid
-        if (Phi[j].hasReal()) {
-            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
-            int max_n = indexVec.size();
-            // send node coefs from Phi[j] to bank
-            // except for the root nodes, only wavelets are sent
-            for (int i = 0; i < max_n; i++) {
-                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
-                int csize = sizecoeffW;
-                if (parindexVec[i] < 0) csize = sizecoeff;
-                if (sizes > 0) { // fixed size
-                    account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]);
-                } else {
-                    account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
-                }
-            }
-        }
-        // Imaginary parts are considered as orbitals with an orbid shifted by N
-        if (Phi[j].hasImag()) {
-            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
-            int max_n = indexVec.size();
-            // send node coefs from Phi[j] to bank
-            for (int i = 0; i < max_n; i++) {
-                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
-                // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part
-                int csize = sizecoeffW;
-                if (parindexVec[i] < 0) csize = sizecoeff;
-                if (sizes > 0) { // fixed size
-                    account.put_nodedata(j + N, indexVec[i], sizes, coeffVec[i]);
-                } else {
-                    account.put_nodedata(j + N, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
-                }
-            }
-        }
-    }
-}
-
-/** @brief Multiply all orbitals with a function
- *
- * @param Phi: orbitals to multiply
- * @param f  : function to multiply
- *
- * Computes the product of each orbital with a function
- * in parallel using a local representation.
- * Input trees are extended by one scale at most.
- */
-MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec, ComplexFunction *Func, int nrefine, bool all) {
-
-    int N = Phi.size();
-    const int D = 3;
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
-
-    // 1a) extend grid where f is large (around nuclei)
-    // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree?
-
-    for (int i = 0; i < N; i++) {
-        if (!mpi::my_orb(i)) continue;
-        int irefine = 0;
-        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
-        irefine = 0;
-        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
-    }
-
-    // 1b) make union tree without coefficients
-    FunctionTree<D> refTree(*Phi.vecMRA);
-    // refine_grid(refTree, f); //to test
-    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
-
-    int kp1 = refTree.getKp1();
-    int kp1_d = refTree.getKp1_d();
-    int nCoefs = refTree.getTDim() * kp1_d;
-
-    IntVector PsihasReIm = IntVector::Zero(2);
-    for (int i = 0; i < N; i++) {
-        if (!mpi::my_orb(i)) continue;
-        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
-    }
-    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    MPI_FuncVector out(N);
-    MPI_FuncVector outtest(N);
-    if (not PsihasReIm[0] and not PsihasReIm[1]) {
-        return out; // do nothing
-    }
-
-    int Neff = N;
-    if (PsihasReIm[1]) Neff = 2 * N; // Imag and Real treated independently. We always treat real part of Psi
-
-    std::vector<double> scalefac_ref;
-    std::vector<double *> coeffVec_ref; // not used!
-    std::vector<int> indexVec_ref;      // serialIx of the nodes
-    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
-    std::vector<MWNode<D> *> refNodes;  // pointers to nodes
-    int max_ix;
-    // get a list of all nodes in union tree, identified by their serialIx indices
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes);
-    int max_n = indexVec_ref.size();
-    std::map<int, int> ix2n; // for a given serialIx, give index in vectors
-    for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn;
-
-    // 2a) send own nodes to bank, identifying them through the serialIx of refTree
-    BankAccount nodesPhi;        // to put the original nodes
-    BankAccount nodesMultiplied; // to put the multiplied nodes
-
-    // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
-    if (serial) {
-        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<double> scalefac;
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j + N]) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else {
-        mpifuncvec::save_nodes(Phi, refTree, nodesPhi, nCoefs);
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-    }
-
-    // 2b) save Func in bank and remove its coefficients
-    if (Func != nullptr and !serial) {
-        // put Func in local representation if not already done
-        if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); }
-    }
-
-    // 3) mutiply for each node
-    std::vector<std::vector<double *>> coeffpVec(Neff); // to put pointers to the multiplied coefficient for each orbital in serial case
-    std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
-    std::vector<std::map<int, int>> ix2coef(Neff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    DoubleVector NODEP = DoubleVector::Zero(nCoefs);
-    DoubleVector NODEF = DoubleVector::Zero(nCoefs);
-
-    if (serial) {
-#pragma omp parallel for schedule(dynamic)
-        for (int n = 0; n < max_n; n++) {
-            MWNode<D> node(*(refNodes[n]), false);
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-
-            // 3a) make values for f at this node
-            // 3a1) get coordinates of quadrature points for this node
-            Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs);
-            double fval[nCoefs];
-            Coord<D> r;
-            double *originalCoef = nullptr;
-            MWNode<3> *Fnode = nullptr;
-            if (Func == nullptr) {
-                node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-                for (int j = 0; j < nCoefs; j++) {
-                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                    fval[j] = f.evalf(r);
-                }
-            } else {
-                Fnode = Func->real().findNode(node.getNodeIndex());
-                if (Fnode == nullptr) {
-                    node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-                    for (int j = 0; j < nCoefs; j++) {
-                        for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                        fval[j] = f.evalf(r);
-                    }
-                } else {
-                    originalCoef = Fnode->getCoefs();
-                    for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j];
-                    Fnode->attachCoefs(fval); // note that each thread has its own copy
-                    Fnode->mwTransform(Reconstruction);
-                    Fnode->cvTransform(Forward);
-                }
-            }
-            DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size());
-            int i = 0;
-            // 3b) fetch all orbitals at this node
-            std::vector<int> orbjVec;            // to remember which orbital correspond to each orbVec.size();
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                orbjVec.push_back(j);
-                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k];
-                // 3c) transform to grid
-                node.attachCoefs(&(multipliedCoeff(0, i)));
-                node.mwTransform(Reconstruction);
-                node.cvTransform(Forward);
-                // 3d) multiply
-                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication?
-                // 3e) transform back to mw
-                node.cvTransform(Backward);
-                node.mwTransform(Compression);
-                i++;
-            }
-            if (Func != nullptr and originalCoef != nullptr) {
-                // restablish original values
-                Fnode->attachCoefs(originalCoef);
-            }
-
-            // 3f) save multiplied nodes
-            for (int i = 0; i < orbjVec.size(); i++) {
-#pragma omp critical
-                {
-                    ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size();
-                    coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers
-                }
-            }
-#pragma omp critical
-            {
-                // this ensures that multipliedCoeff is not deleted, when getting out of scope
-                multipliedCoeffVec.push_back(std::move(multipliedCoeff));
-            }
-            node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
-        }
-    } else {
-        // MPI
-        int count1 = 0;
-        int count2 = 0;
-        TaskManager tasks(max_n);
-        for (int nn = 0; nn < max_n; nn++) {
-            int n = tasks.next_task();
-            if (n < 0) break;
-            MWNode<D> node(*(refNodes[n]), false);
-            // 3a) make values for f
-            // 3a1) get coordinates of quadrature points for this node
-            Eigen::MatrixXd pts;           // Eigen::Zero(D, nCoefs);
-            node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-            double fval[nCoefs];
-            Coord<D> r;
-            MWNode<D> Fnode(*(refNodes[n]), false);
-            if (Func == nullptr) {
-                for (int j = 0; j < nCoefs; j++) {
-                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                    fval[j] = f.evalf(r);
-                }
-            } else {
-                int nIdx = Func->real().getIx(node.getNodeIndex());
-                count1++;
-                if (nIdx < 0) {
-                    // use the function f instead of Func
-                    count2++;
-                    for (int j = 0; j < nCoefs; j++) {
-                        for (int d = 0; d < D; d++) r[d] = pts(d, j);
-                        fval[j] = f.evalf(r);
-                    }
-                } else {
-                    Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank
-                    Fnode.attachCoefs(fval);
-                    Fnode.mwTransform(Reconstruction);
-                    Fnode.cvTransform(Forward);
-                }
-            }
-
-            // 3b) fetch all orbitals at this node
-            DoubleMatrix coeffBlock(nCoefs, Neff); // largest possible used size
-            std::vector<int> orbjVec;
-            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
-            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
-            DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size());
-            // 3c) transform to grid
-            for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ?
-                // TODO: select only nodes that are end nodes?
-                node.attachCoefs(coeffBlock.col(j).data());
-                node.mwTransform(Reconstruction);
-                node.cvTransform(Forward);
-                // 3d) multiply
-                double *coefs = node.getCoefs();
-                for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i];
-                // 3e) transform back to mw
-                node.cvTransform(Backward);
-                node.mwTransform(Compression);
-                // 3f) save multiplied nodes
-                nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs);
-            }
-            node.attachCoefs(nullptr);  // to avoid deletion of valid multipliedCoeff by destructor
-            Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
-        }
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 5) reconstruct trees using multiplied nodes.
-
-    // only serial case can use OMP, because MPI cannot be used by threads
-    if (serial) {
-        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
-        // operation is writing the coefficient into the tree)
-
-#pragma omp parallel for schedule(static)
-        for (int j = 0; j < Neff; j++) {
-            if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
-                }
-            } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    out[j].imag().mwTransform(BottomUp);
-                    out[j].imag().calcSquareNorm();
-                }
-            }
-        }
-    } else {
-        for (int j = 0; j < Neff; j++) {
-            if (not mpi::my_orb(j % N) and not all) continue;
-            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
-            std::vector<double *> coeffpVec; //
-            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx in refTree
-            int ix = 0;
-            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
-
-            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
-                std::vector<int> nodeidVec;
-                double *dataVec; // will be allocated by bank
-                nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank);
-                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
-                int shift = 0;
-                for (int n = 0; n < nodeidVec.size(); n++) {
-                    assert(nodeidVec[n] - max_ix >= 0);                // unmultiplied nodes have been deleted
-                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
-                    ix2coef[nodeidVec[n] - max_ix] = ix++;
-                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
-                    shift += nCoefs;
-                }
-            }
-            if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
-                    out[j].real().resetEndNodeTable();
-                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
-                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
-                }
-            } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    out[j % N].imag().mwTransform(BottomUp);
-                    out[j % N].imag().calcSquareNorm();
-                    // out[j % N].imag().crop(prec, 1.0, false);
-                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
-                }
-            }
-
-            for (double *p : pointerstodelete) delete[] p;
-            pointerstodelete.clear();
-        }
-    }
-    return out;
-}
-
-ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    int N = Bra.size();
-    ComplexVector result = ComplexVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        // The bra is sent to the owner of the ket
-        if (my_orb(Bra[i]) != my_orb(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
-        result[i] = cplxfunc::dot(Bra[i], Ket[i]);
-        if (not mrcpp::mpi::my_orb(i)) Bra[i].free(NUMBER::Total);
-    }
-    mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
-    return result;
-}
-
-/** @brief Compute Löwdin orthonormalization matrix
- *
- * @param Phi: orbitals to orthonomalize
- *
- * Computes the inverse square root of the orbital overlap matrix S^(-1/2)
- */
-ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi) {
-    ComplexMatrix S_tilde = mpifuncvec::calc_overlap_matrix(Phi);
-    ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0);
-    return S_m12;
-}
-
-/** @brief Orbital transformation out_j = sum_i inp_i*U_ij
- *
- * NOTE: OrbitalVector is considered a ROW vector, so rotation
- *       means matrix multiplication from the right
- *
- * MPI: Rank distribution of output vector is the same as input vector
- *
- */
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet) {
-    // NB: must be spinseparated at this point!
-
-    int N = BraKet.size();
-    ComplexMatrix S = ComplexMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
-
-    // 1) make union tree without coefficients
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double> scalefac;
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    int max_ix;                       // largest index value (not used here)
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-    mrcpp::BankAccount nodesBraKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(BraKet, refTree, nodesBraKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-
-    int ibank = 0;
-#pragma omp parallel for schedule(dynamic) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j <= i; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N);
-            S.imag()(i, j) = conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
-            if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
-        }
-    }
-
-    // Assumes linearity: result is sum of all nodes contributions
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-
-    return S;
-}
-
-/** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
- *
- */
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
-
-    MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
-
-    int N = Bra.size();
-    int M = Ket.size();
-    ComplexMatrix S = ComplexMatrix::Zero(N, M);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * M); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-
-    // 1) make union tree without coefficients for Bra (supposed smallest)
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk);
-    // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero.
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    std::vector<double> scalefac;
-    int max_ix;
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-    max_ix++;
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVecBra(2 * N);
-    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeBra(2 * N); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
-    std::vector<std::vector<double *>> coeffVecKet(2 * M);
-    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeKet(2 * M); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
-    mrcpp::BankAccount nodesBra;
-    mrcpp::BankAccount nodesKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        // TODO? : do not copy coefficients, but use directly the pointers
-        // could OMP parallelize, but is fast anyway
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Bra[j].hasReal()) {
-                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j);
-                }
-            }
-            if (Bra[j].hasImag()) {
-                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j + N);
-                }
-            }
-        }
-        for (int j = 0; j < M; j++) {
-            if (Ket[j].hasReal()) {
-                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j);
-                }
-            }
-            if (Ket[j].hasImag()) {
-                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j + M][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j + M);
-                }
-            }
-        }
-
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(Bra, refTree, nodesBra);
-        save_nodes(Ket, refTree, nodesKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-    int totsiz = 0;
-    int totget = 0;
-    int mxtotsiz = 0;
-    int ibank = 0;
-    //For some unknown reason the h2_mag_lda test sometimes fails when schedule(dynamic) is chosen
-#pragma omp parallel for schedule(static) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
-        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        if (serial) {
-            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
-            DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
-            if (parindexVec_ref[n] < 0) shift = 0;
-
-            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeBra[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
-                orbVecBra.push_back(j);
-            }
-            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeKet[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
-                orbVecKet.push_back(j);
-            }
-
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
-                            continue;
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
-                            continue;
-                        // must ensure that threads are not competing
-                        double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else {
-
-            DoubleMatrix coeffBlockBra(csize, 2 * N);
-            DoubleMatrix coeffBlockKet(csize, 2 * M);
-            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
-            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
-            totsiz += orbVecBra.size() * orbVecKet.size();
-            mxtotsiz += N * M;
-            totget += orbVecBra.size() + orbVecKet.size();
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
-                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
-                            continue;
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-
-    IntVector conjMatBra = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(Bra[i])) continue;
-        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
-    IntVector conjMatKet = IntVector::Zero(M);
-    for (int i = 0; i < M; i++) {
-        if (!mrcpp::mpi::my_orb(Ket[i])) continue;
-        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMatBra[i] * conjMatKet[j] * Sreal(i + N, j + M);
-            S.imag()(i, j) = conjMatKet[j] * Sreal(i, j + M) - conjMatBra[i] * Sreal(i + N, j);
-        }
-    }
-
-    // 4) collect results from all MPI. Linearity: result is sum of all node contributions
-
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-
-    return S;
-}
-
-/** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
- *
- */
-DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet) {
-    int N = BraKet.size();
-    DoubleMatrix S = DoubleMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
-
-    // 1) make union tree without coefficients
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double> scalefac;
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    int max_ix;                       // largest index value (not used here)
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-    mrcpp::BankAccount nodesBraKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(BraKet, refTree, nodesBraKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-
-    int ibank = 0;
-#pragma omp parallel for schedule(dynamic) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock = coeffBlock.cwiseAbs();
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                coeffBlock = coeffBlock.cwiseAbs();
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(i)) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j <= i; j++) {
-            S(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N) + conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
-            S(j, i) = S(i, j);
-        }
-    }
-
-    // Assumes linearity: result is sum of all nodes contributions
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-    return S;
-}
-
-/** @brief Orthogonalize the functions in Bra against all orbitals in Ket
- *
- */
-void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    // TODO: generalize for cases where Ket functions are not orthogonal to each other?
-    ComplexMatrix S = mpifuncvec::calc_overlap_matrix(Bra, Ket);
-    int N = Bra.size();
-    int M = Ket.size();
-    DoubleVector Ketnorms = DoubleVector::Zero(M);
-    for (int i = 0; i < M; i++) {
-        if (mpi::my_orb(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
-    }
-    mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
-    ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
-    for (int j = 0; j < N; j++) {
-        for (int i = 0; i < M; i++) {
-            rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
-        }
-    }
-    MPI_FuncVector rotatedKet(N);
-    mpifuncvec::rotate(Ket, rmat, rotatedKet, prec / M);
-    for (int j = 0; j < N; j++) {
-        if(my_orb(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
-    }
-}
-} // namespace mpifuncvec
-} // namespace mrcpp
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
deleted file mode 100644
index c43d3475c..000000000
--- a/src/utils/ComplexFunction.h
+++ /dev/null
@@ -1,199 +0,0 @@
-#pragma once
-
-#include "functions/RepresentableFunction.h"
-#include "math_utils.h"
-#include "mpi_utils.h"
-#include "trees/FunctionTree.h"
-#include "trees/MultiResolutionAnalysis.h"
-#include <Eigen/Core>
-
-using namespace Eigen;
-
-using IntVector = Eigen::VectorXi;
-using DoubleVector = Eigen::VectorXd;
-using ComplexVector = Eigen::VectorXcd;
-
-using IntMatrix = Eigen::MatrixXi;
-using DoubleMatrix = Eigen::MatrixXd;
-using ComplexMatrix = Eigen::MatrixXcd;
-
-class MPI_FuncVector;
-
-namespace mrcpp {
-
-class BankAccount;
-  template <int D, typename T> class FunctionTree;
-template <int D> class MultiResolutionAnalysis;
-
-using ComplexDouble = std::complex<double>;
-namespace NUMBER {
-enum type { Total, Real, Imag };
-}
-namespace SPIN {
-enum type { Paired, Alpha, Beta };
-}
-
-struct FunctionData {
-    int type{0};
-    int order{1};
-    int scale{0};
-    int depth{0};
-    int boxes[3] = {0, 0, 0};
-    int corner[3] = {0, 0, 0};
-    int real_size{0};
-    int imag_size{0};
-    bool is_shared{false};
-    int spin{0};
-    int occ{0};
-};
-
-class TreePtr final {
-public:
-    explicit TreePtr(bool share)
-            : shared_mem_re(nullptr)
-            , shared_mem_im(nullptr)
-            , re(nullptr)
-            , im(nullptr) {
-        this->func_data.is_shared = share;
-        if (this->func_data.is_shared and mpi::share_size > 1) {
-            // Memory size in MB defined in input. Virtual memory, does not cost anything if not used.
-#ifdef MRCPP_HAS_MPI
-            this->shared_mem_re = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
-            this->shared_mem_im = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
-#endif
-        }
-    }
-
-    ~TreePtr() {
-        if (this->shared_mem_re != nullptr) delete this->shared_mem_re;
-        if (this->shared_mem_im != nullptr) delete this->shared_mem_im;
-        if (this->re != nullptr) delete this->re;
-        if (this->im != nullptr) delete this->im;
-    }
-
-    friend class ComplexFunction;
-
-private:
-    FunctionData func_data;
-    mrcpp::SharedMemory<double> *shared_mem_re;
-    mrcpp::SharedMemory<double> *shared_mem_im;
-    mrcpp::FunctionTree<3, double> *re; ///< Real part of function
-    mrcpp::FunctionTree<3, double> *im; ///< Imaginary part of function
-
-    void flushFuncData() {
-        this->func_data.real_size = 0;
-        this->func_data.imag_size = 0;
-        if (this->re != nullptr) {
-            this->func_data.real_size = this->re->getNChunksUsed();
-            flushMRAData(this->re->getMRA());
-        }
-        if (this->im != nullptr) {
-            this->func_data.imag_size = this->im->getNChunksUsed();
-            flushMRAData(this->im->getMRA());
-        }
-    }
-
-    void flushMRAData(const mrcpp::MultiResolutionAnalysis<3> &mra) {
-        const auto &box = mra.getWorldBox();
-        this->func_data.type = mra.getScalingBasis().getScalingType();
-        this->func_data.order = mra.getOrder();
-        this->func_data.depth = mra.getMaxDepth();
-        this->func_data.scale = box.getScale();
-        this->func_data.boxes[0] = box.size(0);
-        this->func_data.boxes[1] = box.size(1);
-        this->func_data.boxes[2] = box.size(2);
-        this->func_data.corner[0] = box.getCornerIndex().getTranslation(0);
-        this->func_data.corner[1] = box.getCornerIndex().getTranslation(1);
-        this->func_data.corner[2] = box.getCornerIndex().getTranslation(2);
-    }
-};
-
-class ComplexFunction {
-public:
-    ComplexFunction(std::shared_ptr<TreePtr> funcptr);
-    ComplexFunction(const ComplexFunction &func);
-    ComplexFunction(int spin = 0, int occ = -1, int rank = -1, bool share = false);
-    ComplexFunction &operator=(const ComplexFunction &func);
-    ComplexFunction paramCopy() const;
-    bool isShared() const { return this->func_ptr->func_data.is_shared; }
-    bool hasReal() const { return (this->func_ptr->re == nullptr) ? false : true; }
-    bool hasImag() const { return (this->func_ptr->im == nullptr) ? false : true; }
-    FunctionData &getFunctionData();
-    int occ() const { return this->func_ptr->func_data.occ; }
-    int spin() const { return this->func_ptr->func_data.spin; }
-    FunctionTree<3, double> &real() { return *this->func_ptr->re; }
-    FunctionTree<3, double> &imag() { return *this->func_ptr->im; }
-    const FunctionTree<3, double> &real() const { return *this->func_ptr->re; }
-    const FunctionTree<3, double> &imag() const { return *this->func_ptr->im; }
-    void release() { this->func_ptr.reset(); }
-    bool conjugate() const { return this->conj; }
-    MultiResolutionAnalysis<3> *funcMRA = nullptr;
-    int getRank() const { return rank; }
-    void setRank(int rank) { (*this).rank = rank; }
-    void setOcc(int occ) { this->getFunctionData().occ = occ; }
-    void setSpin(int spin) { this->getFunctionData().spin = spin; }
-    ComplexFunction dagger();
-    virtual ~ComplexFunction() = default;
-
-    void alloc(int type, mrcpp::MultiResolutionAnalysis<3> *mra = nullptr);
-    void free(int type);
-
-    int getSizeNodes(int type) const;
-    int getNNodes(int type) const;
-
-    void setReal(mrcpp::FunctionTree<3, double> *tree);
-    void setImag(mrcpp::FunctionTree<3, double> *tree);
-
-    double norm() const;
-    double squaredNorm() const;
-    ComplexDouble integrate() const;
-
-    int crop(double prec);
-    void rescale(double c);
-    void rescale(ComplexDouble c);
-    void add(ComplexDouble c, ComplexFunction inp);
-    void absadd(ComplexDouble c, ComplexFunction inp);
-    char printSpin() const;
-
-protected:
-    bool conj{false};
-    std::shared_ptr<mrcpp::TreePtr> func_ptr;
-    int rank = -1; // index in vector
-};
-
-namespace cplxfunc {
-void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
-ComplexDouble dot(ComplexFunction bra, ComplexFunction ket);
-ComplexDouble node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact);
-void deep_copy(ComplexFunction &out, ComplexFunction &inp);
-void add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec);
-void project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec);
-void project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec);
-void multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
-void multiply(ComplexFunction &out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
-void linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec);
-} // namespace cplxfunc
-
-class MPI_FuncVector : public std::vector<ComplexFunction> {
-public:
-    MPI_FuncVector(int N = 0);
-    MultiResolutionAnalysis<3> *vecMRA;
-    void distribute();
-};
-
-namespace mpifuncvec {
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec = -1.0);
-void save_nodes(MPI_FuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
-MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
-ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi);
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet);
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet);
-void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-} // namespace mpifuncvec
-} // namespace mrcpp
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 34a334c50..7387ffdbc 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -258,11 +258,6 @@ bool my_orb(int j) {
     return ((j) % wrk_size == wrk_rank) ? true : false;
 }
 
-/** @brief Test if orbital belongs to this MPI rank (or is common)*/
-bool my_orb(ComplexFunction orbj) {
-    return my_orb(orbj.getRank());
-}
-
 /** @brief Test if function belongs to this MPI rank */
 bool my_func(int j) {
     return ((j) % wrk_size == wrk_rank) ? true : false;
@@ -334,39 +329,6 @@ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
 #endif
 }
 
-// send a function with MPI
-void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm) {
-#ifdef MRCPP_HAS_MPI
-    if (func.isShared()) MSG_WARN("Sending a shared function is not recommended");
-    FunctionData &funcinfo = func.getFunctionData();
-    MPI_Send(&funcinfo, sizeof(FunctionData), MPI_BYTE, dst, 0, comm);
-    if (func.hasReal()) mrcpp::send_tree(func.real(), dst, tag, comm, funcinfo.real_size);
-    if (func.hasImag()) mrcpp::send_tree(func.imag(), dst, tag + 10000, comm, funcinfo.imag_size);
-#endif
-}
-
-// receive a function with MPI
-void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
-#ifdef MRCPP_HAS_MPI
-    if (func.isShared()) MSG_WARN("Receiving a shared function is not recommended");
-    MPI_Status status;
-
-    FunctionData &funcinfo = func.getFunctionData();
-    MPI_Recv(&funcinfo, sizeof(FunctionData), MPI_BYTE, src, 0, comm, &status);
-    if (funcinfo.real_size > 0) {
-        // We must have a tree defined for receiving nodes. Define one:
-        if (not func.hasReal()) func.alloc(NUMBER::Real);
-        mrcpp::recv_tree(func.real(), src, tag, comm, funcinfo.real_size);
-    }
-
-    if (funcinfo.imag_size > 0) {
-        // We must have a tree defined for receiving nodes. Define one:
-        if (not func.hasImag()) func.alloc(NUMBER::Imag);
-        mrcpp::recv_tree(func.imag(), src, tag + 10000, comm, funcinfo.imag_size);
-    }
-#endif
-}
-
 // send a component function with MPI
 void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
@@ -397,17 +359,6 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
 #endif
 }
 
-/** Update a shared function after it has been changed by one of the MPI ranks. */
-void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm) {
-    if (func.isShared()) {
-#ifdef MRCPP_HAS_MPI
-        if (func.hasReal()) mrcpp::share_tree(func.real(), src, tag, comm);
-        if (func.hasImag()) mrcpp::share_tree(func.imag(), src, 2 * tag, comm);
-#endif
-    }
-}
-
-
 /** Update a shared function after it has been changed by one of the MPI ranks. */
 void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
@@ -420,48 +371,6 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     }
 }
 
-/** @brief Add all mpi function into rank zero */
-void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm) {
-/* 1) Each odd rank send to the left rank
-   2) All odd ranks are "deleted" (can exit routine)
-   3) new "effective" ranks are defined within the non-deleted ranks
-      effective rank = rank/fac , where fac are powers of 2
-   4) repeat
- */
-#ifdef MRCPP_HAS_MPI
-    int comm_size, comm_rank;
-    MPI_Comm_rank(comm, &comm_rank);
-    MPI_Comm_size(comm, &comm_size);
-    if (comm_size == 1) return;
-
-    int fac = 1; // powers of 2
-    while (fac < comm_size) {
-        if ((comm_rank / fac) % 2 == 0) {
-            // receive
-            int src = comm_rank + fac;
-            if (src < comm_size) {
-                ComplexFunction func_i(false);
-                int tag = 3333 + src;
-                recv_function(func_i, src, tag, comm);
-                func.add(1.0, func_i); // add in place using union grid
-                func.crop(prec);
-            }
-        }
-        if ((comm_rank / fac) % 2 == 1) {
-            // send
-            int dest = comm_rank - fac;
-            if (dest >= 0) {
-                int tag = 3333 + comm_rank;
-                send_function(func, dest, tag, comm);
-                break; // once data is sent we are done
-            }
-        }
-        fac *= 2;
-    }
-    MPI_Barrier(comm);
-#endif
-}
-
 /** @brief Add all mpi function into rank zero */
 void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
@@ -586,28 +495,6 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm c
 #endif
 }
 
-/** @brief make union tree without coeff and send to all
- *  Include both real and imaginary parts
- */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<ComplexFunction> &Phi, MPI_Comm comm) {
-#ifdef MRCPP_HAS_MPI
-    /* 1) make union grid of own orbitals
-       2) make union grid with others orbitals (sent to rank zero)
-       3) rank zero broadcast func to everybody
-     */
-
-    int N = Phi.size();
-    for (int j = 0; j < N; j++) {
-        if (not my_orb(j)) continue;
-        if (Phi[j].hasReal()) tree.appendTreeNoCoeff(Phi[j].real());
-        if (Phi[j].hasImag()) tree.appendTreeNoCoeff(Phi[j].imag());
-    }
-    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
-    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
-#endif
-}
-
-
 /** @brief make union tree without coeff and send to all
  *  Real trees
  */
@@ -670,38 +557,6 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
 #endif
 }
 
-/** @brief Distribute rank zero function to all ranks */
-void broadcast_function(ComplexFunction &func, MPI_Comm comm) {
-/* use same strategy as a reduce, but in reverse order */
-#ifdef MRCPP_HAS_MPI
-    int comm_size, comm_rank;
-    MPI_Comm_rank(comm, &comm_rank);
-    MPI_Comm_size(comm, &comm_size);
-    if (comm_size == 1) return;
-
-    int fac = 1; // powers of 2
-    while (fac < comm_size) fac *= 2;
-    fac /= 2;
-
-    while (fac > 0) {
-        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) {
-            // receive
-            int src = comm_rank - fac;
-            int tag = 4334 + comm_rank;
-            recv_function(func, src, tag, comm);
-        }
-        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
-            // send
-            int dst = comm_rank + fac;
-            int tag = 4334 + dst;
-            if (dst < comm_size) send_function(func, dst, tag, comm);
-        }
-        fac /= 2;
-    }
-    MPI_Barrier(comm);
-#endif
-}
-
 /** @brief Distribute rank zero function to all ranks */
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 6277c0c24..a176fe7ab 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -2,7 +2,6 @@
 
 #include <Eigen/Core>
 
-#include "ComplexFunction.h"
 #include "CompFunction.h"
 #include "mpi_utils.h"
 #include "trees/MultiResolutionAnalysis.h"
@@ -41,7 +40,6 @@ void barrier(MPI_Comm comm);
 bool grand_master();
 bool share_master();
 bool my_orb(int j);
-bool my_orb(ComplexFunction orbj);
 bool my_func(int j);
 bool my_func(const CompFunction<3>& func);
 bool my_func(CompFunction<3> *func);
@@ -49,20 +47,14 @@ bool my_func(CompFunction<3> *func);
 // bool my_unique_orb(const Orbital &orb);
 void free_foreign(CompFunctionVector &Phi);
 
-void send_function(ComplexFunction &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
-void recv_function(ComplexFunction &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
 void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm = mpi::comm_wrk);
 void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm = mpi::comm_wrk);
-void share_function(ComplexFunction &func, int src, int tag, MPI_Comm comm);
 void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm);
 
-void reduce_function(double prec, ComplexFunction &func, MPI_Comm comm);
-void broadcast_function(ComplexFunction &func, MPI_Comm comm);
 void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm);
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm);
 
 void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<ComplexFunction> &Phi, MPI_Comm comm);
 void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
 void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
 void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);

From 0df20bca9c03eb894462ce923155c7033b4fdbcd Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 5 Aug 2024 15:38:44 +0200
Subject: [PATCH 22/38] removed ComplexFunction from api

---
 api/MWFunctions | 1 -
 1 file changed, 1 deletion(-)

diff --git a/api/MWFunctions b/api/MWFunctions
index 13a53214a..24c562364 100644
--- a/api/MWFunctions
+++ b/api/MWFunctions
@@ -27,7 +27,6 @@
 #include "trees/BoundingBox.h"
 #include "trees/FunctionTree.h"
 #include "trees/FunctionTreeVector.h"
-#include "utils/ComplexFunction.h"
 #include "utils/CompFunction.h"
 
 #include "core/InterpolatingBasis.h"

From 1e33df76f139933f23440c7389a4547119b836d1 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 6 Aug 2024 16:24:37 +0200
Subject: [PATCH 23/38] shared CompFunctions trees

---
 src/utils/CompFunction.cpp    |   39 +-
 src/utils/CompFunction.h      |   53 +-
 src/utils/ComplexFunction.cpp | 2016 +++++++++++++++++++++++++++++++++
 src/utils/ComplexFunction.h   |  199 ++++
 4 files changed, 2274 insertions(+), 33 deletions(-)
 create mode 100644 src/utils/ComplexFunction.cpp
 create mode 100644 src/utils/ComplexFunction.h

diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 1e6ff066d..8384e93af 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -18,15 +18,15 @@ namespace mrcpp {
   CompFunction<D>::CompFunction(MultiResolutionAnalysis<D> &mra)
   { defaultCompMRA<D> = &mra;
     data.Ncomp = 0;
-    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    func_ptr = std::make_shared<TreePtr<D>>(false);
+
     for (int i = 0; i < 4; i++) CompC[i] = nullptr;
   }
 
   template <int D>
   CompFunction<D>::CompFunction()
   { data.Ncomp = 0;
-    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
-    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+      func_ptr = std::make_shared<TreePtr<D>>(false);
   }
 
 /*
@@ -35,6 +35,7 @@ namespace mrcpp {
   template <int D>
   CompFunction<D>::CompFunction(int n1)
   { data.Ncomp = 0;
+      func_ptr = std::make_shared<TreePtr<D>>(false);
       data.n1[0] = n1;
       data.n2[0] = -1;
       data.n3[0] = 0;
@@ -42,11 +43,6 @@ namespace mrcpp {
       isreal = 1;
       iscomplex = 0;
       data.shared = false;
-      //      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      //CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
-    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
-    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-
   }
 
 /*
@@ -55,6 +51,7 @@ namespace mrcpp {
   template <int D>
   CompFunction<D>::CompFunction(int n1, bool share)
   { data.Ncomp = 0;
+      func_ptr = std::make_shared<TreePtr<D>>(share);
       data.n1[0] = n1;
       data.n2[0] = -1;
       data.n3[0] = 0;
@@ -62,11 +59,6 @@ namespace mrcpp {
       isreal = 1;
       iscomplex = 0;
       data.shared = share;
-     if (share) MSG_ABORT("Not yet implemented");
-      //CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
-    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
-    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-
   }
 
 /*
@@ -74,7 +66,9 @@ namespace mrcpp {
  */
   template <int D>
   CompFunction<D>::CompFunction(const CompFunctionData<D>& indata)
-  { data = indata;}
+  { data = indata;
+      func_ptr = std::make_shared<TreePtr<D>>(share);
+  }
 
 /** @brief Copy constructor
  *
@@ -84,8 +78,7 @@ namespace mrcpp {
   template <int D>
   CompFunction<D>::CompFunction(const CompFunction<D> &compfunc) {
       data = compfunc.data;
-    for (int i = 0; i < 4; i++) CompD[i] = compfunc.CompD[i];
-    for (int i = 0; i < 4; i++) CompC[i] = compfunc.CompC[i];
+      func_ptr = compfunc.func_ptr;
   }
 
 /** @brief Copy constructor
@@ -96,18 +89,14 @@ namespace mrcpp {
   template <int D>
   CompFunction<D>::CompFunction(CompFunction<D> && compfunc) {
       data = compfunc.data;
-      for (int i = 0; i < 4; i++) CompD[i]=compfunc.CompD[i];
-      for (int i = 0; i < 4; i++) CompC[i]=compfunc.CompC[i];
+      func_ptr = compfunc.func_ptr;
   }
 
   template <int D>
   CompFunction<D> &CompFunction<D>::operator=(const CompFunction<D> &compfunc) {
       if (this != &compfunc) {
-          this->data = compfunc.data;
-          for (int i = 0; i < Ncomp; i++) {
-              CompD[i] = compfunc.CompD[i];
-              CompC[i] = compfunc.CompC[i];
-          }
+          data = compfunc.data;
+          func_ptr = compfunc.func_ptr;
       }
       return *this;
   }
@@ -217,10 +206,10 @@ ComplexDouble CompFunction<D>::integrate() const {
           delete CompD[i];
           delete CompC[i];
           if (isreal) {
-              CompD[i] = new FunctionTree<D, double> (*defaultCompMRA<D>);
+              CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
           }
           if (iscomplex) {
-              CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>);
+              CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
           }
           Ncomp = std::max(Ncomp, i + 1);
       }
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 2c30dc3c2..0588d8fd0 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "trees/FunctionTree.h"
+#include "mpi_utils.h"
 
 using namespace Eigen;
 
@@ -43,6 +44,41 @@ struct CompFunctionData {
     int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
 };
 
+template <int D> class TreePtr final {
+public:
+    explicit TreePtr(bool share)
+            : shared_mem_real(nullptr)
+            , shared_mem_cplx(nullptr) {
+        for (int i = 0; i < 4; i++) real[i] = nullptr;
+        for (int i = 0; i < 4; i++) cplx[i] = nullptr;
+        is_shared = share;
+        if (is_shared and mpi::share_size > 1) {
+            // Memory size in MB defined in input. Virtual memory, does not cost anything if not used.
+#ifdef MRCPP_HAS_MPI
+            this->shared_mem_real = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
+            this->shared_mem_cplx = new mrcpp::SharedMemory<ComplexDouble>(mpi::comm_share, mpi::shared_memory_size);
+#endif
+        }
+
+    }
+
+    ~TreePtr() {
+        if (this->shared_mem_real != nullptr) delete this->shared_mem_real;
+        if (this->shared_mem_cplx != nullptr) delete this->shared_mem_cplx;
+        for (int i = 0; i < 4; i++) {
+            if (this->real[i] != nullptr) delete this->real[i];
+            if (this->cplx[i] != nullptr) delete this->cplx[i];
+        }
+    }
+    bool is_shared = false;
+    friend class CompFunction<D>;
+protected:
+    FunctionTree<D, double> *real[4]; // Real function
+    FunctionTree<D, ComplexDouble> *cplx[4]; // Complex function
+    SharedMemory<double> *shared_mem_real;
+    SharedMemory<ComplexDouble> *shared_mem_cplx;
+};
+
 
 template <int D> class CompFunction {
 public:
@@ -54,9 +90,10 @@ template <int D> class CompFunction {
     CompFunction(const CompFunction<D> &compfunc);
     CompFunction(CompFunction<D> && compfunc);
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
+    virtual ~CompFunction() = default;
 
-    FunctionTree<D, double> *CompD[4];
-    FunctionTree<D, ComplexDouble> *CompC[4];
+    FunctionTree<D, double>* (&CompD)[4] = func_ptr->real; // so that we can use name CompD instead of func_ptr.real
+    FunctionTree<D, ComplexDouble>* (&CompC)[4] = func_ptr->cplx;
 
     std::string name;
 
@@ -67,19 +104,15 @@ template <int D> class CompFunction {
     int& conj = data.conj; // soft conjugate
     int& isreal = data.isreal; // T=double
     int& iscomplex = data.iscomplex; // T=DoubleComplex
+    int& share = data.shared;
     int* Nchunks = data.Nchunks; // number of chunks of each component tree
+
     // ComplexFunctions are only defined for D=3
     // template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
      //CompFunction(ComplexFunction cplxfunc);
     // template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
      //operator ComplexFunction() const;
     // CompFunction destructor
-    ~CompFunction() {
-        for (int i = 0; i < Ncomp; i++) {
-            delete CompD[i];
-            delete CompC[i];
-        }
-    }
 
     CompFunction paramCopy() const;
     ComplexDouble integrate() const;
@@ -114,6 +147,10 @@ template <int D> class CompFunction {
     CompFunction<D> dagger();
     FunctionTree<D, double> &imag(int i = 0); //does not make sense now
     const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
+
+protected:
+    std::shared_ptr<mrcpp::TreePtr<D>> func_ptr;
+
 };
 
 template <int D>
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
new file mode 100644
index 000000000..63d855727
--- /dev/null
+++ b/src/utils/ComplexFunction.cpp
@@ -0,0 +1,2016 @@
+#include "ComplexFunction.h"
+#include "Bank.h"
+#include "Printer.h"
+#include "Timer.h"
+#include "parallel.h"
+#include "treebuilders/grid.h"
+#include "treebuilders/multiply.h"
+#include "treebuilders/project.h"
+#include "trees/FunctionNode.h"
+#include "treebuilders/add.h"
+
+using mrcpp::Timer;
+
+namespace mrcpp {
+
+MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
+
+ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
+        : funcMRA(defaultMRA)
+        , func_ptr(funcptr) {}
+
+ComplexFunction::ComplexFunction(const ComplexFunction &func)
+        : funcMRA(func.funcMRA)
+        , conj(func.conj)
+        , func_ptr(func.func_ptr)
+        , rank(func.rank) {}
+
+ComplexFunction &ComplexFunction::operator=(const ComplexFunction &func) {
+    if (this != &func) {
+        this->conj = func.conj;
+        this->func_ptr = func.func_ptr;
+        this->funcMRA = func.funcMRA;
+        this->rank = func.rank;
+    }
+    return *this;
+}
+
+/** @brief Constructor
+ *
+ * @param spin: electron spin (SPIN::Alpha/Beta/Paired)
+ * @param occ: occupation
+ * @param rank: MPI ownership (-1 means all MPI ranks)
+ *
+ * Initializes the mrcpp::ComplexFunction with NULL pointers for both real and imaginary part.
+ */
+ComplexFunction::ComplexFunction(int spin, int occ, int rank, bool share)
+        : funcMRA(defaultMRA)
+        , func_ptr(std::make_shared<TreePtr>(share))
+        , rank(rank) {
+    this->getFunctionData().spin = spin;
+    this->getFunctionData().occ = occ;
+    if (this->spin() < 0) INVALID_ARG_ABORT;
+    if (this->occ() < 0) {
+        if (this->spin() == SPIN::Paired) this->getFunctionData().occ = 2;
+        if (this->spin() == SPIN::Alpha) this->getFunctionData().occ = 1;
+        if (this->spin() == SPIN::Beta) this->getFunctionData().occ = 1;
+    }
+}
+
+/** @brief Parameter copy
+ *
+ * Returns a new ComplexFunction with the same spin, occupation and rank_id as *this.
+ */
+ComplexFunction ComplexFunction::paramCopy() const {
+    return ComplexFunction(this->spin(), this->occ(), this->getRank());
+}
+
+MPI_FuncVector::MPI_FuncVector(int N)
+        : std::vector<ComplexFunction>(N) {
+    for (int i = 0; i < N; i++) (*this)[i].setRank(i);
+    vecMRA = defaultMRA;
+}
+void MPI_FuncVector::distribute() {
+    for (int i = 0; i < this->size(); i++) (*this)[i].setRank(i);
+}
+
+/** @brief Returns the orbital meta data
+ *
+ * Tree sizes (nChunks) are flushed before return.
+ */
+FunctionData &ComplexFunction::getFunctionData() {
+    this->func_ptr->flushFuncData();
+    return this->func_ptr->func_data;
+}
+
+ComplexFunction ComplexFunction::dagger() {
+    ComplexFunction out(*this);
+    out.conj = not(this->conj);
+    return out; // Return shallow copy
+}
+
+void ComplexFunction::setReal(FunctionTree<3> *tree) {
+    if (isShared()) MSG_ABORT("Cannot set in shared function");
+    this->func_ptr->re = tree;
+}
+
+void ComplexFunction::setImag(FunctionTree<3> *tree) {
+    if (isShared()) MSG_ABORT("Cannot set in shared function");
+    this->func_ptr->im = tree;
+}
+
+void ComplexFunction::alloc(int type, MultiResolutionAnalysis<3> *mra) {
+    if (mra == nullptr) mra = funcMRA;
+    if (mra == nullptr) MSG_ABORT("Invalid argument");
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (hasReal()) MSG_ABORT("Real part already allocated");
+        this->func_ptr->re = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_re);
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (hasImag()) MSG_ABORT("Imaginary part already allocated");
+        this->func_ptr->im = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_im);
+    }
+}
+
+void ComplexFunction::free(int type) {
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (hasReal()) delete this->func_ptr->re;
+        this->func_ptr->re = nullptr;
+        if (this->func_ptr->shared_mem_re) this->func_ptr->shared_mem_re->clear();
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (hasImag()) delete this->func_ptr->im;
+        this->func_ptr->im = nullptr;
+        if (this->func_ptr->shared_mem_im) this->func_ptr->shared_mem_im->clear();
+    }
+}
+
+int ComplexFunction::getSizeNodes(int type) const {
+    int size_mb = 0; // Memory size in kB
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (hasReal()) size_mb += real().getSizeNodes();
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (hasImag()) size_mb += imag().getSizeNodes();
+    }
+    return size_mb;
+}
+
+int ComplexFunction::getNNodes(int type) const {
+    int nNodes = 0;
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (hasReal()) nNodes += real().getNNodes();
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (hasImag()) nNodes += imag().getNNodes();
+    }
+    return nNodes;
+}
+
+int ComplexFunction::crop(double prec) {
+    if (prec < 0.0) return 0;
+    bool need_to_crop = not(isShared()) or mpi::share_master();
+    int nChunksremoved = 0;
+    if (need_to_crop) {
+        if (hasReal()) nChunksremoved = real().crop(prec, 1.0, false);
+        if (hasImag()) nChunksremoved += imag().crop(prec, 1.0, false);
+    }
+    mpi::share_function(*this, 0, 7744, mpi::comm_share);
+    return nChunksremoved;
+}
+
+ComplexDouble ComplexFunction::integrate() const {
+    double int_r = 0.0;
+    double int_i = 0.0;
+    if (hasReal()) int_r = real().integrate();
+    if (hasImag()) int_i = imag().integrate();
+    return ComplexDouble(int_r, int_i);
+}
+
+/** @brief Returns the norm of the orbital */
+double ComplexFunction::norm() const {
+    double norm = squaredNorm();
+    if (norm > 0.0) norm = std::sqrt(norm);
+    return norm;
+}
+
+/** @brief Returns the squared norm of the orbital */
+double ComplexFunction::squaredNorm() const {
+    double sq_r = -1.0;
+    double sq_i = -1.0;
+    if (hasReal()) sq_r = real().getSquareNorm();
+    if (hasImag()) sq_i = imag().getSquareNorm();
+
+    double sq_norm = 0.0;
+    if (sq_r < 0.0 and sq_i < 0.0) {
+        sq_norm = -1.0;
+    } else {
+        if (sq_r >= 0.0) sq_norm += sq_r;
+        if (sq_i >= 0.0) sq_norm += sq_i;
+    }
+    return sq_norm;
+}
+
+/** @brief In place addition.
+ *
+ * Output is extended to union grid.
+ *
+ */
+void ComplexFunction::add(ComplexDouble c, ComplexFunction inp) {
+    double thrs = MachineZero;
+    bool cHasReal = (std::abs(c.real()) > thrs);
+    bool cHasImag = (std::abs(c.imag()) > thrs);
+    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
+    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
+
+    ComplexFunction &out = *this;
+    bool clearReal(false), clearImag(false);
+    if (outNeedsReal and not(out.hasReal())) {
+        out.alloc(NUMBER::Real);
+        clearReal = true;
+    }
+
+    if (outNeedsImag and not(out.hasImag())) {
+        out.alloc(NUMBER::Imag);
+        clearImag = true;
+    }
+
+    bool need_to_add = not(out.isShared()) or mpi::share_master();
+    if (need_to_add) {
+        if (clearReal) out.real().setZero();
+        if (clearImag) out.imag().setZero();
+        if (cHasReal and inp.hasReal()) {
+            while (refine_grid(out.real(), inp.real())) {}
+            out.real().add(c.real(), inp.real());
+        }
+        if (cHasReal and inp.hasImag()) {
+            double conj = (inp.conjugate()) ? -1.0 : 1.0;
+            while (refine_grid(out.imag(), inp.imag())) {}
+            out.imag().add(conj * c.real(), inp.imag());
+        }
+        if (cHasImag and inp.hasReal()) {
+            while (refine_grid(out.imag(), inp.real())) {}
+            out.imag().add(c.imag(), inp.real());
+        }
+        if (cHasImag and inp.hasImag()) {
+            double conj = (inp.conjugate()) ? -1.0 : 1.0;
+            while (refine_grid(out.real(), inp.imag())) {}
+            out.real().add(-1.0 * conj * c.imag(), inp.imag());
+        }
+    }
+    mpi::share_function(out, 0, 9911, mpi::comm_share);
+}
+
+/** @brief In place addition of absolute values.
+ *
+ * Output is extended to union grid.
+ *
+ */
+void ComplexFunction::absadd(ComplexDouble c, ComplexFunction inp) {
+    double thrs = MachineZero;
+    bool cHasReal = (std::abs(c.real()) > thrs);
+    bool cHasImag = (std::abs(c.imag()) > thrs);
+    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
+    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
+
+    ComplexFunction &out = *this;
+    bool clearReal(false), clearImag(false);
+    if (outNeedsReal and not(out.hasReal())) {
+        out.alloc(NUMBER::Real);
+        clearReal = true;
+    }
+
+    if (outNeedsImag and not(out.hasImag())) {
+        out.alloc(NUMBER::Imag);
+        clearImag = true;
+    }
+
+    bool need_to_add = not(out.isShared()) or mpi::share_master();
+    if (need_to_add) {
+        if (clearReal) out.real().setZero();
+        if (clearImag) out.imag().setZero();
+        if (cHasReal and inp.hasReal()) {
+            while (refine_grid(out.real(), inp.real())) {}
+            out.real().absadd(c.real(), inp.real());
+        }
+        if (cHasReal and inp.hasImag()) {
+            double conj = (inp.conjugate()) ? -1.0 : 1.0;
+            while (refine_grid(out.imag(), inp.imag())) {}
+            out.imag().absadd(conj * c.real(), inp.imag());
+        }
+        if (cHasImag and inp.hasReal()) {
+            while (refine_grid(out.imag(), inp.real())) {}
+            out.imag().absadd(c.imag(), inp.real());
+        }
+        if (cHasImag and inp.hasImag()) {
+            double conj = (inp.conjugate()) ? -1.0 : 1.0;
+            while (refine_grid(out.real(), inp.imag())) {}
+            out.real().absadd(-1.0 * conj * c.imag(), inp.imag());
+        }
+    }
+    mpi::share_function(out, 0, 9912, mpi::comm_share);
+}
+
+/** @brief In place multiply with real scalar. Fully in-place.*/
+void ComplexFunction::rescale(double c) {
+    bool need_to_rescale = not(isShared()) or mpi::share_master();
+    if (need_to_rescale) {
+        if (hasReal()) real().rescale(c);
+        if (hasImag()) imag().rescale(c);
+    }
+    mpi::share_function(*this, 0, 5543, mpi::comm_share);
+}
+
+/** @brief In place multiply with complex scalar. Involves a deep copy.*/
+void ComplexFunction::rescale(ComplexDouble c) {
+    ComplexFunction &out = *this;
+    ComplexFunction tmp(spin(), occ(), rank, isShared());
+    cplxfunc::deep_copy(tmp, out);
+    out.free(NUMBER::Total);
+    out.add(c, tmp);
+}
+
+/** @brief Returns a character representing the spin (a/b/p) */
+char ComplexFunction::printSpin() const {
+    char sp = 'u';
+    if (this->spin() == SPIN::Paired) sp = 'p';
+    if (this->spin() == SPIN::Alpha) sp = 'a';
+    if (this->spin() == SPIN::Beta) sp = 'b';
+    return sp;
+}
+
+void cplxfunc::SetdefaultMRA(MultiResolutionAnalysis<3> *MRA) {
+    defaultMRA = MRA;
+}
+
+/** @brief Compute <bra|ket> = int bra^\dag(r) * ket(r) dr.
+ *
+ *  Notice that the <bra| position is already complex conjugated.
+ *
+ */
+ComplexDouble cplxfunc::dot(ComplexFunction bra, ComplexFunction ket) {
+    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
+    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::dot(bra.real(), ket.real());
+    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::dot(bra.real(), ket.imag());
+    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::dot(bra.imag(), ket.real());
+    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::dot(bra.imag(), ket.imag());
+
+    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
+    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
+
+    double real_part = rr + bra_conj * ket_conj * ii;
+    double imag_part = ket_conj * ri - bra_conj * ir;
+    return ComplexDouble(real_part, imag_part);
+}
+
+/** @brief Compute <bra|ket> = int |bra^\dag(r)| * |ket(r)| dr.
+ *
+ */
+ComplexDouble cplxfunc::node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact) {
+    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
+    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::node_norm_dot(bra.real(), ket.real(), exact);
+    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::node_norm_dot(bra.real(), ket.imag(), exact);
+    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::node_norm_dot(bra.imag(), ket.real(), exact);
+    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::node_norm_dot(bra.imag(), ket.imag(), exact);
+
+    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
+    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
+
+    double real_part = rr + bra_conj * ket_conj * ii;
+    double imag_part = ket_conj * ri - bra_conj * ir;
+    return ComplexDouble(real_part, imag_part);
+}
+
+/** @brief Deep copy
+ *
+ * Returns a new function which is a full blueprint copy of the input function.
+ * This is achieved by building a new grid for the real and imaginary parts and
+ * copying.
+ */
+void cplxfunc::deep_copy(ComplexFunction &out, ComplexFunction &inp) {
+    bool need_to_copy = not(out.isShared()) or mpi::share_master();
+    out.funcMRA = inp.funcMRA;
+    out.setRank(inp.getRank());
+    if (inp.hasReal()) {
+        if (not out.hasReal()) out.alloc(NUMBER::Real);
+        if (need_to_copy) {
+            copy_grid(out.real(), inp.real());
+            copy_func(out.real(), inp.real());
+        }
+    }
+    if (inp.hasImag()) {
+        if (not out.hasImag()) out.alloc(NUMBER::Imag);
+        if (need_to_copy) {
+            copy_grid(out.imag(), inp.imag());
+            copy_func(out.imag(), inp.imag());
+            if (out.conjugate()) out.imag().rescale(-1.0);
+        }
+    }
+    mpi::share_function(out, 0, 1324, mpi::comm_share);
+}
+
+void cplxfunc::project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (not out.hasReal()) out.alloc(NUMBER::Real);
+        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (not out.hasImag()) out.alloc(NUMBER::Imag);
+        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
+    }
+    mpi::share_function(out, 0, 123123, mpi::comm_share);
+}
+
+void cplxfunc::project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec) {
+    bool need_to_project = not(out.isShared()) or mpi::share_master();
+    if (type == NUMBER::Real or type == NUMBER::Total) {
+        if (not out.hasReal()) out.alloc(NUMBER::Real);
+        if (need_to_project) build_grid(out.real(), f);
+        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
+    }
+    if (type == NUMBER::Imag or type == NUMBER::Total) {
+        if (not out.hasImag()) out.alloc(NUMBER::Imag);
+        if (need_to_project) build_grid(out.imag(), f);
+        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
+    }
+    mpi::share_function(out, 0, 132231, mpi::comm_share);
+}
+
+/** @brief out = a*inp_a + b*inp_b
+ *
+ * Recast into linear_combination.
+ *
+ */
+void cplxfunc::add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec) {
+    ComplexVector coefs(2);
+    coefs(0) = a;
+    coefs(1) = b;
+
+    std::vector<ComplexFunction> funcs; // NB: not a ComplexFunctionVector, because not run in parallel!
+    funcs.push_back(inp_a);
+    funcs.push_back(inp_b);
+
+    cplxfunc::linear_combination(out, coefs, funcs, prec);
+}
+
+/** @brief out = inp_a * inp_b
+ *
+ */
+void cplxfunc::multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
+    multiply_real(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
+    multiply_imag(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
+}
+
+/** @brief out = inp_a * f
+ *
+ */
+void cplxfunc::multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
+    // uses the mpifuncvec multiply
+    MPI_FuncVector mpi_funcvec_a;
+    mpi_funcvec_a.push_back(inp_a);
+    MPI_FuncVector mpi_funcvec_out;
+    mpi_funcvec_out = mpifuncvec::multiply(mpi_funcvec_a, f, prec, nullptr, nrefine, true);
+    out = mpi_funcvec_out[0];
+}
+
+/** @brief out = inp_a * f
+ *
+ */
+void cplxfunc::multiply(ComplexFunction &out, FunctionTree<3> &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
+    ComplexFunction cplxfunc_a;
+    cplxfunc_a.setReal(&inp_a);
+    cplxfunc::multiply(out, cplxfunc_a, f, prec, nrefine);
+    cplxfunc_a.setReal(nullptr); // otherwise inp_a is deleted by cplxfunc_a destructor
+}
+
+/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N
+ *
+ */
+void cplxfunc::linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec) {
+    FunctionTreeVector<3> rvec;
+    FunctionTreeVector<3> ivec;
+
+    double thrs = MachineZero;
+    for (int i = 0; i < inp.size(); i++) {
+        double sign = (inp[i].conjugate()) ? -1.0 : 1.0;
+
+        bool cHasReal = (std::abs(c[i].real()) > thrs);
+        bool cHasImag = (std::abs(c[i].imag()) > thrs);
+
+        if (cHasReal and inp[i].hasReal()) rvec.push_back(std::make_tuple(c[i].real(), &inp[i].real()));
+        if (cHasImag and inp[i].hasImag()) rvec.push_back(std::make_tuple(-sign * c[i].imag(), &inp[i].imag()));
+
+        if (cHasImag and inp[i].hasReal()) ivec.push_back(std::make_tuple(c[i].imag(), &inp[i].real()));
+        if (cHasReal and inp[i].hasImag()) ivec.push_back(std::make_tuple(sign * c[i].real(), &inp[i].imag()));
+    }
+
+    if (rvec.size() > 0 and not out.hasReal()) out.alloc(NUMBER::Real);
+    if (ivec.size() > 0 and not out.hasImag()) out.alloc(NUMBER::Imag);
+
+    bool need_to_add = not(out.isShared()) or mpi::share_master();
+    if (need_to_add) {
+        if (rvec.size() > 0) {
+            if (prec < 0.0) {
+                build_grid(out.real(), rvec);
+                mrcpp::add(prec, out.real(), rvec, 0);
+            } else {
+                mrcpp::add(prec, out.real(), rvec);
+            }
+        } else if (out.hasReal()) {
+            out.real().setZero();
+        }
+        if (ivec.size() > 0) {
+            if (prec < 0.0) {
+                build_grid(out.imag(), ivec);
+                mrcpp::add(prec, out.imag(), ivec, 0);
+            } else {
+                mrcpp::add(prec, out.imag(), ivec);
+            }
+        } else if (out.hasImag()) {
+            out.imag().setZero();
+        }
+    }
+    mpi::share_function(out, 0, 9911, mpi::comm_share);
+}
+
+/** @brief out = Re(inp_a * inp_b)
+ *
+ */
+void cplxfunc::multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
+    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
+    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
+
+    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
+
+    FunctionTreeVector<3> vec;
+    if (inp_a.hasReal() and inp_b.hasReal()) {
+        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
+        if (need_to_multiply) {
+            double coef = 1.0;
+            if (prec < 0.0) {
+                // Union grid
+                build_grid(*tree, inp_a.real());
+                build_grid(*tree, inp_b.real());
+                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), 0);
+            } else {
+                // Adaptive grid
+                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), -1, absPrec, useMaxNorms);
+            }
+        }
+        vec.push_back(std::make_tuple(1.0, tree));
+    }
+    if (inp_a.hasImag() and inp_b.hasImag()) {
+        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
+        if (need_to_multiply) {
+            double coef = -1.0 * conj_a * conj_b;
+            if (prec < 0.0) {
+                // Union grid
+                build_grid(*tree, inp_a.imag());
+                build_grid(*tree, inp_b.imag());
+                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), 0);
+            } else {
+                // Adaptive grid
+                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), -1, absPrec, useMaxNorms);
+            }
+        }
+        vec.push_back(std::make_tuple(1.0, tree));
+    }
+
+    if (vec.size() > 0) {
+        if (out.hasReal()) {
+            if (need_to_multiply) out.real().clear();
+        } else {
+            // All sharing procs must allocate
+            out.alloc(NUMBER::Real);
+        }
+    }
+
+    if (need_to_multiply) {
+        if (vec.size() == 1) {
+            FunctionTree<3> &func_0 = get_func(vec, 0);
+            copy_grid(out.real(), func_0);
+            copy_func(out.real(), func_0);
+            clear(vec, true);
+        } else if (vec.size() == 2) {
+            build_grid(out.real(), vec);
+            mrcpp::add(prec, out.real(), vec, 0);
+            clear(vec, true);
+        } else if (out.hasReal()) {
+            out.real().setZero();
+        }
+    }
+    mpi::share_function(out, 0, 9191, mpi::comm_share);
+}
+
+/** @brief out = Im(inp_a * inp_b)
+ *
+ */
+void cplxfunc::multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
+    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
+    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
+    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
+
+    FunctionTreeVector<3> vec;
+    if (inp_a.hasReal() and inp_b.hasImag()) {
+        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
+        if (need_to_multiply) {
+            double coef = conj_b;
+            if (prec < 0.0) {
+                // Union grid
+                build_grid(*tree, inp_a.real());
+                build_grid(*tree, inp_b.imag());
+                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), 0);
+            } else {
+                // Adaptive grid
+                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), -1, absPrec, useMaxNorms);
+            }
+        }
+        vec.push_back(std::make_tuple(1.0, tree));
+    }
+    if (inp_a.hasImag() and inp_b.hasReal()) {
+        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
+        if (need_to_multiply) {
+            double coef = conj_a;
+            if (prec < 0.0) {
+                // Union grid
+                build_grid(*tree, inp_a.imag());
+                build_grid(*tree, inp_b.real());
+                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), 0);
+            } else {
+                // Adaptive grid
+                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), -1, absPrec, useMaxNorms);
+            }
+        }
+        vec.push_back(std::make_tuple(1.0, tree));
+    }
+
+    if (vec.size() > 0) {
+        if (out.hasImag()) {
+            if (need_to_multiply) out.imag().clear();
+        } else {
+            // All sharing procs must allocate
+            out.alloc(NUMBER::Imag);
+        }
+    }
+
+    if (need_to_multiply) {
+        if (vec.size() == 1) {
+            FunctionTree<3> &func_0 = get_func(vec, 0);
+            copy_grid(out.imag(), func_0);
+            copy_func(out.imag(), func_0);
+            clear(vec, true);
+        } else if (vec.size() == 2) {
+            build_grid(out.imag(), vec);
+            mrcpp::add(prec, out.imag(), vec, 0);
+            clear(vec, true);
+        } else if (out.hasImag()) {
+            out.imag().setZero();
+        }
+    }
+    mpi::share_function(out, 0, 9292, mpi::comm_share);
+}
+
+namespace mpifuncvec {
+
+
+/** @brief Make a linear combination of functions
+ *
+ * Uses "local" representation: treats one node at a time.
+ * For each node, all functions are transformed simultaneously
+ * by a dense matrix multiplication.
+ * Phi input functions, Psi output functions
+ *
+ */
+void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec) {
+
+    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
+    // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
+    // MPI version does not use OMP yet, Serial version uses OMP
+    // size of input is N, size of output is M
+    int N = Phi.size();
+    int M = Psi.size();
+    if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
+    if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
+
+    // 1) make union tree without coefficients
+    FunctionTree<3> refTree(*Phi.vecMRA);
+    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+    std::vector<double> scalefac_ref;
+    std::vector<double *> coeffVec_ref; // not used!
+    std::vector<int> indexVec_ref;      // serialIx of the nodes
+    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
+    int max_ix;
+    // get a list of all nodes in union tree, identified by their serialIx indices
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+   // 2) We work with real numbers only. Make real blocks for U matrix
+    bool UhasReal = false;
+    bool UhasImag = false;
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            if (std::abs(U(i, j).real()) > 10*MachineZero) UhasReal = true;
+            if (std::abs(U(i, j).imag()) > 10*MachineZero) UhasImag = true;
+        }
+    }
+
+    IntVector PsihasReIm = IntVector::Zero(2);
+    for (int j = 0; j < N; j++) {
+        if (!mpi::my_orb(j)) continue;
+        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
+    }
+    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
+    if (not PsihasReIm[0] and not PsihasReIm[1]) {
+        return; // do nothing
+    }
+
+    bool makeReal = (UhasReal and PsihasReIm[0]) or (UhasImag and PsihasReIm[1]);
+    bool makeImag = (UhasReal and PsihasReIm[1]) or (UhasImag and PsihasReIm[0]);
+
+    for (int j = 0; j < M; j++) {
+        if (!mpi::my_orb(j)) continue;
+        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
+        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
+    }
+
+    if (not makeReal and not makeImag) { return; }
+
+    int Neff = N;               // effective number of input orbitals
+    int Meff = M;               // effective number of output orbitals
+    if (makeImag) Neff = 2 * N; // Imag and Real treated independently. We always use real part of U
+    if (makeImag) Meff = 2 * M; // Imag and Real treated independently. We always use real part of U
+
+    IntVector conjMat = IntVector::Zero(Neff);
+    for (int j = 0; j < Neff; j++) {
+        if (!mpi::my_orb(j % N)) continue;
+        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
+    }
+    mpi::allreduce_vector(conjMat, mpi::comm_wrk);
+
+    // we make a real matrix = U,  but organized as one or four real blocks
+    // out_r = U_rr*in_r - U_ir*in_i*conjMat
+    // out_i = U_ri*in_r - U_ii*in_i*conjMat
+    // the first index of U is the one used on input Phi
+    DoubleMatrix Ureal(Neff, Meff); // four blocks, for rr ri ir ii
+    for (int j = 0; j < Neff; j++) {
+        for (int i = 0; i < Meff; i++) {
+            double sign = 1.0;
+            if (j < N and i < M) {
+                // real U applied on real Phi
+                Ureal(j, i) = U.real()(j % N, i % M);
+            } else if (j >= N and i >= M) {
+                // real U applied on imag Phi
+                Ureal(j, i) = conjMat[j] * U.real()(j % N, i % M);
+            } else if (j < N and i >= M) {
+                // imag U applied on real Phi
+                Ureal(j, i) = U.imag()(j % N, i % M);
+            } else {
+                // imag U applied on imag Phi
+                Ureal(j, i) = -1.0 * conjMat[j] * U.imag()(j % N, i % M);
+            }
+        }
+    }
+
+    // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
+    BankAccount nodesPhi;             // to put the original nodes
+    BankAccount nodesRotated;         // to put the rotated nodes
+
+    // used for serial only:
+    std::vector<std::vector<double *>> coeffVec(Neff);
+    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+                                                    // orbital given the node index in the reference tree
+    if (serial) {
+
+        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<double> scalefac;
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j]) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j + N]) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+
+        // send own nodes to bank, identifying them through the serialIx of refTree
+        mpifuncvec::save_nodes(Phi, refTree, nodesPhi);
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+    }
+
+    // 4) rotate all the nodes
+    IntMatrix split_serial;                             // in the serial case all split are stored in one array
+    std::vector<std::vector<double *>> coeffpVec(Meff); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::map<int, int>> ix2coef(Meff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    int csize;                                          // size of the current coefficients (different for roots and branches)
+    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+    // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    if (serial) {
+        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
+        split_serial.resize(Meff, max_n); // not use in the MPI case
+        for (int n = 0; n < max_n; n++) {
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            ix2coef_ref[node_ix] = n;
+            for (int i = 0; i < Meff; i++) split_serial(i, n) = 1;
+        }
+
+        std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
+
+        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
+        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
+        // n is finished.
+#pragma omp parallel for schedule(dynamic)
+        for (int n = 0; n < max_n; n++) {
+            int csize;
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n
+            std::vector<int> orbjVec; // to remember which orbital correspond to each orbVec.size();
+            if (node2orbVec[node_ix].size() <= 0) continue;
+            csize = sizecoeffW;
+            if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff
+
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbjVec.push_back(j);
+            }
+
+            // 4b) make a list of rotated orbitals needed for this node
+            // OMP must wait until parent is ready
+            while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) {
+#pragma omp flush
+            };
+
+            std::vector<int> orbiVec;
+            for (int i = 0; i < Meff; i++) { // loop over all rotated orbitals
+                if (not makeReal and i < M) continue;
+                if (not makeImag and i >= M) continue;
+                if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4c) rotate this node
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
+            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = Ureal(orbjVec[j], orbiVec[i]); }
+            }
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+            // HERE IT HAPPENS!
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 4d) store and make rotated node pointers
+            // for now we allocate in buffer, in future could be directly allocated in the final trees
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // make all norms:
+            for (int i = 0; i < orbiVec.size(); i++) {
+                // check if parent must be split
+                if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) {
+                    // mark this node for this orbital for later split
+#pragma omp critical
+                    {
+                        ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size();
+                        coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers
+                    }
+                    // check norms for split
+                    double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
+                    int kstart = 0;
+                    if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
+                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
+                    if (thres < wnorm or prec < 0)
+                        split_serial(orbiVec[i], n) = 1;
+                    else
+                        split_serial(orbiVec[i], n) = 0;
+                } else {
+                    ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used
+                    split_serial(orbiVec[i], n) = 0;          // do not split if parent does not need to be split
+                }
+            }
+            nodeReady[n] = 1;
+#pragma omp critical
+            {
+                // this ensures that rotatedCoeff is not deleted, when getting out of scope
+                rotatedCoeffVec.push_back(std::move(rotatedCoeff));
+            }
+        }
+    } else { // MPI case
+
+        // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
+        std::vector<double> split(Meff, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
+        std::vector<double> needsplit(Meff, 1.0); // which orbitals need splitting
+        BankAccount nodeSplits;
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+
+        DoubleMatrix coeffBlock(sizecoeff, Neff);
+        max_ix++; // largest node index + 1. to store rotated orbitals with different id
+        TaskManager tasks(max_n);
+        for (int nn = 0; nn < max_n; nn++) {
+            int n = tasks.next_task();
+            if (n < 0) break;
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // 4a) make list of orbitals that should split the parent node, i.e. include this node
+            int parentid = parindexVec_ref[n];
+            if (parentid == -1) {
+                // root node, split if output needed
+                for (int i = 0; i < M; i++) {
+                    if (makeReal)
+                        split[i] = 1.0;
+                    else
+                        split[i] = -1.0;
+                }
+                for (int i = N; i < Meff; i++) {
+                    if (makeImag)
+                        split[i] = 1.0;
+                    else
+                        split[i] = -1.0;
+                }
+                csize = sizecoeff;
+            } else {
+                // note that it will wait until data is available
+                nodeSplits.get_data(parentid, Meff, split.data());
+                csize = sizecoeffW;
+            }
+            std::vector<int> orbiVec;
+            std::vector<int> orbjVec;
+            for (int i = 0; i < Meff; i++) {  // loop over rotated orbitals
+                if (split[i] < 0.0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4b) rotate this node
+            DoubleMatrix coeffBlock(csize, Neff); // largest possible used size
+            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
+            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
+
+            // chunk of U, with reorganized indices and separate blocks for real and imag:
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+
+            for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
+                    Un(j, i) = Ureal(orbjVec[j], orbiVec[i]);
+                }
+            }
+
+            // HERE IT HAPPENS
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
+            // in cache).
+            for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals
+                needsplit[orbiVec[i]] = -1.0;          // default, do not split
+                // check if this node/orbital needs further refinement
+                double wnorm = 0.0;
+                int kwstart = csize - sizecoeffW; // do not include scaling
+                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
+                if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
+                nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
+            }
+            nodeSplits.put_data(indexVec_ref[n], Meff, needsplit.data());
+        }
+        mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
+    }
+
+    // 5) reconstruct trees using rotated nodes.
+
+    // only serial case can use OMP, because MPI cannot be used by threads
+    if (serial) {
+        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
+        // operation is writing the coefficient into the tree)
+
+#pragma omp parallel for schedule(static)
+        for (int j = 0; j < Meff; j++) {
+            if (coeffpVec[j].size()==0) continue;
+            if (j < M) {
+                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+            } else {
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+            }
+        }
+
+    } else { // MPI case
+
+        for (int j = 0; j < Meff; j++) {
+            if (not mpi::my_orb(j % M)) continue;
+            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
+            std::vector<double *> coeffpVec; //
+            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
+            int ix = 0;
+            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
+                std::vector<int> nodeidVec;
+                double *dataVec; // will be allocated by bank
+                nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
+                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
+                int shift = 0;
+                for (int n = 0; n < nodeidVec.size(); n++) {
+                    assert(nodeidVec[n] - max_ix >= 0);                // unrotated nodes have been deleted
+                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
+                    ix2coef[nodeidVec[n] - max_ix] = ix++;
+                    csize = sizecoeffW;
+                    if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff;
+                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
+                    shift += csize;
+                }
+            }
+            if (j < M) {
+                // Real part
+                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
+                Psi[j].real().clear();
+                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+            } else {
+                // Imag part
+                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
+                Psi[j % M].imag().clear();
+                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+            }
+            for (double *p : pointerstodelete) delete[] p;
+            pointerstodelete.clear();
+        }
+    }
+}
+
+
+void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec) {
+    rotate(Phi, U, Phi, prec);
+    return;
+}
+
+/** @brief Save all nodes in bank; identify them using serialIx from refTree
+ * shift is a shift applied in the id
+ */
+void save_nodes(MPI_FuncVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+    int max_nNodes = refTree.getNNodes();
+    std::vector<double *> coeffVec;
+    std::vector<double> scalefac;
+    std::vector<int> indexVec;    // SerialIx of the node in refOrb
+    std::vector<int> parindexVec; // SerialIx of the parent node
+    int N = Phi.size();
+    int max_ix;
+    for (int j = 0; j < N; j++) {
+        if (not mpi::my_orb(j)) continue;
+        // make vector with all coef address and their index in the union grid
+        if (Phi[j].hasReal()) {
+            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+            int max_n = indexVec.size();
+            // send node coefs from Phi[j] to bank
+            // except for the root nodes, only wavelets are sent
+            for (int i = 0; i < max_n; i++) {
+                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
+                int csize = sizecoeffW;
+                if (parindexVec[i] < 0) csize = sizecoeff;
+                if (sizes > 0) { // fixed size
+                    account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]);
+                } else {
+                    account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
+                }
+            }
+        }
+        // Imaginary parts are considered as orbitals with an orbid shifted by N
+        if (Phi[j].hasImag()) {
+            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+            int max_n = indexVec.size();
+            // send node coefs from Phi[j] to bank
+            for (int i = 0; i < max_n; i++) {
+                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
+                // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part
+                int csize = sizecoeffW;
+                if (parindexVec[i] < 0) csize = sizecoeff;
+                if (sizes > 0) { // fixed size
+                    account.put_nodedata(j + N, indexVec[i], sizes, coeffVec[i]);
+                } else {
+                    account.put_nodedata(j + N, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
+                }
+            }
+        }
+    }
+}
+
+/** @brief Multiply all orbitals with a function
+ *
+ * @param Phi: orbitals to multiply
+ * @param f  : function to multiply
+ *
+ * Computes the product of each orbital with a function
+ * in parallel using a local representation.
+ * Input trees are extended by one scale at most.
+ */
+MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec, ComplexFunction *Func, int nrefine, bool all) {
+
+    int N = Phi.size();
+    const int D = 3;
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
+
+    // 1a) extend grid where f is large (around nuclei)
+    // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree?
+
+    for (int i = 0; i < N; i++) {
+        if (!mpi::my_orb(i)) continue;
+        int irefine = 0;
+        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
+        irefine = 0;
+        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
+    }
+
+    // 1b) make union tree without coefficients
+    FunctionTree<D> refTree(*Phi.vecMRA);
+    // refine_grid(refTree, f); //to test
+    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
+
+    int kp1 = refTree.getKp1();
+    int kp1_d = refTree.getKp1_d();
+    int nCoefs = refTree.getTDim() * kp1_d;
+
+    IntVector PsihasReIm = IntVector::Zero(2);
+    for (int i = 0; i < N; i++) {
+        if (!mpi::my_orb(i)) continue;
+        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
+        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
+    }
+    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
+    MPI_FuncVector out(N);
+    MPI_FuncVector outtest(N);
+    if (not PsihasReIm[0] and not PsihasReIm[1]) {
+        return out; // do nothing
+    }
+
+    int Neff = N;
+    if (PsihasReIm[1]) Neff = 2 * N; // Imag and Real treated independently. We always treat real part of Psi
+
+    std::vector<double> scalefac_ref;
+    std::vector<double *> coeffVec_ref; // not used!
+    std::vector<int> indexVec_ref;      // serialIx of the nodes
+    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
+    std::vector<MWNode<D> *> refNodes;  // pointers to nodes
+    int max_ix;
+    // get a list of all nodes in union tree, identified by their serialIx indices
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes);
+    int max_n = indexVec_ref.size();
+    std::map<int, int> ix2n; // for a given serialIx, give index in vectors
+    for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn;
+
+    // 2a) send own nodes to bank, identifying them through the serialIx of refTree
+    BankAccount nodesPhi;        // to put the original nodes
+    BankAccount nodesMultiplied; // to put the multiplied nodes
+
+    // used for serial only:
+    std::vector<std::vector<double *>> coeffVec(Neff);
+    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+                                                    // orbital given the node index in the reference tree
+    if (serial) {
+        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<double> scalefac;
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Phi[j].hasReal()) {
+                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j]) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (Phi[j].hasImag()) {
+                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec[j + N]) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else {
+        mpifuncvec::save_nodes(Phi, refTree, nodesPhi, nCoefs);
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+    }
+
+    // 2b) save Func in bank and remove its coefficients
+    if (Func != nullptr and !serial) {
+        // put Func in local representation if not already done
+        if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); }
+    }
+
+    // 3) mutiply for each node
+    std::vector<std::vector<double *>> coeffpVec(Neff); // to put pointers to the multiplied coefficient for each orbital in serial case
+    std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
+    std::vector<std::map<int, int>> ix2coef(Neff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    DoubleVector NODEP = DoubleVector::Zero(nCoefs);
+    DoubleVector NODEF = DoubleVector::Zero(nCoefs);
+
+    if (serial) {
+#pragma omp parallel for schedule(dynamic)
+        for (int n = 0; n < max_n; n++) {
+            MWNode<D> node(*(refNodes[n]), false);
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+
+            // 3a) make values for f at this node
+            // 3a1) get coordinates of quadrature points for this node
+            Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs);
+            double fval[nCoefs];
+            Coord<D> r;
+            double *originalCoef = nullptr;
+            MWNode<3> *Fnode = nullptr;
+            if (Func == nullptr) {
+                node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+                for (int j = 0; j < nCoefs; j++) {
+                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                    fval[j] = f.evalf(r);
+                }
+            } else {
+                Fnode = Func->real().findNode(node.getNodeIndex());
+                if (Fnode == nullptr) {
+                    node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+                    for (int j = 0; j < nCoefs; j++) {
+                        for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                        fval[j] = f.evalf(r);
+                    }
+                } else {
+                    originalCoef = Fnode->getCoefs();
+                    for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j];
+                    Fnode->attachCoefs(fval); // note that each thread has its own copy
+                    Fnode->mwTransform(Reconstruction);
+                    Fnode->cvTransform(Forward);
+                }
+            }
+            DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size());
+            int i = 0;
+            // 3b) fetch all orbitals at this node
+            std::vector<int> orbjVec;            // to remember which orbital correspond to each orbVec.size();
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                orbjVec.push_back(j);
+                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k];
+                // 3c) transform to grid
+                node.attachCoefs(&(multipliedCoeff(0, i)));
+                node.mwTransform(Reconstruction);
+                node.cvTransform(Forward);
+                // 3d) multiply
+                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication?
+                // 3e) transform back to mw
+                node.cvTransform(Backward);
+                node.mwTransform(Compression);
+                i++;
+            }
+            if (Func != nullptr and originalCoef != nullptr) {
+                // restablish original values
+                Fnode->attachCoefs(originalCoef);
+            }
+
+            // 3f) save multiplied nodes
+            for (int i = 0; i < orbjVec.size(); i++) {
+#pragma omp critical
+                {
+                    ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size();
+                    coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers
+                }
+            }
+#pragma omp critical
+            {
+                // this ensures that multipliedCoeff is not deleted, when getting out of scope
+                multipliedCoeffVec.push_back(std::move(multipliedCoeff));
+            }
+            node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
+        }
+    } else {
+        // MPI
+        int count1 = 0;
+        int count2 = 0;
+        TaskManager tasks(max_n);
+        for (int nn = 0; nn < max_n; nn++) {
+            int n = tasks.next_task();
+            if (n < 0) break;
+            MWNode<D> node(*(refNodes[n]), false);
+            // 3a) make values for f
+            // 3a1) get coordinates of quadrature points for this node
+            Eigen::MatrixXd pts;           // Eigen::Zero(D, nCoefs);
+            node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
+            double fval[nCoefs];
+            Coord<D> r;
+            MWNode<D> Fnode(*(refNodes[n]), false);
+            if (Func == nullptr) {
+                for (int j = 0; j < nCoefs; j++) {
+                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
+                    fval[j] = f.evalf(r);
+                }
+            } else {
+                int nIdx = Func->real().getIx(node.getNodeIndex());
+                count1++;
+                if (nIdx < 0) {
+                    // use the function f instead of Func
+                    count2++;
+                    for (int j = 0; j < nCoefs; j++) {
+                        for (int d = 0; d < D; d++) r[d] = pts(d, j);
+                        fval[j] = f.evalf(r);
+                    }
+                } else {
+                    Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank
+                    Fnode.attachCoefs(fval);
+                    Fnode.mwTransform(Reconstruction);
+                    Fnode.cvTransform(Forward);
+                }
+            }
+
+            // 3b) fetch all orbitals at this node
+            DoubleMatrix coeffBlock(nCoefs, Neff); // largest possible used size
+            std::vector<int> orbjVec;
+            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
+            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
+            DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size());
+            // 3c) transform to grid
+            for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ?
+                // TODO: select only nodes that are end nodes?
+                node.attachCoefs(coeffBlock.col(j).data());
+                node.mwTransform(Reconstruction);
+                node.cvTransform(Forward);
+                // 3d) multiply
+                double *coefs = node.getCoefs();
+                for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i];
+                // 3e) transform back to mw
+                node.cvTransform(Backward);
+                node.mwTransform(Compression);
+                // 3f) save multiplied nodes
+                nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs);
+            }
+            node.attachCoefs(nullptr);  // to avoid deletion of valid multipliedCoeff by destructor
+            Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
+        }
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 5) reconstruct trees using multiplied nodes.
+
+    // only serial case can use OMP, because MPI cannot be used by threads
+    if (serial) {
+        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
+        // operation is writing the coefficient into the tree)
+
+#pragma omp parallel for schedule(static)
+        for (int j = 0; j < Neff; j++) {
+            if (j < N) {
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(NUMBER::Real);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    // 6) reconstruct trees from end nodes
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
+                }
+            } else {
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(NUMBER::Imag);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                    out[j].imag().mwTransform(BottomUp);
+                    out[j].imag().calcSquareNorm();
+                }
+            }
+        }
+    } else {
+        for (int j = 0; j < Neff; j++) {
+            if (not mpi::my_orb(j % N) and not all) continue;
+            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
+            std::vector<double *> coeffpVec; //
+            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx in refTree
+            int ix = 0;
+            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+
+            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
+                std::vector<int> nodeidVec;
+                double *dataVec; // will be allocated by bank
+                nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank);
+                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
+                int shift = 0;
+                for (int n = 0; n < nodeidVec.size(); n++) {
+                    assert(nodeidVec[n] - max_ix >= 0);                // unmultiplied nodes have been deleted
+                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
+                    ix2coef[nodeidVec[n] - max_ix] = ix++;
+                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
+                    shift += nCoefs;
+                }
+            }
+            if (j < N) {
+                if (Phi[j].hasReal()) {
+                    out[j].alloc(NUMBER::Real);
+                    out[j].real().clear();
+                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    // 6) reconstruct trees from end nodes
+                    out[j].real().mwTransform(BottomUp);
+                    out[j].real().calcSquareNorm();
+                    out[j].real().resetEndNodeTable();
+                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
+                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
+                }
+            } else {
+                if (Phi[j % N].hasImag()) {
+                    out[j % N].alloc(NUMBER::Imag);
+                    out[j % N].imag().clear();
+                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    out[j % N].imag().mwTransform(BottomUp);
+                    out[j % N].imag().calcSquareNorm();
+                    // out[j % N].imag().crop(prec, 1.0, false);
+                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
+                }
+            }
+
+            for (double *p : pointerstodelete) delete[] p;
+            pointerstodelete.clear();
+        }
+    }
+    return out;
+}
+
+ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
+    int N = Bra.size();
+    ComplexVector result = ComplexVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        // The bra is sent to the owner of the ket
+        if (my_orb(Bra[i]) != my_orb(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
+        result[i] = cplxfunc::dot(Bra[i], Ket[i]);
+        if (not mrcpp::mpi::my_orb(i)) Bra[i].free(NUMBER::Total);
+    }
+    mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
+    return result;
+}
+
+/** @brief Compute Löwdin orthonormalization matrix
+ *
+ * @param Phi: orbitals to orthonomalize
+ *
+ * Computes the inverse square root of the orbital overlap matrix S^(-1/2)
+ */
+ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi) {
+    ComplexMatrix S_tilde = mpifuncvec::calc_overlap_matrix(Phi);
+    ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0);
+    return S_m12;
+}
+
+/** @brief Orbital transformation out_j = sum_i inp_i*U_ij
+ *
+ * NOTE: OrbitalVector is considered a ROW vector, so rotation
+ *       means matrix multiplication from the right
+ *
+ * MPI: Rank distribution of output vector is the same as input vector
+ *
+ */
+ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet) {
+    // NB: must be spinseparated at this point!
+
+    int N = BraKet.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, N);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
+
+    // 1) make union tree without coefficients
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double> scalefac;
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    int max_ix;                       // largest index value (not used here)
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+                                                     // the orbital given the node index in the reference tree
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+    mrcpp::BankAccount nodesBraKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(BraKet, refTree, nodesBraKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+
+    int ibank = 0;
+#pragma omp parallel for schedule(dynamic) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+        std::vector<int> orbVec;       // identifies which orbitals use this node
+        if (serial and node2orbVec[node_ix].size() <= 0) continue;
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+
+        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+        if (serial) {
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbVec.push_back(j);
+            }
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
+                            continue;
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
+                            continue;
+                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else { // MPI case
+            DoubleMatrix coeffBlock(csize, 2 * N);
+            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
+                            continue;
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
+                            continue;
+                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+    IntVector conjMat = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_orb(BraKet[i])) continue;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j <= i; j++) {
+            S.real()(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N);
+            S.imag()(i, j) = conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
+            if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
+        }
+    }
+
+    // Assumes linearity: result is sum of all nodes contributions
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+
+/** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
+ *
+ */
+ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
+    mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
+
+    MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
+
+    int N = Bra.size();
+    int M = Ket.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, M);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * M); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+
+    // 1) make union tree without coefficients for Bra (supposed smallest)
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk);
+    // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero.
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    std::vector<double> scalefac;
+    int max_ix;
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+    max_ix++;
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVecBra(2 * N);
+    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeBra(2 * N); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    std::vector<std::vector<double *>> coeffVecKet(2 * M);
+    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeKet(2 * M); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    mrcpp::BankAccount nodesBra;
+    mrcpp::BankAccount nodesKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        // TODO? : do not copy coefficients, but use directly the pointers
+        // could OMP parallelize, but is fast anyway
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (Bra[j].hasReal()) {
+                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeBra[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecBra[ix].push_back(j);
+                }
+            }
+            if (Bra[j].hasImag()) {
+                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeBra[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecBra[ix].push_back(j + N);
+                }
+            }
+        }
+        for (int j = 0; j < M; j++) {
+            if (Ket[j].hasReal()) {
+                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeKet[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecKet[ix].push_back(j);
+                }
+            }
+            if (Ket[j].hasImag()) {
+                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2nodeKet[j + M][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVecKet[ix].push_back(j + M);
+                }
+            }
+        }
+
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(Bra, refTree, nodesBra);
+        save_nodes(Ket, refTree, nodesKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+    int totsiz = 0;
+    int totget = 0;
+    int mxtotsiz = 0;
+    int ibank = 0;
+    //For some unknown reason the h2_mag_lda test sometimes fails when schedule(dynamic) is chosen
+#pragma omp parallel for schedule(static) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
+        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+        if (serial) {
+            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
+            DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
+            if (parindexVec_ref[n] < 0) shift = 0;
+
+            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeBra[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
+                orbVecBra.push_back(j);
+            }
+            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeKet[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
+                orbVecKet.push_back(j);
+            }
+
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
+                            continue;
+                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
+                            continue;
+                        // must ensure that threads are not competing
+                        double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else {
+
+            DoubleMatrix coeffBlockBra(csize, 2 * N);
+            DoubleMatrix coeffBlockKet(csize, 2 * M);
+            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
+            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
+            totsiz += orbVecBra.size() * orbVecKet.size();
+            mxtotsiz += N * M;
+            totget += orbVecBra.size() + orbVecKet.size();
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
+                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
+                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
+                            continue;
+                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
+                            continue;
+                        Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+
+    IntVector conjMatBra = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_orb(Bra[i])) continue;
+        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
+    IntVector conjMatKet = IntVector::Zero(M);
+    for (int i = 0; i < M; i++) {
+        if (!mrcpp::mpi::my_orb(Ket[i])) continue;
+        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            S.real()(i, j) = Sreal(i, j) + conjMatBra[i] * conjMatKet[j] * Sreal(i + N, j + M);
+            S.imag()(i, j) = conjMatKet[j] * Sreal(i, j + M) - conjMatBra[i] * Sreal(i + N, j);
+        }
+    }
+
+    // 4) collect results from all MPI. Linearity: result is sum of all node contributions
+
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+
+/** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
+ *
+ */
+DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet) {
+    int N = BraKet.size();
+    DoubleMatrix S = DoubleMatrix::Zero(N, N);
+    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
+
+    // 1) make union tree without coefficients
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double> scalefac;
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    int max_ix;                       // largest index value (not used here)
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+                                                     // the orbital given the node index in the reference tree
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+    mrcpp::BankAccount nodesBraKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            if (BraKet[j].hasReal()) {
+                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+                }
+            }
+            if (BraKet[j].hasImag()) {
+                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
+                // make a map that gives j from indexVec
+                int orb_node_ix = 0;
+                for (int ix : indexVec) {
+                    orb2node[j + N][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j + N);
+                }
+            }
+        }
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(BraKet, refTree, nodesBraKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+
+    int ibank = 0;
+#pragma omp parallel for schedule(dynamic) if (serial)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+        std::vector<int> orbVec;       // identifies which orbitals use this node
+        if (serial and node2orbVec[node_ix].size() <= 0) continue;
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+        if (serial) {
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbVec.push_back(j);
+            }
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock = coeffBlock.cwiseAbs();
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
+                            continue;
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
+                            continue;
+                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
+                        double &Stempij = S_temp(i, j);
+#pragma omp atomic
+                        Srealij += Stempij;
+                    }
+                }
+            }
+        } else { // MPI case
+            DoubleMatrix coeffBlock(csize, 2 * N);
+            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+            if (orbVec.size() > 0) {
+                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                coeffBlock = coeffBlock.cwiseAbs();
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
+                            continue;
+                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
+                            continue;
+                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
+                }
+            }
+        }
+    }
+
+    IntVector conjMat = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_orb(i)) continue;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j <= i; j++) {
+            S(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N) + conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
+            S(j, i) = S(i, j);
+        }
+    }
+
+    // Assumes linearity: result is sum of all nodes contributions
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+    return S;
+}
+
+/** @brief Orthogonalize the functions in Bra against all orbitals in Ket
+ *
+ */
+void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
+    // TODO: generalize for cases where Ket functions are not orthogonal to each other?
+    ComplexMatrix S = mpifuncvec::calc_overlap_matrix(Bra, Ket);
+    int N = Bra.size();
+    int M = Ket.size();
+    DoubleVector Ketnorms = DoubleVector::Zero(M);
+    for (int i = 0; i < M; i++) {
+        if (mpi::my_orb(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
+    }
+    mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
+    ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
+    for (int j = 0; j < N; j++) {
+        for (int i = 0; i < M; i++) {
+            rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
+        }
+    }
+    MPI_FuncVector rotatedKet(N);
+    mpifuncvec::rotate(Ket, rmat, rotatedKet, prec / M);
+    for (int j = 0; j < N; j++) {
+        if(my_orb(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
+    }
+}
+} // namespace mpifuncvec
+} // namespace mrcpp
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
new file mode 100644
index 000000000..c43d3475c
--- /dev/null
+++ b/src/utils/ComplexFunction.h
@@ -0,0 +1,199 @@
+#pragma once
+
+#include "functions/RepresentableFunction.h"
+#include "math_utils.h"
+#include "mpi_utils.h"
+#include "trees/FunctionTree.h"
+#include "trees/MultiResolutionAnalysis.h"
+#include <Eigen/Core>
+
+using namespace Eigen;
+
+using IntVector = Eigen::VectorXi;
+using DoubleVector = Eigen::VectorXd;
+using ComplexVector = Eigen::VectorXcd;
+
+using IntMatrix = Eigen::MatrixXi;
+using DoubleMatrix = Eigen::MatrixXd;
+using ComplexMatrix = Eigen::MatrixXcd;
+
+class MPI_FuncVector;
+
+namespace mrcpp {
+
+class BankAccount;
+  template <int D, typename T> class FunctionTree;
+template <int D> class MultiResolutionAnalysis;
+
+using ComplexDouble = std::complex<double>;
+namespace NUMBER {
+enum type { Total, Real, Imag };
+}
+namespace SPIN {
+enum type { Paired, Alpha, Beta };
+}
+
+struct FunctionData {
+    int type{0};
+    int order{1};
+    int scale{0};
+    int depth{0};
+    int boxes[3] = {0, 0, 0};
+    int corner[3] = {0, 0, 0};
+    int real_size{0};
+    int imag_size{0};
+    bool is_shared{false};
+    int spin{0};
+    int occ{0};
+};
+
+class TreePtr final {
+public:
+    explicit TreePtr(bool share)
+            : shared_mem_re(nullptr)
+            , shared_mem_im(nullptr)
+            , re(nullptr)
+            , im(nullptr) {
+        this->func_data.is_shared = share;
+        if (this->func_data.is_shared and mpi::share_size > 1) {
+            // Memory size in MB defined in input. Virtual memory, does not cost anything if not used.
+#ifdef MRCPP_HAS_MPI
+            this->shared_mem_re = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
+            this->shared_mem_im = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
+#endif
+        }
+    }
+
+    ~TreePtr() {
+        if (this->shared_mem_re != nullptr) delete this->shared_mem_re;
+        if (this->shared_mem_im != nullptr) delete this->shared_mem_im;
+        if (this->re != nullptr) delete this->re;
+        if (this->im != nullptr) delete this->im;
+    }
+
+    friend class ComplexFunction;
+
+private:
+    FunctionData func_data;
+    mrcpp::SharedMemory<double> *shared_mem_re;
+    mrcpp::SharedMemory<double> *shared_mem_im;
+    mrcpp::FunctionTree<3, double> *re; ///< Real part of function
+    mrcpp::FunctionTree<3, double> *im; ///< Imaginary part of function
+
+    void flushFuncData() {
+        this->func_data.real_size = 0;
+        this->func_data.imag_size = 0;
+        if (this->re != nullptr) {
+            this->func_data.real_size = this->re->getNChunksUsed();
+            flushMRAData(this->re->getMRA());
+        }
+        if (this->im != nullptr) {
+            this->func_data.imag_size = this->im->getNChunksUsed();
+            flushMRAData(this->im->getMRA());
+        }
+    }
+
+    void flushMRAData(const mrcpp::MultiResolutionAnalysis<3> &mra) {
+        const auto &box = mra.getWorldBox();
+        this->func_data.type = mra.getScalingBasis().getScalingType();
+        this->func_data.order = mra.getOrder();
+        this->func_data.depth = mra.getMaxDepth();
+        this->func_data.scale = box.getScale();
+        this->func_data.boxes[0] = box.size(0);
+        this->func_data.boxes[1] = box.size(1);
+        this->func_data.boxes[2] = box.size(2);
+        this->func_data.corner[0] = box.getCornerIndex().getTranslation(0);
+        this->func_data.corner[1] = box.getCornerIndex().getTranslation(1);
+        this->func_data.corner[2] = box.getCornerIndex().getTranslation(2);
+    }
+};
+
+class ComplexFunction {
+public:
+    ComplexFunction(std::shared_ptr<TreePtr> funcptr);
+    ComplexFunction(const ComplexFunction &func);
+    ComplexFunction(int spin = 0, int occ = -1, int rank = -1, bool share = false);
+    ComplexFunction &operator=(const ComplexFunction &func);
+    ComplexFunction paramCopy() const;
+    bool isShared() const { return this->func_ptr->func_data.is_shared; }
+    bool hasReal() const { return (this->func_ptr->re == nullptr) ? false : true; }
+    bool hasImag() const { return (this->func_ptr->im == nullptr) ? false : true; }
+    FunctionData &getFunctionData();
+    int occ() const { return this->func_ptr->func_data.occ; }
+    int spin() const { return this->func_ptr->func_data.spin; }
+    FunctionTree<3, double> &real() { return *this->func_ptr->re; }
+    FunctionTree<3, double> &imag() { return *this->func_ptr->im; }
+    const FunctionTree<3, double> &real() const { return *this->func_ptr->re; }
+    const FunctionTree<3, double> &imag() const { return *this->func_ptr->im; }
+    void release() { this->func_ptr.reset(); }
+    bool conjugate() const { return this->conj; }
+    MultiResolutionAnalysis<3> *funcMRA = nullptr;
+    int getRank() const { return rank; }
+    void setRank(int rank) { (*this).rank = rank; }
+    void setOcc(int occ) { this->getFunctionData().occ = occ; }
+    void setSpin(int spin) { this->getFunctionData().spin = spin; }
+    ComplexFunction dagger();
+    virtual ~ComplexFunction() = default;
+
+    void alloc(int type, mrcpp::MultiResolutionAnalysis<3> *mra = nullptr);
+    void free(int type);
+
+    int getSizeNodes(int type) const;
+    int getNNodes(int type) const;
+
+    void setReal(mrcpp::FunctionTree<3, double> *tree);
+    void setImag(mrcpp::FunctionTree<3, double> *tree);
+
+    double norm() const;
+    double squaredNorm() const;
+    ComplexDouble integrate() const;
+
+    int crop(double prec);
+    void rescale(double c);
+    void rescale(ComplexDouble c);
+    void add(ComplexDouble c, ComplexFunction inp);
+    void absadd(ComplexDouble c, ComplexFunction inp);
+    char printSpin() const;
+
+protected:
+    bool conj{false};
+    std::shared_ptr<mrcpp::TreePtr> func_ptr;
+    int rank = -1; // index in vector
+};
+
+namespace cplxfunc {
+void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
+ComplexDouble dot(ComplexFunction bra, ComplexFunction ket);
+ComplexDouble node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact);
+void deep_copy(ComplexFunction &out, ComplexFunction &inp);
+void add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec);
+void project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec);
+void project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec);
+void multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
+void multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
+void multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
+void multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
+void multiply(ComplexFunction &out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
+void linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec);
+} // namespace cplxfunc
+
+class MPI_FuncVector : public std::vector<ComplexFunction> {
+public:
+    MPI_FuncVector(int N = 0);
+    MultiResolutionAnalysis<3> *vecMRA;
+    void distribute();
+};
+
+namespace mpifuncvec {
+void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
+void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec = -1.0);
+void save_nodes(MPI_FuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
+MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
+ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
+ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi);
+ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet);
+ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
+DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet);
+void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket);
+} // namespace mpifuncvec
+} // namespace mrcpp

From e95e12860fc620185e5de49c839414615953d2ee Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Wed, 14 Aug 2024 15:02:18 +0200
Subject: [PATCH 24/38] dagger, serial rotate and overlap

---
 src/treebuilders/AdditionCalculator.h       |   38 +-
 src/treebuilders/MultiplicationCalculator.h |   42 +-
 src/treebuilders/SquareCalculator.h         |   42 +-
 src/treebuilders/add.cpp                    |   67 +-
 src/treebuilders/add.h                      |    9 +-
 src/treebuilders/apply.cpp                  |   36 +-
 src/treebuilders/grid.cpp                   |   10 +-
 src/treebuilders/multiply.cpp               |   72 +-
 src/treebuilders/multiply.h                 |   13 +-
 src/treebuilders/project.cpp                |    2 +-
 src/trees/FunctionNode.cpp                  |   29 +-
 src/trees/FunctionTree.cpp                  |   66 +-
 src/trees/FunctionTree.h                    |    9 +-
 src/trees/MWNode.h                          |    2 +
 src/trees/MWTree.h                          |    3 +
 src/utils/Bank.cpp                          |   56 +
 src/utils/Bank.h                            |    3 +
 src/utils/CompFunction.cpp                  | 1212 ++++++++++++-------
 src/utils/CompFunction.h                    |   81 +-
 src/utils/parallel.cpp                      |   38 +-
 20 files changed, 1230 insertions(+), 600 deletions(-)

diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h
index a7804a761..bc3ff5250 100644
--- a/src/treebuilders/AdditionCalculator.h
+++ b/src/treebuilders/AdditionCalculator.h
@@ -32,24 +32,46 @@ namespace mrcpp {
 
 template <int D, typename T> class AdditionCalculator final : public TreeCalculator<D, T> {
 public:
-    AdditionCalculator(const FunctionTreeVector<D, T> &inp)
-            : sum_vec(inp) {}
+    AdditionCalculator(const FunctionTreeVector<D, T> &inp, bool conjugate = false)
+        : sum_vec(inp),
+          conj(conjugate) {}
 
 private:
     FunctionTreeVector<D, T> sum_vec;
+    bool conj;
 
-    void calcNode(MWNode<D, T> &node_o) override {
+    void calcNode(MWNode<D, double> &node_o) {
         node_o.zeroCoefs();
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        T *coefs_o = node_o.getCoefs();
+        double *coefs_o = node_o.getCoefs();
         for (int i = 0; i < this->sum_vec.size(); i++) {
-            T c_i = get_coef(this->sum_vec, i);
-            FunctionTree<D, T> &func_i = get_func(this->sum_vec, i);
+            double c_i = get_coef(this->sum_vec, i);
+            FunctionTree<D, double> &func_i = get_func(this->sum_vec, i);
             // This generates missing nodes
-            const MWNode<D, T> &node_i = func_i.getNode(idx);
-            const T *coefs_i = node_i.getCoefs();
+            const MWNode<D, double> &node_i = func_i.getNode(idx);
+            const double *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
+       }
+        node_o.setHasCoefs();
+        node_o.calcNorms();
+    }
+    void calcNode(MWNode<D, ComplexDouble> &node_o) {
+        node_o.zeroCoefs();
+        const NodeIndex<D> &idx = node_o.getNodeIndex();
+        ComplexDouble *coefs_o = node_o.getCoefs();
+        for (int i = 0; i < this->sum_vec.size(); i++) {
+            ComplexDouble c_i = get_coef(this->sum_vec, i);
+            FunctionTree<D, ComplexDouble> &func_i = get_func(this->sum_vec, i);
+            // This generates missing nodes
+            const MWNode<D, ComplexDouble> &node_i = func_i.getNode(idx);
+            const ComplexDouble *coefs_i = node_i.getCoefs();
+            int n_coefs = node_i.getNCoefs();
+            if (func_i.conjugate() xor conj) {
+               for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); }
+            } else {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
+            }
         }
         node_o.setHasCoefs();
         node_o.calcNorms();
diff --git a/src/treebuilders/MultiplicationCalculator.h b/src/treebuilders/MultiplicationCalculator.h
index 4f82756c2..29c9e2574 100644
--- a/src/treebuilders/MultiplicationCalculator.h
+++ b/src/treebuilders/MultiplicationCalculator.h
@@ -32,24 +32,26 @@ namespace mrcpp {
 
 template <int D, typename T> class MultiplicationCalculator final : public TreeCalculator<D, T> {
 public:
-    MultiplicationCalculator(const FunctionTreeVector<D, T> &inp)
-            : prod_vec(inp) {}
+    MultiplicationCalculator(const FunctionTreeVector<D, T> &inp, bool conjugate = false)
+            : prod_vec(inp),
+              conj(conjugate) {}
 
 private:
     FunctionTreeVector<D, T> prod_vec;
+    bool conj;
 
-    void calcNode(MWNode<D, T> &node_o) override {
+    void calcNode(MWNode<D, double> &node_o) {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        T *coefs_o = node_o.getCoefs();
+        double *coefs_o = node_o.getCoefs();
         for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
         for (int i = 0; i < this->prod_vec.size(); i++) {
-            T c_i = get_coef(this->prod_vec, i);
-            FunctionTree<D, T> &func_i = get_func(this->prod_vec, i);
+            double c_i = get_coef(this->prod_vec, i);
+            FunctionTree<D, double> &func_i = get_func(this->prod_vec, i);
             // This generates missing nodes
-            MWNode<D, T> node_i = func_i.getNode(idx); // Copy node
+            MWNode<D, double> node_i = func_i.getNode(idx); // Copy node
             node_i.mwTransform(Reconstruction);
             node_i.cvTransform(Forward);
-            const T *coefs_i = node_i.getCoefs();
+            const double *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
         }
@@ -58,6 +60,30 @@ template <int D, typename T> class MultiplicationCalculator final : public TreeC
         node_o.setHasCoefs();
         node_o.calcNorms();
     }
+    void calcNode(MWNode<D, ComplexDouble> &node_o)  {
+        const NodeIndex<D> &idx = node_o.getNodeIndex();
+        ComplexDouble *coefs_o = node_o.getCoefs();
+        for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
+        for (int i = 0; i < this->prod_vec.size(); i++) {
+            ComplexDouble c_i = get_coef(this->prod_vec, i);
+            FunctionTree<D, ComplexDouble> &func_i = get_func(this->prod_vec, i);
+            // ComplexDoublehis generates missing nodes
+            MWNode<D, ComplexDouble> node_i = func_i.getNode(idx); // Copy node
+            node_i.mwTransform(Reconstruction);
+            node_i.cvTransform(Forward);
+            const ComplexDouble *coefs_i = node_i.getCoefs();
+            int n_coefs = node_i.getNCoefs();
+           if (func_i.conjugate() xor (conj and i==0)) {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * std::conj(coefs_i[j]); }
+            } else {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
+            }
+        }
+        node_o.cvTransform(Backward);
+        node_o.mwTransform(Compression);
+        node_o.setHasCoefs();
+        node_o.calcNorms();
+    }
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/SquareCalculator.h b/src/treebuilders/SquareCalculator.h
index e56b41cf0..179c97e87 100644
--- a/src/treebuilders/SquareCalculator.h
+++ b/src/treebuilders/SquareCalculator.h
@@ -31,27 +31,57 @@ namespace mrcpp {
 
 template <int D, typename T> class SquareCalculator final : public TreeCalculator<D, T> {
 public:
-    SquareCalculator(FunctionTree<D, T> &inp)
-            : func(&inp) {}
+    SquareCalculator(FunctionTree<D, T> &inp, bool conjugate = false)
+            : func(&inp),
+              conj(conjugate) {}
 
 private:
     FunctionTree<D, T> *func;
+    bool conj;
 
-    void calcNode(MWNode<D, T> &node_o) override {
+    void calcNode(MWNode<D, double> &node_o) {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         int n_coefs = node_o.getNCoefs();
-        T *coefs_o = node_o.getCoefs();
+        double *coefs_o = node_o.getCoefs();
         // This generates missing nodes
-        MWNode<D, T> node_i = func->getNode(idx); // Copy node
+        MWNode<D, double> node_i = func->getNode(idx); // Copy node
         node_i.mwTransform(Reconstruction);
         node_i.cvTransform(Forward);
-        const T *coefs_i = node_i.getCoefs();
+        const double *coefs_i = node_i.getCoefs();
         for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
         node_o.cvTransform(Backward);
         node_o.mwTransform(Compression);
         node_o.setHasCoefs();
         node_o.calcNorms();
     }
+    void calcNode(MWNode<D, ComplexDouble> &node_o) {
+        const NodeIndex<D> &idx = node_o.getNodeIndex();
+        int n_coefs = node_o.getNCoefs();
+        ComplexDouble *coefs_o = node_o.getCoefs();
+        // This generates missing nodes
+        MWNode<D, ComplexDouble> node_i = func->getNode(idx); // Copy node
+        node_i.mwTransform(Reconstruction);
+        node_i.cvTransform(Forward);
+        const ComplexDouble *coefs_i = node_i.getCoefs();
+        for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
+        if (func->conjugate()) {
+            if (conj) {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * coefs_i[j]; }
+            } else {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * std::conj(coefs_i[j]); }
+            }
+        } else {
+            if (conj) {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * std::conj(coefs_i[j]); }
+            } else {
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
+            }
+        }
+        node_o.cvTransform(Backward);
+        node_o.mwTransform(Compression);
+        node_o.setHasCoefs();
+        node_o.calcNorms();
+    }
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp
index 584e61e68..7d7518b25 100644
--- a/src/treebuilders/add.cpp
+++ b/src/treebuilders/add.cpp
@@ -69,11 +69,12 @@ void add(double prec,
          T b,
          FunctionTree<D, T> &inp_b,
          int maxIter,
-         bool absPrec) {
+         bool absPrec,
+         bool conjugate) {
     FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back(std::make_tuple(a, &inp_a));
     tmp_vec.push_back(std::make_tuple(b, &inp_b));
-    add(prec, out, tmp_vec, maxIter, absPrec);
+    add(prec, out, tmp_vec, maxIter, absPrec, conjugate);
 }
 
 /** @brief Addition of several MW function representations, adaptive grid
@@ -98,14 +99,14 @@ void add(double prec,
  * no coefs).
  *
  */
-template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec) {
+    template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec, bool conjugate) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
     TreeBuilder<D, T> builder;
     WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
-    AdditionCalculator<D, T> calculator(inp);
+    AdditionCalculator<D, T> calculator(inp, conjugate);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -126,10 +127,10 @@ template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, Func
     print::separator(10, ' ');
 }
 
-template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter, bool absPrec, bool conjugate) {
     FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
-    add(prec, out, inp_vec, maxIter, absPrec);
+    add(prec, out, inp_vec, maxIter, absPrec, conjugate);
 }
 
 template void add<1, double>(double prec,
@@ -139,7 +140,8 @@ template void add<1, double>(double prec,
                      double b,
                      FunctionTree<1, double> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, double>(double prec,
                      FunctionTree<2, double> &out,
                      double a,
@@ -147,7 +149,8 @@ template void add<2, double>(double prec,
                      double b,
                      FunctionTree<2, double> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, double>(double prec,
                      FunctionTree<3, double> &out,
                      double a,
@@ -155,39 +158,46 @@ template void add<3, double>(double prec,
                      double b,
                      FunctionTree<3, double> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 template void add<1, double>(double prec,
                      FunctionTree<1, double> &out,
                      FunctionTreeVector<1, double> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, double>(double prec,
                      FunctionTree<2, double> &out,
                      FunctionTreeVector<2, double> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, double>(double prec,
                      FunctionTree<3, double> &out,
 		     FunctionTreeVector<3, double> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 template void add<1, double>(double prec,
                      FunctionTree<1, double> &out,
                      std::vector<FunctionTree<1, double> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, double>(double prec,
                      FunctionTree<2, double> &out,
                      std::vector<FunctionTree<2, double> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, double>(double prec,
                      FunctionTree<3, double> &out,
                      std::vector<FunctionTree<3, double> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 
 template void add<1, ComplexDouble>(double prec,
@@ -197,7 +207,8 @@ template void add<1, ComplexDouble>(double prec,
                      ComplexDouble b,
                      FunctionTree<1, ComplexDouble> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, ComplexDouble>(double prec,
                      FunctionTree<2, ComplexDouble> &out,
                      ComplexDouble a,
@@ -205,7 +216,8 @@ template void add<2, ComplexDouble>(double prec,
                      ComplexDouble b,
                      FunctionTree<2, ComplexDouble> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, ComplexDouble>(double prec,
                      FunctionTree<3, ComplexDouble> &out,
                      ComplexDouble a,
@@ -213,38 +225,45 @@ template void add<3, ComplexDouble>(double prec,
                      ComplexDouble b,
                      FunctionTree<3, ComplexDouble> &tree_b,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 template void add<1, ComplexDouble>(double prec,
                      FunctionTree<1, ComplexDouble> &out,
                      FunctionTreeVector<1, ComplexDouble> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, ComplexDouble>(double prec,
                      FunctionTree<2, ComplexDouble> &out,
                      FunctionTreeVector<2, ComplexDouble> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, ComplexDouble>(double prec,
                      FunctionTree<3, ComplexDouble> &out,
                      FunctionTreeVector<3, ComplexDouble> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 template void add<1, ComplexDouble>(double prec,
                      FunctionTree<1, ComplexDouble> &out,
                      std::vector<FunctionTree<1, ComplexDouble> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<2, ComplexDouble>(double prec,
                      FunctionTree<2, ComplexDouble> &out,
                      std::vector<FunctionTree<2, ComplexDouble> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 template void add<3, ComplexDouble>(double prec,
                      FunctionTree<3, ComplexDouble> &out,
                      std::vector<FunctionTree<3, ComplexDouble> *> &inp,
                      int maxIter,
-                     bool absPrec);
+                     bool absPrec,
+                     bool conjugate);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/add.h b/src/treebuilders/add.h
index dae1b366a..a4e35c47b 100644
--- a/src/treebuilders/add.h
+++ b/src/treebuilders/add.h
@@ -35,16 +35,19 @@ template <int D, typename T> void add(double prec,
                           T b,
                           FunctionTree<D, T> &tree_b,
                           int maxIter = -1,
-                          bool absPrec = false);
+                          bool absPrec = false,
+                          bool conjugate = false);
 template <int D, typename T> void add(double prec,
                           FunctionTree<D, T> &out,
                           FunctionTreeVector<D, T> &inp,
                           int maxIter = -1,
-                          bool absPrec = false);
+                          bool absPrec = false,
+                          bool conjugate = false);
 template <int D, typename T> void add(double prec,
                           FunctionTree<D, T> &out,
                           std::vector<FunctionTree<D, T> *> &inp,
                           int maxIter = -1,
-                          bool absPrec = false);
+                          bool absPrec = false,
+                          bool conjugate = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 47cffa711..2ab9b7955 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -128,15 +128,17 @@ template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperat
     if (metric == nullptr) {
         metric = defaultMetric;
     }
-    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal) {
+                if (inp.isreal()) {
+                    if (out.CompD[ocomp] == nullptr) out.alloc(ocomp);
                     apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompD[ocomp]->rescale(metric[icomp][ocomp].real());
                     }
                 } else {
+                    if (out.CompC[ocomp] == nullptr) out.alloc(ocomp);
                     apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
@@ -272,10 +274,10 @@ template <int D, typename T> void apply(double prec, CompFunction<D> &out, Convo
     if (metric == nullptr) {
         metric = defaultMetric;
     }
-    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal) {
+                if (inp.isreal()) {
                     apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], precTrees[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
@@ -332,7 +334,7 @@ template <int D> void apply_far_field(double prec, CompFunction<D> &out, Convolu
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    if (inp.isreal) {
+                    if (inp.isreal()) {
                         apply_on_unit_cell<D>(false, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
                         if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                             out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
@@ -391,7 +393,7 @@ template <int D> void apply_near_field(double prec, CompFunction<D> &out, Convol
         if (inp.Comp[icomp]!=nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++){
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    if (inp.isreal) {
+                    if (inp.isreal()) {
                         apply_on_unit_cell<D>(true, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
                         if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                             out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
@@ -470,21 +472,21 @@ template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, C
     if (metric == nullptr) {
         metric = defaultMetric;
     }
-    for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal and std::imag(metric[icomp][ocomp]) < MachinePrec) {
+                if (inp.isreal() and std::imag(metric[icomp][ocomp]) < MachinePrec) {
                     apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
                     }
-                    out.isreal = 1;
+                    out.func_ptr->isreal = 1;
                 } else {
                     apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
                     }
-                    out.iscomplex = 1;
+                    out.func_ptr->iscomplex = 1;
                 }
             }
         }
@@ -527,21 +529,21 @@ std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction
     }
     for (int d = 0; d < 3; d++) {
         CompFunction<3> *grad_d = new CompFunction<3>();
-        for (int icomp = 0; icomp < inp.Ncomp; icomp++){
+        for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
             for (int ocomp = 0; ocomp < 4; ocomp++){
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    grad_d->Ncomp=ocomp;
-                    if (inp.isreal) {
-                        grad_d->isreal = 1;
-                        grad_d->iscomplex = 0;
+                    grad_d->func_ptr->Ncomp=ocomp;
+                    if (inp.isreal()) {
+                        grad_d->func_ptr->isreal = 1;
+                        grad_d->func_ptr->iscomplex = 0;
                         grad_d->CompD[ocomp] = new FunctionTree<3, double>(inp.CompD[0]->getMRA());
                         apply(*(grad_d->CompD[ocomp]), oper, *inp.CompD[icomp], d);
                         if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                             grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real());
                         }
                     } else {
-                        grad_d->isreal = 0;
-                        grad_d->iscomplex = 1;
+                        grad_d->func_ptr->isreal = 0;
+                        grad_d->func_ptr->iscomplex = 1;
                         grad_d->CompC[ocomp] = new FunctionTree<3, ComplexDouble>(inp.CompC[0]->getMRA());
                         apply(*(grad_d->CompC[ocomp]), oper, *inp.CompC[icomp], d);
                         if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index 277ab4d8a..f5d28779b 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -237,11 +237,11 @@ template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTre
  */
 template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
     out.free();
-    out.data = inp.data;
-    out.alloc(inp.Ncomp);
-    for (int i = 0; i < inp.Ncomp; i++) {
-        if (inp.isreal) build_grid(*out.CompD[i], *inp.CompD[i]);
-        if (inp.iscomplex) build_grid(*out.CompC[i], *inp.CompC[i]);
+    out.func_ptr->data = inp.func_ptr->data;
+    out.alloc(inp.Ncomp());
+    for (int i = 0; i < inp.Ncomp(); i++) {
+        if (inp.isreal()) build_grid(*out.CompD[i], *inp.CompD[i]);
+        if (inp.iscomplex()) build_grid(*out.CompC[i], *inp.CompC[i]);
     }
 }
 
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index b95f030d4..c4bc01edf 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -62,6 +62,7 @@ namespace mrcpp {
  * - Repeat until convergence or `maxIter` is reached
  * - `prec < 0` or `maxIter = 0` means NO refinement
  * - `maxIter < 0` means no bound
+ * - conjugate is applied on inp_b
  *
  * @note This algorithm will start at whatever grid is present in the `out`
  * tree when the function is called (this grid should however be EMPTY, e.i.
@@ -76,11 +77,12 @@ void multiply(double prec,
               FunctionTree<D, T> &inp_b,
               int maxIter,
               bool absPrec,
-              bool useMaxNorms) {
+              bool useMaxNorms,
+              bool conjugate ) {
     FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back({c, &inp_a});
     tmp_vec.push_back({1.0, &inp_b});
-    multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms);
+    multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate);
 }
     /*
 template <int D> void multiply(double prec,
@@ -113,6 +115,7 @@ template <int D> void multiply(double prec,
  * - Repeat until convergence or `maxIter` is reached
  * - `prec < 0` or `maxIter = 0` means NO refinement
  * - `maxIter < 0` means no bound
+ * - conjugate is applied on all the trees in inp, except the first
  *
  * @note This algorithm will start at whatever grid is present in the `out`
  * tree when the function is called (this grid should however be EMPTY, e.i.
@@ -125,13 +128,14 @@ void multiply(double prec,
               FunctionTreeVector<D, T> &inp,
               int maxIter,
               bool absPrec,
-              bool useMaxNorms) {
+              bool useMaxNorms,
+              bool conjugate ) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
     TreeBuilder<D, T> builder;
-    MultiplicationCalculator<D, T> calculator(inp);
+    MultiplicationCalculator<D, T> calculator(inp, conjugate);
 
     if (useMaxNorms) {
         for (int i = 0; i < inp.size(); i++) get_func(inp, i).makeMaxSquareNorms();
@@ -165,10 +169,11 @@ void multiply(double prec,
               std::vector<FunctionTree<D, T> *> &inp,
               int maxIter,
               bool absPrec,
-              bool useMaxNorms) {
+              bool useMaxNorms,
+              bool conjugate ) {
     FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
-    multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms);
+    multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms, conjugate);
 }
 
 /** @brief Out-of-place square of MW function representations, adaptive grid
@@ -192,13 +197,13 @@ void multiply(double prec,
  * no coefs).
  *
  */
-template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter, bool absPrec) {
+template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter, bool absPrec, bool conjugate) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
 
     int maxScale = out.getMRA().getMaxScale();
     TreeBuilder<D, T> builder;
     WaveletAdaptor<D, T> adaptor(prec, maxScale, absPrec);
-    SquareCalculator<D, T> calculator(inp);
+    SquareCalculator<D, T> calculator(inp, conjugate);
 
     builder.build(out, calculator, adaptor, maxIter);
 
@@ -241,6 +246,7 @@ template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, F
 template <int D, typename T>
 void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double p, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
+    if (inp.conjugate()) MSG_ABORT("Not implemented");
 
     int maxScale = out.getMRA().getMaxScale();
     TreeBuilder<D, T> builder;
@@ -298,7 +304,7 @@ void dot(double prec,
         auto *out_d = new FunctionTree<D, T>(out.getMRA());
         build_grid(*out_d, out);
         T One = 1.0;
-        multiply(prec, *out_d, One, tree_a, tree_b, maxIter, absPrec);
+        multiply(prec, *out_d, One, tree_a, tree_b, maxIter, absPrec, true);
         tmp_vec.push_back({coef_a * coef_b, out_d});
     }
     build_grid(out, tmp_vec);
@@ -404,7 +410,7 @@ template void multiply<1, double>(double prec,
                           FunctionTree<1, double> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, double>(double prec,
                           FunctionTree<2, double> &out,
                           double c,
@@ -412,7 +418,7 @@ template void multiply<2, double>(double prec,
                           FunctionTree<2, double> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, double>(double prec,
                           FunctionTree<3, double> &out,
                           double c,
@@ -420,43 +426,43 @@ template void multiply<3, double>(double prec,
                           FunctionTree<3, double> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<1, double>(double prec,
                           FunctionTree<1, double> &out,
                           FunctionTreeVector<1, double> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, double>(double prec,
                           FunctionTree<2, double> &out,
                           FunctionTreeVector<2, double> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, double>(double prec,
                           FunctionTree<3, double> &out,
                           FunctionTreeVector<3, double> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<1, double>(double prec,
                           FunctionTree<1, double> &out,
                           std::vector<FunctionTree<1, double> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, double>(double prec,
                           FunctionTree<2, double> &out,
                           std::vector<FunctionTree<2, double> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, double>(double prec,
                           FunctionTree<3, double> &out,
                           std::vector<FunctionTree<3, double> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void power<1, double>(double prec,
                        FunctionTree<1, double> &out,
                        FunctionTree<1, double> &tree,
@@ -479,17 +485,17 @@ template void square<1, double>(double prec,
                         FunctionTree<1, double> &out,
                         FunctionTree<1, double> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void square<2, double>(double prec,
                         FunctionTree<2, double> &out,
                         FunctionTree<2, double> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void square<3, double>(double prec,
                         FunctionTree<3, double> &out,
                         FunctionTree<3, double> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void dot<1, double>(double prec,
                      FunctionTree<1, double> &out,
                      FunctionTreeVector<1, double> &inp_a,
@@ -528,7 +534,7 @@ template void multiply<1, ComplexDouble>(double prec,
                           FunctionTree<1, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, ComplexDouble>(double prec,
                           FunctionTree<2, ComplexDouble> &out,
                           ComplexDouble c,
@@ -536,7 +542,7 @@ template void multiply<2, ComplexDouble>(double prec,
                           FunctionTree<2, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, ComplexDouble>(double prec,
                           FunctionTree<3, ComplexDouble> &out,
                           ComplexDouble c,
@@ -544,43 +550,43 @@ template void multiply<3, ComplexDouble>(double prec,
                           FunctionTree<3, ComplexDouble> &tree_b,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<1, ComplexDouble>(double prec,
                           FunctionTree<1, ComplexDouble> &out,
                           FunctionTreeVector<1, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, ComplexDouble>(double prec,
                           FunctionTree<2, ComplexDouble> &out,
                           FunctionTreeVector<2, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, ComplexDouble>(double prec,
                           FunctionTree<3, ComplexDouble> &out,
                           FunctionTreeVector<3, ComplexDouble> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<1, ComplexDouble>(double prec,
                           FunctionTree<1, ComplexDouble> &out,
                           std::vector<FunctionTree<1, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<2, ComplexDouble>(double prec,
                           FunctionTree<2, ComplexDouble> &out,
                           std::vector<FunctionTree<2, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void multiply<3, ComplexDouble>(double prec,
                           FunctionTree<3, ComplexDouble> &out,
                           std::vector<FunctionTree<3, ComplexDouble> *> &inp,
                           int maxIter,
                           bool absPrec,
-                          bool useMaxNorms);
+                          bool useMaxNorms, bool conjugate);
 template void power<1, ComplexDouble>(double prec,
                        FunctionTree<1, ComplexDouble> &out,
                        FunctionTree<1, ComplexDouble> &tree,
@@ -603,17 +609,17 @@ template void square<1, ComplexDouble>(double prec,
                         FunctionTree<1, ComplexDouble> &out,
                         FunctionTree<1, ComplexDouble> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void square<2, ComplexDouble>(double prec,
                         FunctionTree<2, ComplexDouble> &out,
                         FunctionTree<2, ComplexDouble> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void square<3, ComplexDouble>(double prec,
                         FunctionTree<3, ComplexDouble> &out,
                         FunctionTree<3, ComplexDouble> &tree,
                         int maxIter,
-                        bool absPrec);
+                        bool absPrec, bool conjugate);
 template void dot<1, ComplexDouble>(double prec,
                      FunctionTree<1, ComplexDouble> &out,
                      FunctionTreeVector<1, ComplexDouble> &inp_a,
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index 96a956f3b..5994edfa5 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -52,33 +52,36 @@ template <int D, typename T> void multiply(double prec,
                                FunctionTree<D, T> &inp_b,
                                int maxIter = -1,
                                bool absPrec = false,
-                               bool useMaxNorms = false);
+                               bool useMaxNorms = false,
+                               bool conjugate = false );
 
 template <int D, typename T> void multiply(double prec,
                                FunctionTree<D, T> &out,
                                std::vector<FunctionTree<D, T> *> &inp,
                                int maxIter = -1,
                                bool absPrec = false,
-                               bool useMaxNorms = false);
+                               bool useMaxNorms = false,
+                               bool conjugate = false );
 
 template <int D, typename T> void multiply(double prec,
                                FunctionTree<D, T> &out,
                                FunctionTreeVector<D, T> &inp,
                                int maxIter = -1,
                                bool absPrec = false,
-                               bool useMaxNorms = false);
+                               bool useMaxNorms = false,
+                               bool conjugate = false );
 
 template <int D, typename T> void power(double prec,
                             FunctionTree<D, T> &out,
                             FunctionTree<D, T> &inp,
                             double p,
                             int maxIter = -1,
-                            bool absPrec = false);
+                            bool absPrec = false );
 
 template <int D, typename T> void square(double prec,
                              FunctionTree<D, T> &out,
                              FunctionTree<D, T> &inp,
                              int maxIter = -1,
-                             bool absPrec = false);
+                             bool absPrec = false, bool conjugate = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp
index 65d17fd16..95360b901 100644
--- a/src/treebuilders/project.cpp
+++ b/src/treebuilders/project.cpp
@@ -58,6 +58,7 @@ namespace mrcpp {
  */
 template <int D, typename T> void project(double prec, FunctionTree<D, T> &out, std::function<T(const Coord<D> &r)> func, int maxIter, bool absPrec) {
     AnalyticFunction<D, T> inp(func);
+
     mrcpp::project(prec, out, inp, maxIter, absPrec);
 }
 
@@ -82,7 +83,6 @@ template <int D, typename T> void project(double prec, FunctionTree<D, T> &out,
  *
  */
 template <int D, typename T> void project(double prec, FunctionTree<D, T> &out, RepresentableFunction<D, T> &inp, int maxIter, bool absPrec) {
-
     int maxScale = out.getMRA().getMaxScale();
     const auto scaling_factor = out.getMRA().getWorldBox().getScalingFactors();
     TreeBuilder<D, T> builder;
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index 39555c5a9..c7b21a334 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -446,7 +446,20 @@ template <> void FunctionNode<3>::reCompress() {
 
     int size = bra.getKp1_d();
     ComplexDouble result = 0.0;
-    for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
+    // note that bra is conjugated by default
+    if (bra.getMWTree().conjugate()){
+        if (ket.getMWTree().conjugate()){
+            for (int i = 0; i < size; i++) result += a[i] * std::conj(b[i]);
+        } else {
+            for (int i = 0; i < size; i++) result += a[i] * b[i];
+        }
+    } else {
+        if (ket.getMWTree().conjugate()){
+            for (int i = 0; i < size; i++) result += std::conj(a[i]) * std::conj(b[i]);
+        } else {
+            for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
+        }
+    }
     return result;
 }
 
@@ -499,7 +512,19 @@ template <> void FunctionNode<3>::reCompress() {
     int start = bra.getKp1_d();
     int size = (bra.getTDim() - 1) * start;
     ComplexDouble result = 0.0;
-    for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
+    if (bra.getMWTree().conjugate()){
+        if (ket.getMWTree().conjugate()){
+            for (int i = 0; i < size; i++) result += a[start + i] * std::conj(b[start + i]);
+        } else {
+            for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
+        }
+    } else {
+        if (ket.getMWTree().conjugate()){
+            for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * std::conj(b[start + i]);
+        } else {
+            for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
+        }
+    }
     return result;
 }
 
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 4a922b5bc..de11ebc7d 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -624,15 +624,15 @@ void FunctionTree<D, T>::makeCoeffVector(std::vector<T *> &coefs,
                                       std::vector<int> &parent_indices,
                                       std::vector<double> &scalefac,
                                       int &max_index,
-                                      MWTree<D, T> &refTree,
-                                      std::vector<MWNode<D, T> *> *refNodes) {
+                                      MWTree<D, double> &refTree,
+                                      std::vector<MWNode<D, double> *> *refNodes) {
     coefs.clear();
     indices.clear();
     parent_indices.clear();
     max_index = 0;
     int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
     int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    std::vector<MWNode<D, T> *> refstack;  // nodes from refTree
+    std::vector<MWNode<D, double> *> refstack;  // nodes from refTree
     std::vector<MWNode<D, T> *> thisstack; // nodes from this Tree
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
         refstack.push_back(refTree.getRootBox().getNodes()[rIdx]);
@@ -642,7 +642,7 @@ void FunctionTree<D, T>::makeCoeffVector(std::vector<T *> &coefs,
     while (thisstack.size() > stack_p) {
         // refNode and thisNode are the same node in space, but on different trees
         MWNode<D, T> *thisNode = thisstack[stack_p];
-        MWNode<D, T> *refNode = refstack[stack_p++];
+        MWNode<D, double> *refNode = refstack[stack_p++];
         coefs.push_back(thisNode->getCoefs());
         if (refNodes != nullptr) refNodes->push_back(refNode);
         if (refNode != nullptr) {
@@ -672,22 +672,22 @@ void FunctionTree<D, T>::makeCoeffVector(std::vector<T *> &coefs,
  * reference tree and a list of coefficients.
  * It is the reference tree (refTree) which is traversed, but one does not descend
  * into children if the norm of the tree is smaller than absPrec. */
-template <int D, typename T> void FunctionTree<D, T>::makeTreefromCoeff(MWTree<D, T> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode) {
-    std::vector<MWNode<D, T> *> stack;
+template <int D, typename T> void FunctionTree<D, T>::makeTreefromCoeff(MWTree<D, double> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode) {
+    std::vector<MWNode<D, double> *> stack;
     std::map<int, MWNode<D, T> *> ix2node; // gives the nodes in this tree for a given ix
     int sizecoef = (1 << this->getDim()) * this->getKp1_d();
     int sizecoefW = ((1 << this->getDim()) - 1) * this->getKp1_d();
     this->squareNorm = 0.0;
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < refTree.getRootBox().size(); rIdx++) {
-        MWNode<D, T> *refNode = refTree.getRootBox().getNodes()[rIdx];
+        MWNode<D, double> *refNode = refTree.getRootBox().getNodes()[rIdx];
         stack.push_back(refNode);
         int ix = ix2coef[refNode->getSerialIx()];
         ix2node[ix] = this->getRootBox().getNodes()[rIdx];
     }
 
     while (stack.size() > 0) {
-        MWNode<D, T> *refNode = stack.back(); // node in the reference tree refTree
+        MWNode<D, double> *refNode = stack.back(); // node in the reference tree refTree
         stack.pop_back();
         assert(ix2coef.count(refNode->getSerialIx()) > 0);
         int ix = ix2coef[refNode->getSerialIx()];
@@ -748,9 +748,51 @@ template <int D, typename T> void FunctionTree<D, T>::makeTreefromCoeff(MWTree<D
     }
 }
 
+/** Traverse tree using DFS and append same nodes as another tree, without coefficients
+ *  Note that we do not use coefficients, so it does not matter what is real or complex
+ */
+template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, double> &inTree) {
+    std::vector<MWNode<D, double> *> instack;   // node from inTree
+    std::vector<MWNode<D, T> *> thisstack; // node from this Tree
+    this->clearEndNodeTable();
+    for (int rIdx = 0; rIdx < inTree.getRootBox().size(); rIdx++) {
+        instack.push_back(inTree.getRootBox().getNodes()[rIdx]);
+        thisstack.push_back(this->getRootBox().getNodes()[rIdx]);
+    }
+    while (thisstack.size() > 0) {
+        // inNode and thisNode are the same node in space, but on different trees
+        MWNode<D, T> *thisNode = thisstack.back();
+        thisstack.pop_back();
+        MWNode<D, double> *inNode = instack.back();
+        instack.pop_back();
+        if (inNode->getNChildren() > 0) {
+            thisNode->clearIsEndNode();
+            if (thisNode->getNChildren() < inNode->getNChildren()) thisNode->createChildren(false);
+            for (int i = 0; i < inNode->getNChildren(); i++) {
+                instack.push_back(inNode->children[i]);
+                thisstack.push_back(thisNode->children[i]);
+            }
+        } else {
+            // construct EndNodeTable for "This", starting from this branch
+            // This could be done more efficiently, if it proves to be time consuming
+            std::vector<MWNode<D, T> *> branchstack; // local stack starting from this branch
+            branchstack.push_back(thisNode);
+            while (branchstack.size() > 0) {
+                MWNode<D, T> *branchNode = branchstack.back();
+                branchstack.pop_back();
+                if (branchNode->getNChildren() > 0) {
+                    for (int i = 0; i < branchNode->getNChildren(); i++) { branchstack.push_back(branchNode->children[i]); }
+                } else
+                    this->endNodeTable.push_back(branchNode);
+            }
+        }
+    }
+}
+
+
 /** Traverse tree using DFS and append same nodes as another tree, without coefficients */
-template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, T> &inTree) {
-    std::vector<MWNode<D, T> *> instack;   // node from inTree
+template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, ComplexDouble> &inTree) {
+    std::vector<MWNode<D, ComplexDouble> *> instack;   // node from inTree
     std::vector<MWNode<D, T> *> thisstack; // node from this Tree
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < inTree.getRootBox().size(); rIdx++) {
@@ -761,7 +803,7 @@ template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D
         // inNode and thisNode are the same node in space, but on different trees
         MWNode<D, T> *thisNode = thisstack.back();
         thisstack.pop_back();
-        MWNode<D, T> *inNode = instack.back();
+        MWNode<D, ComplexDouble> *inNode = instack.back();
         instack.pop_back();
         if (inNode->getNChildren() > 0) {
             thisNode->clearIsEndNode();
@@ -846,8 +888,6 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
  * @details Exact copy without any binding between old and new tree
  */
 template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out){
-    delete out;
-    out = new FunctionTree<D, T> (this->getMRA(), this->getName());
     copy_grid(*out, *this);
     copy_func(*out, *this);
 }
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 1c71dcfc6..110ceb07e 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -107,10 +107,11 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
                          std::vector<int> &parent_indices,
                          std::vector<double> &scalefac,
                          int &max_index,
-                         MWTree<D, T> &refTree,
-                         std::vector<MWNode<D, T> *> *refNodes = nullptr);
-    void makeTreefromCoeff(MWTree<D, T> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode = "adaptive");
-    void appendTreeNoCoeff(MWTree<D, T> &inTree);
+                         MWTree<D, double> &refTree,
+                         std::vector<MWNode<D, double> *> *refNodes = nullptr);
+    void makeTreefromCoeff(MWTree<D, double> &refTree, std::vector<T *> coefpVec, std::map<int, int> &ix2coef, double absPrec, const std::string &mode = "adaptive");
+    void appendTreeNoCoeff(MWTree<D, double> &inTree);
+    void appendTreeNoCoeff(MWTree<D, ComplexDouble> &inTree);
     void CopyTree(FunctionTree<D, double> &inTree);
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h
index ed25762b4..eca15925d 100644
--- a/src/trees/MWNode.h
+++ b/src/trees/MWNode.h
@@ -167,6 +167,8 @@ template <int D, typename T> class MWNode {
     friend class OperatorNode;
     friend class DerivativeCalculator<D, T>;
     bool isComplex = false; //TODO put as one of the flags
+    friend class FunctionTree<D, double>; // required if a ComplexDouble tree access a double node from another tree!
+    friend class FunctionTree<D, ComplexDouble>;
 
 protected:
     MWTree<D, T> *tree{nullptr};    ///< Tree the node belongs to
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index c2124bda2..9bec2c1bc 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -143,6 +143,8 @@ class BankAccount;
     MWNodeVector<D, T> endNodeTable;          ///< Final projected nodes
 
     void getNodeCoeff(NodeIndex<D> nIdx, T *data); // fetch coefficient from a specific node stored in Bank
+    bool conjugate() const { return this->conj; }
+    void setConjugate(bool conjug)  { this->conj = conjug; }
 
     friend std::ostream &operator<<(std::ostream &o, const MWTree<D, T> &tree) { return tree.print(o); }
 
@@ -180,6 +182,7 @@ class BankAccount;
     void decrementNodeCount(int scale);
 
     BankAccount *NodesCoeff = nullptr;
+    bool conj{false};
 
     virtual std::ostream &print(std::ostream &o) const;
 };
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index 20bcd49b0..bc700ce1b 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -863,6 +863,23 @@ int BankAccount::put_nodedata(int id, int nodeid, int size, double *data) {
     return 1;
 }
 
+// save data in Bank with identity id as part of block with identity nodeid.
+// NB: Complex is stored as two doubles
+int BankAccount::put_nodedata(int id, int nodeid, int size, ComplexDouble *data) {
+#ifdef MRCPP_HAS_MPI
+    // for now we distribute according to nodeid
+    int messages[message_size];
+    messages[0] = SAVE_NODEDATA;
+    messages[1] = account_id;
+    messages[2] = nodeid; // which block
+    messages[3] = id;     // id within block
+    messages[4] = 2*size;   // size of this data
+    MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank);
+    MPI_Send(data, 2*size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank);
+#endif
+    return 1;
+}
+
 // get data with identity id
 int BankAccount::get_nodedata(int id, int nodeid, int size, double *data, std::vector<int> &idVec) {
 #ifdef MRCPP_HAS_MPI
@@ -880,6 +897,24 @@ int BankAccount::get_nodedata(int id, int nodeid, int size, double *data, std::v
     return 1;
 }
 
+
+// get data with identity id
+int BankAccount::get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector<int> &idVec) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    // get the column with identity id
+    int messages[message_size];
+    messages[0] = GET_NODEDATA;
+    messages[1] = account_id;
+    messages[2] = nodeid; // which block
+    messages[3] = id;     // id within block.
+    messages[4] = size;   // expected size of data
+    MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank);
+    MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status);
+#endif
+    return 1;
+}
+
 // get all data for nodeid (same nodeid, different orbitals)
 int BankAccount::get_nodeblock(int nodeid, double *data, std::vector<int> &idVec) {
 #ifdef MRCPP_HAS_MPI
@@ -900,6 +935,27 @@ int BankAccount::get_nodeblock(int nodeid, double *data, std::vector<int> &idVec
     return 1;
 }
 
+
+// get all data for nodeid (same nodeid, different orbitals)
+int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector<int> &idVec) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    // get the entire superblock and also the id of each column
+    int messages[message_size];
+    messages[0] = GET_NODEBLOCK;
+    messages[1] = account_id;
+    messages[2] = nodeid;
+
+    MPI_Send(messages, 3, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank);
+    MPI_Recv(metadata_block, size_metadata, MPI_INT, bankmaster[nodeid % bank_size], 1, comm_bank, &status);
+    idVec.resize(metadata_block[1]);
+    int size = metadata_block[2];
+    if (size > 0) MPI_Recv(idVec.data(), metadata_block[1], MPI_INT, bankmaster[nodeid % bank_size], 2, comm_bank, &status);
+    if (size > 0) MPI_Recv(data, size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status);
+#endif
+    return 1;
+}
+
 // get all data with identity orbid (same orbital, different nodes)
 int BankAccount::get_orbblock(int orbid, double *&data, std::vector<int> &nodeidVec, int bankstart) {
 #ifdef MRCPP_HAS_MPI
diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index dc52791b3..5ecbe6a7e 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -108,8 +108,11 @@ class BankAccount {
     int get_data(NodeIndex<3> nIdx, int size, double *data);
     int get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data);
     int put_nodedata(int id, int nodeid, int size, double *data);
+    int put_nodedata(int id, int nodeid, int size, ComplexDouble *data);
     int get_nodedata(int id, int nodeid, int size, double *data, std::vector<int> &idVec);
+    int get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector<int> &idVec);
     int get_nodeblock(int nodeid, double *data, std::vector<int> &idVec);
+    int get_nodeblock(int nodeid, ComplexDouble *data, std::vector<int> &idVec);
     int get_orbblock(int orbid, double *&data, std::vector<int> &nodeidVec, int bankstart);
 };
 
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 8384e93af..c6a391947 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -17,16 +17,20 @@ namespace mrcpp {
   template <int D>
   CompFunction<D>::CompFunction(MultiResolutionAnalysis<D> &mra)
   { defaultCompMRA<D> = &mra;
-    data.Ncomp = 0;
     func_ptr = std::make_shared<TreePtr<D>>(false);
-
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
     for (int i = 0; i < 4; i++) CompC[i] = nullptr;
   }
 
   template <int D>
   CompFunction<D>::CompFunction()
-  { data.Ncomp = 0;
-      func_ptr = std::make_shared<TreePtr<D>>(false);
+  { func_ptr = std::make_shared<TreePtr<D>>(false);
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
   }
 
 /*
@@ -34,15 +38,18 @@ namespace mrcpp {
  */
   template <int D>
   CompFunction<D>::CompFunction(int n1)
-  { data.Ncomp = 0;
-      func_ptr = std::make_shared<TreePtr<D>>(false);
-      data.n1[0] = n1;
-      data.n2[0] = -1;
-      data.n3[0] = 0;
-      rank = 0;
-      isreal = 1;
-      iscomplex = 0;
-      data.shared = false;
+  {   func_ptr = std::make_shared<TreePtr<D>>(false);
+      CompD = func_ptr->real;
+      CompC = func_ptr->cplx;
+      for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+      for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+      func_ptr->data.n1[0] = n1;
+      func_ptr->data.n2[0] = -1;
+      func_ptr->data.n3[0] = 0;
+      func_ptr->rank = 0;
+      func_ptr->isreal = 1;
+      func_ptr->iscomplex = 0;
+      func_ptr->data.shared = false;
   }
 
 /*
@@ -50,24 +57,30 @@ namespace mrcpp {
  */
   template <int D>
   CompFunction<D>::CompFunction(int n1, bool share)
-  { data.Ncomp = 0;
-      func_ptr = std::make_shared<TreePtr<D>>(share);
-      data.n1[0] = n1;
-      data.n2[0] = -1;
-      data.n3[0] = 0;
-      rank = 0;
-      isreal = 1;
-      iscomplex = 0;
-      data.shared = share;
+  {   func_ptr = std::make_shared<TreePtr<D>>(share);
+      CompD = func_ptr->real;
+      CompC = func_ptr->cplx;
+      for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+      for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+      func_ptr->data.n1[0] = n1;
+      func_ptr->data.n2[0] = -1;
+      func_ptr->data.n3[0] = 0;
+      func_ptr->rank = 0;
+      func_ptr->isreal = 1;
+      func_ptr->iscomplex = 0;
+      func_ptr->data.shared = share;
   }
 
 /*
- * Empty functions (no components defined)
+ * Empty functions (trees defined but zero)
  */
   template <int D>
   CompFunction<D>::CompFunction(const CompFunctionData<D>& indata)
-  { data = indata;
-      func_ptr = std::make_shared<TreePtr<D>>(share);
+  { func_ptr = std::make_shared<TreePtr<D>>(indata.shared);
+    func_ptr->data = indata;
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+    this->alloc(Ncomp()-1);
   }
 
 /** @brief Copy constructor
@@ -77,8 +90,9 @@ namespace mrcpp {
  */
   template <int D>
   CompFunction<D>::CompFunction(const CompFunction<D> &compfunc) {
-      data = compfunc.data;
       func_ptr = compfunc.func_ptr;
+      CompD = func_ptr->real;
+      CompC = func_ptr->cplx;
   }
 
 /** @brief Copy constructor
@@ -86,18 +100,13 @@ namespace mrcpp {
  * Shallow copy: meta data is copied along with the component pointers,
  * NO transfer of ownership.
  */
-  template <int D>
-  CompFunction<D>::CompFunction(CompFunction<D> && compfunc) {
-      data = compfunc.data;
-      func_ptr = compfunc.func_ptr;
-  }
-
   template <int D>
   CompFunction<D> &CompFunction<D>::operator=(const CompFunction<D> &compfunc) {
       if (this != &compfunc) {
-          data = compfunc.data;
           func_ptr = compfunc.func_ptr;
-      }
+          CompD = func_ptr->real;
+          CompC = func_ptr->cplx;
+     }
       return *this;
   }
 
@@ -107,36 +116,36 @@ template <int D>
  * Returns a copy without defined trees.
  */
 CompFunction<D> CompFunction<D>::paramCopy() const {
-    return CompFunction<D>(data);
+    return CompFunction<D>(func_ptr->data);
 }
 
 
 template <int D>
 void CompFunction<D>::flushMRAData() {
     const auto &box = defaultCompMRA<3>->getWorldBox();
-    data.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
-    data.order = defaultCompMRA<3>->getOrder();
-    data.depth = defaultCompMRA<3>->getMaxDepth();
-    data.scale = box.getScale();
-    data.boxes[0] = box.size(0);
-    data.boxes[1] = box.size(1);
-    data.boxes[2] = box.size(2);
-    data.corner[0] = box.getCornerIndex().getTranslation(0);
-    data.corner[1] = box.getCornerIndex().getTranslation(1);
-    data.corner[2] = box.getCornerIndex().getTranslation(2);
+    func_ptr->data.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
+    func_ptr->data.order = defaultCompMRA<3>->getOrder();
+    func_ptr->data.depth = defaultCompMRA<3>->getMaxDepth();
+    func_ptr->data.scale = box.getScale();
+    func_ptr->data.boxes[0] = box.size(0);
+    func_ptr->data.boxes[1] = box.size(1);
+    func_ptr->data.boxes[2] = box.size(2);
+    func_ptr->data.corner[0] = box.getCornerIndex().getTranslation(0);
+    func_ptr->data.corner[1] = box.getCornerIndex().getTranslation(1);
+    func_ptr->data.corner[2] = box.getCornerIndex().getTranslation(2);
 }
 
 template <int D>
 void CompFunction<D>::flushFuncData() {
     if (D == 3) flushMRAData();
-    for (int i = 0; i < Ncomp; i++) {
-        if (isreal) {
-            Nchunks[i] = CompD[i]->getNChunksUsed();
+    for (int i = 0; i < Ncomp(); i++) {
+        if (isreal()) {
+            func_ptr->Nchunks[i] = CompD[i]->getNChunksUsed();
         } else {
-            Nchunks[i] = CompC[i]->getNChunksUsed();
+            func_ptr->Nchunks[i] = CompC[i]->getNChunksUsed();
         }
     }
-    for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+    for (int i = Ncomp(); i < 4; i++) func_ptr->Nchunks[i] = 0;
 }
 
 template <int D>
@@ -153,14 +162,14 @@ CompFunctionData<D> CompFunction<D>::getFuncData() const {
     outdata.corner[0] = box.getCornerIndex().getTranslation(0);
     outdata.corner[1] = box.getCornerIndex().getTranslation(1);
     outdata.corner[2] = box.getCornerIndex().getTranslation(2);
-    for (int i = 0; i < Ncomp; i++) {
-        if (isreal) {
+    for (int i = 0; i < Ncomp(); i++) {
+        if (isreal()) {
             outdata.Nchunks[i] = CompD[i]->getNChunksUsed();
         } else {
             outdata.Nchunks[i] = CompC[i]->getNChunksUsed();
         }
     }
-    for (int i = Ncomp; i < 4; i++) Nchunks[i] = 0;
+    for (int i = Ncomp(); i < 4; i++) outdata.Nchunks[i] = 0;
     return outdata;
 }
 
@@ -168,52 +177,49 @@ CompFunctionData<D> CompFunction<D>::getFuncData() const {
 template <int D>
 ComplexDouble CompFunction<D>::integrate() const {
     ComplexDouble integral;
-    if (isreal) integral = CompD[0]->integrate();
+    if (isreal()) integral = CompD[0]->integrate();
     else integral = CompC[0]->integrate();
     return integral;
 }
 
-    template <int D>
-  double CompFunction<D>::norm() const {
-     double norm = squaredNorm();
-     for (int i = 0; i < Ncomp; i++) {
-          if (isreal) {
-              norm += CompD[i]->getSquareNorm();
-          } else {
-              norm += CompC[i]->getSquareNorm();
-          }
-     }
-     if (norm > 0.0) norm = std::sqrt(norm);
-     return norm;
-  }
-  template <int D>
-  double CompFunction<D>::squaredNorm() const {
-     double norm = squaredNorm();
-     for (int i = 0; i < Ncomp; i++) {
-          if (isreal) {
-              norm += CompD[i]->getSquareNorm();
-          } else {
-              norm += CompC[i]->getSquareNorm();
-          }
-     }
-     return norm;
-  }
-  template <int D>
-  void CompFunction<D>::alloc(int ialloc) {
+template <int D>
+double CompFunction<D>::norm() const {
+    double norm = squaredNorm();
+    if (norm > 0.0) norm = std::sqrt(norm);
+    return norm;
+}
+template <int D>
+double CompFunction<D>::squaredNorm() const {
+    double norm = 0.0;
+    for (int i = 0; i < Ncomp(); i++) {
+        if (isreal() and CompD[i]!= nullptr) {
+            norm += CompD[i]->getSquareNorm();
+        } else if (iscomplex() and CompC[i]!= nullptr) {
+            norm += CompC[i]->getSquareNorm();
+        }
+    }
+    return norm;
+}
+
+//  Allocate empty trees. The tree must be defined as real or complex already.
+//  Allocates all the ialloc+1 trees, with indices 0,...ialloc
+//  ialloc is the largest index allocated. ialloc=0 allocates one tree.
+template <int D>
+void CompFunction<D>::alloc(int ialloc) {
       if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      if (isreal == 0 and iscomplex == 0)  MSG_ABORT("Function must be defined either real or complex");
+      if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
       for (int i = 0; i < ialloc + 1; i++) {
           delete CompD[i];
           delete CompC[i];
-          if (isreal) {
+          if (isreal()) {
               CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
           }
-          if (iscomplex) {
+          if (iscomplex()) {
               CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
           }
-          Ncomp = std::max(Ncomp, i + 1);
+          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
       }
-      for (int i = ialloc + 1; i < Ncomp; i++) {
+      for (int i = ialloc + 1; i < Ncomp(); i++) {
           //delete possible remaining components
           delete CompD[i];
           delete CompC[i];
@@ -224,7 +230,7 @@ ComplexDouble CompFunction<D>::integrate() const {
 template <int D>
 void CompFunction<D>::free() {
     //TODO: shared memory handling
-    for (int i = 0; i < Ncomp; i++) {
+    for (int i = 0; i < Ncomp(); i++) {
         delete CompD[i];
         delete CompC[i];
     }
@@ -233,7 +239,7 @@ void CompFunction<D>::free() {
 template <int D>
 int CompFunction<D>::getSizeNodes() const {
     int size_mb = 0; // Memory size in kB
-    for (int i = 0; i < Ncomp; i++) {
+    for (int i = 0; i < Ncomp(); i++) {
         if (CompD[i]!= nullptr) size_mb +=CompD[i]->getSizeNodes();
         if (CompC[i]!= nullptr) size_mb +=CompC[i]->getSizeNodes();
     }
@@ -243,79 +249,83 @@ int CompFunction<D>::getSizeNodes() const {
 template <int D>
 int CompFunction<D>::getNNodes() const {
     int nNodes = 0;
-     for (int i = 0; i < Ncomp; i++) {
+     for (int i = 0; i < Ncomp(); i++) {
         if (CompD[i]!= nullptr) nNodes +=CompD[i]->getSizeNodes();
         if (CompC[i]!= nullptr) nNodes +=CompC[i]->getSizeNodes();
     }
     return nNodes;
 }
 
+/** @brief Soft complex conjugate
+ *
+ * Will use complex conjugate in operations (add, multiply etc.)
+ * Does change the state (conj flag), but does not actively change all coefficients.
+ */
 template <int D>
-CompFunction<D> CompFunction<D>::dagger() {
-        CompFunction<D> out(*this); // Returns shallow copy
-        out.data.conj = not(this->data.conj);
-        return out;
+void CompFunction<D>::dagger() {
+    func_ptr->data.conj = not(func_ptr->data.conj);
+    for (int i = 0; i < Ncomp(); i++) {
+        if (CompC[i]!= nullptr) CompC[i]->setConjugate(func_ptr->data.conj);
+    }
 }
 
 template <int D>
 FunctionTree<D, double> &CompFunction<D>::real(int i) {
-    if (!isreal) MSG_ABORT("not real function");
+    if (!isreal()) MSG_ABORT("not real function");
     if (CompD[i] == nullptr) alloc(i);
     return *CompD[i];
 }
 template <int D> //NB: should return CompC in the future
 FunctionTree<D, double>  &CompFunction<D>::imag(int i) {
     MSG_ABORT("Must choose real or complex");
-    if (!iscomplex) MSG_ABORT("not complex function");
+    if (!iscomplex()) MSG_ABORT("not complex function");
     return *CompD[i];
 }
 
 template <int D>
 FunctionTree<D, ComplexDouble>  &CompFunction<D>::complex(int i) {
-    if (!iscomplex) MSG_ABORT("not marked as a complex function");
+    if (!iscomplex()) MSG_ABORT("not marked as a complex function");
     if (CompC[i] == nullptr) alloc(i);
     return *CompC[i];
 }
 
 template <int D>
 const FunctionTree<D, double> &CompFunction<D>::real(int i) const {
-    if (!isreal) MSG_ABORT("not real function");
+    if (!isreal()) MSG_ABORT("not real function");
     return *CompD[i];
 }
 template <int D> //NB: should use complex or real
 const FunctionTree<D, double> &CompFunction<D>::imag(int i) const {
     MSG_ABORT("Must choose real or complex");
-    if (!iscomplex) MSG_ABORT("not complex function");
+    if (!iscomplex()) MSG_ABORT("not complex function");
     return *CompD[i];
 }
 template <int D>
 const FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) const {
-    if (!iscomplex) MSG_ABORT("not marked as a complex function");
+    if (!iscomplex()) MSG_ABORT("not marked as a complex function");
     return *CompC[i];
 }
 
  /* for backwards compatibility */
 template <int D>
 void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
-      isreal = 1;
+      func_ptr->isreal = 1;
       if (CompD[i] != nullptr) delete CompD[i];
-      if (iscomplex) MSG_ERROR("cannot write real tree into complex function");
       CompD[i] = tree;
       if (tree != nullptr) {
-          Ncomp = std::max(Ncomp, i + 1);
-      } else {Ncomp = std::min(Ncomp, i);}
+          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+      } else {func_ptr->Ncomp = std::min(Ncomp(), i);}
 }
-    /*
+
 template <int D>
-void CompFunction<D>::set(FunctionTree<D, ComplexDouble> *tree, int i) {
-      if (CompC[i] != nullptr) delete CompD[i];
-      if (isreal) MSG_ERROR("cannot write comlex tree into complex function");
+void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
+      func_ptr->iscomplex = 1;
+      if (CompC[i] != nullptr) delete CompC[i];
       CompC[i] = tree;
       if (tree != nullptr) {
-          iscomplex = 1;
-          Ncomp = std::max(Ncomp, i + 1);
-      } else {Ncomp = std::min(Ncomp, i);}
-      } */
+          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+      } else {func_ptr->Ncomp = std::min(Ncomp(), i);}
+}
 
 /** @brief In place addition.
  *
@@ -324,9 +334,19 @@ void CompFunction<D>::set(FunctionTree<D, ComplexDouble> *tree, int i) {
  */
 template <int D>
 void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
-    for (int i = 0; i < inp.Ncomp; i++) {
-        if (i >= Ncomp) alloc(i);
-        if (isreal) {
+    if (Ncomp()<inp.Ncomp()){
+        func_ptr->data = inp.func_ptr->data;
+        alloc(inp.Ncomp()-1);
+        for (int i = 0; i < inp.Ncomp(); i++) {
+            if (inp.isreal()) {
+                CompD[i]->setZero();
+            } else {
+                CompC[i]->setZero();
+            }
+        }
+    }
+    for (int i = 0; i < inp.Ncomp(); i++) {
+        if (inp.isreal()) {
             CompD[i]->add_inplace(c.real(),*inp.CompD[i]);
         } else {
             CompC[i]->add_inplace(c,*inp.CompC[i]);
@@ -339,8 +359,8 @@ template <int D>
 int CompFunction<D>::crop(double prec) {
     if (prec < 0.0) return 0;
     int nChunksremoved = 0;
-    for (int i = 0; i < Ncomp; i++) {
-        if (isreal) {
+    for (int i = 0; i < Ncomp(); i++) {
+        if (isreal()) {
             nChunksremoved += CompD[i]->crop(prec, 1.0, false);
         } else {
             nChunksremoved += CompC[i]->crop(prec, 1.0, false);
@@ -354,8 +374,8 @@ template <int D>
 void CompFunction<D>::rescale(ComplexDouble c) {
     bool need_to_rescale = not(isShared()) or mpi::share_master();
     if (need_to_rescale) {
-        for (int i = 0; i < Ncomp; i++) {
-            if (isreal) {
+        for (int i = 0; i < Ncomp(); i++) {
+            if (isreal()) {
                 CompD[i]->rescale(c.real());
             } else {
                 CompC[i]->rescale(c);
@@ -378,13 +398,12 @@ template class CompFunction<3>;
  */
   template <int D>
   void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp) {
-      out->data = inp.data;
-      for (int i = 0; i < inp.Ncomp; i++) {
-          if (inp.isreal) {
-              delete out->CompD[i];
+      out->func_ptr->data = inp.func_ptr->data;
+      out->alloc(inp.Ncomp()-1);
+      for (int i = 0; i < inp.Ncomp(); i++) {
+          if (inp.isreal()) {
               inp.CompD[i]->deep_copy(out->CompD[i]);
           } else {
-              delete out->CompC[i];
               inp.CompC[i]->deep_copy(out->CompC[i]);
           }
       }
@@ -393,17 +412,16 @@ template class CompFunction<3>;
 
 /** @brief Deep copy
  *
- * Deep copy: meta data is copied along with the content of each component.
+ * Deep copy: meta func_ptr->data is copied along with the content of each component.
  */
   template <int D>
   void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp) {
-      out.data = inp.data;
-      for (int i = 0; i < inp.Ncomp; i++) {
-          if (inp.isreal) {
-              out.CompD[i] = nullptr;
+      out.func_ptr->data = inp.func_ptr->data;
+      out.alloc(inp.Ncomp()-1);
+      for (int i = 0; i < inp.Ncomp(); i++) {
+          if (inp.isreal()) {
               inp.CompD[i]->deep_copy(out.CompD[i]);
           } else {
-              out.CompC[i] = nullptr;
               inp.CompC[i]->deep_copy(out.CompC[i]);
           }
       }
@@ -415,7 +433,7 @@ template class CompFunction<3>;
  *
  */
 template <int D>
-void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec) {
+void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate) {
     std::vector<ComplexDouble> coefs(2);
     coefs[0] = a;
     coefs[1] = b;
@@ -424,7 +442,7 @@ void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDo
     funcs.push_back(inp_a);
     funcs.push_back(inp_b);
 
-    linear_combination(out, coefs, funcs, prec);
+    linear_combination(out, coefs, funcs, prec, conjugate);
 }
 
 /** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N
@@ -432,11 +450,21 @@ void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDo
  * OMP parallel, but not MPI parallel
  */
 template <int D>
-    void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec) {
+    void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate) {
     double thrs = MachineZero;
     bool need_to_add = not(out.isShared()) or mpi::share_master();
-    for (int comp = 0; comp < inp[0].Ncomp; comp++) {
-        if (inp[0].isreal) {
+    out.func_ptr->data = inp[0].func_ptr->data;
+    out.func_ptr->iscomplex = 0;
+    out.func_ptr->isreal = 1;
+    for (int i = 0; i < inp.size(); i++) {
+        if(inp[i].iscomplex()){
+            out.func_ptr->iscomplex = 1;
+            out.func_ptr->isreal = 0;
+        }
+    }
+    out = inp[0].paramCopy();
+    for (int comp = 0; comp < inp[0].Ncomp(); comp++) {
+        if (inp[0].isreal()) {
             FunctionTreeVector<D, double> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
@@ -450,28 +478,27 @@ template <int D>
                     } else {
                         mrcpp::add(prec, *out.CompD[comp], fvec);
                     }
+                } else if (out.isreal()) {
+                    out.CompD[comp]->setZero();
                 }
-            } else if (out.hasReal()) {
-                out.CompD[comp]->setZero();
             }
         } else {
             FunctionTreeVector<D, ComplexDouble> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
-                if (inp[i].data.conj) MSG_ERROR("conjugaison not implemented");
                 fvec.push_back(std::make_tuple(c[i], inp[i].CompC[comp]));
             }
             if (need_to_add) {
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
-                        build_grid(*out.CompC[comp], fvec);
-                        mrcpp::add(prec, *out.CompC[comp], fvec, 0);
+                       build_grid(*out.CompC[comp], fvec);
+                       mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
                     } else {
-                        mrcpp::add(prec, *out.CompC[comp], fvec);
+                        mrcpp::add(prec, *out.CompC[comp], fvec, -1, false, conjugate);
                     }
+                } else if (out.iscomplex()) {
+                    out.CompC[comp]->setZero();
                 }
-            } else if (out.hasReal()) {
-                out.CompC[comp]->setZero();
             }
         }
         mpi::share_function(out, 0, 9911, mpi::comm_share);
@@ -482,8 +509,8 @@ template <int D>
  *
  */
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms);
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate) {
+    multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms, conjugate);
 }
 
 
@@ -491,68 +518,67 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
  *
  */
 template <int D>
-void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms) {
+void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-    for (int comp = 0; comp < inp_a.Ncomp; comp++) {
-        if (inp_a.isreal and inp_b.isreal) {
+    out.func_ptr->data = inp_a.func_ptr->data;
+    for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
+        if (inp_a.isreal() and inp_b.isreal()) {
             delete out.CompD[comp];
             FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0]->getMRA());
             if (need_to_multiply) {
-                if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
-                if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
                 if (prec < 0.0) {
                     // Union grid
                     build_grid(*tree, *inp_a.CompD[comp]);
                     build_grid(*tree, *inp_b.CompD[comp]);
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate);
                 } else {
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate);
                 }
             }
             out.CompD[comp] = tree;
         } else {
+            out.func_ptr->iscomplex = 1;
+            out.func_ptr->isreal = 0;
             // if one of the input is real, we simply make a new complex copy of it
-            bool inp_aisReal = inp_a.isreal;
-            bool inp_bisReal = inp_b.isreal;
+            bool inp_aisReal = inp_a.isreal();
+            bool inp_bisReal = inp_b.isreal();
             if(inp_aisReal) {
                 inp_a.CompC[comp] = inp_a.CompD[comp]->CopyTreeToComplex();
-                inp_a.iscomplex = true;
-                inp_a.isreal = false;
+                inp_a.func_ptr->iscomplex = true;
+                inp_a.func_ptr->isreal = false;
             }
             if(inp_bisReal) {
                 inp_b.CompC[comp] = inp_b.CompD[comp]->CopyTreeToComplex();
-                inp_b.iscomplex = true;
-                inp_b.isreal = false;
+                inp_b.func_ptr->iscomplex = true;
+                inp_b.func_ptr->isreal = false;
             }
 
             delete out.CompC[comp];
             FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0]->getMRA());
             ComplexDouble coef = 1.0;
             if (need_to_multiply) {
-                if (out.iscomplex and inp_a.data.conj) MSG_ERROR("conjugaison not implemented");
-                if (out.iscomplex and inp_b.data.conj) MSG_ERROR("conjugaison not implemented");
                 if (prec < 0.0) {
                     // Union grid
                     build_grid(*tree, *inp_a.CompC[comp]);
                     build_grid(*tree, *inp_b.CompC[comp]);
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
                 } else {
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms);
+                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
                 }
             }
             out.CompC[comp] = tree;
             // restore original tree
             if(inp_aisReal) {
                 delete inp_a.CompC[comp];
-                inp_a.iscomplex = false;
-                inp_a.isreal = true;
+                inp_a.func_ptr->iscomplex = false;
+                inp_a.func_ptr->isreal = true;
             }
             if(inp_bisReal) {
                 delete inp_b.CompC[comp];
-                inp_b.iscomplex = false;
-                inp_b.isreal = true;
+                inp_b.func_ptr->iscomplex = false;
+                inp_b.func_ptr->isreal = true;
             }
         }
     }
@@ -565,10 +591,10 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
  *  Only one component is multiplied
  */
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine) {
-    if (inp_a.Ncomp > 1) MSG_ERROR("Not implemented");
-    if (inp_a.isreal != 1) MSG_ERROR("Not implemented");
-    multiply(out, *inp_a.CompD[0], f, prec, nrefine);
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
+    if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
+    if (inp_a.isreal() != 1) MSG_ERROR("Not implemented");
+    multiply(out, *inp_a.CompD[0], f, prec, nrefine, conjugate);
 }
 
 /** @brief out = inp_a * f
@@ -576,10 +602,10 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  *  Only one component is multiplied
  */
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine) {
-    if (inp_a.Ncomp > 1) MSG_ERROR("Not implemented");
-    if (inp_a.iscomplex != 1) MSG_ERROR("Not implemented");
-    multiply(out, *inp_a.CompC[0], f, prec, nrefine);
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
+    if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
+    if (inp_a.iscomplex() != 1) MSG_ERROR("Not implemented");
+    multiply(out, *inp_a.CompC[0], f, prec, nrefine, conjugate);
 
 }
 
@@ -587,21 +613,21 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  *
  */
 template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine) {
+void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
     CompFunction<D> func_a(1);
-    func_a.isreal = 1;
-    func_a.iscomplex = 0;
+    func_a.func_ptr->isreal = 1;
+    func_a.func_ptr->iscomplex = 0;
     func_a.CompD[0] = &inp_a;
-    multiply(out, func_a, f, prec, nrefine);
+    multiply(out, func_a, f, prec, nrefine, conjugate);
     func_a.CompD[0] = nullptr;
 }
 template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine) {
+void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
     CompFunction<D> func_a(1);
-    func_a.isreal = 0;
-    func_a.iscomplex = 1;
+    func_a.func_ptr->isreal = 0;
+    func_a.func_ptr->iscomplex = 1;
     func_a.CompC[0] = &inp_a;
-    multiply(out, func_a, f, prec, nrefine);
+    multiply(out, func_a, f, prec, nrefine, conjugate);
     func_a.CompC[0] = nullptr;
 }
 
@@ -615,15 +641,14 @@ void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, Repre
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
     ComplexDouble dotprod = 0.0;
-    if (bra.data.conj or ket.data.conj) MSG_ERROR("dot with conjugaison not implemented");
-    for (int comp = 0; comp < bra.Ncomp; comp++) {
-          if (bra.isreal and ket.isreal) {
+    for (int comp = 0; comp < bra.Ncomp(); comp++) {
+          if (bra.isreal() and ket.isreal()) {
               dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
           } else {
               dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
           }
     }
-    if (bra.isreal and ket.isreal) {
+    if (bra.isreal() and ket.isreal()) {
         return dotprod.real();
     } else {
         return dotprod;
@@ -638,9 +663,8 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
 template <int D>
 double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
     double dotprod = 0.0;
-    if (bra.data.conj or ket.data.conj) MSG_ERROR("dot with conjugaison not implemented");
-    for (int comp = 0; comp < bra.Ncomp; comp++) {
-          if (bra.isreal and ket.isreal) {
+    for (int comp = 0; comp < bra.Ncomp(); comp++) {
+          if (bra.isreal() and ket.isreal()) {
               dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
           } else {
               dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
@@ -651,9 +675,9 @@ double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
 
 void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
-    out.isreal = 1;
-    out.iscomplex = 0;
-    if(out.Ncomp < 1) out.alloc(0);
+    out.func_ptr->isreal = 1;
+    out.func_ptr->iscomplex = 0;
+    if(out.Ncomp() < 1) out.alloc(0);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
@@ -661,9 +685,9 @@ void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, d
 // template <int D, typename T>
 void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r)> f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
-    out.isreal = 0;
-    out.iscomplex = 1;
-    if(out.Ncomp < 1) out.alloc(0);
+    out.func_ptr->isreal = 0;
+    out.func_ptr->iscomplex = 1;
+    if(out.Ncomp() < 1) out.alloc(0);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
@@ -671,18 +695,18 @@ void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
-    out.isreal = 1;
-    out.iscomplex = 0;
-    if(out.Ncomp < 1) out.alloc(0);
+    out.func_ptr->isreal = 1;
+    out.func_ptr->iscomplex = 0;
+    if(out.Ncomp() < 1) out.alloc(0);
     if (need_to_project) mrcpp::project<D, double>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
 }
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
-    out.isreal = 0;
-    out.iscomplex = 1;
-    if(out.Ncomp < 1) out.alloc(0);
+    out.func_ptr->isreal = 0;
+    out.func_ptr->iscomplex = 1;
+    if(out.Ncomp() < 1) out.alloc(0);
     if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
  }
@@ -692,11 +716,11 @@ void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, d
 
 CompFunctionVector::CompFunctionVector(int N):
     std::vector<CompFunction<3>>(N) {
-    for (int i = 0; i < N; i++) (*this)[i].rank = i;
+    for (int i = 0; i < N; i++) (*this)[i].func_ptr->rank = i;
     vecMRA = defaultCompMRA<3>;
 }
 void CompFunctionVector::distribute() {
-    for (int i = 0; i < this->size(); i++) (*this)[i].rank = i;
+    for (int i = 0; i < this->size(); i++) (*this)[i].func_ptr->rank = i;
 }
 
 
@@ -710,6 +734,11 @@ void CompFunctionVector::distribute() {
  */
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
 
+    if (Phi[0].iscomplex() ){
+        rotate_cplx(Phi, U, Psi, prec);
+        return;
+    }
+
     // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
     // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
     // MPI version does not use OMP yet, Serial version uses OMP
@@ -719,7 +748,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
     if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
 
-    // 1) make union tree without coefficients
+    // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff)
     FunctionTree<3> refTree(*Phi.vecMRA);
     mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
 
@@ -733,73 +762,9 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     // get a list of all nodes in union tree, identified by their serialIx indices
     refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
     int max_n = indexVec_ref.size();
-
-   // 2) We work with real numbers only. Make real blocks for U matrix
-    bool UhasReal = false;
-    bool UhasImag = false;
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            if (std::abs(U(i, j).real()) > 10*MachineZero) UhasReal = true;
-            if (std::abs(U(i, j).imag()) > 10*MachineZero) UhasImag = true;
-        }
-    }
-
-    IntVector PsihasReIm = IntVector::Zero(2);
-    for (int j = 0; j < N; j++) {
-        if (!mpi::my_func(j)) continue;
-        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
-    }
-    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    if (not PsihasReIm[0] and not PsihasReIm[1]) {
-        return; // do nothing
-    }
-
-    bool makeReal = (UhasReal and PsihasReIm[0]) or (UhasImag and PsihasReIm[1]);
-    bool makeImag = (UhasReal and PsihasReIm[1]) or (UhasImag and PsihasReIm[0]);
-
-    for (int j = 0; j < M; j++) {
-        if (!mpi::my_func(j)) continue;
-        if (not makeReal and Psi[j].hasReal()) Psi[j].free();
-        if (not makeImag and Psi[j].hasImag()) Psi[j].free();
-    }
-
-    if (not makeReal and not makeImag) { return; }
-
-    int Neff = N;               // effective number of input orbitals
-    int Meff = M;               // effective number of output orbitals
-    if (makeImag) Neff = 2 * N; // Imag and Real treated independently. We always use real part of U
-    if (makeImag) Meff = 2 * M; // Imag and Real treated independently. We always use real part of U
-
-    IntVector conjMat = IntVector::Zero(Neff);
-    for (int j = 0; j < Neff; j++) {
-        if (!mpi::my_func(j % N)) continue;
-        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
-    }
-    mpi::allreduce_vector(conjMat, mpi::comm_wrk);
-
-    // we make a real matrix = U,  but organized as one or four real blocks
-    // out_r = U_rr*in_r - U_ir*in_i*conjMat
-    // out_i = U_ri*in_r - U_ii*in_i*conjMat
-    // the first index of U is the one used on input Phi
-    DoubleMatrix Ureal(Neff, Meff); // four blocks, for rr ri ir ii
-    for (int j = 0; j < Neff; j++) {
-        for (int i = 0; i < Meff; i++) {
-            double sign = 1.0;
-            if (j < N and i < M) {
-                // real U applied on real Phi
-                Ureal(j, i) = U.real()(j % N, i % M);
-            } else if (j >= N and i >= M) {
-                // real U applied on imag Phi
-                Ureal(j, i) = conjMat[j] * U.real()(j % N, i % M);
-            } else if (j < N and i >= M) {
-                // imag U applied on real Phi
-                Ureal(j, i) = U.imag()(j % N, i % M);
-            } else {
-                // imag U applied on imag Phi
-                Ureal(j, i) = -1.0 * conjMat[j] * U.imag()(j % N, i % M);
-            }
-        }
+    for (int i = 0; i < M; i++) {
+        Psi[i].func_ptr->data.isreal = 1;
+        Psi[i].func_ptr->data.iscomplex = 0;
     }
 
     // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
@@ -809,10 +774,10 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     BankAccount nodesRotated;         // to put the rotated nodes
 
     // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::vector<std::vector<double *>> coeffVec(N);
+    std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
     std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
                                                     // orbital given the node index in the reference tree
     if (serial) {
 
@@ -821,29 +786,16 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j + N]) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
+            Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec[j]) {
+                orb2node[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVec[ix].push_back(j);
             }
         }
     } else { // MPI case
-
         // send own nodes to bank, identifying them through the serialIx of refTree
         save_nodes(Phi, refTree, nodesPhi);
         mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
@@ -851,18 +803,18 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
     // 4) rotate all the nodes
     IntMatrix split_serial;                             // in the serial case all split are stored in one array
-    std::vector<std::vector<double *>> coeffpVec(Meff); // to put pointers to the rotated coefficient for each orbital in serial case
-    std::vector<std::map<int, int>> ix2coef(Meff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    std::vector<std::vector<double *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
     int csize;                                          // size of the current coefficients (different for roots and branches)
     std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
     // j indices are for unrotated orbitals, i indices are for rotated orbitals
     if (serial) {
         std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
-        split_serial.resize(Meff, max_n); // not use in the MPI case
+        split_serial.resize(M, max_n); // not use in the MPI case
         for (int n = 0; n < max_n; n++) {
             int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
             ix2coef_ref[node_ix] = n;
-            for (int i = 0; i < Meff; i++) split_serial(i, n) = 1;
+            for (int i = 0; i < M; i++) split_serial(i, n) = 1;
         }
 
         std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
@@ -896,9 +848,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             };
 
             std::vector<int> orbiVec;
-            for (int i = 0; i < Meff; i++) { // loop over all rotated orbitals
-                if (not makeReal and i < M) continue;
-                if (not makeImag and i >= M) continue;
+            for (int i = 0; i < M; i++) { // loop over all rotated orbitals
                 if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
@@ -906,7 +856,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             // 4c) rotate this node
             DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
             for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = Ureal(orbjVec[j], orbiVec[i]); }
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); }
             }
             DoubleMatrix rotatedCoeff(csize, orbiVec.size());
             // HERE IT HAPPENS!
@@ -949,12 +899,12 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     } else { // MPI case
 
         // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
-        std::vector<double> split(Meff, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
-        std::vector<double> needsplit(Meff, 1.0); // which orbitals need splitting
+        std::vector<double> split(M, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
+        std::vector<double> needsplit(M, 1.0); // which orbitals need splitting
         BankAccount nodeSplits;
         mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
 
-        DoubleMatrix coeffBlock(sizecoeff, Neff);
+        DoubleMatrix coeffBlock(sizecoeff, N);
         max_ix++; // largest node index + 1. to store rotated orbitals with different id
         TaskManager tasks(max_n);
         for (int nn = 0; nn < max_n; nn++) {
@@ -966,32 +916,23 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             if (parentid == -1) {
                 // root node, split if output needed
                 for (int i = 0; i < M; i++) {
-                    if (makeReal)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
-                }
-                for (int i = N; i < Meff; i++) {
-                    if (makeImag)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
+                         split[i] = 1.0;
                 }
                 csize = sizecoeff;
             } else {
                 // note that it will wait until data is available
-                nodeSplits.get_data(parentid, Meff, split.data());
+                nodeSplits.get_data(parentid, M, split.data());
                 csize = sizecoeffW;
             }
             std::vector<int> orbiVec;
             std::vector<int> orbjVec;
-            for (int i = 0; i < Meff; i++) {  // loop over rotated orbitals
+            for (int i = 0; i < M; i++) {  // loop over rotated orbitals
                 if (split[i] < 0.0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
 
             // 4b) rotate this node
-            DoubleMatrix coeffBlock(csize, Neff); // largest possible used size
+            DoubleMatrix coeffBlock(csize, N); // largest possible used size
             nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
             coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
 
@@ -1001,7 +942,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
             for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
                 for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
-                    Un(j, i) = Ureal(orbjVec[j], orbiVec[i]);
+                    Un(j, i) = std::real(U(orbjVec[j], orbiVec[i]));
                 }
             }
 
@@ -1019,12 +960,12 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                 if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
                 nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
             }
-            nodeSplits.put_data(indexVec_ref[n], Meff, needsplit.data());
+            nodeSplits.put_data(indexVec_ref[n], M, needsplit.data());
         }
         mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
     }
 
-    // 5) reconstruct trees using rotated nodes.
+     // 5) reconstruct trees using rotated nodes.
 
     // only serial case can use OMP, because MPI cannot be used by threads
     if (serial) {
@@ -1032,23 +973,304 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         // operation is writing the coefficient into the tree)
 
 #pragma omp parallel for schedule(static)
-        for (int j = 0; j < Meff; j++) {
+        for (int j = 0; j < M; j++) {
             if (coeffpVec[j].size()==0) continue;
-            if (j < M) {
-                if (!Psi[j].hasReal()) Psi[j].alloc(0);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-            } else {
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(0);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+            Psi[j].alloc(0);
+            Psi[j].real().clear();
+            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+        }
+
+    } else { // MPI case
+
+        for (int j = 0; j < M; j++) {
+            if (not mpi::my_func(j)) continue;
+            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
+            std::vector<double *> coeffpVec; //
+            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
+            int ix = 0;
+            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
+                std::vector<int> nodeidVec;
+                double *dataVec; // will be allocated by bank
+                nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
+                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
+                int shift = 0;
+                for (int n = 0; n < nodeidVec.size(); n++) {
+                    assert(nodeidVec[n] - max_ix >= 0);                // unrotated nodes have been deleted
+                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
+                    ix2coef[nodeidVec[n] - max_ix] = ix++;
+                    csize = sizecoeffW;
+                    if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff;
+                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
+                    shift += csize;
+                }
             }
+            Psi[j].alloc(0);
+            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+
+            for (double *p : pointerstodelete) delete[] p;
+            pointerstodelete.clear();
         }
+    }
+}
 
+/** @brief Make a linear combination of functions
+ *
+ * Uses "local" representation: treats one node at a time.
+ * For each node, all functions are transformed simultaneously
+ * by a dense matrix multiplication.
+ * Phi input functions, Psi output functions
+ * Phi must be complex.
+ */
+void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
+
+    // The principle of this routine is that nodes for all orbitals are rotated one by one using matrix multiplication.
+    // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
+    // MPI version does not use OMP yet, Serial version uses OMP
+    // size of input is N, size of output is M
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
+    int N = Phi.size();
+    int M = Psi.size();
+    if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
+    if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
+
+    // 1) make union tree without coefficients. Note that the ref tree is always real (in fact it has no coeff)
+    FunctionTree<3> refTree(*Phi.vecMRA);
+    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+    std::vector<double> scalefac_ref;
+    std::vector<double *> coeffVec_ref; // not used!
+    std::vector<int> indexVec_ref;      // serialIx of the nodes
+    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
+    int max_ix;
+    // get a list of all nodes in union tree, identified by their serialIx indices
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    for (int j = 0; j < N; j++) {
+        if (!mpi::my_func(j)) continue;
+        if (Phi[j].isreal()) MSG_ABORT("This function only use complex input");
+    }
+
+    for (int i = 0; i < M; i++) {
+        Psi[i].func_ptr->data.isreal = 0;
+        Psi[i].func_ptr->data.iscomplex = 1;
+    }
+
+    // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+
+    BankAccount nodesPhi;             // to put the original nodes
+    BankAccount nodesRotated;         // to put the rotated nodes
+
+    // used for serial only:
+    std::vector<std::vector<ComplexDouble *>> coeffVec(N);
+    std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
+    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
+                                                    // orbital given the node index in the reference tree
+    if (serial) {
+        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<double> scalefac;
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            Phi[j].complex().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec[j]) {
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
+            }
+        }
+    } else { // MPI case
+        // send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(Phi, refTree, nodesPhi);
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+    }
+
+    // 4) rotate all the nodes
+    IntMatrix split_serial;                             // in the serial case all split are stored in one array
+    std::vector<std::vector<ComplexDouble *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    int csize;                                          // size of the current coefficients (different for roots and branches)
+    std::vector<ComplexMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+   // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    if (serial) {
+        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
+        split_serial.resize(M, max_n); // not use in the MPI case
+        for (int n = 0; n < max_n; n++) {
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            ix2coef_ref[node_ix] = n;
+            for (int i = 0; i < M; i++) split_serial(i, n) = 1;
+        }
+        std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
+        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
+        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
+        // n is finished.
+#pragma omp parallel for schedule(dynamic)
+        for (int n = 0; n < max_n; n++) {
+            int csize;
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n
+            std::vector<int> orbjVec; // to remember which orbital correspond to each orbVec.size();
+            if (node2orbVec[node_ix].size() <= 0) continue;
+            csize = sizecoeffW;
+            if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff
+
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbjVec.push_back(j);
+            }
+
+            // 4b) make a list of rotated orbitals needed for this node
+            // OMP must wait until parent is ready
+            while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) {
+#pragma omp flush
+            };
+
+            std::vector<int> orbiVec;
+            for (int i = 0; i < M; i++) { // loop over all rotated orbitals
+                if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4c) rotate this node
+            ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
+            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); }
+            }
+            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
+            // HERE IT HAPPENS!
+            // TODO: conjugaison
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 4d) store and make rotated node pointers
+            // for now we allocate in buffer, in future could be directly allocated in the final trees
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // make all norms:
+            for (int i = 0; i < orbiVec.size(); i++) {
+                // check if parent must be split
+                if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) {
+                    // mark this node for this orbital for later split
+#pragma omp critical
+                    {
+                        ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size();
+                        coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers
+                    }
+                    // check norms for split
+                    double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
+                    int kstart = 0;
+                    if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
+                    for (int k = kstart; k < csize; k++) wnorm += std::real(rotatedCoeff(k, i) * std::conj(rotatedCoeff(k, i)));
+                    if (thres < wnorm or prec < 0)
+                        split_serial(orbiVec[i], n) = 1;
+                    else
+                        split_serial(orbiVec[i], n) = 0;
+                } else {
+                    ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used
+                    split_serial(orbiVec[i], n) = 0;          // do not split if parent does not need to be split
+                }
+            }
+            nodeReady[n] = 1;
+#pragma omp critical
+            {
+                // this ensures that rotatedCoeff is not deleted, when getting out of scope
+                rotatedCoeffVec.push_back(std::move(rotatedCoeff));
+            }
+        }
     } else { // MPI case
 
-        for (int j = 0; j < Meff; j++) {
-            if (not mpi::my_func(j % M)) continue;
+        // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
+        std::vector<double> split(M, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
+        std::vector<double> needsplit(M, 1.0); // which orbitals need splitting
+        BankAccount nodeSplits;
+        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
+
+        ComplexMatrix coeffBlock(sizecoeff, N);
+        max_ix++; // largest node index + 1. to store rotated orbitals with different id
+        TaskManager tasks(max_n);
+        for (int nn = 0; nn < max_n; nn++) {
+            int n = tasks.next_task();
+            if (n < 0) break;
+            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
+            // 4a) make list of orbitals that should split the parent node, i.e. include this node
+            int parentid = parindexVec_ref[n];
+            if (parentid == -1) {
+                // root node, split if output needed
+                for (int i = 0; i < M; i++) {
+                    split[i] = 1.0;
+                }
+                csize = sizecoeff;
+            } else {
+                // note that it will wait until data is available
+                nodeSplits.get_data(parentid, M, split.data());
+                csize = sizecoeffW;
+            }
+            std::vector<int> orbiVec;
+            std::vector<int> orbjVec;
+            for (int i = 0; i < M; i++) {  // loop over rotated orbitals
+                if (split[i] < 0.0) continue; // parent node has too small wavelets
+                orbiVec.push_back(i);
+            }
+
+            // 4b) rotate this node
+            ComplexMatrix coeffBlock(csize, N); // largest possible used size
+            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
+            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
+
+            // chunk of U, with reorganized indices and separate blocks for real and imag:
+            ComplexMatrix Un(orbjVec.size(), orbiVec.size());
+            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
+
+            for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
+                for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
+                    Un(j, i) = U(orbjVec[j], orbiVec[i]);
+                }
+            }
+
+            // HERE IT HAPPENS
+            // TODO conjugaison
+            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
+
+            // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
+            // in cache).
+            for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals
+                needsplit[orbiVec[i]] = -1.0;          // default, do not split
+                // check if this node/orbital needs further refinement
+                double wnorm = 0.0;
+                int kwstart = csize - sizecoeffW; // do not include scaling
+                for (int k = kwstart; k < csize; k++) wnorm += std::real(rotatedCoeff.col(i)[k] * std::conj(rotatedCoeff.col(i)[k]));
+                if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
+                nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
+            }
+            nodeSplits.put_data(indexVec_ref[n], M, needsplit.data());
+        }
+        mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
+    }
+
+     // 5) reconstruct trees using rotated nodes.
+
+    // only serial case can use OMP, because MPI cannot be used by threads
+    if (serial) {
+        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
+        // operation is writing the coefficient into the tree)
+
+#pragma omp parallel for schedule(static)
+        for (int j = 0; j < M; j++) {
+           if (coeffpVec[j].size()==0) continue;
+            Psi[j].alloc(0); //All data is stored in coeffpVec[j]
+            Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+       }
+    } else { // MPI case
+        for (int j = 0; j < M; j++) {
+            if (not mpi::my_func(j)) continue;
             // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
             std::vector<double *> coeffpVec; //
             std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
@@ -1077,9 +1299,9 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                 Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             } else {
                 // Imag part
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(0);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+                if (!Psi[j].hasImag()) Psi[j].alloc(0);
+                Psi[j].imag().clear();
+                Psi[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
             }
             for (double *p : pointerstodelete) delete[] p;
             pointerstodelete.clear();
@@ -1193,9 +1415,6 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
         return out; // do nothing
     }
 
-    int Neff = N;
-    if (PsihasReIm[1]) Neff = 2 * N; // Imag and Real treated independently. We always treat real part of Psi
-
     std::vector<double> scalefac_ref;
     std::vector<double *> coeffVec_ref; // not used!
     std::vector<int> indexVec_ref;      // serialIx of the nodes
@@ -1213,10 +1432,10 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     BankAccount nodesMultiplied; // to put the multiplied nodes
 
     // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
+    std::vector<std::vector<double *>> coeffVec(N);
+    std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
     std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
                                                     // orbital given the node index in the reference tree
     if (serial) {
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
@@ -1257,9 +1476,9 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     }
 
     // 3) mutiply for each node
-    std::vector<std::vector<double *>> coeffpVec(Neff); // to put pointers to the multiplied coefficient for each orbital in serial case
+    std::vector<std::vector<double *>> coeffpVec(N); // to put pointers to the multiplied coefficient for each orbital in serial case
     std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
-    std::vector<std::map<int, int>> ix2coef(Neff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    std::vector<std::map<int, int>> ix2coef(N);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
     DoubleVector NODEP = DoubleVector::Zero(nCoefs);
     DoubleVector NODEF = DoubleVector::Zero(nCoefs);
 
@@ -1377,7 +1596,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
             }
 
             // 3b) fetch all orbitals at this node
-            DoubleMatrix coeffBlock(nCoefs, Neff); // largest possible used size
+            DoubleMatrix coeffBlock(nCoefs, N); // largest possible used size
             std::vector<int> orbjVec;
             nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
             coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
@@ -1411,7 +1630,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
         // operation is writing the coefficient into the tree)
 
 #pragma omp parallel for schedule(static)
-        for (int j = 0; j < Neff; j++) {
+        for (int j = 0; j < N; j++) {
             if (j < N) {
                 if (Phi[j].hasReal()) {
                     out[j].alloc(0);
@@ -1422,18 +1641,18 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                     out[j].real().calcSquareNorm();
                 }
             } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(0);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
+                if (Phi[j].hasImag()) {
+                    out[j].alloc(0);
+                    out[j].imag().clear();
+                    out[j].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
                     out[j].imag().mwTransform(BottomUp);
                     out[j].imag().calcSquareNorm();
                 }
             }
         }
     } else {
-        for (int j = 0; j < Neff; j++) {
-            if (not mpi::my_func(j % N) and not all) continue;
+        for (int j = 0; j < N; j++) {
+            if (not mpi::my_func(j) and not all) continue;
             // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
             std::vector<double *> coeffpVec; //
             std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx in refTree
@@ -1467,14 +1686,14 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                     if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
                 }
             } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(0);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    out[j % N].imag().mwTransform(BottomUp);
-                    out[j % N].imag().calcSquareNorm();
-                    // out[j % N].imag().crop(prec, 1.0, false);
-                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
+                if (Phi[j].hasImag()) {
+                    out[j].alloc(0);
+                    out[j].imag().clear();
+                    out[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
+                    out[j].imag().mwTransform(BottomUp);
+                    out[j].imag().calcSquareNorm();
+                    // out[j].imag().crop(prec, 1.0, false);
+                    if (nrefine > 0) Phi[j].imag().crop(prec, 1.0, false);
                 }
             }
 
@@ -1522,12 +1741,10 @@ ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi) {
  * MPI: Rank distribution of output vector is the same as input vector
  *
  */
-ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
-    // NB: must be spinseparated at this point!
-
+ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
     int N = BraKet.size();
     ComplexMatrix S = ComplexMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    DoubleMatrix Sreal = S.real();
     MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
 
     // 1) make union tree without coefficients
@@ -1548,9 +1765,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     int max_n = indexVec_ref.size();
 
     // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::vector<std::vector<ComplexDouble *>> coeffVec(N);
     std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
                                                      // the orbital given the node index in the reference tree
 
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
@@ -1564,27 +1781,165 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
+            BraKet[j].complex().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2node[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVec[ix].push_back(j);
+            }
+        }
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(BraKet, refTree, nodesBraKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    // 3) make dot product for all the nodes and accumulate into S
+
+    int ibank = 0;
+#pragma omp parallel if (serial)
+    {
+    ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
+
+#pragma omp for schedule(dynamic)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+        std::vector<int> orbVec;       // identifies which orbitals use this node
+        if (serial and node2orbVec[node_ix].size() <= 0) continue;
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+
+        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+        if (serial) {
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            if (parindexVec_ref[n] < 0) shift = 0;
+            ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2node[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                orbVec.push_back(j);
+            }
+            if (orbVec.size() > 0) {
+                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
+                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
                 }
             }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
+        } else { // MPI case
+            ComplexMatrix coeffBlock(csize, N);
+            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+            if (orbVec.size() > 0) {
+                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
+                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                for (int i = 0; i < orbVec.size(); i++) {
+                    for (int j = 0; j < orbVec.size(); j++) {
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
+                        S(orbVec[i], orbVec[j]) += S_temp(i, j);
+                    }
                 }
             }
         }
+    }
+    if (serial) {
+#pragma omp critical
+        for (int i = 0; i < N; i++) {
+            for (int j = 0; j < N; j++) {
+                S(i, j) += S_omp(i, j);
+            }
+        }
+    }
+
+    }
+    IntVector conjMat = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
+        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
+    }
+    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j <= i; j++) {
+            if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
+        }
+    }
+
+    // Assumes linearity: result is sum of all nodes contributions
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
+    // NB: should be spinseparated at this point!
+    if (BraKet[0].iscomplex() ){
+        return calc_overlap_matrix_cplx(BraKet);
+   }
+
+    int N = BraKet.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, N);
+    DoubleMatrix Sreal = S.real();
+    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
+
+    // 1) make union tree without coefficients
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double> scalefac;
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    int max_ix;                       // largest index value (not used here)
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVec(N);
+    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
+                                                     // the orbital given the node index in the reference tree
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+    mrcpp::BankAccount nodesBraKet;
+
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2node[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVec[ix].push_back(j);
+            }
+        }
     } else { // MPI case
         // 2) send own nodes to bank, identifying them through the serialIx of refTree
         save_nodes(BraKet, refTree, nodesBraKet);
@@ -1594,7 +1949,11 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     // 3) make dot product for all the nodes and accumulate into S
 
     int ibank = 0;
-#pragma omp parallel for schedule(dynamic) if (serial)
+#pragma omp parallel if (serial)
+    {
+    ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
+
+#pragma omp for schedule(dynamic)
     for (int n = 0; n < max_n; n++) {
         if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
         int csize;
@@ -1617,23 +1976,20 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 orbVec.push_back(j);
             }
             if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
                             continue;
-                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
+                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
             }
         } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
+            DoubleMatrix coeffBlock(csize, N);
             nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
 
             if (orbVec.size() > 0) {
@@ -1642,15 +1998,25 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0] != 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
                             continue;
-                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        S(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
             }
         }
+    }
+    if (serial) {
+#pragma omp critical
+        for (int i = 0; i < N; i++) {
+            for (int j = 0; j < N; j++) {
+                S(i, j) += S_omp(i, j);
+            }
+        }
+    }
+
     }
     IntVector conjMat = IntVector::Zero(N);
     for (int i = 0; i < N; i++) {
@@ -1661,8 +2027,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
 
     for (int i = 0; i < N; i++) {
         for (int j = 0; j <= i; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N);
-            S.imag()(i, j) = conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
             if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
         }
     }
@@ -1718,7 +2082,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                                                         // the orbital given the node index in the reference tree
     mrcpp::BankAccount nodesBra;
     mrcpp::BankAccount nodesKet;
-
     // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
     if (serial) {
         // 2) make list of all coefficients, and their reference indices
@@ -1819,9 +2182,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
-                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
                             continue;
                         // must ensure that threads are not competing
                         double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
@@ -1847,9 +2210,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].data.n1[0] != Ket[orbVecKet[j] % M].data.n1[0] and
-                            Bra[orbVecBra[i] % N].data.n1[0] != 0 and
-                            Ket[orbVecKet[j] % M].data.n1[0] != 0)
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
                             continue;
                         Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
@@ -1985,9 +2348,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -2007,9 +2370,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].data.n1[0] != BraKet[orbVec[j] % N].data.n1[0] and
-                            BraKet[orbVec[i] % N].data.n1[0] != 0 and
-                            BraKet[orbVec[j] % N].data.n1[0]!= 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -2062,17 +2425,36 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
         if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
     }
 }
+
+/** @brief Orthogonalize the Bra against Ket
+ *
+ */
+template <int D>
+void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket) {
+    ComplexDouble overlap = dot(Bra, Ket);
+    double sq_norm = Ket.squaredNorm();
+    for (int i = 0; i < Bra.Ncomp(); i++) {
+        if (Bra.isreal()) {
+            Bra.CompD[i]->add_inplace(-overlap.real()/sq_norm,*Ket.CompD[i]);
+        } else {
+            Bra.CompC[i]->add_inplace(-overlap/sq_norm,*Ket.CompC[i]);
+        }
+    }
+}
+
 template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket);
 template void project(CompFunction<3>& out, RepresentableFunction<3, double>& f, double prec);
 template void project(CompFunction<3>& out, RepresentableFunction<3, ComplexDouble>& f, double prec);
-template void multiply(CompFunction<3> &out, CompFunction<3> inp_a, CompFunction<3> inp_b, double prec, bool absPrec, bool useMaxNorms);
-template void multiply(CompFunction<3>& out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double>& f, double prec, int nrefine = 0);
-template void multiply(CompFunction<3>& out, FunctionTree<3, ComplexDouble> &inp_a, RepresentableFunction<3, ComplexDouble>& f, double prec, int nrefine = 0);
-template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
-template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, ComplexDouble> &f, double prec, int nrefine = 0);
+template void multiply(CompFunction<3> &out, CompFunction<3> inp_a, CompFunction<3> inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply(CompFunction<3>& out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double>& f, double prec, int nrefine = 0, bool conjugate);
+template void multiply(CompFunction<3>& out, FunctionTree<3, ComplexDouble> &inp_a, RepresentableFunction<3, ComplexDouble>& f, double prec, int nrefine = 0, bool conjugate);
+template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0, bool conjugate);
+template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate);
 template void deep_copy(CompFunction<3>* out, const CompFunction<3> &inp);
 template void deep_copy(CompFunction<3>& out, const CompFunction<3> &inp);
-template void add(CompFunction<3> &out, ComplexDouble a, CompFunction<3> inp_a, ComplexDouble b, CompFunction<3> inp_b, double prec);
+template void add(CompFunction<3> &out, ComplexDouble a, CompFunction<3> inp_a, ComplexDouble b, CompFunction<3> inp_b, double prec, bool conjugate);
+template void linear_combination(CompFunction<3> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<3>> &inp, double prec, bool conjugate);
 template double node_norm_dot(CompFunction<3> bra, CompFunction<3> ket);
+    template void orthogonalize(double prec, CompFunction<3> &Bra, CompFunction<3> &Ket);
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 0588d8fd0..7f857c914 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -11,7 +11,7 @@ template <int D>
 struct CompFunctionData {
     // additional data that describe the overall multicomponent function (defined by user):
     // occupancy, quantum number, norm, etc.
-    int Ncomp{1}; // number of components defined
+    int Ncomp{0}; // number of components defined
     int rank{-1}; // rank (index) if part of a vector
     int conj{0}; // soft conjugate (all components)
     int CompFn1{0};
@@ -59,7 +59,6 @@ template <int D> class TreePtr final {
             this->shared_mem_cplx = new mrcpp::SharedMemory<ComplexDouble>(mpi::comm_share, mpi::shared_memory_size);
 #endif
         }
-
     }
 
     ~TreePtr() {
@@ -70,6 +69,15 @@ template <int D> class TreePtr final {
             if (this->cplx[i] != nullptr) delete this->cplx[i];
         }
     }
+    CompFunctionData<D> data;
+    int& Ncomp = data.Ncomp; //number of components defined
+    int& rank = data.rank; // rank (index) if part of a vector
+    int& conj = data.conj; // soft conjugate
+    int& isreal = data.isreal; // T=double
+    int& iscomplex = data.iscomplex; // T=DoubleComplex
+    int& share = data.shared;
+    int* Nchunks = data.Nchunks;
+
     bool is_shared = false;
     friend class CompFunction<D>;
 protected:
@@ -92,27 +100,22 @@ template <int D> class CompFunction {
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
     virtual ~CompFunction() = default;
 
-    FunctionTree<D, double>* (&CompD)[4] = func_ptr->real; // so that we can use name CompD instead of func_ptr.real
-    FunctionTree<D, ComplexDouble>* (&CompC)[4] = func_ptr->cplx;
+//    FunctionTree<D, double>* (&CompD)[4]; //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
+//    FunctionTree<D, ComplexDouble>* (&CompC)[4]; // = func_ptr->cplx
+    FunctionTree<D, double>** CompD; //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
+    FunctionTree<D, ComplexDouble>** CompC; // = func_ptr->cplx
 
     std::string name;
 
     // additional data that describe each component (defined by user):
-    CompFunctionData<D> data;
-    int& Ncomp = data.Ncomp; //number of components defined
-    int& rank = data.rank; // rank (index) if part of a vector
-    int& conj = data.conj; // soft conjugate
-    int& isreal = data.isreal; // T=double
-    int& iscomplex = data.iscomplex; // T=DoubleComplex
-    int& share = data.shared;
-    int* Nchunks = data.Nchunks; // number of chunks of each component tree
-
-    // ComplexFunctions are only defined for D=3
-    // template <int D_ = D, typename std::enable_if<D_ == 3, int>::type = 0>
-     //CompFunction(ComplexFunction cplxfunc);
-    // template <int D_ = 3, typename std::enable_if<D_ == 3, int>::type = 0>
-     //operator ComplexFunction() const;
-    // CompFunction destructor
+    CompFunctionData<D> data() const {return func_ptr->data;}
+    int Ncomp() const {return func_ptr->data.Ncomp;} //number of components defined
+    int rank() const {return func_ptr->data.rank;} // rank (index) if part of a vector
+    int conj() const {return func_ptr->data.conj;} // soft conjugate
+    int isreal() const {return func_ptr->data.isreal;} // T=double
+    int iscomplex() const {return func_ptr->data.iscomplex;} // T=DoubleComplex
+    int share() const {return func_ptr->data.shared;}
+    int* Nchunks() const {return func_ptr->data.Nchunks;} // number of chunks of each component tree
 
     CompFunction paramCopy() const;
     ComplexDouble integrate() const;
@@ -120,8 +123,9 @@ template <int D> class CompFunction {
     double squaredNorm() const;
     void alloc(int i = 0);
     void setReal(FunctionTree<D, double> *tree, int i = 0);
-    void setRank(int i) {rank = i;};
-    const int getRank() const {return rank;};
+    void setCplx(FunctionTree<D, ComplexDouble> *tree, int i = 0);
+    void setRank(int i) {func_ptr->rank = i;};
+    const int getRank() const {return func_ptr->rank;};
     void add(ComplexDouble c, CompFunction<D> inp);
 
     int crop(double prec);
@@ -140,15 +144,13 @@ template <int D> class CompFunction {
     //NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class
 
     void free(int type) {free();}
-    bool hasReal()  const {return isreal;}
-    bool hasImag()  const {return iscomplex;}
-    bool isShared() const {return data.shared;}
-    bool conjugate() const {return data.conj;}
-    CompFunction<D> dagger();
+    bool hasReal()  const {return isreal();}
+    bool hasImag()  const {return iscomplex();}
+    bool isShared() const {return share();}
+    bool conjugate() const {return conj();}
+    void dagger();
     FunctionTree<D, double> &imag(int i = 0); //does not make sense now
     const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
-
-protected:
     std::shared_ptr<mrcpp::TreePtr<D>> func_ptr;
 
 };
@@ -158,23 +160,23 @@ void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp);
 template <int D>
 void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp);
 template <int D>
-void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec);
+void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate = false);
 template <int D>
-void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec);
+void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 template <int D>
-void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false);
+void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false);
+void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
 template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0);
+void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
 template <int D>
@@ -185,6 +187,8 @@ template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec);
 template <int D>
 void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
+template <int D>
+void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket);
 
 class CompFunctionVector : public std::vector<CompFunction<3>> {
 public:
@@ -195,8 +199,9 @@ class CompFunctionVector : public std::vector<CompFunction<3>> {
 
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec = -1.0);
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
+void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
 void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
-CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
+CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, CompFunction<3> *Func = nullptr, int nrefine = 1, bool all = false);
 void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
 ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket);
 ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi);
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 7387ffdbc..bdd722015 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -265,13 +265,13 @@ bool my_func(int j) {
 
 /** @brief Test if function belongs to this MPI rank */
 bool my_func(const CompFunction<3>& func) {
-    return my_func(func.rank);
+    return my_func(func.rank());
 }
 
 
 /** @brief Test if function belongs to this MPI rank */
 bool my_func(CompFunction<3> *func) {
-    return my_func(func->rank);
+    return my_func(func->rank());
 }
 
 /** @brief Free all function pointers not belonging to this MPI rank */
@@ -332,15 +332,15 @@ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
 // send a component function with MPI
 void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
-    for (int i = 0; i < func.data.Ncomp; i++) {
+    for (int i = 0; i < func.Ncomp(); i++) {
         //make sure that Nchunks is up to date
-        if (func.isreal) func.Nchunks[i] = func.CompD[i]->getNChunks();
-        else func.Nchunks[i] = func.CompC[i]->getNChunks();
+        if (func.isreal()) func.Nchunks()[i] = func.CompD[i]->getNChunks();
+        else func.Nchunks()[i] = func.CompC[i]->getNChunks();
     }
-    MPI_Send(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, dst, 0, comm);
-    for (int i = 0; i < func.data.Ncomp; i++) {
-        if (func.isreal) mrcpp::send_tree(*func.CompD[i], dst, tag, comm, func.Nchunks[i]);
-        else mrcpp::send_tree(*func.CompC[i], dst, tag, comm, func.Nchunks[i]);
+    MPI_Send(&func.func_ptr->data, sizeof(CompFunctionData<3>), MPI_BYTE, dst, 0, comm);
+    for (int i = 0; i < func.Ncomp(); i++) {
+        if (func.isreal()) mrcpp::send_tree(*func.CompD[i], dst, tag, comm, func.Nchunks()[i]);
+        else mrcpp::send_tree(*func.CompC[i], dst, tag, comm, func.Nchunks()[i]);
     }
 #endif
 }
@@ -349,12 +349,12 @@ void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm)
 void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     MPI_Status status;
-    int func_ncomp_in = func.Ncomp;
-    MPI_Recv(&func.data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
-    for (int i = 0; i < func.data.Ncomp; i++) {
+    int func_ncomp_in = func.Ncomp();
+    MPI_Recv(&func.func_ptr->data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
+    for (int i = 0; i < func.Ncomp(); i++) {
         if (func_ncomp_in <= i) func.alloc(i);
-        if (func.isreal) mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks[i]);
-        else  mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks[i]);
+        if (func.isreal()) mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks()[i]);
+        else  mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks()[i]);
     }
 #endif
 }
@@ -363,8 +363,8 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
 void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
 #ifdef MRCPP_HAS_MPI
-        for (int comp = 0; comp < func.Ncomp; comp++) {
-            if (func.isreal) mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
+        for (int comp = 0; comp < func.Ncomp(); comp++) {
+            if (func.isreal()) mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
             else  mrcpp::share_tree(*func.CompC[comp], src, tag, comm);
         }
 #endif
@@ -508,7 +508,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
+        if (Phi[j].isreal()) tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
+        if (Phi[j].iscomplex()) tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
@@ -529,7 +530,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_orb(j)) continue;
-        tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
+        if (Phi[j].isreal()) tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
+        if (Phi[j].iscomplex()) tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);

From 5d8ee63e91b9a86de453fb4183de14aaeec9fd5c Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Wed, 21 Aug 2024 16:51:34 +0200
Subject: [PATCH 25/38] multiply, apply, rescale, mixed types

---
 src/treebuilders/apply.cpp            |  16 +-
 src/treebuilders/grid.cpp             |   4 +-
 src/treebuilders/multiply.cpp         |  55 ++
 src/treebuilders/multiply.h           |   5 +
 src/trees/FunctionNode.cpp            |  58 ++
 src/trees/FunctionNode.h              |  11 +-
 src/trees/FunctionTree.cpp            | 119 ++++-
 src/trees/FunctionTree.h              |   4 +-
 src/trees/FunctionTreeVector.h        |   3 +-
 src/trees/MWTree.cpp                  |   5 +-
 src/trees/MWTree.h                    |   2 +-
 src/trees/MultiResolutionAnalysis.cpp |   7 +-
 src/trees/NodeAllocator.h             |   1 +
 src/utils/Bank.cpp                    |  22 +
 src/utils/Bank.h                      |   1 +
 src/utils/CompFunction.cpp            | 733 +++++++++++++++++---------
 src/utils/CompFunction.h              |   9 +-
 src/utils/ComplexFunction.cpp         |   1 -
 18 files changed, 766 insertions(+), 290 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 2ab9b7955..d2bb1c286 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -462,7 +462,7 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
 }
 
 template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
-    ComplexDouble defaultMetric[4][4];
+   ComplexDouble defaultMetric[4][4];
     for (int i=0; i<4; i++){
         for (int j=0; j<4; j++){
             if (i==j) defaultMetric[i][j] = 1.0;
@@ -482,11 +482,17 @@ template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, C
                     }
                     out.func_ptr->isreal = 1;
                 } else {
-                    apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
+                    if (inp.isreal() ){
+                        apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
+                        out.CompD[icomp]->CopyTreeToComplex(out.CompC[ocomp]);
+                        out.func_ptr->isreal = 0;
+                        out.func_ptr->iscomplex = 1;
+                  } else {
+                        apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
                     }
-                    out.func_ptr->iscomplex = 1;
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
+                   }
                 }
             }
         }
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index f5d28779b..0b9867820 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -238,11 +238,11 @@ template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTre
 template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
     out.free();
     out.func_ptr->data = inp.func_ptr->data;
-    out.alloc(inp.Ncomp());
+    out.alloc(inp.Ncomp()-1);
     for (int i = 0; i < inp.Ncomp(); i++) {
         if (inp.isreal()) build_grid(*out.CompD[i], *inp.CompD[i]);
         if (inp.iscomplex()) build_grid(*out.CompC[i], *inp.CompC[i]);
-    }
+   }
 }
 
 /** @brief Clear the MW coefficients of a function representation
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index c4bc01edf..7abefc7a3 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -360,6 +360,59 @@ template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &
     return result;
 }
 
+
+/** @returns Dot product <bra|ket> of two MW function representations
+ *
+ * @param[in] bra: Bra side input function
+ * @param[in] ket: Ket side input function
+ *
+ * @details The dot product is computed with the trees in compressed form, i.e.
+ * scaling coefs only on root nodes, wavelet coefs on all nodes. Since wavelet
+ * functions are orthonormal through ALL scales and the root scaling functions
+ * are orthonormal to all finer level wavelet functions, this becomes a rather
+ * efficient procedure as you only need to compute the dot product where the
+ * grids overlap.
+ *
+ */
+template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra, FunctionTree<D, double> &ket) {
+    if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible");
+    MWNodeVector<D, ComplexDouble> nodeTable;
+    TreeIterator<D, ComplexDouble> it(bra);
+    it.setReturnGenNodes(false);
+    while (it.next()) {
+        MWNode<D, ComplexDouble> &node = it.getNode();
+        nodeTable.push_back(&node);
+    }
+    int nNodes = nodeTable.size();
+    ComplexDouble result = 0.0;
+    ComplexDouble locResult = 0.0;
+    // OMP is disabled in order to get EXACT results (to the very last digit), the
+    // order of summation makes the result different beyond the 14th digit or so.
+    // OMP does improve the performace, but its not worth it for the time being.
+    //#pragma omp parallel firstprivate(n_nodes, locResult) num_threads(mrcpp_get_num_threads())
+    //		shared(nodeTable,rhs,result)
+    //    {
+    //#pragma omp for schedule(guided)
+    for (int n = 0; n < nNodes; n++) {
+        const auto &braNode = static_cast<const FunctionNode<D, ComplexDouble> &>(*nodeTable[n]);
+        const MWNode<D, double> *mwNode = ket.findNode(braNode.getNodeIndex());
+        if (mwNode == nullptr) continue;
+
+        const auto &ketNode = static_cast<const FunctionNode<D, double> &>(*mwNode);
+        if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode);
+        locResult += dot_wavelet(braNode, ketNode);
+    }
+    //#pragma omp critical
+    result += locResult;
+    //    }
+    return result;
+}
+template <int D> ComplexDouble dot(FunctionTree<D, double> &bra, FunctionTree<D, ComplexDouble> &ket) {
+    ket.setConjugate(!ket.conjugate());
+    ComplexDouble prod =  dot(ket, bra);
+    ket.setConjugate(!ket.conjugate());
+    return prod;
+}
 /** @brief abs-dot product of two MW function representations
  *
  * @param[in] bra: Bra side input function
@@ -642,6 +695,8 @@ template void dot<3, ComplexDouble>(double prec,
 template ComplexDouble dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket);
 template ComplexDouble dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket);
 template ComplexDouble dot<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket);
+template ComplexDouble dot<3>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, double> &ket);
+template ComplexDouble dot<3>(FunctionTree<3, double> &bra, FunctionTree<3, ComplexDouble> &ket);
 
 template double node_norm_dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket, bool exact);
 template double node_norm_dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket, bool exact);
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index 5994edfa5..d6b33f2d5 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -41,6 +41,11 @@ template <int D, typename T> void dot(double prec,
 template <int D, typename T> T dot(FunctionTree<D, T> &bra,
                             FunctionTree<D, T> &ket);
 
+template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra,
+                            FunctionTree<D, double> &ket);
+template <int D> ComplexDouble dot(FunctionTree<D, double> &bra,
+                            FunctionTree<D, ComplexDouble> &ket);
+
 template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra,
                                       FunctionTree<D, T> &ket,
                                       bool exact = false);
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index c7b21a334..56648fb7b 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -463,6 +463,32 @@ template <> void FunctionNode<3>::reCompress() {
     return result;
 }
 
+/** Inner product of the functions represented by the scaling basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the scaling basis on
+ * the node on the full support of the nodes. The scaling basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const ComplexDouble *a = bra.getCoefs();
+    const double *b = ket.getCoefs();
+
+    int size = bra.getKp1_d();
+    ComplexDouble result = 0.0;
+    // note that bra is conjugated by default
+    if (bra.getMWTree().conjugate()){
+        for (int i = 0; i < size; i++) result += a[i] * b[i];
+    } else {
+        for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
+    }
+    return result;
+}
+
 /** Inner product of the functions represented by the wavelet basis of the nodes.
  *
  * Integrates the product of the functions represented by the wavelet basis on
@@ -492,6 +518,7 @@ template <> void FunctionNode<3>::reCompress() {
 #endif
 }
 
+
 /** Inner product of the functions represented by the wavelet basis of the nodes.
  *
  * Integrates the product of the functions represented by the wavelet basis on
@@ -528,6 +555,34 @@ template <> void FunctionNode<3>::reCompress() {
     return result;
 }
 
+/** Inner product of the functions represented by the wavelet basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the wavelet basis on
+ * the node on the full support of the nodes. The wavelet basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+    template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
+    if (bra.isGenNode() or ket.isGenNode()) return 0.0;
+
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const ComplexDouble *a = bra.getCoefs();
+    const double *b = ket.getCoefs();
+
+    int start = bra.getKp1_d();
+    int size = (bra.getTDim() - 1) * start;
+    ComplexDouble result = 0.0;
+    if (bra.getMWTree().conjugate()){
+        for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
+    } else {
+        for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
+    }
+    return result;
+}
+
 template double dot_scaling(const FunctionNode<1, double> &bra, const FunctionNode<1, double> &ket);
 template double dot_scaling(const FunctionNode<2, double> &bra, const FunctionNode<2, double> &ket);
 template double dot_scaling(const FunctionNode<3, double> &bra, const FunctionNode<3, double> &ket);
@@ -550,4 +605,7 @@ template ComplexDouble dot_wavelet(const FunctionNode<1, ComplexDouble> &bra, co
 template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, ComplexDouble> &ket);
 template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, ComplexDouble> &ket);
 
+template ComplexDouble dot_scaling(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket);
+
 } // namespace mrcpp
diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h
index 8f2ff4fb5..a985008a4 100644
--- a/src/trees/FunctionNode.h
+++ b/src/trees/FunctionNode.h
@@ -89,12 +89,9 @@ ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const Funct
 template <int D>
 ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
 
-    /*double FunctionNode<D, double>::dot_scaling (const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
-double FunctionNode<D, double>::dot_scaling(const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
-ComplexDouble FunctionNode<D, ComplexDouble>::dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
-ComplexDouble FunctionNode<D, ComplexDouble>::dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
-
+template <int D>
+ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
+template <int D>
+ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
 
-    template <int D, typename T> T dot_scaling(const FunctionNode<D, T > &bra, const FunctionNode<D, T> &ket);
-    template <int D, typename T> T dot_wavelet(const FunctionNode<D, T> &bra, const FunctionNode<D, T> &ket);    */
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index de11ebc7d..98a88851e 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -56,6 +56,7 @@ FunctionTree<D, T>::FunctionTree(const MultiResolutionAnalysis<D> &mra, SharedMe
         : MWTree<D, T>(mra, name)
         , RepresentableFunction<D, T>(mra.getWorldBox().getLowerBounds().data(), mra.getWorldBox().getUpperBounds().data()) {
     int nodesPerChunk = 2048; // Large chunks are required for not leading to memory fragmentation (32 MB on "Betzy" 2023)
+    // nodesPerChunk is same for real and complex trees: the size (in MB) of the complex chunks are twice as large
     int coefsGenNodes = this->getKp1_d();
     int coefsRegNodes = this->getTDim() * this->getKp1_d();
     this->nodeAllocator_p = std::make_unique<NodeAllocator<D, T>>(this, sh_mem, coefsRegNodes, nodesPerChunk);
@@ -103,7 +104,7 @@ template <int D, typename T> void FunctionTree<D, T>::allocRootNodes() {
 
 // FunctionTree destructor
 template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
-    this->deleteRootNodes();
+    if (this->getNNodes()>0) this->deleteRootNodes();
 }
 
 /** @brief Write the tree structure to disk, for later use
@@ -936,27 +937,123 @@ template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag()
     return out;
 }
 
-
-/** From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents.  */
-template <int D, typename T> FunctionTree<D, ComplexDouble>* FunctionTree<D, T>::CopyTreeToComplex() {
-    FunctionTree<D, ComplexDouble>* outTree = new FunctionTree<D, ComplexDouble> (this->getMRA());
-    int nChunks=getNChunks();
+    /*
+template<>
+void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree) {
+    //void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree, FunctionTree<3, double>* inTree) {
+    FunctionTree<3, double>* inTree = this;
+    delete outTree;
+    outTree = new FunctionTree<3, ComplexDouble> (inTree->getMRA());
+    int nChunks=inTree->getNChunks();
     outTree->getNodeAllocator().init(nChunks, true); //also allocate coefficients
     int Ncoefperchunk = outTree->getNodeAllocator().getCoefChunkSize()/sizeof(ComplexDouble);
+    // real and complex trees have the same Ncoefperchunk.
     for (int iChunk = 0; iChunk < nChunks; iChunk++) {
-        //MWNode<D, double> * inNode = inTree.getNodeAllocator().getNodeChunk(iChunk); //TODO
-        //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree.getNodeAllocator().getNodeChunk(iChunk);//TODO
+        MWNode<3, double> * inNode = inTree->getNodeAllocator().getNodeChunk(iChunk);
+        MWNode<3, ComplexDouble> * outNode = outTree->getNodeAllocator().getNodeChunk(iChunk);
+        //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree->getNodeAllocator().getNodeChunk(iChunk);
+        int nNodes = std::min(inTree->getNNodes(), inTree->getNodeAllocator().getMaxNodesPerChunk());
+        for (int i = 0; i < nNodes; i++) {
+            outNode[i] = *reinterpret_cast<MWNode<3, std::complex<double>>*>(&inNode[i]); // could be improved
+        }
         ComplexDouble* Ccoefs;
+        int ncoefs = nNodes * inTree->getNodeAllocator().getNCoefs();
         Ccoefs = outTree->getNodeAllocator().getCoefChunk(iChunk);
-        auto InCoefs = this->getNodeAllocator().getCoefChunk(iChunk); // can be type double* or ComplexDouble*
-        for (int i = 0; i < Ncoefperchunk; i++) {
+        auto InCoefs = inTree->getNodeAllocator().getCoefChunk(iChunk);
+        for (int i = 0; i < ncoefs; i++) {
             Ccoefs[i] = InCoefs[i];
         }
     }
     outTree->getNodeAllocator().reassemble();
-    return outTree;
+}*/
+
+/*
+ * From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents.
+ * Should use a deep_copy if generalized in the future.
+ */
+
+template<>
+void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree) {
+    delete outTree;
+    double ref=0.0;
+    outTree = new FunctionTree<3, ComplexDouble> (this->getMRA());
+    std::vector<MWNode<3, double> *> instack;   // node from this
+    std::vector<MWNode<3, ComplexDouble> *> outstack; // node from outTree
+    outTree->clearEndNodeTable();
+    for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
+        instack.push_back(this->getRootBox().getNodes()[rIdx]);
+        outstack.push_back(outTree->getRootBox().getNodes()[rIdx]);
+    }
+    int nNodes = std::min(this->getNNodes(), this->getNodeAllocator().getMaxNodesPerChunk());
+    int ncoefs = this->getNodeAllocator().getNCoefs();
+    while (instack.size() > 0) {
+        // inNode and outNode are the same node in space, but on different trees
+        MWNode<3, ComplexDouble> *outNode = outstack.back();
+        outstack.pop_back();
+        MWNode<3, double> *inNode = instack.back();
+        instack.pop_back();
+        // copy coefficients:
+        double* incoefs = inNode->getCoefs();
+        ComplexDouble* outcoefs = outNode->getCoefs();
+        for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
+        outNode->setHasCoefs();
+        outNode->calcNorms();
+
+        if (inNode->getNChildren() > 0) {
+            if (outNode->getNChildren() < inNode->getNChildren()) outNode->createChildren(true);
+            for (int i = 0; i < inNode->getNChildren(); i++) {
+                instack.push_back(inNode->children[i]);
+                outstack.push_back(outNode->children[i]);
+            }
+        } else {
+            outTree->endNodeTable.push_back(outNode);
+        }
+    }
+    outTree->calcSquareNorm();
+    outTree->calcSquareNorm(true);
 }
 
+// for testing
+template<>
+void FunctionTree<3, double>::CopyTreeToReal(FunctionTree<3, double>* &outTree) {
+    delete outTree;
+    double ref=0.0;
+    // FunctionTree<3, double>* inTree = this;
+    outTree = new FunctionTree<3, double> (this->getMRA());
+    std::vector<MWNode<3, double> *> instack;   // node from this
+    std::vector<MWNode<3, double> *> outstack; // node from outTree
+    outTree->clearEndNodeTable();
+    for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
+        instack.push_back(this->getRootBox().getNodes()[rIdx]);
+        outstack.push_back(outTree->getRootBox().getNodes()[rIdx]);
+    }
+    int nNodes = std::min(this->getNNodes(), this->getNodeAllocator().getMaxNodesPerChunk());
+    int ncoefs = this->getNodeAllocator().getNCoefs();
+    while (instack.size() > 0) {
+        // inNode and outNode are the same node in space, but on different trees
+        MWNode<3, double> *outNode = outstack.back();
+        outstack.pop_back();
+        MWNode<3, double> *inNode = instack.back();
+        instack.pop_back();
+        // copy coefficients:
+        double* incoefs = inNode->getCoefs();
+        double* outcoefs = outNode->getCoefs();
+        for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
+        outNode->setHasCoefs();
+        outNode->calcNorms();
+
+        if (inNode->getNChildren() > 0) {
+            outNode->clearIsEndNode();
+            if (outNode->getNChildren() < inNode->getNChildren()) outNode->createChildren(true);
+            for (int i = 0; i < inNode->getNChildren(); i++) {
+                instack.push_back(inNode->children[i]);
+                outstack.push_back(outNode->children[i]);
+            }
+        } else {
+            outTree->endNodeTable.push_back(outNode);
+        }
+    }
+}
 
 template class FunctionTree<1, double>;
 template class FunctionTree<2, double>;
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 110ceb07e..3e45b2871 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -116,9 +116,11 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     // tools for use of local (nodes are stored in Bank) representation
     int saveNodesAndRmCoeff(); // put all nodes coefficients in Bank and delete all coefficients
     void deep_copy(FunctionTree<D, T> *out);
-    FunctionTree<D, ComplexDouble>* CopyTreeToComplex();
     FunctionTree<D, double> *Real();
     FunctionTree<D, double> *Imag();
+    void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &out);
+    void CopyTreeToReal(FunctionTree<3, double>* &out); //for testing
+
 protected:
     std::unique_ptr<NodeAllocator<D, T>> genNodeAllocator_p{nullptr};
     std::ostream &print(std::ostream &o) const override;
diff --git a/src/trees/FunctionTreeVector.h b/src/trees/FunctionTreeVector.h
index a9ed84d91..142113e1f 100644
--- a/src/trees/FunctionTreeVector.h
+++ b/src/trees/FunctionTreeVector.h
@@ -39,11 +39,12 @@ template <int D, typename T = double> using FunctionTreeVector = std::vector<Coe
  *  @param[in] fs: Vector to clear
  *  @param[in] dealloc: Option to free FunctionTree pointer before clearing
  */
-  template <int D, typename T> void clear(FunctionTreeVector<D, T> &fs, bool dealloc = false) {
+template <int D, typename T> void clear(FunctionTreeVector<D, T> &fs, bool dealloc = false) {
     if (dealloc) {
         for (auto &t : fs) {
             auto f = std::get<1>(t);
             if (f != nullptr) delete f;
+            f = nullptr;
         }
     }
     fs.clear();
diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp
index c849517da..d7ebaba55 100644
--- a/src/trees/MWTree.cpp
+++ b/src/trees/MWTree.cpp
@@ -106,10 +106,11 @@ template <int D, typename T> void MWTree<D, T>::clear() {
  * @details The norm is calculated using endNodes only. The specific
  * type of norm which is computed will depend on the derived class
  */
-template <int D, typename T> void MWTree<D, T>::calcSquareNorm() {
+template <int D, typename T> void MWTree<D, T>::calcSquareNorm(bool deep) {
     double treeNorm = 0.0;
     for (int n = 0; n < this->getNEndNodes(); n++) {
-        const MWNode<D, T> &node = getEndMWNode(n);
+        MWNode<D, T> &node = getEndMWNode(n);
+        if (deep) node.calcNorms();
         assert(node.hasCoefs());
         treeNorm += node.getSquareNorm();
     }
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index 9bec2c1bc..b19d356c8 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -73,7 +73,7 @@ class BankAccount;
 
     /** @returns Squared L2 norm of the function */
     double getSquareNorm() const { return this->squareNorm; }
-    void calcSquareNorm();
+    void calcSquareNorm(bool deep = false);
     void clearSquareNorm() { this->squareNorm = -1.0; }
 
     int getOrder() const { return this->order; }
diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp
index 6eaabb120..b72a8c5e8 100644
--- a/src/trees/MultiResolutionAnalysis.cpp
+++ b/src/trees/MultiResolutionAnalysis.cpp
@@ -146,7 +146,12 @@ template <int D> bool MultiResolutionAnalysis<D>::operator==(const MultiResoluti
  *  For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes.
  */
 template <int D> bool MultiResolutionAnalysis<D>::operator!=(const MultiResolutionAnalysis<D> &mra) const {
-    return !(*this == mra);
+    if (this->basis != mra.basis) std::cout<<"diff basis "<<this->basis<<std::endl <<"and  "<< mra.basis<<std::endl;
+    if (this->basis != mra.basis) return true;
+    if (this->world != mra.world) std::cout<<"diff world "<<this->world<<std::endl <<"and  " <<" "<< mra.world<<std::endl;
+    if (this->world != mra.world) return true;
+    if (this->maxDepth != mra.maxDepth) return true;
+    return false;
 }
 
 /**
diff --git a/src/trees/NodeAllocator.h b/src/trees/NodeAllocator.h
index b426d0021..69065d1dc 100644
--- a/src/trees/NodeAllocator.h
+++ b/src/trees/NodeAllocator.h
@@ -64,6 +64,7 @@ namespace mrcpp {
     int getNChunksUsed() const { return (this->topStack + this->maxNodesPerChunk - 1) / this->maxNodesPerChunk; }
     int getNodeChunkSize() const { return this->maxNodesPerChunk * this->sizeOfNode; }
     int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); }
+    int getMaxNodesPerChunk() const { return this->maxNodesPerChunk; }
 
     T * getCoef_p(int sIdx);
     MWNode<D, T> * getNode_p(int sIdx);
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index bc700ce1b..0c9cc3769 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -977,6 +977,28 @@ int BankAccount::get_orbblock(int orbid, double *&data, std::vector<int> &nodeid
     return 1;
 }
 
+
+// get all data with identity orbid (same orbital, different nodes)
+int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector<int> &nodeidVec, int bankstart) {
+#ifdef MRCPP_HAS_MPI
+    MPI_Status status;
+    int nodeid = wrk_rank + bankstart;
+    // get the entire superblock and also the nodeid of each column
+    int messages[message_size];
+    messages[0] = GET_ORBBLOCK;
+    messages[1] = account_id;
+    messages[2] = orbid;
+    MPI_Send(messages, 3, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank);
+    MPI_Recv(metadata_block, size_metadata, MPI_INT, bankmaster[nodeid % bank_size], 1, comm_bank, &status);
+    nodeidVec.resize(metadata_block[1]);
+    int totsize = metadata_block[2];
+    if (totsize > 0) MPI_Recv(nodeidVec.data(), metadata_block[1], MPI_INT, bankmaster[nodeid % bank_size], 2, comm_bank, &status);
+    data = new ComplexDouble[totsize/2];
+    if (totsize > 0) MPI_Recv(data, totsize, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status);
+#endif
+    return 1;
+}
+
 // creator. NB: collective
 BankAccount::BankAccount(int iclient, MPI_Comm comm) {
     this->account_id = dataBank.openAccount(iclient, comm);
diff --git a/src/utils/Bank.h b/src/utils/Bank.h
index 5ecbe6a7e..69719c530 100644
--- a/src/utils/Bank.h
+++ b/src/utils/Bank.h
@@ -114,6 +114,7 @@ class BankAccount {
     int get_nodeblock(int nodeid, double *data, std::vector<int> &idVec);
     int get_nodeblock(int nodeid, ComplexDouble *data, std::vector<int> &idVec);
     int get_orbblock(int orbid, double *&data, std::vector<int> &nodeidVec, int bankstart);
+    int get_orbblock(int orbid, ComplexDouble *&data, std::vector<int> &nodeidVec, int bankstart);
 };
 
 class TaskManager {
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index c6a391947..630b84cb4 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -9,6 +9,17 @@
 #include "treebuilders/multiply.h"
 #include "CompFunction.h"
 
+/* Some rules for CompFunction:
+ * NComp is the number of components. If Ncomp>0, the corresponding trees must exist (can be only empty roots).
+ * The other trees should be set to nullptr.
+ * The trees and data can be shared among several CompFunction; this is managed automatically by "std::make_shared"
+ * Normally the CompFunction must be eiher real or complex (or none if noe is defined anyway).
+ * Though it is allowed in some cases to have both and the code should preferably allow this. (It is used temporary
+ * when we need a Complex type, but the trees are real: the tree is then copied as a complex tree in the same CompFunction).
+ * TreePtr (aka func_ptr) is the part potentially shared with others with "std::make_shared". It contains the pointers to the trees.
+ * The static data (number of components, real/complex, conjugaison, integers used for spin etc.) are store in func_ptr.data.
+ */
+
 namespace mrcpp {
 
   template <int D>
@@ -106,7 +117,7 @@ namespace mrcpp {
           func_ptr = compfunc.func_ptr;
           CompD = func_ptr->real;
           CompC = func_ptr->cplx;
-     }
+      }
       return *this;
   }
 
@@ -205,17 +216,21 @@ double CompFunction<D>::squaredNorm() const {
 //  Allocates all the ialloc+1 trees, with indices 0,...ialloc
 //  ialloc is the largest index allocated. ialloc=0 allocates one tree.
 template <int D>
-void CompFunction<D>::alloc(int ialloc) {
+void CompFunction<D>::alloc(int ialloc, bool zero) {
       if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
       if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
       for (int i = 0; i < ialloc + 1; i++) {
           delete CompD[i];
           delete CompC[i];
+          CompD[i] = nullptr;
+          CompC[i] = nullptr;
           if (isreal()) {
               CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
+              if (zero) CompD[i]->setZero();
           }
           if (iscomplex()) {
               CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
+              if (zero) CompC[i]->setZero();
           }
           func_ptr->Ncomp = std::max(Ncomp(), i + 1);
       }
@@ -223,25 +238,30 @@ void CompFunction<D>::alloc(int ialloc) {
           //delete possible remaining components
           delete CompD[i];
           delete CompC[i];
+          CompD[i] = nullptr;
+          CompC[i] = nullptr;
       }
-
- }
+}
 
 template <int D>
 void CompFunction<D>::free() {
-    //TODO: shared memory handling
     for (int i = 0; i < Ncomp(); i++) {
         delete CompD[i];
         delete CompC[i];
+        CompD[i] = nullptr;
+        CompC[i] = nullptr;
     }
+    //    if (this->func_ptr->shared_mem_real) this->func_ptr->shared_mem_real->clear();
+    //if (this->func_ptr->shared_mem_cplx) this->func_ptr->shared_mem_cplx->clear();
+    func_ptr->Ncomp = 0;
 }
 
 template <int D>
 int CompFunction<D>::getSizeNodes() const {
     int size_mb = 0; // Memory size in kB
     for (int i = 0; i < Ncomp(); i++) {
-        if (CompD[i]!= nullptr) size_mb +=CompD[i]->getSizeNodes();
-        if (CompC[i]!= nullptr) size_mb +=CompC[i]->getSizeNodes();
+        if (isreal() and CompD[i]!= nullptr) size_mb +=CompD[i]->getSizeNodes();
+        if (iscomplex() and CompC[i]!= nullptr) size_mb +=CompC[i]->getSizeNodes();
     }
     return size_mb;
 }
@@ -250,8 +270,8 @@ template <int D>
 int CompFunction<D>::getNNodes() const {
     int nNodes = 0;
      for (int i = 0; i < Ncomp(); i++) {
-        if (CompD[i]!= nullptr) nNodes +=CompD[i]->getSizeNodes();
-        if (CompC[i]!= nullptr) nNodes +=CompC[i]->getSizeNodes();
+         if (isreal() and CompD[i]!= nullptr) nNodes +=CompD[i]->getNNodes();
+         if (iscomplex() and CompC[i]!= nullptr) nNodes +=CompC[i]->getNNodes();
     }
     return nNodes;
 }
@@ -310,7 +330,7 @@ const FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) const {
 template <int D>
 void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
       func_ptr->isreal = 1;
-      if (CompD[i] != nullptr) delete CompD[i];
+      //if (CompD[i] != nullptr) delete CompD[i];
       CompD[i] = tree;
       if (tree != nullptr) {
           func_ptr->Ncomp = std::max(Ncomp(), i + 1);
@@ -320,7 +340,7 @@ void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
 template <int D>
 void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
       func_ptr->iscomplex = 1;
-      if (CompC[i] != nullptr) delete CompC[i];
+      //if (CompC[i] != nullptr) delete CompC[i];
       CompC[i] = tree;
       if (tree != nullptr) {
           func_ptr->Ncomp = std::max(Ncomp(), i + 1);
@@ -334,17 +354,12 @@ void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
  */
 template <int D>
 void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
+
     if (Ncomp()<inp.Ncomp()){
         func_ptr->data = inp.func_ptr->data;
-        alloc(inp.Ncomp()-1);
-        for (int i = 0; i < inp.Ncomp(); i++) {
-            if (inp.isreal()) {
-                CompD[i]->setZero();
-            } else {
-                CompC[i]->setZero();
-            }
-        }
+        alloc(inp.Ncomp()-1, true);
     }
+
     for (int i = 0; i < inp.Ncomp(); i++) {
         if (inp.isreal()) {
             CompD[i]->add_inplace(c.real(),*inp.CompD[i]);
@@ -354,7 +369,6 @@ void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
     }
 }
 
-
 template <int D>
 int CompFunction<D>::crop(double prec) {
     if (prec < 0.0) return 0;
@@ -375,10 +389,10 @@ void CompFunction<D>::rescale(ComplexDouble c) {
     bool need_to_rescale = not(isShared()) or mpi::share_master();
     if (need_to_rescale) {
         for (int i = 0; i < Ncomp(); i++) {
-            if (isreal()) {
-                CompD[i]->rescale(c.real());
-            } else {
+            if (iscomplex()) {
                 CompC[i]->rescale(c);
+            } else {
+                CompD[i]->rescale(c.real());
             }
         }
     } else MSG_ERROR("Not implemented");
@@ -453,21 +467,19 @@ template <int D>
     void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate) {
     double thrs = MachineZero;
     bool need_to_add = not(out.isShared()) or mpi::share_master();
+    bool share = out.isShared();
     out.func_ptr->data = inp[0].func_ptr->data;
-    out.func_ptr->iscomplex = 0;
-    out.func_ptr->isreal = 1;
-    for (int i = 0; i < inp.size(); i++) {
-        if(inp[i].iscomplex()){
-            out.func_ptr->iscomplex = 1;
-            out.func_ptr->isreal = 0;
-        }
+    out.func_ptr->data.shared = share; // we don' inherit the shareness
+    out.alloc(out.Ncomp()-1);
+    for (int i = 1; i < inp.size(); i++) {
+        if(inp[i].iscomplex() and !inp[0].iscomplex()) MSG_ABORT("mixed types not implemented");
     }
-    out = inp[0].paramCopy();
     for (int comp = 0; comp < inp[0].Ncomp(); comp++) {
         if (inp[0].isreal()) {
             FunctionTreeVector<D, double> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
+                if (inp[i].getNNodes()==0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i].real(), inp[i].CompD[comp]));
             }
             if (need_to_add) {
@@ -486,13 +498,14 @@ template <int D>
             FunctionTreeVector<D, ComplexDouble> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
+                if (inp[i].getNNodes()==0 or inp[i].CompC[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i], inp[i].CompC[comp]));
             }
             if (need_to_add) {
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
-                       build_grid(*out.CompC[comp], fvec);
-                       mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
+                        build_grid(*out.CompC[comp], fvec);
+                        mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
                     } else {
                         mrcpp::add(prec, *out.CompC[comp], fvec, -1, false, conjugate);
                     }
@@ -520,69 +533,94 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
 template <int D>
 void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
+    bool out_allocated = true;
+    if (out.Ncomp() == 0) out_allocated = false;
+    std::cout<<"multiply "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<" "<<out.isreal()<<" "<<out_allocated<<std::endl;
+   bool share = out.isShared();
     out.func_ptr->data = inp_a.func_ptr->data;
+    out.func_ptr->data.shared = share; // we don' inherit the shareness
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
         if (inp_a.isreal() and inp_b.isreal()) {
-            delete out.CompD[comp];
-            FunctionTree<D, double> *tree = new FunctionTree<D, double>(inp_a.CompD[0]->getMRA());
             if (need_to_multiply) {
                 if (prec < 0.0) {
                     // Union grid
-                    build_grid(*tree, *inp_a.CompD[comp]);
-                    build_grid(*tree, *inp_b.CompD[comp]);
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate);
+                    build_grid(*out.CompD[comp], *inp_a.CompD[comp]);
+                    build_grid(*out.CompD[comp], *inp_b.CompD[comp]);
+                    mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate);
                 } else {
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate);
-                }
+                    if (!out_allocated) out.alloc(out.Ncomp()-1);
+                    mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate);
+               }
             }
-            out.CompD[comp] = tree;
         } else {
-            out.func_ptr->iscomplex = 1;
-            out.func_ptr->isreal = 0;
-            // if one of the input is real, we simply make a new complex copy of it
+           // if one of the input is real, we simply make a new complex copy of it
             bool inp_aisReal = inp_a.isreal();
             bool inp_bisReal = inp_b.isreal();
             if(inp_aisReal) {
-                inp_a.CompC[comp] = inp_a.CompD[comp]->CopyTreeToComplex();
-                inp_a.func_ptr->iscomplex = true;
-                inp_a.func_ptr->isreal = false;
+               inp_a.CompD[comp]->CopyTreeToComplex(inp_a.CompC[comp]);
+               inp_a.func_ptr->iscomplex = true;
+               inp_a.func_ptr->isreal = false;
             }
             if(inp_bisReal) {
-                inp_b.CompC[comp] = inp_b.CompD[comp]->CopyTreeToComplex();
+                inp_b.CompD[comp]->CopyTreeToComplex(inp_b.CompC[comp]);
                 inp_b.func_ptr->iscomplex = true;
                 inp_b.func_ptr->isreal = false;
             }
-
-            delete out.CompC[comp];
-            FunctionTree<D, ComplexDouble> *tree = new FunctionTree<D, ComplexDouble>(inp_a.CompC[0]->getMRA());
             ComplexDouble coef = 1.0;
             if (need_to_multiply) {
                 if (prec < 0.0) {
                     // Union grid
-                    build_grid(*tree, *inp_a.CompC[comp]);
-                    build_grid(*tree, *inp_b.CompC[comp]);
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
-                } else {
+                    out.func_ptr->iscomplex = 1;
+                    out.func_ptr->isreal = 0;
+                    delete out.CompD[comp];
+                    delete out.CompC[comp];
+                    if (!out_allocated) out.alloc(out.Ncomp()-1);
+                    build_grid(*out.CompC[comp], *inp_a.CompC[comp]);
+                    build_grid(*out.CompC[comp], *inp_b.CompC[comp]);
+                    mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
+                } else {// note that this assumes Ncomp=1
                     // Adaptive grid
-                    mrcpp::multiply(prec, *tree, coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
+                    std::cout<<"Adaptive grid "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
+                   if (out.CompD[comp] != nullptr) { //NB: func_ptr has alreadybeen overwritten!
+                       std::cout<<"copoy to complex  "<<" "<<out.CompD[comp]->getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
+                         if(out.CompD[comp]->getNNodes() > 0){
+                            out.CompD[comp]->CopyTreeToComplex(out.CompC[comp]);
+                            out.func_ptr->iscomplex = 1;
+                            out.func_ptr->isreal = 0;
+                            delete out.CompD[comp];
+                            out.CompD[comp] = nullptr;
+                        } else {
+                             out.func_ptr->iscomplex = 1;
+                             out.func_ptr->isreal = 0;
+                             out.alloc(out.Ncomp()-1);
+                        }
+                    } else {
+                        out.func_ptr->iscomplex = 1;
+                        out.func_ptr->isreal = 0;
+                        if (!out_allocated) out.alloc(out.Ncomp()-1);
+                    }
+                     std::cout<<"before "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
+                  mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
                 }
             }
-            out.CompC[comp] = tree;
             // restore original tree
             if(inp_aisReal) {
                 delete inp_a.CompC[comp];
+                inp_a.CompC[comp] = nullptr;
                 inp_a.func_ptr->iscomplex = false;
                 inp_a.func_ptr->isreal = true;
             }
             if(inp_bisReal) {
                 delete inp_b.CompC[comp];
+                inp_b.CompC[comp] = nullptr;
                 inp_b.func_ptr->iscomplex = false;
                 inp_b.func_ptr->isreal = true;
             }
         }
     }
     mpi::share_function(out, 0, 9911, mpi::comm_share);
+   std::cout<<"final multiply "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
 
 }
 
@@ -644,6 +682,10 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
           if (bra.isreal() and ket.isreal()) {
               dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
+          } else  if (bra.isreal() and ket.iscomplex()) {
+              dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompC[comp]);
+          } else  if (bra.iscomplex() and ket.isreal()) {
+              dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompD[comp]);
           } else {
               dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
           }
@@ -730,19 +772,15 @@ void CompFunctionVector::distribute() {
  * For each node, all functions are transformed simultaneously
  * by a dense matrix multiplication.
  * Phi input functions, Psi output functions
- *
+ * Phi and Psi are complex.
  */
-void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
-
-    if (Phi[0].iscomplex() ){
-        rotate_cplx(Phi, U, Psi, prec);
-        return;
-    }
+void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
 
-    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
+    // The principle of this routine is that nodes for all orbitals are rotated one by one using matrix multiplication.
     // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
     // MPI version does not use OMP yet, Serial version uses OMP
     // size of input is N, size of output is M
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     int N = Phi.size();
     int M = Psi.size();
     if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
@@ -762,37 +800,41 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     // get a list of all nodes in union tree, identified by their serialIx indices
     refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
     int max_n = indexVec_ref.size();
+
+    for (int j = 0; j < N; j++) {
+        if (!mpi::my_func(j)) continue;
+        if (Phi[j].isreal()) MSG_ABORT("This function only use complex input");
+    }
+
     for (int i = 0; i < M; i++) {
-        Psi[i].func_ptr->data.isreal = 1;
-        Psi[i].func_ptr->data.iscomplex = 0;
+        Psi[i].func_ptr->data.isreal = 0;
+        Psi[i].func_ptr->data.iscomplex = 1;
     }
 
     // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
 
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     BankAccount nodesPhi;             // to put the original nodes
     BankAccount nodesRotated;         // to put the rotated nodes
 
     // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(N);
+    std::vector<std::vector<ComplexDouble *>> coeffVec(N);
     std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
     std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
                                                     // orbital given the node index in the reference tree
     if (serial) {
-
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
         std::vector<int> parindexVec; // serialIx of the parent nodes
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            Phi[j].complex().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
             // make a map that gives j from indexVec
             int orb_node_ix = 0;
             for (int ix : indexVec[j]) {
-                orb2node[j][ix] = orb_node_ix++;
-                if (ix < 0) continue;
-                node2orbVec[ix].push_back(j);
+                    orb2node[j][ix] = orb_node_ix++;
+                    if (ix < 0) continue;
+                    node2orbVec[ix].push_back(j);
             }
         }
     } else { // MPI case
@@ -803,11 +845,11 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
     // 4) rotate all the nodes
     IntMatrix split_serial;                             // in the serial case all split are stored in one array
-    std::vector<std::vector<double *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::vector<ComplexDouble *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
     std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
     int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
-    // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    std::vector<ComplexMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+   // j indices are for unrotated orbitals, i indices are for rotated orbitals
     if (serial) {
         std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
         split_serial.resize(M, max_n); // not use in the MPI case
@@ -816,9 +858,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             ix2coef_ref[node_ix] = n;
             for (int i = 0; i < M; i++) split_serial(i, n) = 1;
         }
-
         std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
-
         // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
         // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
         // n is finished.
@@ -834,7 +874,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
             int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
             if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
             for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
                 int orb_node_ix = orb2node[j][node_ix];
                 for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
@@ -854,12 +894,13 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             }
 
             // 4c) rotate this node
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
+            ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
             for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); }
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); }
             }
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
             // HERE IT HAPPENS!
+            // TODO: conjugaison
             rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
 
             // 4d) store and make rotated node pointers
@@ -879,7 +920,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                     double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
                     int kstart = 0;
                     if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
-                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
+                    for (int k = kstart; k < csize; k++) wnorm += std::real(rotatedCoeff(k, i) * std::conj(rotatedCoeff(k, i)));
                     if (thres < wnorm or prec < 0)
                         split_serial(orbiVec[i], n) = 1;
                     else
@@ -904,7 +945,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         BankAccount nodeSplits;
         mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
 
-        DoubleMatrix coeffBlock(sizecoeff, N);
+        ComplexMatrix coeffBlock(sizecoeff, N);
         max_ix++; // largest node index + 1. to store rotated orbitals with different id
         TaskManager tasks(max_n);
         for (int nn = 0; nn < max_n; nn++) {
@@ -916,7 +957,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             if (parentid == -1) {
                 // root node, split if output needed
                 for (int i = 0; i < M; i++) {
-                         split[i] = 1.0;
+                    split[i] = 1.0;
                 }
                 csize = sizecoeff;
             } else {
@@ -932,21 +973,22 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             }
 
             // 4b) rotate this node
-            DoubleMatrix coeffBlock(csize, N); // largest possible used size
+            ComplexMatrix coeffBlock(csize, N); // largest possible used size
             nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
             coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
 
             // chunk of U, with reorganized indices and separate blocks for real and imag:
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
+            ComplexMatrix Un(orbjVec.size(), orbiVec.size());
+            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
 
             for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
                 for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
-                    Un(j, i) = std::real(U(orbjVec[j], orbiVec[i]));
+                    Un(j, i) = U(orbjVec[j], orbiVec[i]);
                 }
             }
 
             // HERE IT HAPPENS
+            // TODO conjugaison
             rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
 
             // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
@@ -956,7 +998,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                 // check if this node/orbital needs further refinement
                 double wnorm = 0.0;
                 int kwstart = csize - sizecoeffW; // do not include scaling
-                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
+                for (int k = kwstart; k < csize; k++) wnorm += std::real(rotatedCoeff.col(i)[k] * std::conj(rotatedCoeff.col(i)[k]));
                 if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
                 nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
             }
@@ -974,24 +1016,21 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
-            if (coeffpVec[j].size()==0) continue;
-            Psi[j].alloc(0);
-            Psi[j].real().clear();
-            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-        }
-
+           if (coeffpVec[j].size()==0) continue;
+            Psi[j].alloc(0); //All data is stored in coeffpVec[j]
+            Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+       }
     } else { // MPI case
-
         for (int j = 0; j < M; j++) {
             if (not mpi::my_func(j)) continue;
             // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
-            std::vector<double *> coeffpVec; //
+            std::vector<ComplexDouble *> coeffpVec; //
             std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
             int ix = 0;
-            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
+            std::vector<ComplexDouble *> pointerstodelete; // list of temporary arrays to clean up
             for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
                 std::vector<int> nodeidVec;
-                double *dataVec; // will be allocated by bank
+                ComplexDouble *dataVec; // will be allocated by bank
                 nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
                 if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
                 int shift = 0;
@@ -1005,30 +1044,37 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                     shift += csize;
                 }
             }
+
             Psi[j].alloc(0);
-            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+            Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
 
-            for (double *p : pointerstodelete) delete[] p;
+            for (ComplexDouble *p : pointerstodelete) delete[] p;
             pointerstodelete.clear();
         }
     }
 }
 
+
+
 /** @brief Make a linear combination of functions
  *
  * Uses "local" representation: treats one node at a time.
  * For each node, all functions are transformed simultaneously
  * by a dense matrix multiplication.
  * Phi input functions, Psi output functions
- * Phi must be complex.
+ *
  */
-void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
+void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
 
-    // The principle of this routine is that nodes for all orbitals are rotated one by one using matrix multiplication.
+    if (Phi[0].iscomplex() ){
+        rotate_cplx(Phi, U, Psi, prec);
+        return;
+    }
+
+    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
     // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
     // MPI version does not use OMP yet, Serial version uses OMP
     // size of input is N, size of output is M
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     int N = Phi.size();
     int M = Psi.size();
     if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
@@ -1048,41 +1094,37 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
     // get a list of all nodes in union tree, identified by their serialIx indices
     refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
     int max_n = indexVec_ref.size();
-
-    for (int j = 0; j < N; j++) {
-        if (!mpi::my_func(j)) continue;
-        if (Phi[j].isreal()) MSG_ABORT("This function only use complex input");
-    }
-
     for (int i = 0; i < M; i++) {
-        Psi[i].func_ptr->data.isreal = 0;
-        Psi[i].func_ptr->data.iscomplex = 1;
+        Psi[i].func_ptr->data.isreal = 1;
+        Psi[i].func_ptr->data.iscomplex = 0;
     }
 
     // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
 
+    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     BankAccount nodesPhi;             // to put the original nodes
     BankAccount nodesRotated;         // to put the rotated nodes
 
     // used for serial only:
-    std::vector<std::vector<ComplexDouble *>> coeffVec(N);
+    std::vector<std::vector<double *>> coeffVec(N);
     std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
     std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
                                                     // orbital given the node index in the reference tree
     if (serial) {
+
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
         std::vector<int> parindexVec; // serialIx of the parent nodes
         std::vector<double> scalefac;
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            Phi[j].complex().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
+            Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
             // make a map that gives j from indexVec
             int orb_node_ix = 0;
             for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
+                orb2node[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVec[ix].push_back(j);
             }
         }
     } else { // MPI case
@@ -1093,11 +1135,11 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 
     // 4) rotate all the nodes
     IntMatrix split_serial;                             // in the serial case all split are stored in one array
-    std::vector<std::vector<ComplexDouble *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
+    std::vector<std::vector<double *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
     std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
     int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<ComplexMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
-   // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+    // j indices are for unrotated orbitals, i indices are for rotated orbitals
     if (serial) {
         std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
         split_serial.resize(M, max_n); // not use in the MPI case
@@ -1106,7 +1148,9 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             ix2coef_ref[node_ix] = n;
             for (int i = 0; i < M; i++) split_serial(i, n) = 1;
         }
+
         std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
+
         // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
         // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
         // n is finished.
@@ -1122,7 +1166,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 
             int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
             if (parindexVec_ref[n] < 0) shift = 0;
-            ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
             for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
                 int orb_node_ix = orb2node[j][node_ix];
                 for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
@@ -1142,13 +1186,12 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             }
 
             // 4c) rotate this node
-            ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
             for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); }
+                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = std::real(U(orbjVec[j], orbiVec[i])); }
             }
-            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
             // HERE IT HAPPENS!
-            // TODO: conjugaison
             rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
 
             // 4d) store and make rotated node pointers
@@ -1168,7 +1211,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
                     double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
                     int kstart = 0;
                     if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
-                    for (int k = kstart; k < csize; k++) wnorm += std::real(rotatedCoeff(k, i) * std::conj(rotatedCoeff(k, i)));
+                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
                     if (thres < wnorm or prec < 0)
                         split_serial(orbiVec[i], n) = 1;
                     else
@@ -1193,7 +1236,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
         BankAccount nodeSplits;
         mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
 
-        ComplexMatrix coeffBlock(sizecoeff, N);
+        DoubleMatrix coeffBlock(sizecoeff, N);
         max_ix++; // largest node index + 1. to store rotated orbitals with different id
         TaskManager tasks(max_n);
         for (int nn = 0; nn < max_n; nn++) {
@@ -1205,7 +1248,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             if (parentid == -1) {
                 // root node, split if output needed
                 for (int i = 0; i < M; i++) {
-                    split[i] = 1.0;
+                         split[i] = 1.0;
                 }
                 csize = sizecoeff;
             } else {
@@ -1221,22 +1264,21 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             }
 
             // 4b) rotate this node
-            ComplexMatrix coeffBlock(csize, N); // largest possible used size
+            DoubleMatrix coeffBlock(csize, N); // largest possible used size
             nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
             coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
 
             // chunk of U, with reorganized indices and separate blocks for real and imag:
-            ComplexMatrix Un(orbjVec.size(), orbiVec.size());
-            ComplexMatrix rotatedCoeff(csize, orbiVec.size());
+            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
+            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
 
             for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
                 for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
-                    Un(j, i) = U(orbjVec[j], orbiVec[i]);
+                    Un(j, i) = std::real(U(orbjVec[j], orbiVec[i]));
                 }
             }
 
             // HERE IT HAPPENS
-            // TODO conjugaison
             rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
 
             // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
@@ -1246,7 +1288,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
                 // check if this node/orbital needs further refinement
                 double wnorm = 0.0;
                 int kwstart = csize - sizecoeffW; // do not include scaling
-                for (int k = kwstart; k < csize; k++) wnorm += std::real(rotatedCoeff.col(i)[k] * std::conj(rotatedCoeff.col(i)[k]));
+                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
                 if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
                 nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
             }
@@ -1264,11 +1306,14 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
-           if (coeffpVec[j].size()==0) continue;
-            Psi[j].alloc(0); //All data is stored in coeffpVec[j]
-            Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-       }
+            if (coeffpVec[j].size()==0) continue;
+            Psi[j].alloc(0);
+            Psi[j].real().clear();
+            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
+        }
+
     } else { // MPI case
+
         for (int j = 0; j < M; j++) {
             if (not mpi::my_func(j)) continue;
             // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
@@ -1292,24 +1337,15 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
                     shift += csize;
                 }
             }
-            if (j < M) {
-                // Real part
-                if (!Psi[j].hasReal()) Psi[j].alloc(0);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            } else {
-                // Imag part
-                if (!Psi[j].hasImag()) Psi[j].alloc(0);
-                Psi[j].imag().clear();
-                Psi[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            }
+            Psi[j].alloc(0);
+            Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
+
             for (double *p : pointerstodelete) delete[] p;
             pointerstodelete.clear();
         }
     }
 }
 
-
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec) {
     rotate(Phi, U, Phi, prec);
     return;
@@ -1323,6 +1359,7 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
     int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
     int max_nNodes = refTree.getNNodes();
     std::vector<double *> coeffVec;
+    std::vector<ComplexDouble *> coeffVec_cplx;
     std::vector<double> scalefac;
     std::vector<int> indexVec;    // SerialIx of the node in refOrb
     std::vector<int> parindexVec; // SerialIx of the parent node
@@ -1331,7 +1368,7 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
     for (int j = 0; j < N; j++) {
         if (not mpi::my_func(j)) continue;
         // make vector with all coef address and their index in the union grid
-        if (Phi[j].hasReal()) {
+        if (Phi[j].isreal()) {
             Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
@@ -1347,9 +1384,9 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
                 }
             }
         }
-        // Imaginary parts are considered as orbitals with an orbid shifted by N
-        if (Phi[j].hasImag()) {
-            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
+        // Complex components
+        if (Phi[j].iscomplex()) {
+            Phi[j].complex().makeCoeffVector(coeffVec_cplx, indexVec, parindexVec, scalefac, max_ix, refTree);
             int max_n = indexVec.size();
             // send node coefs from Phi[j] to bank
             for (int i = 0; i < max_n; i++) {
@@ -1358,9 +1395,9 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
                 int csize = sizecoeffW;
                 if (parindexVec[i] < 0) csize = sizecoeff;
                 if (sizes > 0) { // fixed size
-                    account.put_nodedata(j + N, indexVec[i], sizes, coeffVec[i]);
+                    account.put_nodedata(j, indexVec[i], sizes, coeffVec_cplx[i]);
                 } else {
-                    account.put_nodedata(j + N, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
+                    account.put_nodedata(j, indexVec[i], csize, &(coeffVec_cplx[i][sizecoeff - csize]));
                 }
             }
         }
@@ -1377,7 +1414,6 @@ void save_nodes(CompFunctionVector &Phi, FunctionTree<3> &refTree, BankAccount &
  * Input trees are extended by one scale at most.
  */
 CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec, CompFunction<3> *Func, int nrefine, bool all) {
-
     int N = Phi.size();
     const int D = 3;
     bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
@@ -1388,9 +1424,10 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     for (int i = 0; i < N; i++) {
         if (!mpi::my_func(i)) continue;
         int irefine = 0;
-        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
+        while (Phi[i].isreal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
+        if (Phi[i].iscomplex()) MSG_ABORT("Not yet implemented");
         irefine = 0;
-        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
+        //        while (Phi[i].iscomplex() and irefine < nrefine and refine_grid(Phi[i].complex(), f) > 0) irefine++;
     }
 
     // 1b) make union tree without coefficients
@@ -1827,7 +1864,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
             }
             if (orbVec.size() > 0) {
                 ComplexMatrix S_temp(orbVec.size(), orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
                         if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
@@ -1845,14 +1882,14 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
             if (orbVec.size() > 0) {
                 ComplexMatrix S_temp(orbVec.size(), orbVec.size());
                 coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
                         if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
                             BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
                             BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
                             continue;
-                        S(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
             }
@@ -1868,12 +1905,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
     }
 
     }
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
     for (int i = 0; i < N; i++) {
         for (int j = 0; j <= i; j++) {
@@ -1894,7 +1925,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
 
     int N = BraKet.size();
     ComplexMatrix S = ComplexMatrix::Zero(N, N);
-    DoubleMatrix Sreal = S.real();
+
     MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
 
     // 1) make union tree without coefficients
@@ -1947,7 +1978,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     }
 
     // 3) make dot product for all the nodes and accumulate into S
-
     int ibank = 0;
 #pragma omp parallel if (serial)
     {
@@ -2018,12 +2048,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     }
 
     }
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
 
     for (int i = 0; i < N; i++) {
         for (int j = 0; j <= i; j++) {
@@ -2039,16 +2063,46 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
 
 /** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
  *
+ *  Will take the conjugate of bra before integrating
  */
-ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) {
+ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket) {
+    std::cout<<" calc_overlap_matrix start"<<std::endl;
     mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
-
+    bool braisreal = !Bra[0].iscomplex();
+    bool ketisreal = !Ket[0].iscomplex();
+    if (braisreal or ketisreal) {
+        // temporary solution: copy as complex trees
+        if(braisreal){
+            for (int i = 0; i < Ket.size(); i++) {
+                Bra[i].CompD[0]->CopyTreeToComplex(Bra[i].CompC[0]);
+                Bra[i].func_ptr->iscomplex = 1;
+            }
+        }
+        if(ketisreal){
+            for (int i = 0; i < Ket.size(); i++) {
+                Ket[i].CompD[0]->CopyTreeToComplex(Ket[i].CompC[0]);
+                Ket[i].func_ptr->iscomplex = 1;
+            }
+        }
+    }
     MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
 
     int N = Bra.size();
     int M = Ket.size();
     ComplexMatrix S = ComplexMatrix::Zero(N, M);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * M); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+
+    IntVector conjMatBra = IntVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(Bra[i])) continue;
+        conjMatBra[i] = (Bra[i].conjugate()) ? 1 : 0;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
+    IntVector conjMatKet = IntVector::Zero(M);
+    for (int i = 0; i < M; i++) {
+        if (!mrcpp::mpi::my_func(Ket[i])) continue;
+        conjMatKet[i] = (Ket[i].conjugate()) ? 1 : 0;
+    }
+    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
 
     // 1) make union tree without coefficients for Bra (supposed smallest)
     mrcpp::FunctionTree<3> refTree(*mra);
@@ -2072,16 +2126,18 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
 
     // only used for serial case:
-    std::vector<std::vector<double *>> coeffVecBra(2 * N);
+    std::vector<std::vector<ComplexDouble *>> coeffVecBra(N);
     std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeBra(2 * N); // for a given orbital and a given node, gives the node index in
+    std::vector<std::map<int, int>> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in
                                                         // the orbital given the node index in the reference tree
-    std::vector<std::vector<double *>> coeffVecKet(2 * M);
+    std::vector<std::vector<ComplexDouble *>> coeffVecKet(M);
     std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeKet(2 * M); // for a given orbital and a given node, gives the node index in
+    std::vector<std::map<int, int>> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in
                                                         // the orbital given the node index in the reference tree
     mrcpp::BankAccount nodesBra;
     mrcpp::BankAccount nodesKet;
+    std::cout<<" 1) calc_overlap_matrix start"<<std::endl;
+
     // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
     if (serial) {
         // 2) make list of all coefficients, and their reference indices
@@ -2092,49 +2148,227 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
         std::vector<int> indexVec;    // serialIx of the nodes
         for (int j = 0; j < N; j++) {
             // make vector with all coef pointers and their indices in the union grid
-            if (Bra[j].hasReal()) {
-                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j);
-                }
-            }
-            if (Bra[j].hasImag()) {
-                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j + N);
-                }
+            Bra[j].complex().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2nodeBra[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVecBra[ix].push_back(j);
             }
         }
         for (int j = 0; j < M; j++) {
-            if (Ket[j].hasReal()) {
-                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j);
+            Ket[j].complex().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2nodeKet[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVecKet[ix].push_back(j);
+            }
+        }
+
+    } else { // MPI case
+        // 2) send own nodes to bank, identifying them through the serialIx of refTree
+        save_nodes(Bra, refTree, nodesBra);
+        save_nodes(Ket, refTree, nodesKet);
+        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
+    }
+
+    std::cout<<" 2) calc_overlap_matrix"<<std::endl;
+    // 3) make dot product for all the nodes and accumulate into S
+    int totsiz = 0;
+    int totget = 0;
+    int mxtotsiz = 0;
+    int ibank = 0;
+#pragma omp parallel if (serial)
+    {
+    ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread
+
+    //#pragma omp for schedule(dynamic)
+#pragma omp for schedule(static)
+    for (int n = 0; n < max_n; n++) {
+        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+        int csize;
+        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
+        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
+        if (parindexVec_ref[n] < 0)
+            csize = sizecoeff;
+        else
+            csize = sizecoeffW;
+        if (serial) {
+            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
+            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+            ComplexMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
+            ComplexMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
+            if (parindexVec_ref[n] < 0) shift = 0;
+
+            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeBra[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
+                orbVecBra.push_back(j);
+            }
+            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
+                int orb_node_ix = orb2nodeKet[j][node_ix];
+                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
+                orbVecKet.push_back(j);
+            }
+
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                if ( not conjMatBra[0] and not conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
+                } else if ( conjMatBra[0] and not conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                } else if (not conjMatBra[0] and conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
+                } else if (conjMatBra[0] and conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
+                } else MSG_ABORT("Unexpected case");
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
+                        S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                    }
                 }
+
             }
-            if (Ket[j].hasImag()) {
-                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j + M][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j + M);
+        } else { // MPI case
+
+            ComplexMatrix coeffBlockBra(csize, N);
+            ComplexMatrix coeffBlockKet(csize, M);
+            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
+            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
+            totsiz += orbVecBra.size() * orbVecKet.size();
+            mxtotsiz += N * M;
+            totget += orbVecBra.size() + orbVecKet.size();
+            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
+                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
+                if ( not conjMatBra[0] and not conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
+                } else if ( conjMatBra[0] and not conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                } else if (not conjMatBra[0] and conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
+                } else if (conjMatBra[0] and conjMatBra[0]) {
+                    S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
+                } else MSG_ABORT("Unexpected case");
+
+                for (int i = 0; i < orbVecBra.size(); i++) {
+                    for (int j = 0; j < orbVecKet.size(); j++) {
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
+                        S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                    }
                 }
             }
         }
+    }
+    if (serial) {
+#pragma omp critical
+        for (int i = 0; i < N; i++) {
+            for (int j = 0; j < M; j++) {
+                S(i, j) += S_omp(i, j);
+            }
+        }
+    }
+    }
+
+    std::cout<<" 4) calc_overlap_matrix"<<std::endl;
+
+    // 4) collect results from all MPI. Linearity: result is sum of all node contributions
+
+    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+
+    return S;
+}
+
+/** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
+ *
+ */
+ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) {
+
+    if (Bra[0].iscomplex() or Ket[0].iscomplex()){
+         return calc_overlap_matrix_cplx(Bra, Ket);
+    }
+
+
+    mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
+
+    MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
+
+    int N = Bra.size();
+    int M = Ket.size();
+    ComplexMatrix S = ComplexMatrix::Zero(N, M);
+
+    // 1) make union tree without coefficients for Bra (supposed smallest)
+    mrcpp::FunctionTree<3> refTree(*mra);
+    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk);
+    // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero.
+
+    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
+    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
+
+    // get a list of all nodes in union grid, as defined by their indices
+    std::vector<double *> coeffVec_ref;
+    std::vector<int> indexVec_ref;    // serialIx of the nodes
+    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
+    std::vector<double> scalefac;
+    int max_ix;
+
+    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
+    int max_n = indexVec_ref.size();
+    max_ix++;
+
+    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
+
+    // only used for serial case:
+    std::vector<std::vector<double *>> coeffVecBra(N);
+    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    std::vector<std::vector<double *>> coeffVecKet(M);
+    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::vector<std::map<int, int>> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in
+                                                        // the orbital given the node index in the reference tree
+    mrcpp::BankAccount nodesBra;
+    mrcpp::BankAccount nodesKet;
+    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
+    if (serial) {
+        // 2) make list of all coefficients, and their reference indices
+        // for different orbitals, indexVec will give the same index for the same node in space
+        // TODO? : do not copy coefficients, but use directly the pointers
+        // could OMP parallelize, but is fast anyway
+        std::vector<int> parindexVec; // serialIx of the parent nodes
+        std::vector<int> indexVec;    // serialIx of the nodes
+        for (int j = 0; j < N; j++) {
+            // make vector with all coef pointers and their indices in the union grid
+            Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2nodeBra[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVecBra[ix].push_back(j);
+            }
+        }
+        for (int j = 0; j < M; j++) {
+            Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
+            // make a map that gives j from indexVec
+            int orb_node_ix = 0;
+            for (int ix : indexVec) {
+                orb2nodeKet[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVecKet[ix].push_back(j);
+            }
+        }
 
     } else { // MPI case
         // 2) send own nodes to bank, identifying them through the serialIx of refTree
@@ -2148,8 +2382,11 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     int totget = 0;
     int mxtotsiz = 0;
     int ibank = 0;
-    //For some unknown reason the h2_mag_lda test sometimes fails when schedule(dynamic) is chosen
-#pragma omp parallel for schedule(static) if (serial)
+    //#pragma omp parallel if (serial)
+    {
+    DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread
+    //NB: dynamic does give strange errors?
+#pragma omp for schedule(static)
     for (int n = 0; n < max_n; n++) {
         if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
         int csize;
@@ -2180,24 +2417,21 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
             if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
                 DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
                         if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
                             Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
                             Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
                             continue;
-                        // must ensure that threads are not competing
-                        double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
+                        S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
             }
-        } else {
+        } else { // MPI case
 
-            DoubleMatrix coeffBlockBra(csize, 2 * N);
-            DoubleMatrix coeffBlockKet(csize, 2 * M);
+            DoubleMatrix coeffBlockBra(csize, N);
+            DoubleMatrix coeffBlockKet(csize, M);
             nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
             nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
             totsiz += orbVecBra.size() * orbVecKet.size();
@@ -2214,32 +2448,21 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                             Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
                             Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
                             continue;
-                        Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                        S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
             }
         }
     }
-
-    IntVector conjMatBra = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_func(Bra[i])) continue;
-        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
-    IntVector conjMatKet = IntVector::Zero(M);
-    for (int i = 0; i < M; i++) {
-        if (!mrcpp::mpi::my_func(Ket[i])) continue;
-        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMatBra[i] * conjMatKet[j] * Sreal(i + N, j + M);
-            S.imag()(i, j) = conjMatKet[j] * Sreal(i, j + M) - conjMatBra[i] * Sreal(i + N, j);
+    if (serial) {
+#pragma omp critical
+        for (int i = 0; i < N; i++) {
+            for (int j = 0; j < M; j++) {
+                S(i, j) += S_omp(i, j);
+            }
         }
     }
+    }
 
     // 4) collect results from all MPI. Linearity: result is sum of all node contributions
 
@@ -2248,13 +2471,15 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     return S;
 }
 
+
 /** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
  *
  */
 DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
+    std::cout<<" calc_norm_overlap_matrix "<<std::endl;
     int N = BraKet.size();
     DoubleMatrix S = DoubleMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
+    DoubleMatrix Sreal = DoubleMatrix::Zero(N, N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
     MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
 
     // 1) make union tree without coefficients
@@ -2275,9 +2500,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
     int max_n = indexVec_ref.size();
 
     // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
+    std::vector<std::vector<double *>> coeffVec(N);
     std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
+    std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
                                                      // the orbital given the node index in the reference tree
 
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
@@ -2360,7 +2585,7 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 }
             }
         } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
+            DoubleMatrix coeffBlock(csize, N);
             nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
 
             if (orbVec.size() > 0) {
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 7f857c914..57095ae6c 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -67,6 +67,8 @@ template <int D> class TreePtr final {
         for (int i = 0; i < 4; i++) {
             if (this->real[i] != nullptr) delete this->real[i];
             if (this->cplx[i] != nullptr) delete this->cplx[i];
+             this->real[i] = nullptr;
+             this->cplx[i] = nullptr;
         }
     }
     CompFunctionData<D> data;
@@ -100,8 +102,6 @@ template <int D> class CompFunction {
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
     virtual ~CompFunction() = default;
 
-//    FunctionTree<D, double>* (&CompD)[4]; //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
-//    FunctionTree<D, ComplexDouble>* (&CompC)[4]; // = func_ptr->cplx
     FunctionTree<D, double>** CompD; //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
     FunctionTree<D, ComplexDouble>** CompC; // = func_ptr->cplx
 
@@ -121,7 +121,7 @@ template <int D> class CompFunction {
     ComplexDouble integrate() const;
     double norm() const;
     double squaredNorm() const;
-    void alloc(int i = 0);
+    void alloc(int i = 0, bool zero = true);
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setCplx(FunctionTree<D, ComplexDouble> *tree, int i = 0);
     void setRank(int i) {func_ptr->rank = i;};
@@ -199,7 +199,7 @@ class CompFunctionVector : public std::vector<CompFunction<3>> {
 
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec = -1.0);
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
-void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
+//void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
 void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
 CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, CompFunction<3> *Func = nullptr, int nrefine = 1, bool all = false);
 void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
@@ -207,6 +207,7 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket);
 ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi);
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet);
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket);
+//ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket);
 DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet);
 void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket);
 
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
index 63d855727..625f19a62 100644
--- a/src/utils/ComplexFunction.cpp
+++ b/src/utils/ComplexFunction.cpp
@@ -769,7 +769,6 @@ void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, do
     std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
                                                     // orbital given the node index in the reference tree
     if (serial) {
-
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
         std::vector<int> parindexVec; // serialIx of the parent nodes
         std::vector<double> scalefac;

From 6624a73fadcb9083ef86fad24c2635bfb882bfe2 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Thu, 22 Aug 2024 16:06:36 +0200
Subject: [PATCH 26/38] orthogonalize for complex type

---
 src/trees/FunctionTree.cpp |  80 +++++++++++++++++++++
 src/trees/FunctionTree.h   |   2 +
 src/utils/CompFunction.cpp | 140 +++++++++++++++++++++----------------
 src/utils/CompFunction.h   |   2 +-
 4 files changed, 163 insertions(+), 61 deletions(-)

diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 98a88851e..299fb884d 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -1013,6 +1013,86 @@ void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>*
     outTree->calcSquareNorm(true);
 }
 
+template <> void FunctionTree<2, double>::CopyTreeToComplex(FunctionTree<2, ComplexDouble>* &outTree) {
+    delete outTree;
+    double ref=0.0;
+    outTree = new FunctionTree<2, ComplexDouble> (this->getMRA());
+    std::vector<MWNode<2, double> *> instack;   // node from this
+    std::vector<MWNode<2, ComplexDouble> *> outstack; // node from outTree
+    outTree->clearEndNodeTable();
+    for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
+        instack.push_back(this->getRootBox().getNodes()[rIdx]);
+        outstack.push_back(outTree->getRootBox().getNodes()[rIdx]);
+    }
+    int nNodes = std::min(this->getNNodes(), this->getNodeAllocator().getMaxNodesPerChunk());
+    int ncoefs = this->getNodeAllocator().getNCoefs();
+    while (instack.size() > 0) {
+        // inNode and outNode are the same node in space, but on different trees
+        MWNode<2, ComplexDouble> *outNode = outstack.back();
+        outstack.pop_back();
+        MWNode<2, double> *inNode = instack.back();
+        instack.pop_back();
+        // copy coefficients:
+        double* incoefs = inNode->getCoefs();
+        ComplexDouble* outcoefs = outNode->getCoefs();
+        for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
+        outNode->setHasCoefs();
+        outNode->calcNorms();
+
+        if (inNode->getNChildren() > 0) {
+            if (outNode->getNChildren() < inNode->getNChildren()) outNode->createChildren(true);
+            for (int i = 0; i < inNode->getNChildren(); i++) {
+                instack.push_back(inNode->children[i]);
+                outstack.push_back(outNode->children[i]);
+            }
+        } else {
+            outTree->endNodeTable.push_back(outNode);
+        }
+    }
+    outTree->calcSquareNorm();
+    outTree->calcSquareNorm(true);
+}
+
+template <> void FunctionTree<1, double>::CopyTreeToComplex(FunctionTree<1, ComplexDouble>* &outTree) {
+    delete outTree;
+    double ref=0.0;
+    outTree = new FunctionTree<1, ComplexDouble> (this->getMRA());
+    std::vector<MWNode<1, double> *> instack;   // node from this
+    std::vector<MWNode<1, ComplexDouble> *> outstack; // node from outTree
+    outTree->clearEndNodeTable();
+    for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
+        instack.push_back(this->getRootBox().getNodes()[rIdx]);
+        outstack.push_back(outTree->getRootBox().getNodes()[rIdx]);
+    }
+    int nNodes = std::min(this->getNNodes(), this->getNodeAllocator().getMaxNodesPerChunk());
+    int ncoefs = this->getNodeAllocator().getNCoefs();
+    while (instack.size() > 0) {
+        // inNode and outNode are the same node in space, but on different trees
+        MWNode<1, ComplexDouble> *outNode = outstack.back();
+        outstack.pop_back();
+        MWNode<1, double> *inNode = instack.back();
+        instack.pop_back();
+        // copy coefficients:
+        double* incoefs = inNode->getCoefs();
+        ComplexDouble* outcoefs = outNode->getCoefs();
+        for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
+        outNode->setHasCoefs();
+        outNode->calcNorms();
+
+        if (inNode->getNChildren() > 0) {
+            if (outNode->getNChildren() < inNode->getNChildren()) outNode->createChildren(true);
+            for (int i = 0; i < inNode->getNChildren(); i++) {
+                instack.push_back(inNode->children[i]);
+                outstack.push_back(outNode->children[i]);
+            }
+        } else {
+            outTree->endNodeTable.push_back(outNode);
+        }
+    }
+    outTree->calcSquareNorm();
+    outTree->calcSquareNorm(true);
+}
+
 // for testing
 template<>
 void FunctionTree<3, double>::CopyTreeToReal(FunctionTree<3, double>* &outTree) {
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 3e45b2871..05961eb7d 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -119,6 +119,8 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     FunctionTree<D, double> *Real();
     FunctionTree<D, double> *Imag();
     void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &out);
+    void CopyTreeToComplex(FunctionTree<2, ComplexDouble>* &out);
+    void CopyTreeToComplex(FunctionTree<1, ComplexDouble>* &out);
     void CopyTreeToReal(FunctionTree<3, double>* &out); //for testing
 
 protected:
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 630b84cb4..abe3d592e 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -392,7 +392,16 @@ void CompFunction<D>::rescale(ComplexDouble c) {
             if (iscomplex()) {
                 CompC[i]->rescale(c);
             } else {
-                CompD[i]->rescale(c.real());
+                if(abs(c.imag())>MachineZero){ //works only only for NComp==1)
+                    CompD[i]->CopyTreeToComplex(CompC[i]);
+                    delete CompD[i];
+                    CompD[i] = nullptr;
+                    func_ptr->iscomplex = true;
+                    func_ptr->isreal = false;
+                    CompC[i]->rescale(c);
+                } else {
+                    CompD[i]->rescale(c.real());
+                }
             }
         }
     } else MSG_ERROR("Not implemented");
@@ -535,13 +544,13 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
     bool out_allocated = true;
     if (out.Ncomp() == 0) out_allocated = false;
-    std::cout<<"multiply "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<" "<<out.isreal()<<" "<<out_allocated<<std::endl;
-   bool share = out.isShared();
+    bool share = out.isShared();
     out.func_ptr->data = inp_a.func_ptr->data;
     out.func_ptr->data.shared = share; // we don' inherit the shareness
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
         if (inp_a.isreal() and inp_b.isreal()) {
             if (need_to_multiply) {
+                if (!out_allocated) out.alloc(out.Ncomp()-1);
                 if (prec < 0.0) {
                     // Union grid
                     build_grid(*out.CompD[comp], *inp_a.CompD[comp]);
@@ -549,7 +558,6 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                     mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], 0, false, false, conjugate);
                 } else {
                     // Adaptive grid
-                    if (!out_allocated) out.alloc(out.Ncomp()-1);
                     mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate);
                }
             }
@@ -581,10 +589,8 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                     mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
                 } else {// note that this assumes Ncomp=1
                     // Adaptive grid
-                    std::cout<<"Adaptive grid "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
-                   if (out.CompD[comp] != nullptr) { //NB: func_ptr has alreadybeen overwritten!
-                       std::cout<<"copoy to complex  "<<" "<<out.CompD[comp]->getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
-                         if(out.CompD[comp]->getNNodes() > 0){
+                    if (out.CompD[comp] != nullptr) { //NB: func_ptr has alreadybeen overwritten!
+                        if(out.CompD[comp]->getNNodes() > 0){
                             out.CompD[comp]->CopyTreeToComplex(out.CompC[comp]);
                             out.func_ptr->iscomplex = 1;
                             out.func_ptr->isreal = 0;
@@ -600,8 +606,7 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                         out.func_ptr->isreal = 0;
                         if (!out_allocated) out.alloc(out.Ncomp()-1);
                     }
-                     std::cout<<"before "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
-                  mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
+                    mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
                 }
             }
             // restore original tree
@@ -620,7 +625,6 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
         }
     }
     mpi::share_function(out, 0, 9911, mpi::comm_share);
-   std::cout<<"final multiply "<<" "<<out.getNNodes()<<" "<<inp_a.getNNodes()<<" "<<inp_b.getNNodes()<<std::endl;
 
 }
 
@@ -1867,10 +1871,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                       // if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                       //     BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                       //     BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                       //     continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -1885,10 +1889,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                      //      continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2010,10 +2014,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                      //      continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2028,10 +2032,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                      //      continue;
                         S(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2066,7 +2070,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
  *  Will take the conjugate of bra before integrating
  */
 ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket) {
-    std::cout<<" calc_overlap_matrix start"<<std::endl;
     mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
     bool braisreal = !Bra[0].iscomplex();
     bool ketisreal = !Ket[0].iscomplex();
@@ -2136,7 +2139,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
                                                         // the orbital given the node index in the reference tree
     mrcpp::BankAccount nodesBra;
     mrcpp::BankAccount nodesKet;
-    std::cout<<" 1) calc_overlap_matrix start"<<std::endl;
 
     // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
     if (serial) {
@@ -2175,13 +2177,13 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
         mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
     }
 
-    std::cout<<" 2) calc_overlap_matrix"<<std::endl;
     // 3) make dot product for all the nodes and accumulate into S
     int totsiz = 0;
     int totget = 0;
     int mxtotsiz = 0;
     int ibank = 0;
-#pragma omp parallel if (serial)
+    //the omp crashes sometime for unknown reasons!
+//#pragma omp parallel if (serial)
     {
     ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread
 
@@ -2227,10 +2229,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
                 } else MSG_ABORT("Unexpected case");
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                        //                       if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+             //                  Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+            //                   Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+               //                continue;
                         S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2261,10 +2263,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
 
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                        //     if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                    //        Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                     //       Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                     //       continue;
                         S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2281,12 +2283,28 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     }
     }
 
-    std::cout<<" 4) calc_overlap_matrix"<<std::endl;
 
     // 4) collect results from all MPI. Linearity: result is sum of all node contributions
 
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
 
+    // restore input
+    if(braisreal){
+        for (int i = 0; i < Ket.size(); i++) {
+            delete Bra[i].CompC[0];
+            Bra[i].CompC[0] = nullptr;
+            Bra[i].func_ptr->iscomplex = 0;
+            Bra[i].func_ptr->isreal = 1;
+        }
+    }
+    if(ketisreal){
+        for (int i = 0; i < Ket.size(); i++) {
+            delete Ket[i].CompC[0];
+            Ket[i].CompC[0] = nullptr;
+            Ket[i].func_ptr->iscomplex = 0;
+            Ket[i].func_ptr->isreal = 1;
+        }
+    }
     return S;
 }
 
@@ -2420,10 +2438,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
 
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                       // if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                       //     Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                       //     Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                       //     continue;
                         S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2444,10 +2462,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
+                      //  if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                      //      Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                      //      Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                      //      continue;
                         S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2476,7 +2494,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
  *
  */
 DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
-    std::cout<<" calc_norm_overlap_matrix "<<std::endl;
     int N = BraKet.size();
     DoubleMatrix S = DoubleMatrix::Zero(N, N);
     DoubleMatrix Sreal = DoubleMatrix::Zero(N, N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
@@ -2573,10 +2590,10 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
-                            continue;
+                     //   if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                     //       BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                      //      BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                      //      continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
 #pragma omp atomic
@@ -2595,10 +2612,10 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
-                            continue;
+                     //   if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                     //       BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                     //       BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                     //       continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2660,10 +2677,13 @@ void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket) {
     double sq_norm = Ket.squaredNorm();
     for (int i = 0; i < Bra.Ncomp(); i++) {
         if (Bra.isreal()) {
+            if (abs(overlap.imag())>MachineZero) MSG_ABORT("NOT IMPLEMENTED");
             Bra.CompD[i]->add_inplace(-overlap.real()/sq_norm,*Ket.CompD[i]);
         } else {
-            Bra.CompC[i]->add_inplace(-overlap/sq_norm,*Ket.CompC[i]);
-        }
+            if (Ket.isreal()) MSG_ABORT("NOT IMPLEMENTED");
+            Bra.CompC[i]->add_inplace(-std::conj(overlap/sq_norm),*Ket.CompC[i]);
+            overlap = dot(Bra, Ket);
+       }
     }
 }
 
@@ -2680,6 +2700,6 @@ template void deep_copy(CompFunction<3>& out, const CompFunction<3> &inp);
 template void add(CompFunction<3> &out, ComplexDouble a, CompFunction<3> inp_a, ComplexDouble b, CompFunction<3> inp_b, double prec, bool conjugate);
 template void linear_combination(CompFunction<3> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<3>> &inp, double prec, bool conjugate);
 template double node_norm_dot(CompFunction<3> bra, CompFunction<3> ket);
-    template void orthogonalize(double prec, CompFunction<3> &Bra, CompFunction<3> &Ket);
+template void orthogonalize(double prec, CompFunction<3> &Bra, CompFunction<3> &Ket);
 
 } // namespace mrcpp
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 57095ae6c..9fca0f2e4 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -24,7 +24,7 @@ struct CompFunctionData {
     // additional data that describe each component (defined by user):
     // occupancy, quantum number, norm, etc.
     //Note: defined with fixed size to ease copying and MPI send
-    int n1[4]{0,0,0,0}; // 0: neutral. values 1 and 2 are orthogonal to each other (product = 0)
+    int n1[4]{0,0,0,0}; // NOT ENFORCED (spin tests in operator_comosition fails):  0: neutral. values 1 and 2 are orthogonal to each other (product = 0)
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
     int n4[4]{0,0,0,0};

From 1b29dea2c3b1fa97783935f5740b2250ad8bee02 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 23 Aug 2024 17:36:05 +0200
Subject: [PATCH 27/38] all tests passes

---
 src/utils/CompFunction.cpp    | 114 +++++++++++++++++++---------------
 src/utils/CompFunction.h      |   4 +-
 src/utils/ComplexFunction.cpp |   6 +-
 src/utils/ComplexFunction.h   |   2 +-
 4 files changed, 70 insertions(+), 56 deletions(-)

diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index abe3d592e..bc5b51cd3 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -195,12 +195,12 @@ ComplexDouble CompFunction<D>::integrate() const {
 
 template <int D>
 double CompFunction<D>::norm() const {
-    double norm = squaredNorm();
+    double norm = getSquareNorm();
     if (norm > 0.0) norm = std::sqrt(norm);
     return norm;
 }
 template <int D>
-double CompFunction<D>::squaredNorm() const {
+double CompFunction<D>::getSquareNorm() const {
     double norm = 0.0;
     for (int i = 0; i < Ncomp(); i++) {
         if (isreal() and CompD[i]!= nullptr) {
@@ -634,9 +634,15 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
  */
 template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
-    if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
+   if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
     if (inp_a.isreal() != 1) MSG_ERROR("Not implemented");
-    multiply(out, *inp_a.CompD[0], f, prec, nrefine, conjugate);
+    if (conjugate) MSG_ERROR("Not implemented");
+    CompFunctionVector CompVec; // Should use vector<CompFunction>?
+    CompVec.push_back(inp_a);
+    CompFunctionVector CompVecOut;
+    CompVecOut = multiply(CompVec, f, prec, nullptr, nrefine, true);
+    out = CompVecOut[0];
+    //    multiply(out, *inp_a.CompD[0], f, prec, nrefine, conjugate);
 }
 
 /** @brief out = inp_a * f
@@ -645,9 +651,15 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  */
 template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
+    MSG_ERROR("Not implemented");
     if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
     if (inp_a.iscomplex() != 1) MSG_ERROR("Not implemented");
-    multiply(out, *inp_a.CompC[0], f, prec, nrefine, conjugate);
+    if (conjugate) MSG_ERROR("Not implemented");
+    CompFunctionVector CompVec; // Should use vector<CompFunction>?
+    CompVec.push_back(inp_a);
+    CompFunctionVector CompVecOut;
+    // CompVecOut = multiply(CompVec, f, prec, nrefine, true);
+    out = CompVecOut[0];
 
 }
 
@@ -656,9 +668,10 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  */
 template <int D>
 void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
-    CompFunction<D> func_a(1);
+    CompFunction<D> func_a;
     func_a.func_ptr->isreal = 1;
     func_a.func_ptr->iscomplex = 0;
+    func_a.alloc(0);
     func_a.CompD[0] = &inp_a;
     multiply(out, func_a, f, prec, nrefine, conjugate);
     func_a.CompD[0] = nullptr;
@@ -1421,7 +1434,6 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     int N = Phi.size();
     const int D = 3;
     bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
-
     // 1a) extend grid where f is large (around nuclei)
     // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree?
 
@@ -1451,7 +1463,9 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
     CompFunctionVector out(N);
-    CompFunctionVector outtest(N);
+    for (int i = 0; i < N; i++) {
+        out[0] = Phi[i].paramCopy();
+    }
     if (not PsihasReIm[0] and not PsihasReIm[1]) {
         return out; // do nothing
     }
@@ -1871,10 +1885,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                       // if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                       //     BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                       //     BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                       //     continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -1889,10 +1903,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                      //      continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2014,10 +2028,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                      //      continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2032,10 +2046,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                      //  if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                      //      BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                      //      BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                      //      continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2229,10 +2243,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
                 } else MSG_ABORT("Unexpected case");
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        //                       if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-             //                  Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-            //                   Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-               //                continue;
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2263,10 +2277,10 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
 
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                        //     if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                    //        Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                     //       Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                     //       continue;
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2438,10 +2452,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
 
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                       // if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                       //     Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                       //     Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                       //     continue;
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2462,10 +2476,10 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
                 S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
                 for (int i = 0; i < orbVecBra.size(); i++) {
                     for (int j = 0; j < orbVecKet.size(); j++) {
-                      //  if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                      //      Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                      //      Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                      //      continue;
+                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
+                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                            continue;
                         S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
                     }
                 }
@@ -2590,10 +2604,10 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                     //   if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                     //       BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                      //      BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
-                      //      continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                            continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
 #pragma omp atomic
@@ -2612,10 +2626,10 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                     //   if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                     //       BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                     //       BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
-                     //       continue;
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
+                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                            continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
                 }
@@ -2652,7 +2666,7 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
     int M = Ket.size();
     DoubleVector Ketnorms = DoubleVector::Zero(M);
     for (int i = 0; i < M; i++) {
-        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
+        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].getSquareNorm();
     }
     mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
     ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
@@ -2674,7 +2688,7 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
 template <int D>
 void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket) {
     ComplexDouble overlap = dot(Bra, Ket);
-    double sq_norm = Ket.squaredNorm();
+    double sq_norm = Ket.getSquareNorm();
     for (int i = 0; i < Bra.Ncomp(); i++) {
         if (Bra.isreal()) {
             if (abs(overlap.imag())>MachineZero) MSG_ABORT("NOT IMPLEMENTED");
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 9fca0f2e4..523d45b92 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -24,7 +24,7 @@ struct CompFunctionData {
     // additional data that describe each component (defined by user):
     // occupancy, quantum number, norm, etc.
     //Note: defined with fixed size to ease copying and MPI send
-    int n1[4]{0,0,0,0}; // NOT ENFORCED (spin tests in operator_comosition fails):  0: neutral. values 1 and 2 are orthogonal to each other (product = 0)
+    int n1[4]{0,0,0,0}; // 0: neutral. other wise different values are orthogonal to each other (product = 0)
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
     int n4[4]{0,0,0,0};
@@ -120,7 +120,7 @@ template <int D> class CompFunction {
     CompFunction paramCopy() const;
     ComplexDouble integrate() const;
     double norm() const;
-    double squaredNorm() const;
+    double getSquareNorm() const;
     void alloc(int i = 0, bool zero = true);
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setCplx(FunctionTree<D, ComplexDouble> *tree, int i = 0);
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
index 625f19a62..f60d90ab5 100644
--- a/src/utils/ComplexFunction.cpp
+++ b/src/utils/ComplexFunction.cpp
@@ -169,13 +169,13 @@ ComplexDouble ComplexFunction::integrate() const {
 
 /** @brief Returns the norm of the orbital */
 double ComplexFunction::norm() const {
-    double norm = squaredNorm();
+    double norm = getSquareNorm();
     if (norm > 0.0) norm = std::sqrt(norm);
     return norm;
 }
 
 /** @brief Returns the squared norm of the orbital */
-double ComplexFunction::squaredNorm() const {
+double ComplexFunction::getSquareNorm() const {
     double sq_r = -1.0;
     double sq_i = -1.0;
     if (hasReal()) sq_r = real().getSquareNorm();
@@ -1996,7 +1996,7 @@ void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
     int M = Ket.size();
     DoubleVector Ketnorms = DoubleVector::Zero(M);
     for (int i = 0; i < M; i++) {
-        if (mpi::my_orb(Ket[i])) Ketnorms(i)  = Ket[i].squaredNorm();
+        if (mpi::my_orb(Ket[i])) Ketnorms(i)  = Ket[i].getSquareNorm();
     }
     mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
     ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
index c43d3475c..7ded01255 100644
--- a/src/utils/ComplexFunction.h
+++ b/src/utils/ComplexFunction.h
@@ -145,7 +145,7 @@ class ComplexFunction {
     void setImag(mrcpp::FunctionTree<3, double> *tree);
 
     double norm() const;
-    double squaredNorm() const;
+    double getSquareNorm() const;
     ComplexDouble integrate() const;
 
     int crop(double prec);

From d9b07c274b0d8d87cc57ecf23fc9f4139467fc6d Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 26 Aug 2024 14:45:45 +0200
Subject: [PATCH 28/38] mixed complex/real linear combination

---
 src/treebuilders/add.cpp                |   3 +-
 src/utils/CompFunction.cpp              | 118 +++++++++++++++---------
 src/utils/CompFunction.h                |   4 +-
 src/utils/math_utils.cpp                |   2 +
 src/utils/parallel.cpp                  |  11 +--
 src/utils/parallel.h                    |   2 +-
 tests/operators/derivative_operator.cpp |   9 +-
 7 files changed, 90 insertions(+), 59 deletions(-)

diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp
index 7d7518b25..f7dfbe8c2 100644
--- a/src/treebuilders/add.cpp
+++ b/src/treebuilders/add.cpp
@@ -99,7 +99,7 @@ void add(double prec,
  * no coefs).
  *
  */
-    template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec, bool conjugate) {
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec, bool conjugate) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
@@ -113,6 +113,7 @@ void add(double prec,
     Timer trans_t;
     out.mwTransform(BottomUp);
     out.calcSquareNorm();
+
     trans_t.stop();
 
     Timer clean_t;
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index bc5b51cd3..4ccb63c45 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -86,12 +86,13 @@ namespace mrcpp {
  * Empty functions (trees defined but zero)
  */
   template <int D>
-  CompFunction<D>::CompFunction(const CompFunctionData<D>& indata)
+  CompFunction<D>::CompFunction(const CompFunctionData<D>& indata, bool alloc)
   { func_ptr = std::make_shared<TreePtr<D>>(indata.shared);
     func_ptr->data = indata;
     CompD = func_ptr->real;
     CompC = func_ptr->cplx;
-    this->alloc(Ncomp()-1);
+    if (alloc) this->alloc(Ncomp()-1);
+    else this->free();
   }
 
 /** @brief Copy constructor
@@ -126,8 +127,8 @@ template <int D>
  *
  * Returns a copy without defined trees.
  */
-CompFunction<D> CompFunction<D>::paramCopy() const {
-    return CompFunction<D>(func_ptr->data);
+CompFunction<D> CompFunction<D>::paramCopy(bool alloc) const {
+    return CompFunction<D>(func_ptr->data, alloc);
 }
 
 
@@ -251,8 +252,8 @@ void CompFunction<D>::free() {
         CompD[i] = nullptr;
         CompC[i] = nullptr;
     }
-    //    if (this->func_ptr->shared_mem_real) this->func_ptr->shared_mem_real->clear();
-    //if (this->func_ptr->shared_mem_cplx) this->func_ptr->shared_mem_cplx->clear();
+    if (this->func_ptr->shared_mem_real) this->func_ptr->shared_mem_real->clear();
+    if (this->func_ptr->shared_mem_cplx) this->func_ptr->shared_mem_cplx->clear();
     func_ptr->Ncomp = 0;
 }
 
@@ -361,9 +362,16 @@ void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
     }
 
     for (int i = 0; i < inp.Ncomp(); i++) {
-        if (inp.isreal()) {
+        if (inp.isreal() and c.imag()<MachineZero) {
             CompD[i]->add_inplace(c.real(),*inp.CompD[i]);
         } else {
+            if (this->isreal()){
+                CompD[i]->CopyTreeToComplex(CompC[i]);
+                delete CompD[i];
+                CompD[i] = nullptr;
+                func_ptr->iscomplex = true;
+                func_ptr->isreal = false;
+            }
             CompC[i]->add_inplace(c,*inp.CompC[i]);
         }
     }
@@ -404,7 +412,7 @@ void CompFunction<D>::rescale(ComplexDouble c) {
                 }
             }
         }
-    } else MSG_ERROR("Not implemented");
+    } else MSG_ABORT("Not implemented");
 }
 
 
@@ -479,16 +487,19 @@ template <int D>
     bool share = out.isShared();
     out.func_ptr->data = inp[0].func_ptr->data;
     out.func_ptr->data.shared = share; // we don' inherit the shareness
-    out.alloc(out.Ncomp()-1);
-    for (int i = 1; i < inp.size(); i++) {
-        if(inp[i].iscomplex() and !inp[0].iscomplex()) MSG_ABORT("mixed types not implemented");
+    bool iscomplex = false;
+    for (int i = 0; i < inp.size(); i++) if(inp[i].iscomplex() or c[i].imag() > MachineZero) iscomplex = true;
+    if (iscomplex) {
+        out.func_ptr->data.iscomplex = 1;
+        out.func_ptr->data.isreal = 0;
     }
+    out.alloc(out.Ncomp()-1);
     for (int comp = 0; comp < inp[0].Ncomp(); comp++) {
-        if (inp[0].isreal()) {
+        if (not iscomplex) {
             FunctionTreeVector<D, double> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
-                if (inp[i].getNNodes()==0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue;
+                 if (inp[i].getNNodes()==0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i].real(), inp[i].CompD[comp]));
             }
             if (need_to_add) {
@@ -506,6 +517,13 @@ template <int D>
         } else {
             FunctionTreeVector<D, ComplexDouble> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
+                if (inp[i].isreal()) {
+                    inp[i].CompD[comp]->CopyTreeToComplex(inp[i].CompC[comp]);
+                    delete inp[i].CompD[comp];
+                    inp[i].CompD[comp] = nullptr;
+                    inp[i].func_ptr->iscomplex = true;
+                    inp[i].func_ptr->isreal = false;
+               }
                 if (std::norm(c[i]) < thrs) continue;
                 if (inp[i].getNNodes()==0 or inp[i].CompC[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i], inp[i].CompC[comp]));
@@ -514,7 +532,7 @@ template <int D>
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
                         build_grid(*out.CompC[comp], fvec);
-                        mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
+                      mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
                     } else {
                         mrcpp::add(prec, *out.CompC[comp], fvec, -1, false, conjugate);
                     }
@@ -537,16 +555,20 @@ void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b
 
 
 /** @brief out = inp_a * inp_b
- *
+ *  Takes conjugate of inp_a if conjugate=true
+ *  In case of mixed real/complex inputs, the real functions are converted into complex functions.
  */
 template <int D>
 void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
+    if (inp_b.func_ptr->conj) MSG_ABORT("Not implemented");
+    if (inp_a.func_ptr->conj) conjugate = (not conjugate);
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
     bool out_allocated = true;
     if (out.Ncomp() == 0) out_allocated = false;
     bool share = out.isShared();
     out.func_ptr->data = inp_a.func_ptr->data;
     out.func_ptr->data.shared = share; // we don' inherit the shareness
+    out.func_ptr->conj = false; // we don' inherit conjugaison
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
         if (inp_a.isreal() and inp_b.isreal()) {
             if (need_to_multiply) {
@@ -625,7 +647,6 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
         }
     }
     mpi::share_function(out, 0, 9911, mpi::comm_share);
-
 }
 
 /** @brief out = inp_a * f
@@ -634,9 +655,9 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
  */
 template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
-   if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
-    if (inp_a.isreal() != 1) MSG_ERROR("Not implemented");
-    if (conjugate) MSG_ERROR("Not implemented");
+    if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented");
+    if (inp_a.isreal() != 1) MSG_ABORT("Not implemented");
+    if (conjugate) MSG_ABORT("Not implemented");
     CompFunctionVector CompVec; // Should use vector<CompFunction>?
     CompVec.push_back(inp_a);
     CompFunctionVector CompVecOut;
@@ -651,10 +672,10 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  */
 template <int D>
 void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
-    MSG_ERROR("Not implemented");
-    if (inp_a.Ncomp() > 1) MSG_ERROR("Not implemented");
-    if (inp_a.iscomplex() != 1) MSG_ERROR("Not implemented");
-    if (conjugate) MSG_ERROR("Not implemented");
+    MSG_ABORT("Not implemented");
+    if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented");
+    if (inp_a.iscomplex() != 1) MSG_ABORT("Not implemented");
+    if (conjugate) MSG_ABORT("Not implemented");
     CompFunctionVector CompVec; // Should use vector<CompFunction>?
     CompVec.push_back(inp_a);
     CompFunctionVector CompVecOut;
@@ -695,17 +716,20 @@ void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, Repre
  */
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
+    if (bra.func_ptr->conj or ket.func_ptr->conj) MSG_ABORT("Not implemented");
     ComplexDouble dotprod = 0.0;
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
-          if (bra.isreal() and ket.isreal()) {
-              dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
-          } else  if (bra.isreal() and ket.iscomplex()) {
-              dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompC[comp]);
-          } else  if (bra.iscomplex() and ket.isreal()) {
-              dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompD[comp]);
-          } else {
-              dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
-          }
+        if (bra.func_ptr->data.n1[0] != ket.func_ptr->data.n1[0] and
+            bra.func_ptr->data.n1[0] != 0 and ket.func_ptr->data.n1[0]!= 0) continue;
+        if (bra.isreal() and ket.isreal()) {
+            dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
+        } else  if (bra.isreal() and ket.iscomplex()) {
+            dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompC[comp]);
+        } else  if (bra.iscomplex() and ket.isreal()) {
+            dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompD[comp]);
+        } else {
+            dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
+        }
     }
     if (bra.isreal() and ket.isreal()) {
         return dotprod.real();
@@ -725,6 +749,10 @@ double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
           if (bra.isreal() and ket.isreal()) {
               dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
+          } else  if (bra.isreal() and ket.iscomplex()) {
+              MSG_ABORT("Not implemented");
+          } else  if (bra.iscomplex() and ket.isreal()) {
+              MSG_ABORT("Not implemented");
           } else {
               dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
           }
@@ -800,6 +828,14 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
     bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     int N = Phi.size();
     int M = Psi.size();
+    for (int i = 0; i < M; i++){
+        for (int j; j< 4; j++) delete Psi[i].CompD[j];
+        Psi[i].func_ptr->isreal = 0;
+        Psi[i].func_ptr->iscomplex = 1;
+    }
+    for (int i = 0; i < N; i++){
+        if (Phi[i].func_ptr->conj) MSG_ABORT("Conjugaison not implemneted for rotations");
+    }
     if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
     if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
 
@@ -1852,7 +1888,6 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
     }
 
     // 3) make dot product for all the nodes and accumulate into S
-
     int ibank = 0;
 #pragma omp parallel if (serial)
     {
@@ -2090,7 +2125,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     if (braisreal or ketisreal) {
         // temporary solution: copy as complex trees
         if(braisreal){
-            for (int i = 0; i < Ket.size(); i++) {
+            for (int i = 0; i < Bra.size(); i++) {
                 Bra[i].CompD[0]->CopyTreeToComplex(Bra[i].CompC[0]);
                 Bra[i].func_ptr->iscomplex = 1;
             }
@@ -2196,13 +2231,12 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     int totget = 0;
     int mxtotsiz = 0;
     int ibank = 0;
-    //the omp crashes sometime for unknown reasons!
-//#pragma omp parallel if (serial)
+    //the omp crashes sometime for unknown reasons?
+#pragma omp parallel if (serial)
     {
     ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread
 
-    //#pragma omp for schedule(dynamic)
-#pragma omp for schedule(static)
+#pragma omp for schedule(dynamic)
     for (int n = 0; n < max_n; n++) {
         if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
         int csize;
@@ -2287,7 +2321,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
             }
         }
     }
-    if (serial) {
+   if (serial) {
 #pragma omp critical
         for (int i = 0; i < N; i++) {
             for (int j = 0; j < M; j++) {
@@ -2297,14 +2331,13 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     }
     }
 
-
     // 4) collect results from all MPI. Linearity: result is sum of all node contributions
 
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
 
     // restore input
     if(braisreal){
-        for (int i = 0; i < Ket.size(); i++) {
+        for (int i = 0; i < Bra.size(); i++) {
             delete Bra[i].CompC[0];
             Bra[i].CompC[0] = nullptr;
             Bra[i].func_ptr->iscomplex = 0;
@@ -2331,7 +2364,6 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
          return calc_overlap_matrix_cplx(Bra, Ket);
     }
 
-
     mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
 
     MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
@@ -2414,7 +2446,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     int totget = 0;
     int mxtotsiz = 0;
     int ibank = 0;
-    //#pragma omp parallel if (serial)
+#pragma omp parallel if (serial)
     {
     DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread
     //NB: dynamic does give strange errors?
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 523d45b92..ca9745727 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -96,7 +96,7 @@ template <int D> class CompFunction {
     CompFunction();
     CompFunction(int n1);
     CompFunction(int n1, bool share);
-    CompFunction(const CompFunctionData<D>& indata);
+    CompFunction(const CompFunctionData<D>& indata, bool alloc = false);
     CompFunction(const CompFunction<D> &compfunc);
     CompFunction(CompFunction<D> && compfunc);
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
@@ -117,7 +117,7 @@ template <int D> class CompFunction {
     int share() const {return func_ptr->data.shared;}
     int* Nchunks() const {return func_ptr->data.Nchunks;} // number of chunks of each component tree
 
-    CompFunction paramCopy() const;
+    CompFunction paramCopy(bool alloc = false) const;
     ComplexDouble integrate() const;
     double norm() const;
     double getSquareNorm() const;
diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp
index 5ee9294f6..6c4d9b02b 100644
--- a/src/utils/math_utils.cpp
+++ b/src/utils/math_utils.cpp
@@ -171,6 +171,7 @@ void math_utils::tensor_self_product(const VectorXd &A, MatrixXd &tprod) {
     for (int i = 0; i < Ar; i++) { tprod.block(i, 0, 1, Ar) = A(i) * A; }
 }
 
+/** Matrix multiplication of the filter with the input coefficient (type double)*/
 void math_utils::apply_filter(double *out, double *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
 #ifdef HAVE_BLAS
     cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
@@ -185,6 +186,7 @@ void math_utils::apply_filter(double *out, double *in, const MatrixXd &filter, i
 #endif
 }
 
+/** Matrix multiplication of the filter with the input coefficient (type complex)*/
 void math_utils::apply_filter(ComplexDouble *out, ComplexDouble *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
   //#ifdef HAVE_BLAS
 //    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index bdd722015..d49fb83db 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -253,11 +253,6 @@ bool share_master() {
     return (share_rank == 0) ? true : false;
 }
 
-/** @brief Test if orbital belongs to this MPI rank (or is common)*/
-bool my_orb(int j) {
-    return ((j) % wrk_size == wrk_rank) ? true : false;
-}
-
 /** @brief Test if function belongs to this MPI rank */
 bool my_func(int j) {
     return ((j) % wrk_size == wrk_rank) ? true : false;
@@ -507,7 +502,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not my_orb(j)) continue;
+        if (not my_func(j)) continue;
         if (Phi[j].isreal()) tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
         if (Phi[j].iscomplex()) tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
@@ -529,7 +524,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not my_orb(j)) continue;
+        if (not my_func(j)) continue;
         if (Phi[j].isreal()) tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
         if (Phi[j].iscomplex()) tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
     }
@@ -551,7 +546,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
 
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
-        if (not my_orb(j)) continue;
+        if (not my_func(j)) continue;
         tree.appendTreeNoCoeff(Phi[j]);
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index a176fe7ab..12f1c0cf8 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -39,7 +39,7 @@ void barrier(MPI_Comm comm);
 
 bool grand_master();
 bool share_master();
-bool my_orb(int j);
+
 bool my_func(int j);
 bool my_func(const CompFunction<3>& func);
 bool my_func(CompFunction<3> *func);
diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index 773c12ec9..e4f887c8f 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -122,12 +122,13 @@ template <int D> void testDifferentiationABGV(double a, double b) {
     delete mra;
 }
 
+/* trees are defined as complex trees */
 template <int D> void testDifferentiationCplxABGV(double a, double b) {
     MultiResolutionAnalysis<D> *mra = initializeMRA<D>();
 
     double prec = 1.0e-3;
     ABGVOperator<D> diff(*mra, a, b);
-    ComplexDouble s = {1.1, 1.3};
+    ComplexDouble s = {1.1, 1.3}; // NB: Complex
 
     Coord<D> r_0;
     for (auto &x : r_0) x = pi;
@@ -137,7 +138,7 @@ template <int D> void testDifferentiationCplxABGV(double a, double b) {
         return std::exp(-R * R * s);
     };
 
-    auto df = [r_0, s](const Coord<D> &r) {
+    auto df = [r_0, s](const Coord<D> &r) { // analytical derivative of f
         double R = math_utils::calc_distance<D>(r, r_0);
         return -2.0 * s * std::exp(-R * R * s) * (r[0] - r_0[0]);
     };
@@ -148,11 +149,11 @@ template <int D> void testDifferentiationCplxABGV(double a, double b) {
     FunctionTree<D, ComplexDouble> df_tree(*mra);
     project<D, ComplexDouble>(prec / 10, df_tree, df);
 
-    FunctionTree<D, ComplexDouble> dg_tree(*mra);
+    FunctionTree<D, ComplexDouble> dg_tree(*mra); // MW derivative of f
     apply(dg_tree, diff, f_tree, 0);
 
     FunctionTree<D, ComplexDouble> err_tree(*mra);
-    add(-1.0, err_tree, {1.0, 0.0}, df_tree, {-1.0, 0.0}, dg_tree);
+    add(-1.0, err_tree, {1.0, 0.0}, df_tree, {-1.0, 0.0}, dg_tree);// difference between analytical and MW derivative of f.
 
     double df_norm = std::sqrt(df_tree.getSquareNorm());
     double abs_err = std::sqrt(err_tree.getSquareNorm());

From 9081abb4b4c16a2dfec54f170c6b8d1aa256038e Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 26 Aug 2024 15:45:00 +0200
Subject: [PATCH 29/38] redefined alloc for components

---
 src/treebuilders/apply.cpp |  4 +-
 src/treebuilders/grid.cpp  |  2 +-
 src/utils/CompFunction.cpp | 83 +++++++++++++++++++++++++-------------
 src/utils/CompFunction.h   |  5 ++-
 src/utils/parallel.cpp     |  4 +-
 5 files changed, 62 insertions(+), 36 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index d2bb1c286..ca49ec657 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -132,13 +132,13 @@ template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperat
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
                 if (inp.isreal()) {
-                    if (out.CompD[ocomp] == nullptr) out.alloc(ocomp);
+                    if (out.CompD[ocomp] == nullptr) out.alloc_comp(ocomp);
                     apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompD[ocomp]->rescale(metric[icomp][ocomp].real());
                     }
                 } else {
-                    if (out.CompC[ocomp] == nullptr) out.alloc(ocomp);
+                    if (out.CompC[ocomp] == nullptr) out.alloc_comp(ocomp);
                     apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
                     if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index 0b9867820..a93963477 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -238,7 +238,7 @@ template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTre
 template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
     out.free();
     out.func_ptr->data = inp.func_ptr->data;
-    out.alloc(inp.Ncomp()-1);
+    out.alloc(inp.Ncomp());
     for (int i = 0; i < inp.Ncomp(); i++) {
         if (inp.isreal()) build_grid(*out.CompD[i], *inp.CompD[i]);
         if (inp.iscomplex()) build_grid(*out.CompC[i], *inp.CompC[i]);
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 4ccb63c45..a8f4cf957 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -91,7 +91,7 @@ namespace mrcpp {
     func_ptr->data = indata;
     CompD = func_ptr->real;
     CompC = func_ptr->cplx;
-    if (alloc) this->alloc(Ncomp()-1);
+    if (alloc) this->alloc(Ncomp());
     else this->free();
   }
 
@@ -214,13 +214,14 @@ double CompFunction<D>::getSquareNorm() const {
 }
 
 //  Allocate empty trees. The tree must be defined as real or complex already.
-//  Allocates all the ialloc+1 trees, with indices 0,...ialloc
-//  ialloc is the largest index allocated. ialloc=0 allocates one tree.
+//  Allocates all ialloc trees, with indices 0,...ialloc-1
+//  nalloc is the number of components allocated. ialloc=1 allocates one tree.
+//  deletes all old trees if found.
 template <int D>
-void CompFunction<D>::alloc(int ialloc, bool zero) {
+void CompFunction<D>::alloc(int nalloc, bool zero) {
       if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
       if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
-      for (int i = 0; i < ialloc + 1; i++) {
+      for (int i = 0; i < nalloc; i++) {
           delete CompD[i];
           delete CompC[i];
           CompD[i] = nullptr;
@@ -235,7 +236,7 @@ void CompFunction<D>::alloc(int ialloc, bool zero) {
           }
           func_ptr->Ncomp = std::max(Ncomp(), i + 1);
       }
-      for (int i = ialloc + 1; i < Ncomp(); i++) {
+      for (int i = nalloc; i < Ncomp(); i++) {
           //delete possible remaining components
           delete CompD[i];
           delete CompC[i];
@@ -244,6 +245,30 @@ void CompFunction<D>::alloc(int ialloc, bool zero) {
       }
 }
 
+//  Allocate one empty trees for one specific component.
+//  The tree must be defined as real or complex already.
+//  ialloc is index allocated. ialloc=0 allocates the tree with index zero.
+//  deletes old tree if found.
+template <int D>
+void CompFunction<D>::alloc_comp(int ialloc) {
+      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
+      if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
+      int i = ialloc;
+      delete CompD[i];
+      delete CompC[i];
+      CompD[i] = nullptr;
+      CompC[i] = nullptr;
+      if (isreal()) {
+          CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
+          CompD[i]->setZero();
+      }
+      if (iscomplex()) {
+          CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
+          CompC[i]->setZero();
+      }
+      func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+}
+
 template <int D>
 void CompFunction<D>::free() {
     for (int i = 0; i < Ncomp(); i++) {
@@ -293,7 +318,7 @@ void CompFunction<D>::dagger() {
 template <int D>
 FunctionTree<D, double> &CompFunction<D>::real(int i) {
     if (!isreal()) MSG_ABORT("not real function");
-    if (CompD[i] == nullptr) alloc(i);
+    if (CompD[i] == nullptr) alloc_comp(i);
     return *CompD[i];
 }
 template <int D> //NB: should return CompC in the future
@@ -306,7 +331,7 @@ FunctionTree<D, double>  &CompFunction<D>::imag(int i) {
 template <int D>
 FunctionTree<D, ComplexDouble>  &CompFunction<D>::complex(int i) {
     if (!iscomplex()) MSG_ABORT("not marked as a complex function");
-    if (CompC[i] == nullptr) alloc(i);
+    if (CompC[i] == nullptr) alloc_comp(i);
     return *CompC[i];
 }
 
@@ -358,7 +383,7 @@ void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
 
     if (Ncomp()<inp.Ncomp()){
         func_ptr->data = inp.func_ptr->data;
-        alloc(inp.Ncomp()-1, true);
+        alloc(inp.Ncomp(), true);
     }
 
     for (int i = 0; i < inp.Ncomp(); i++) {
@@ -430,7 +455,7 @@ template class CompFunction<3>;
   template <int D>
   void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp) {
       out->func_ptr->data = inp.func_ptr->data;
-      out->alloc(inp.Ncomp()-1);
+      out->alloc(inp.Ncomp());
       for (int i = 0; i < inp.Ncomp(); i++) {
           if (inp.isreal()) {
               inp.CompD[i]->deep_copy(out->CompD[i]);
@@ -448,7 +473,7 @@ template class CompFunction<3>;
   template <int D>
   void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp) {
       out.func_ptr->data = inp.func_ptr->data;
-      out.alloc(inp.Ncomp()-1);
+      out.alloc(inp.Ncomp());
       for (int i = 0; i < inp.Ncomp(); i++) {
           if (inp.isreal()) {
               inp.CompD[i]->deep_copy(out.CompD[i]);
@@ -493,7 +518,7 @@ template <int D>
         out.func_ptr->data.iscomplex = 1;
         out.func_ptr->data.isreal = 0;
     }
-    out.alloc(out.Ncomp()-1);
+    out.alloc(out.Ncomp());
     for (int comp = 0; comp < inp[0].Ncomp(); comp++) {
         if (not iscomplex) {
             FunctionTreeVector<D, double> fvec; // one component vector
@@ -572,7 +597,7 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
         if (inp_a.isreal() and inp_b.isreal()) {
             if (need_to_multiply) {
-                if (!out_allocated) out.alloc(out.Ncomp()-1);
+                if (!out_allocated) out.alloc(out.Ncomp());
                 if (prec < 0.0) {
                     // Union grid
                     build_grid(*out.CompD[comp], *inp_a.CompD[comp]);
@@ -605,7 +630,7 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                     out.func_ptr->isreal = 0;
                     delete out.CompD[comp];
                     delete out.CompC[comp];
-                    if (!out_allocated) out.alloc(out.Ncomp()-1);
+                    if (!out_allocated) out.alloc(out.Ncomp());
                     build_grid(*out.CompC[comp], *inp_a.CompC[comp]);
                     build_grid(*out.CompC[comp], *inp_b.CompC[comp]);
                     mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
@@ -621,12 +646,12 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                         } else {
                              out.func_ptr->iscomplex = 1;
                              out.func_ptr->isreal = 0;
-                             out.alloc(out.Ncomp()-1);
+                             out.alloc(out.Ncomp());
                         }
                     } else {
                         out.func_ptr->iscomplex = 1;
                         out.func_ptr->isreal = 0;
-                        if (!out_allocated) out.alloc(out.Ncomp()-1);
+                        if (!out_allocated) out.alloc(out.Ncomp());
                     }
                     mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], maxIter, absPrec, useMaxNorms, conjugate);
                 }
@@ -692,7 +717,7 @@ void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, Representabl
     CompFunction<D> func_a;
     func_a.func_ptr->isreal = 1;
     func_a.func_ptr->iscomplex = 0;
-    func_a.alloc(0);
+    func_a.alloc(1);
     func_a.CompD[0] = &inp_a;
     multiply(out, func_a, f, prec, nrefine, conjugate);
     func_a.CompD[0] = nullptr;
@@ -764,7 +789,7 @@ void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, d
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 1;
     out.func_ptr->iscomplex = 0;
-    if(out.Ncomp() < 1) out.alloc(0);
+    if(out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
@@ -774,7 +799,7 @@ void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 0;
     out.func_ptr->iscomplex = 1;
-    if(out.Ncomp() < 1) out.alloc(0);
+    if(out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
@@ -784,7 +809,7 @@ void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double p
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 1;
     out.func_ptr->iscomplex = 0;
-    if(out.Ncomp() < 1) out.alloc(0);
+    if(out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<D, double>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
 }
@@ -793,7 +818,7 @@ void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, d
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 0;
     out.func_ptr->iscomplex = 1;
-    if(out.Ncomp() < 1) out.alloc(0);
+    if(out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
  }
@@ -1070,7 +1095,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
            if (coeffpVec[j].size()==0) continue;
-            Psi[j].alloc(0); //All data is stored in coeffpVec[j]
+            Psi[j].alloc(1); //All data is stored in coeffpVec[j]
             Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
        }
     } else { // MPI case
@@ -1098,7 +1123,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
                 }
             }
 
-            Psi[j].alloc(0);
+            Psi[j].alloc(1);
             Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
 
             for (ComplexDouble *p : pointerstodelete) delete[] p;
@@ -1360,7 +1385,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
             if (coeffpVec[j].size()==0) continue;
-            Psi[j].alloc(0);
+            Psi[j].alloc(1);
             Psi[j].real().clear();
             Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
         }
@@ -1390,7 +1415,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
                     shift += csize;
                 }
             }
-            Psi[j].alloc(0);
+            Psi[j].alloc(1);
             Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
 
             for (double *p : pointerstodelete) delete[] p;
@@ -1724,7 +1749,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
         for (int j = 0; j < N; j++) {
             if (j < N) {
                 if (Phi[j].hasReal()) {
-                    out[j].alloc(0);
+                    out[j].alloc(1);
                     out[j].real().clear();
                     out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
                     // 6) reconstruct trees from end nodes
@@ -1733,7 +1758,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                 }
             } else {
                 if (Phi[j].hasImag()) {
-                    out[j].alloc(0);
+                    out[j].alloc(1);
                     out[j].imag().clear();
                     out[j].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
                     out[j].imag().mwTransform(BottomUp);
@@ -1766,7 +1791,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
             }
             if (j < N) {
                 if (Phi[j].hasReal()) {
-                    out[j].alloc(0);
+                    out[j].alloc(1);
                     out[j].real().clear();
                     out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
                     // 6) reconstruct trees from end nodes
@@ -1778,7 +1803,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
                 }
             } else {
                 if (Phi[j].hasImag()) {
-                    out[j].alloc(0);
+                    out[j].alloc(1);
                     out[j].imag().clear();
                     out[j].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
                     out[j].imag().mwTransform(BottomUp);
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index ca9745727..741b347eb 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -24,7 +24,7 @@ struct CompFunctionData {
     // additional data that describe each component (defined by user):
     // occupancy, quantum number, norm, etc.
     //Note: defined with fixed size to ease copying and MPI send
-    int n1[4]{0,0,0,0}; // 0: neutral. other wise different values are orthogonal to each other (product = 0)
+    int n1[4]{0,0,0,0}; // 0: neutral. otherwise different values are orthogonal to each other (product = 0)
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
     int n4[4]{0,0,0,0};
@@ -121,7 +121,8 @@ template <int D> class CompFunction {
     ComplexDouble integrate() const;
     double norm() const;
     double getSquareNorm() const;
-    void alloc(int i = 0, bool zero = true);
+    void alloc(int nalloc = 1, bool zero = true);
+    void alloc_comp(int i = 0); // allocate one specific component
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setCplx(FunctionTree<D, ComplexDouble> *tree, int i = 0);
     void setRank(int i) {func_ptr->rank = i;};
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index d49fb83db..0e33ff53b 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -272,7 +272,7 @@ bool my_func(CompFunction<3> *func) {
 /** @brief Free all function pointers not belonging to this MPI rank */
 void free_foreign(CompFunctionVector &Phi) {
     for (CompFunction<3> &i : Phi) {
-        if (not my_func(i)) i.alloc(0);
+        if (not my_func(i)) i.free();
     }
 }
 
@@ -347,7 +347,7 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     int func_ncomp_in = func.Ncomp();
     MPI_Recv(&func.func_ptr->data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
     for (int i = 0; i < func.Ncomp(); i++) {
-        if (func_ncomp_in <= i) func.alloc(i);
+        if (func_ncomp_in <= i) func.alloc(i+1);
         if (func.isreal()) mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks()[i]);
         else  mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks()[i]);
     }

From 9db89c68de78b488ddfc4ec289c796bec4cedfe0 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Wed, 28 Aug 2024 10:48:48 +0200
Subject: [PATCH 30/38] soft multiplicative factor for components

---
 src/treebuilders/apply.cpp |  10 ++--
 src/utils/CompFunction.cpp | 118 +++++++++++++++++++++++++++++++------
 src/utils/CompFunction.h   |   2 +
 3 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index ca49ec657..2c6e4aaa2 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -462,7 +462,8 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
 }
 
 template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
-   ComplexDouble defaultMetric[4][4];
+    //TODO: sums and not only each components independently
+    ComplexDouble defaultMetric[4][4];
     for (int i=0; i<4; i++){
         for (int j=0; j<4; j++){
             if (i==j) defaultMetric[i][j] = 1.0;
@@ -475,10 +476,11 @@ template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, C
     for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
         for (int ocomp = 0; ocomp < 4; ocomp++){
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal() and std::imag(metric[icomp][ocomp]) < MachinePrec) {
+                if (inp.isreal() and (std::imag(metric[icomp][ocomp]) < MachinePrec or inp.Ncomp() == 1) ) {
                     apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
+                    if (std::norm(metric[icomp][ocomp] - 1.0) > MachinePrec) {
+                        if(std::imag(metric[icomp][ocomp]) < MachinePrec) out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
+                        else out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; //TODO: multiply c1 in rescale?
                     }
                     out.func_ptr->isreal = 1;
                 } else {
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index a8f4cf957..4ab799f24 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -592,9 +592,10 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
     if (out.Ncomp() == 0) out_allocated = false;
     bool share = out.isShared();
     out.func_ptr->data = inp_a.func_ptr->data;
-    out.func_ptr->data.shared = share; // we don' inherit the shareness
-    out.func_ptr->conj = false; // we don' inherit conjugaison
+    out.func_ptr->data.shared = share; // we don't inherit the shareness
+    out.func_ptr->conj = false; // we don't inherit conjugaison
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
+        out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; // we could put this is coef if everything is real?
         if (inp_a.isreal() and inp_b.isreal()) {
             if (need_to_multiply) {
                 if (!out_allocated) out.alloc(out.Ncomp());
@@ -742,8 +743,9 @@ void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, Repre
 template <int D>
 ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
     if (bra.func_ptr->conj or ket.func_ptr->conj) MSG_ABORT("Not implemented");
-    ComplexDouble dotprod = 0.0;
+    ComplexDouble dotprodtot = 0.0;
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
+        ComplexDouble dotprod = 0.0;
         if (bra.func_ptr->data.n1[0] != ket.func_ptr->data.n1[0] and
             bra.func_ptr->data.n1[0] != 0 and ket.func_ptr->data.n1[0]!= 0) continue;
         if (bra.isreal() and ket.isreal()) {
@@ -755,11 +757,13 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
         } else {
             dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
         }
+        dotprod *= bra.func_ptr->data.c1[comp] * ket.func_ptr->data.c1[comp];
+        dotprodtot += dotprod;
     }
     if (bra.isreal() and ket.isreal()) {
-        return dotprod.real();
+        return dotprodtot.real();
     } else {
-        return dotprod;
+        return dotprodtot;
     }
 }
 
@@ -770,19 +774,22 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
  */
 template <int D>
 double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
-    double dotprod = 0.0;
+    double dotprodtot = 0.0;
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
-          if (bra.isreal() and ket.isreal()) {
-              dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
-          } else  if (bra.isreal() and ket.iscomplex()) {
-              MSG_ABORT("Not implemented");
-          } else  if (bra.iscomplex() and ket.isreal()) {
-              MSG_ABORT("Not implemented");
-          } else {
-              dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
-          }
-    }
-    return dotprod;
+        double dotprod = 0.0;
+        if (bra.isreal() and ket.isreal()) {
+            dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
+        } else  if (bra.isreal() and ket.iscomplex()) {
+            MSG_ABORT("Not implemented");
+        } else  if (bra.iscomplex() and ket.isreal()) {
+            MSG_ABORT("Not implemented");
+        } else {
+            dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
+        }
+        dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); //for fully complex values this does not really give the norm
+        dotprodtot += dotprod;
+   }
+    return dotprodtot;
 }
 
 void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, double prec) {
@@ -1992,6 +1999,20 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
 
     // Assumes linearity: result is sum of all nodes contributions
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+    // multiply by CompFunction multiplicative factor
+
+    ComplexVector Fac = ComplexVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
+        Fac[i] = BraKet[i].func_ptr->data.c1[0];
+    }
+
+    mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            S(i, j) *=  std::conj(Fac[i])*Fac[j];
+        }
+    }
 
     return S;
 }
@@ -2136,6 +2157,19 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     // Assumes linearity: result is sum of all nodes contributions
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
 
+    // multiply by CompFunction multiplicative factor
+    ComplexVector Fac = ComplexVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
+        Fac[i] = BraKet[i].func_ptr->data.c1[0];
+    }
+    mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            S(i, j) *=  std::conj(Fac[i])*Fac[j];
+        }
+    }
+
     return S;
 }
 
@@ -2360,6 +2394,25 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
 
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
 
+    // multiply by CompFunction multiplicative factor
+    ComplexVector FacBra = ComplexVector::Zero(N);
+    ComplexVector FacKet = ComplexVector::Zero(M);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(Bra[i])) continue;
+        FacBra[i] = Bra[i].func_ptr->data.c1[0];
+    }
+    for (int i = 0; i < M; i++) {
+        if (!mrcpp::mpi::my_func(Ket[i])) continue;
+        FacKet[i] = Ket[i].func_ptr->data.c1[0];
+    }
+    mrcpp::mpi::allreduce_vector(FacBra, mrcpp::mpi::comm_wrk);
+    mrcpp::mpi::allreduce_vector(FacKet, mrcpp::mpi::comm_wrk);
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            S(i, j) *=  std::conj(FacBra[i])*FacKet[j];
+        }
+    }
+
     // restore input
     if(braisreal){
         for (int i = 0; i < Bra.size(); i++) {
@@ -2557,6 +2610,25 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
 
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
 
+    // multiply by CompFunction multiplicative factor
+    ComplexVector FacBra = ComplexVector::Zero(N);
+    ComplexVector FacKet = ComplexVector::Zero(M);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(Bra[i])) continue;
+        FacBra[i] = Bra[i].func_ptr->data.c1[0];
+    }
+    for (int i = 0; i < M; i++) {
+        if (!mrcpp::mpi::my_func(Ket[i])) continue;
+        FacKet[i] = Ket[i].func_ptr->data.c1[0];
+    }
+    mrcpp::mpi::allreduce_vector(FacBra, mrcpp::mpi::comm_wrk);
+    mrcpp::mpi::allreduce_vector(FacKet, mrcpp::mpi::comm_wrk);
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            S(i, j) *=  std::conj(FacBra[i])*FacKet[j];
+        }
+    }
+
     return S;
 }
 
@@ -2710,6 +2782,18 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
 
     // Assumes linearity: result is sum of all nodes contributions
     mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
+    // multiply by CompFunction multiplicative factor
+    ComplexVector Fac = ComplexVector::Zero(N);
+    for (int i = 0; i < N; i++) {
+        if (!mrcpp::mpi::my_func(BraKet[i])) continue;
+        Fac[i] = BraKet[i].func_ptr->data.c1[0];
+    }
+    mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < N; j++) {
+            S(i, j) *=  std::norm(std::conj(Fac[i]))*std::norm(Fac[j]);
+        }
+    }
     return S;
 }
 
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 741b347eb..c31cfc443 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -28,6 +28,8 @@ struct CompFunctionData {
     int n2[4]{0,0,0,0};
     int n3[4]{0,0,0,0};
     int n4[4]{0,0,0,0};
+    //multiplicative scalar for the function. So far only actively used to take care of imag factor in momentum operator.
+    ComplexDouble c1[4]{{1.0,0.0},{1.0,0.0},{1.0,0.0},{1.0,0.0}};
     double d1[4]{0.0,0.0,0.0,0.0};
     double d2[4]{0.0,0.0,0.0,0.0};
     double d3[4]{0.0,0.0,0.0,0.0};

From cdd24cb0922becb27b1382a8e729a201c6abe625 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Thu, 19 Dec 2024 13:58:04 +0100
Subject: [PATCH 31/38] read and write orbitals in text format

---
 src/trees/FunctionTree.cpp  | 277 +++++++++++++++++++++++++++++++++++-
 src/trees/FunctionTree.h    |   2 +
 src/trees/MWNode.cpp        |  40 +++++-
 src/trees/MWNode.h          |   7 +-
 src/trees/MWTree.cpp        |   9 +-
 src/trees/MWTree.h          |   2 +-
 src/trees/NodeAllocator.cpp |   7 +-
 src/utils/CompFunction.cpp  |   8 +-
 src/utils/mpi_utils.h       |   2 +
 src/utils/parallel.cpp      | 107 +++++++++-----
 10 files changed, 402 insertions(+), 59 deletions(-)

diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 299fb884d..e80affaab 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -107,6 +107,274 @@ template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
     if (this->getNNodes()>0) this->deleteRootNodes();
 }
 
+
+/** @brief Read a previously stusing MADNESS conventions for n, l and index order.ored tree assuming text/ASCII format,
+ *   in a representation
+ * @param[in] file: File name
+ * @note This tree must have the exact same MRA the one that was saved(?)
+ */
+template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::string &file) {
+    std::ifstream in(file);
+    int NDIM, k;
+    in>>NDIM;
+    if (NDIM != D) NOT_IMPLEMENTED_ABORT;
+    double coord[NDIM][2];
+    for (int d = 0; d < NDIM; d++) in >> coord[d][0] >> coord[d][1];
+
+    int p = 1;
+    int rscale = this->getRootScale(); //root scale of target MRA (MRChem) . NB: negative
+    for (int i = rscale; i < 0; i++) p *= 2;
+    int L = p; //NB for now we assume the world as a cube going from -L to +L and L is a power of 2
+    // We require that the world box size is identical and a power of 2
+    double TXT_thres = 1.0e-14; // threshold for differences in scaling factors
+    for (int d = 0; d < NDIM; d++) {
+        if (std::abs(coord[d][0] + L) > TXT_thres) std::cout<<coord[d][0]<<" "<<L<<std::endl;;
+        if (std::abs(coord[d][0] + L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
+        if (std::abs(coord[d][1] - L) > TXT_thres) std::cout<<coord[d][1]<<" "<<L<<std::endl;;
+        if (std::abs(coord[d][1] - L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
+    }
+
+    int nChildren = 1;
+    for (int d=0; d<NDIM; d++) nChildren *= 2;
+
+    int nmax = 0; //deppeset scale in TXT
+    in>>k;
+    if (k != this->getKp1()) NOT_IMPLEMENTED_ABORT;
+    k--; //MRChem defines k as highest polynomial order. MADNESS as number of polynomials
+
+    int ncoefs = 1; // number of coefficents for one single node (not a full MRChem MWnode which stores 2**D of them)
+    for (int i = 0; i < NDIM; i++) ncoefs *= k+1;
+
+    std::vector<std::vector<MWNode<D, T> *>> NodeTable(50); // to store all the nodes pointers
+    std::map<int,int> mp; // to store the number of children stored in each parent node
+    // MRChem and MADNESS do not use the same indices order for the qudrature points
+    // We read MADNESS convention (note that mapMRC[mapMRC[i]]=i for all i)
+    std::vector<int> mapMRC; // mapping vector
+    int kx = k;
+    int ky = k;
+    int kz = k;
+    if (D < 3) kz = 0;
+    if (D < 2) ky = 0;
+    int kp1 = k + 1;
+    // MADNESS: zyx and i=k,k-1,k-2... MRChem: xyz, i=0,1,2,3 ...
+    for (int x = kx; x >= 0; x--){
+        for (int y = ky; y >= 0; y--){
+            for (int z = kz; z >= 0; z--){
+                mapMRC.push_back(z*kp1*kp1 + y*kp1 + x);
+            }
+        }
+    }
+
+    MWNode<D, T> **roots = this->getRootBox().getNodes();
+    for (int rIdx = 0; rIdx < nChildren; rIdx++) {
+        roots[rIdx]->deleteChildren();
+        roots[rIdx]->zeroCoefs();
+    }
+    this->clearEndNodeTable();
+
+    int nread; // number of nodes to read
+    in>>nread;
+    while (nread-- > 0) {
+        // NB: MRChem stores quadrature points values in the PARENT node. 2**D nodes are stored in the same parent
+        int n; // TXT scale
+        int n_in; // MRChem scale
+        in >> n_in;
+        n = n_in + rscale - 1; //MRChem does not define root scale as zero.
+
+        std::array<int, D> l_in; // translation index TXT
+        std::array<int, D> l; // translation index MRChem
+        std::array<int, D> lp; // translation index MRChem, parent
+
+        for (int i = 0; i < NDIM; i++) in >> l_in[i];
+
+        //MRChem defines smallest l as -(2**n)*L , where -L is smallest world coordinate.
+        //note that root scale has 2**D nodes (if range is -L,L)
+        for (int i=0; i<NDIM; i++) {
+            l[i] = l_in[i] - std::pow(2,n)*L;
+            lp[i] = l_in[i]/2 - std::pow(2,n-1)*L; //for parent
+        }
+        NodeIndex<D> idx_p(n-1, lp); // index of parent node
+        MWNode<D, T> *node = &this->getNode(idx_p, true);
+        // note that node is not necesssarily an endnode, but they children are always endnodes
+        // must find to which child of the parent node it corresponds
+        int c_ix = 0; // child index in the parent
+        int p = 1;
+        for (int i = 0; i < NDIM; i++) {
+            if (abs(l[i])%2 == 1)c_ix += p;
+            p *= 2;
+        }
+        T *values = node->getCoefs();
+        if(mp[node->getSerialIx()]==0){
+            //init to zero
+            node->zeroCoefs();
+            if (not node->isRootNode()) {
+                //also set siblings to zero if not set yet
+                MWNode<D, T> *parent = &node->getMWParent();
+                for (int cIdx = 0; cIdx < nChildren; cIdx++) {
+                    if (mp[parent->getMWChild(cIdx).getSerialIx()] == 0) parent->getMWChild(cIdx).zeroCoefs();
+                }
+            }
+        }
+        values += c_ix * ncoefs; //repoint to the right child position (ncoefs is for one child only)
+        for (int i = 0; i < ncoefs; i++) in >> values[mapMRC[i]]; // the indice i is mapped
+        mp[node->getSerialIx()]++; //counts the number of children included
+        nmax = std::max(nmax, n_in); //deepest scale in TXT
+        if (mp[node->getSerialIx()] == 1) NodeTable[n_in].push_back(node);
+    }
+    in.close();
+    // transform all nodes from quadrature point values to scaling coefficients
+    for (int n = nmax; n > -1; n--) {
+        for (int i = 0; i < NodeTable[n].size(); i++){
+            MWNode<D, T> *node = NodeTable[n][i];
+            node->cvTransform(Backward);
+            node->calcNorms();
+        }
+    }
+    // now tree has only scaling coefficients or zeros on end nodes
+
+    // Transform into scaling and wavelets, starting by leaf nodes and copying scaling into parents
+    for (int n = nmax; n > -1; n--) {
+        for (int i = 0; i < NodeTable[n].size(); i++){
+            MWNode<D, T> *node = NodeTable[n][i];
+            if (mp[node->getSerialIx()] == nChildren ){
+                //node complete: transform into scaling and wavelets
+                if (node->isEndNode()){
+                    node->mwTransform(Compression);
+                    node->setHasCoefs();
+                    node->calcNorms();
+                    this->endNodeTable.push_back(node);
+                } else {
+                    // MRCPP requires that all nodes that have no children are end nodes
+                    // and all nodes are groups of 2**D siblings
+                    T* pcoefs = node->getCoefs(); // parent coefficients
+                    for (int cIdx = 0; cIdx < nChildren; cIdx++) {
+                        MWNode<D, T> *cnode = &node->getMWChild(cIdx);
+                        if (mp[cnode->getSerialIx()] != nChildren) {
+                            // This child is not defined. must take scaling from parent
+                            if (mp[cnode->getSerialIx()] > 0) std::cout<<"accounting error "<<std::endl;
+                            T* ccoefs = cnode->getCoefs(); // child coefficients
+                            for (int j = 0; j< ncoefs; j++)  ccoefs[j] = pcoefs[j + cIdx*ncoefs];
+                            for (int j = ncoefs; j< ncoefs*nChildren; j++)  ccoefs[j] = 0.0; // the remainder are set to zero
+                            this->endNodeTable.push_back(cnode); // add to the list of nodes
+                            cnode->setHasCoefs();
+                            cnode->calcNorms();
+                        }
+                    }
+                    node->mwTransform(Compression);
+                    node->setHasCoefs();
+                    node->calcNorms();
+                }
+                if ( not node->isRootNode() ) {
+                    // and copy the new scaling parts into parent
+                    MWNode<D, T> *parent = &node->getMWParent();
+                    // check if parent exist already, and put in the list if not.
+                    if (mp[parent->getSerialIx()] == 0) NodeTable[n-1].push_back(parent);
+                    int my_ix=-1;
+                    // find index among siblings
+                    for (int cIdx = 0; cIdx < nChildren; cIdx++) {
+                        if (&parent->getMWChild(cIdx) == node) my_ix = cIdx;
+                    }
+                    if(my_ix < 0)std::cout<<" DID NOT FIND INDEX"<<std::endl;
+                    T *ccoefs = node->getCoefs();
+                    T *pcoefs = parent->getCoefs();
+                    for (int j = 0; j< ncoefs; j++)  pcoefs[j+my_ix*ncoefs] = ccoefs[j];
+                    mp[parent->getSerialIx()]++;
+                }
+            } else {
+                std::cout<<" WARNING: found incomplete node "<<std::endl;
+            }
+        }
+    }
+    this->calcSquareNorm();
+}
+
+/** @brief Write the tree to disk in text/ASCII format in a representation
+ *   using MADNESS conventions for n, l and index order.
+ * @param[in] file: File name
+ */
+template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::string &fname) {
+    int nRoots = this->getRootBox().size();
+    MWNode<D, T> **roots = this->getRootBox().getNodes();
+
+    std::ofstream out(fname);
+    out << std::setprecision(14);
+    out << D <<std::endl;
+    int rscale = this->getRootScale();
+    std::array<double, D> sf = this->getMRA().getWorldBox().getScalingFactors();
+    double LMRChem = 1.0;
+    for (int i=0; i>rscale; i--) LMRChem *= 2; // we assume world is from -L to L, and a cube with 2 root nodes in each direction
+    for (int d=0; d<D; d++) {
+        out <<- sf[d]*LMRChem <<" "<< sf[d]*LMRChem << std::endl;
+    }
+    int kp1 = this->getKp1();
+    out << kp1 <<std::endl;
+    int ncoefs = 1;
+    for (int d = 0; d < D; d++) ncoefs*=kp1;
+    int Tdim = std::pow(2,D);
+
+    int nout = this->endNodeTable.size();
+    out << Tdim*nout <<std::endl; // could output only scaling coeff?
+
+    // MRChem and MADNESS do not use the same indices order for the qudrature points
+    // We write into MADNESS convention (note that mapMRC[mapMRC[i]]=i for all i)
+    std::vector<int> mapMRC; // mapping vector
+    int kx = kp1 - 1;
+    int ky = kp1 - 1;
+    int kz = kp1 - 1;
+    if (D < 3) kz = 0;
+    if (D < 2) ky = 0;
+    // MADNESS: zyx and i=k,k-1,k-2... MRChem: xyz, i=0,1,2,3 ...
+    for (int x = kx; x >= 0; x--){
+        for (int y = ky; y >= 0; y--){
+            for (int z = kz; z >= 0; z--){
+                mapMRC.push_back(z*kp1*kp1 + y*kp1 + x);
+            }
+        }
+    }
+
+    int L = std::pow(2,-rscale);
+    int count = -1;
+    while (++count<nout) {
+        std::array<int, D> l;
+        NodeIndex<D> idx=this->endNodeTable[count]->getNodeIndex();
+        MWNode<D, T> *node = &(this->getNode(idx, false));
+        T *values = node->getCoefs();
+         int n = idx.getScale();
+         if(node->getSerialIx()==56 or (n==1 and idx.getTranslation(0)==0  and idx.getTranslation(1)==0  and idx.getTranslation(2)==0  )){
+             std::cout<<idx<<" "<<node->getSerialIx()<<" "<<node->getSquareNorm()<<" "<<node->getComponentNorm(0)<<std::endl;
+         }
+        node->mwTransform(Reconstruction);
+        node->cvTransform(Forward);
+        // we write for each children nodes separately
+        for (int i = 0; i < D; i++) {
+            // l in interval [0, max], while in MRCPP it is defined in [-max/2, max/2-1]
+            l[i] = 2 * (idx.getTranslation(i) + std::pow(2,n)*L); //first child
+        }
+        for (int cix = 0; cix < Tdim; cix++) {
+            out<< n-rscale+2 <<" ";// scales start at zero. NB: children are one scale larger than node
+            for (int i = 0; i < D; i++){
+                int p = (cix>>i) & 1; // shift by one for odd child indices
+                out << l[i] + p << " ";
+            }
+            out << std::endl;
+            for (int i=0; i< ncoefs; i++) out<< values[cix*ncoefs + mapMRC[i]]<<" ";
+            out << std::endl;
+            if(node->getSerialIx()==56 or (n==1 and idx.getTranslation(0)==0  and idx.getTranslation(1)==0  and idx.getTranslation(2)==0 )){
+            std::cout<< n-rscale+2 <<" ";// scales start at zero. NB: children are one scale larger than node
+            for (int i = 0; i < D; i++){
+                int p = (cix>>i) & 1; // shift by one for odd child indices
+                std::cout << l[i] + p << " ";
+            }
+            std::cout << std::endl;
+            T norm=0.0;
+             for (int i=0; i< ncoefs; i++) norm+=values[cix*ncoefs + i]*values[cix*ncoefs + i];
+             std::cout<<" norm quadrature "<<norm<<" "<<values[cix*ncoefs]<<std::endl;
+         }
+        }
+    }
+    out.close();
+
+}
 /** @brief Write the tree structure to disk, for later use
  * @param[in] file: File name, will get ".tree" extension
  */
@@ -117,7 +385,6 @@ template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string
 
     std::stringstream fname;
     fname << file << ".tree";
-
     std::fstream f;
     f.open(fname.str(), std::ios::out | std::ios::binary);
     if (not f.is_open()) MSG_ERROR("Unable to open file");
@@ -125,6 +392,7 @@ template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string
     // Write size of tree
     int nChunks = allocator.getNChunksUsed();
     f.write((char *)&nChunks, sizeof(int));
+    std::cout<<"saving. tree norm "<<this->getSquareNorm()<<", number of nodes "<<this->getNNodes()<<", Nchunks "<<nChunks<<" "<<nChunks*allocator.getCoefChunkSize()/1024<<"kB"<<std::endl;
 
     // Write tree data, chunk by chunk
     for (int iChunk = 0; iChunk < nChunks; iChunk++) {
@@ -132,6 +400,7 @@ template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string
         f.write((char *)allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize());
     }
     f.close();
+    this->saveTreeTXT("MRC.dat");
     print::time(10, "Time write", t1);
 }
 
@@ -141,6 +410,7 @@ template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string
  */
 template <int D, typename T> void FunctionTree<D, T>::loadTree(const std::string &file) {
     Timer t1;
+
     std::stringstream fname;
     fname << file << ".tree";
 
@@ -166,6 +436,8 @@ template <int D, typename T> void FunctionTree<D, T>::loadTree(const std::string
     Timer t2;
     allocator.reassemble();
     this->resetEndNodeTable();
+    this->calcSquareNorm(true);
+    std::cout<<"Loaded. tree norm "<<this->getSquareNorm()<<" number of nodes "<<this->getNNodes()<<" N chunks"<<nChunks<<" "<<nChunks*allocator.getCoefChunkSize()/1024<<"kB"<<std::endl;
     print::time(10, "Time rewrite pointers", t2);
 }
 
@@ -287,7 +559,8 @@ template <int D, typename T> T FunctionTree<D, T>::evalf_precise(const Coord<D>
 
     MWNode<D, T> &mw_node = this->getNodeOrEndNode(arg);
     auto &f_node = static_cast<FunctionNode<D, T> &>(mw_node);
-    auto result = f_node.evalf(arg);
+    std::cout<<f_node.getNodeIndex()<<" "<<f_node.getSerialIx()<<" "<<f_node.getSquareNorm()<<" "<<f_node.isEndNode()<<" "<<f_node.getComponentNorm(0)<<" "<<f_node.getComponentNorm(1)<<" "<<f_node.getComponentNorm(2)<<" "<<f_node.getComponentNorm(3)<<" "<<f_node.getComponentNorm(4)<<" "<<f_node.getComponentNorm(5)<<" "<<f_node.getComponentNorm(6)<<" "<<f_node.getComponentNorm(7)<<std::endl;
+   auto result = f_node.evalf(arg);
     this->deleteGenerated();
 
     // Adjust for scaling factor included in basis
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 05961eb7d..59ee8bfe4 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -72,7 +72,9 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     void setEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data);
 
     void saveTree(const std::string &file);
+    void saveTreeTXT(const std::string &file);
     void loadTree(const std::string &file);
+    void loadTreeTXT(const std::string &file);
 
     // In place operations
     void square();
diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp
index 7bfa510de..039e91b31 100644
--- a/src/trees/MWNode.cpp
+++ b/src/trees/MWNode.cpp
@@ -403,7 +403,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
     }
 }
 
-/** @brief Generates scaling cofficients of children
+/** @brief Generates scaling coefficients of children
  *
  * @details If the node is a leafNode, it takes the scaling&wavelet
  * coefficients of the parent and it generates the scaling
@@ -421,6 +421,25 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
     MRCPP_UNSET_OMP_LOCK();
 }
 
+
+/** @brief Creates scaling coefficients of children
+ *
+ * @details If the node is a leafNode, it takes the scaling&wavelet
+ * coefficients of the parent and it generates the scaling
+ * coefficients for the children and stores
+ * them consecutively in the corresponding block of the parent,
+ * following the usual bitwise notation. The new node is permanently added to the tree.
+ */
+  template <int D, typename T> void MWNode<D, T>::threadSafeCreateChildren() {
+    if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; }
+    MRCPP_SET_OMP_LOCK();
+    if (isLeafNode()) {
+        createChildren(true);
+        giveChildrenCoefs();
+    }
+    MRCPP_UNSET_OMP_LOCK();
+}
+
 /** @brief Coefficient-Value transform
  *
  * @details This routine transforms the scaling coefficients of the node to the
@@ -431,7 +450,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1)
  *       representation, instead of s/d (scaling and wavelet).
  */
-  template <int D, typename T> void MWNode<D, T>::cvTransform(int operation) {
+    template <int D, typename T> void MWNode<D, T>::cvTransform(int operation, bool firstchild) {
     int kp1 = this->getKp1();
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     int kp1_d = this->getKp1_d();
@@ -443,8 +462,10 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
     T *out_vec = o_vec;
     T *in_vec = this->coefs;
 
+    int nChildren = this->getTDim();
+    if (firstchild) nChildren = 1;
     for (int i = 0; i < D; i++) {
-        for (int t = 0; t < this->getTDim(); t++) {
+        for (int t = 0; t < nChildren ; t++) {
             T *out = out_vec + t * kp1_d;
             T *in = in_vec + t * kp1_d;
             math_utils::apply_filter(out, in, S, kp1, kp1_dm1, 0.0);
@@ -881,7 +902,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 
 /** @brief Returns the quadrature points in a given node
  *
- * @param[in,out] pts: expanded quadrature points in a \f$ d \times 
+ * @param[in,out] pts: expanded quadrature points in a \f$ d \times
  * 2^d(k+1)^d \f$ matrix form.
  *
  * @details The primitive quadrature points of the children are used to obtain a
@@ -1081,8 +1102,9 @@ void MWNode<D, T>::cvTransform(int operation) {
  * routine always returns the appropriate node, and will generate nodes that
  * does not exist. Recursion starts at this node and ASSUMES the requested
  * node is in fact descending from this node.
+ * If create = true, the nodes are permanently added to the tree.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const NodeIndex<D> &idx) {
+    template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const NodeIndex<D> &idx, bool create) {
     if (getScale() == idx.getScale()) { // we're done
         if (tree->isLocal) {
 	   NOT_IMPLEMENTED_ABORT;
@@ -1097,10 +1119,14 @@ void MWNode<D, T>::cvTransform(int operation) {
     }
 
     assert(isAncestor(idx));
-    threadSafeGenChildren();
+    if  (create) {
+        threadSafeCreateChildren();
+    } else {
+        threadSafeGenChildren();
+    }
     int cIdx = getChildIndex(idx);
     assert(this->children[cIdx] != nullptr);
-    return this->children[cIdx]->retrieveNode(idx);
+    return this->children[cIdx]->retrieveNode(idx, create);
 }
 
 /** Node retriever that ALWAYS returns the requested node.
diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h
index eca15925d..6a96675a2 100644
--- a/src/trees/MWNode.h
+++ b/src/trees/MWNode.h
@@ -125,7 +125,7 @@ template <int D, typename T> class MWNode {
     virtual void deleteChildren();
     virtual void deleteParent();
 
-    virtual void cvTransform(int kind);
+    virtual void cvTransform(int kind, bool firstchild = false);
     virtual void mwTransform(int kind);
 
     double getNodeNorm(const NodeIndex<D> &idx) const;
@@ -169,6 +169,7 @@ template <int D, typename T> class MWNode {
     bool isComplex = false; //TODO put as one of the flags
     friend class FunctionTree<D, double>; // required if a ComplexDouble tree access a double node from another tree!
     friend class FunctionTree<D, ComplexDouble>;
+    int childSerialIx{-1};  ///< index of first child in serial Tree, or -1 for leafnodes/endnodes
 
 protected:
     MWTree<D, T> *tree{nullptr};    ///< Tree the node belongs to
@@ -190,7 +191,6 @@ template <int D, typename T> class MWNode {
 
     int serialIx{-1};       ///< index in serial Tree
     int parentSerialIx{-1}; ///< index of parent in serial Tree, or -1 for roots
-    int childSerialIx{-1};  ///< index of first child in serial Tree, or -1 for leafnodes/endnodes
 
     NodeIndex<D> nodeIndex;     ///< Scale and translation of the node
     HilbertPath<D> hilbertPath; ///< To be documented
@@ -226,7 +226,7 @@ template <int D, typename T> class MWNode {
     bool diffBranch(const MWNode<D , T> &rhs) const;
 
     MWNode<D , T> *retrieveNode(const Coord<D> &r, int depth);
-    MWNode<D , T> *retrieveNode(const NodeIndex<D> &idx);
+    MWNode<D , T> *retrieveNode(const NodeIndex<D> &idx, bool create = false);
     MWNode<D , T> *retrieveParent(const NodeIndex<D> &idx);
 
     const MWNode<D , T> *retrieveNodeNoGen(const NodeIndex<D> &idx) const;
@@ -238,6 +238,7 @@ template <int D, typename T> class MWNode {
     const MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx) const;
     MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx);
 
+    void threadSafeCreateChildren();
     void threadSafeGenChildren();
     void deleteGenerated();
 
diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp
index d7ebaba55..9fd5c907a 100644
--- a/src/trees/MWTree.cpp
+++ b/src/trees/MWTree.cpp
@@ -158,7 +158,7 @@ template <int D, typename T> void MWTree<D, T>::mwTransform(int type, bool overw
  *
  * @details It starts at the bottom of the tree (scaling coefficients
  * of the leaf nodes) and it generates the scaling and wavelet
- * coefficients if the parent node. It then proceeds recursively all the
+ * coefficients of the parent node. It then proceeds recursively all the
  * way up to the root nodes. This is generally used after a function
  * projection to purify the coefficients obtained by quadrature at
  * coarser scales which are therefore not precise enough.
@@ -335,8 +335,9 @@ template <int D, typename T> MWNode<D, T> *MWTree<D, T>::findNode(NodeIndex<D> i
  * node does not exist, it will be generated by MW
  * transform. Recursion starts at the appropriate rootNode and descends
  * from this.
+ * The nodes are permanently added to the tree if create = true
  */
-template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> idx) {
+    template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> idx, bool create) {
     if (getRootBox().isPeriodic()) periodic::index_manipulation<D>(idx, getRootBox().getPeriodic());
 
     MWNode<D, T> *out = nullptr;
@@ -345,7 +346,7 @@ template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> id
 #pragma omp critical(gen_parent)
         out = root.retrieveParent(idx);
     } else {
-        out = root.retrieveNode(idx);
+        out = root.retrieveNode(idx, create);
     }
     return *out;
 }
@@ -433,7 +434,7 @@ template <int D, typename T> const MWNode<D, T> &MWTree<D, T>::getNodeOrEndNode(
 /** @brief Returns the list of all EndNodes
  *
  * @details copies the list of all EndNode pointers into a new vector
- * and retunrs it.
+ * and returns it.
  */
 template <int D, typename T> MWNodeVector<D, T> *MWTree<D, T>::copyEndNodeTable() {
     auto *nVec = new MWNodeVector<D, T>;
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index b19d356c8..e3c656b7c 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -105,7 +105,7 @@ class BankAccount;
     MWNode<D, T> *findNode(NodeIndex<D> nIdx);
     const MWNode<D, T> *findNode(NodeIndex<D> nIdx) const;
 
-    MWNode<D, T> &getNode(NodeIndex<D> nIdx);
+    MWNode<D, T> &getNode(NodeIndex<D> nIdx, bool create = false);
     MWNode<D, T> &getNodeOrEndNode(NodeIndex<D> nIdx);
     const MWNode<D, T> &getNodeOrEndNode(NodeIndex<D> nIdx) const;
 
diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp
index b33d5ccf1..5b6db8a34 100644
--- a/src/trees/NodeAllocator.cpp
+++ b/src/trees/NodeAllocator.cpp
@@ -113,7 +113,6 @@ template <int D, typename T> T * NodeAllocator<D, T>::getCoefNoLock(int sIdx) {
 template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coefs) {
     MRCPP_SET_OMP_LOCK();
     if (nNodes <= 0 or nNodes > this->maxNodesPerChunk) MSG_ABORT("Cannot allocate " << nNodes << " nodes");
-
     // move topstack to start of next chunk if current chunk is too small
     int cIdx = this->topStack % (this->maxNodesPerChunk);
     bool chunkOverflow = ((cIdx + nNodes) > this->maxNodesPerChunk);
@@ -127,6 +126,10 @@ template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coe
     // return value is index of first new node
     auto sIdx = this->topStack;
 
+    // we require that the index for first child is a multiple of 2**D
+    // so that we can find the sibling rank using rank=sIdx%(2**D)
+    if (sIdx%nNodes != 0) MSG_ERROR(" node allocate error");
+
     // fill stack status
     auto &status = this->stackStatus;
     for (int i = sIdx; i < sIdx + nNodes; i++) {
@@ -145,7 +148,7 @@ template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coe
 
 template <int D, typename T> void NodeAllocator<D, T>::dealloc(int sIdx) {
     MRCPP_SET_OMP_LOCK();
-    if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx);
+   if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx);
     auto *node_p = getNodeNoLock(sIdx);
     node_p->~MWNode();
     this->stackStatus[sIdx] = 0; // mark as available
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index 4ab799f24..e7d3291ec 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -272,14 +272,18 @@ void CompFunction<D>::alloc_comp(int ialloc) {
 template <int D>
 void CompFunction<D>::free() {
     for (int i = 0; i < Ncomp(); i++) {
-        delete CompD[i];
-        delete CompC[i];
+        std::cout<<" delete "<<i<<std::endl;
+        if(CompD[i] != nullptr) delete CompD[i];
+         std::cout<<" delete C "<<i<<std::endl;
+       if(CompC[i] !=  nullptr) delete CompC[i];
         CompD[i] = nullptr;
         CompC[i] = nullptr;
+        std::cout<<" done delete "<<i<<std::endl;
     }
     if (this->func_ptr->shared_mem_real) this->func_ptr->shared_mem_real->clear();
     if (this->func_ptr->shared_mem_cplx) this->func_ptr->shared_mem_cplx->clear();
     func_ptr->Ncomp = 0;
+    std::cout<<" free done "<<std::endl;
 }
 
 template <int D>
diff --git a/src/utils/mpi_utils.h b/src/utils/mpi_utils.h
index 93211fd1a..0a854580f 100644
--- a/src/utils/mpi_utils.h
+++ b/src/utils/mpi_utils.h
@@ -51,7 +51,9 @@ extern int sh_group_rank;
 extern int is_bank;
 extern int is_bankclient;
 extern int bank_size;
+extern int bank_per_node;
 extern int omp_threads;
+extern int use_omp_num_threads;
 extern int tot_bank_size;
 extern int max_tag;
 extern int task_bank;
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 0e33ff53b..90a066771 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -54,7 +54,9 @@ int is_centralbank = 0;
 int is_bankclient = 1;
 int is_bankmaster = 0; // only one bankmaster is_bankmaster
 int bank_size = 0;
+int bank_per_node = 0;
 int omp_threads = -1; // can be set to force number of threads
+int use_omp_num_threads = -1; // can be set to use number of threads from env
 int tot_bank_size = 0; // size of bank, including the task manager
 int max_tag = 0;       // max value allowed by MPI
 vector<int> bankmaster;
@@ -83,6 +85,13 @@ void initialize() {
     // divide the world into groups
     // each group has its own group communicator definition
 
+    // count the number of process per node
+    MPI_Comm node_comm;
+    MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm);
+    int node_rank, node_size;
+    MPI_Comm_rank(node_comm, &node_rank);
+    MPI_Comm_size(node_comm, &node_size);
+
     // define independent group of MPI processes, that are not part of comm_wrk
     // for now the new group does not include comm_share
     comm_bank = MPI_COMM_WORLD; // clients and master
@@ -92,7 +101,15 @@ void initialize() {
     if (world_size < 2) {
         bank_size = 0;
     } else if (bank_size < 0) {
-        bank_size = max(world_size / 3, 1);
+        if (bank_per_node >= 0) {
+              bank_size = node_size * bank_per_node;
+        } else {
+            bank_size = max(world_size / 3, 1);
+        }
+    } else if (bank_size >=0 and bank_per_node >= 0) {
+        if (bank_size != node_size * bank_per_node and world_rank == 0)
+            std::cout<<"WARNING: bank_size and bank_per_node are incompatible "<<
+                bank_size<<" "<<bank_per_node<<std::endl;
     }
     if (world_size - bank_size < 1) MSG_ABORT("No MPI ranks left for working!");
     if (bank_size < 1 and world_size > 1) MSG_ABORT("Bank size must be at least one when using MPI!");
@@ -156,20 +173,6 @@ void initialize() {
     max_tag = *(int *)val / 2;
     id_shift = max_tag / 2; // half is reserved for non orbital.
 
-    // determine the number of threads we can assign to each mpi worker.
-    // mrcpp_get_num_procs is total number of hardware logical threads accessible by this mpi
-    // We assume that half of them are physical cores.
-    // mrcpp_get_max_threads is OMP_NUM_THREADS (environment variable).
-    // omp_threads_available is the total number of logical threads available on this compute-node
-    // We assume that half of them are physical cores.
-    //
-    // six conditions should be satisfied:
-    // 1) no one use more than mrcpp_get_num_procs()/2
-    // 2) NOT ENFORCED: no one use more than mrcpp_get_max_threads, as defined by rank 0
-    // 3) the total number of threads used on the compute-node must not exceed omp_threads_available/2
-    // 4) Bank needs only one thread
-    // 5) workers need as many threads as possible
-    // 6) at least one thread
 
     MPI_Comm comm_share_world;//all that share the memory
     MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share_world);
@@ -180,33 +183,61 @@ void initialize() {
     MPI_Allreduce(&is_bankclient, &n_wrk_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world);
 
     int omp_threads_available = thread::hardware_concurrency();
-    int nthreads = 1;
-    if (is_bankclient) nthreads = (omp_threads_available/2-n_bank_thisnode)/n_wrk_thisnode; // 3) and 5)
-
-    // do not exceed total number of cores accessible (assumed to be half the number of logical threads)
-    nthreads = min(nthreads, mrcpp_get_num_procs()); // 1)
 
-    // NB: we do not use OMP_NUM_THREADS. Use all cores accessible. Could change this in the future
-    // if OMP_NUM_THREADS is set, do not exceed
-    // we enforce that all compute nodes use the same OMP_NUM_THREADS. Rank 0 decides.
-    /* int my_OMP_NUM_THREADS = mrcpp_get_max_threads();
+    int nthreads = 1;
+    int my_OMP_NUM_THREADS = omp_get_max_threads();
     MPI_Bcast(&my_OMP_NUM_THREADS, 1, MPI_INT, 0, MPI_COMM_WORLD);
+    if (use_omp_num_threads) { // we assume that the user has set the environment variable
+        // OMP_NUM_THREADS, such that the total number of threads that can be used on each node is
+        // OMP_NUM_THREADS * (number of MPI processes per node)
+        // NB: OMP_NUM_THREADS is the number of threads for all MPI processes on one node.
+        // The bank need only one thread, and can give "their" remaining share to workers.
+        int total_omp_threads_per_node = my_OMP_NUM_THREADS * (n_bank_thisnode + n_wrk_thisnode);
+        nthreads = (total_omp_threads_per_node - n_bank_thisnode)/n_wrk_thisnode;
+    } else {
+        // we determine the number of threads by detecting what is available
+        // determine the number of threads we can assign to each mpi worker.
+        // mrcpp_get_num_procs is total number of hardware logical threads accessible by this mpi
+        // NB: We assume that half of them are physical cores (not easily detectable).
+        // mrcpp_get_max_threads is OMP_NUM_THREADS (environment variable) but is NOT USED.
+        // omp_threads_available is the total number of logical threads available on this compute-node
+        // We assume that half of them are physical cores.
+        //
+        // five conditions should be satisfied:
+        // 1) the total number of threads used on the compute-node must not exceed thread::hardware_concurrency()/2
+        // 2) no one use more than omp_get_num_procs()/2
+        // 3) Bank needs only one thread
+        // 4) workers need as many threads as possible (but all workers use same number of threads)
+        // 5) at least one thread
+        if (is_bankclient) nthreads = (omp_threads_available/2-n_bank_thisnode)/n_wrk_thisnode; // 1) and 4)
+        //cout<<nthreads<<" after direct calculation"<<endl;
+        // do not exceed total number of cores accessible (assumed to be half the number of logical threads)
+        nthreads = min(nthreads, omp_get_num_procs()/2); // 2)
+        //cout<<nthreads<<" after mrcpp_get_num_procs"<<endl;
+
+        // NB: we do not use OMP_NUM_THREADS. Use all cores accessible.
+
+        if (is_bank) nthreads = 1; // 3)
+
+        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<" threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<" my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
+
+        if (omp_threads > 0) {
+            if (omp_threads != nthreads and world_rank == 0) {
+                cout<<"Warning: recommended number of threads is "<<nthreads<<endl;
+                cout<<"setting number of threads to omp_threads, "<<max(1, omp_threads)<<endl;
+            }
+            nthreads = omp_threads;
+        }
+    }
+    nthreads = max(1, nthreads); // 5)
 
-    if (my_OMP_NUM_THREADS > 0) nthreads = min(nthreads, my_OMP_NUM_THREADS); // 2)
-    */
-
-    nthreads = max(1, nthreads); // 6)
-
-    if (is_bank) nthreads = 1; // 4)
-
-    //cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp:"<<omp_get_num_procs()<<" "<<omp_get_max_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<endl;
+    if (nthreads*n_wrk_thisnode+n_bank_thisnode < omp_threads_available/3 and world_rank == 0) {
+        std::cout<<"WARNING: only "<<nthreads*n_wrk_thisnode+n_bank_thisnode<<" threads used per node while "<<omp_threads_available<<" logical cpus are accessible "<<std::endl;
+    }
 
-    if (omp_threads > 0) {
-        if (omp_threads != nthreads and world_rank == 0) {
-            cout<<"Warning: recommended number of threads is "<<nthreads<<endl;
-            cout<<"setting number of threads to omp_threads, "<<omp_threads<<endl;
-        }
-        nthreads = omp_threads;
+    if (nthreads > omp_get_num_procs()) {
+        std::cout<<"WARNING: MPI rank "<<world_rank<<" will use "<<nthreads<<" but only "<<
+            omp_get_num_procs()<<" procs are accessible"<<std::endl;
     }
 
     omp::n_threads = nthreads;

From 1548bbfe02462c73db0531c5087597ef9dfab92f Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Mon, 23 Dec 2024 11:56:21 +0100
Subject: [PATCH 32/38] read and write set of orbitals

---
 src/trees/FunctionTree.cpp | 37 ++++++++++---------------------------
 src/utils/CompFunction.cpp |  6 +-----
 src/utils/CompFunction.h   |  2 ++
 src/utils/parallel.cpp     |  2 +-
 4 files changed, 14 insertions(+), 33 deletions(-)

diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index e80affaab..0d7c1174c 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -118,8 +118,8 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     int NDIM, k;
     in>>NDIM;
     if (NDIM != D) NOT_IMPLEMENTED_ABORT;
-    double coord[NDIM][2];
-    for (int d = 0; d < NDIM; d++) in >> coord[d][0] >> coord[d][1];
+    double coord[D][2];
+    for (int d = 0; d < D; d++) in >> coord[d][0] >> coord[d][1];
 
     int p = 1;
     int rscale = this->getRootScale(); //root scale of target MRA (MRChem) . NB: negative
@@ -127,7 +127,7 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     int L = p; //NB for now we assume the world as a cube going from -L to +L and L is a power of 2
     // We require that the world box size is identical and a power of 2
     double TXT_thres = 1.0e-14; // threshold for differences in scaling factors
-    for (int d = 0; d < NDIM; d++) {
+    for (int d = 0; d < D; d++) {
         if (std::abs(coord[d][0] + L) > TXT_thres) std::cout<<coord[d][0]<<" "<<L<<std::endl;;
         if (std::abs(coord[d][0] + L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
         if (std::abs(coord[d][1] - L) > TXT_thres) std::cout<<coord[d][1]<<" "<<L<<std::endl;;
@@ -135,7 +135,7 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     }
 
     int nChildren = 1;
-    for (int d=0; d<NDIM; d++) nChildren *= 2;
+    for (int d=0; d<D; d++) nChildren *= 2;
 
     int nmax = 0; //deppeset scale in TXT
     in>>k;
@@ -143,7 +143,7 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     k--; //MRChem defines k as highest polynomial order. MADNESS as number of polynomials
 
     int ncoefs = 1; // number of coefficents for one single node (not a full MRChem MWnode which stores 2**D of them)
-    for (int i = 0; i < NDIM; i++) ncoefs *= k+1;
+    for (int i = 0; i < D; i++) ncoefs *= k+1;
 
     std::vector<std::vector<MWNode<D, T> *>> NodeTable(50); // to store all the nodes pointers
     std::map<int,int> mp; // to store the number of children stored in each parent node
@@ -185,11 +185,11 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
         std::array<int, D> l; // translation index MRChem
         std::array<int, D> lp; // translation index MRChem, parent
 
-        for (int i = 0; i < NDIM; i++) in >> l_in[i];
+        for (int i = 0; i < D; i++) in >> l_in[i];
 
         //MRChem defines smallest l as -(2**n)*L , where -L is smallest world coordinate.
         //note that root scale has 2**D nodes (if range is -L,L)
-        for (int i=0; i<NDIM; i++) {
+        for (int i=0; i<D; i++) {
             l[i] = l_in[i] - std::pow(2,n)*L;
             lp[i] = l_in[i]/2 - std::pow(2,n-1)*L; //for parent
         }
@@ -199,7 +199,7 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
         // must find to which child of the parent node it corresponds
         int c_ix = 0; // child index in the parent
         int p = 1;
-        for (int i = 0; i < NDIM; i++) {
+        for (int i = 0; i < D; i++) {
             if (abs(l[i])%2 == 1)c_ix += p;
             p *= 2;
         }
@@ -339,10 +339,7 @@ template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::str
         NodeIndex<D> idx=this->endNodeTable[count]->getNodeIndex();
         MWNode<D, T> *node = &(this->getNode(idx, false));
         T *values = node->getCoefs();
-         int n = idx.getScale();
-         if(node->getSerialIx()==56 or (n==1 and idx.getTranslation(0)==0  and idx.getTranslation(1)==0  and idx.getTranslation(2)==0  )){
-             std::cout<<idx<<" "<<node->getSerialIx()<<" "<<node->getSquareNorm()<<" "<<node->getComponentNorm(0)<<std::endl;
-         }
+        int n = idx.getScale();
         node->mwTransform(Reconstruction);
         node->cvTransform(Forward);
         // we write for each children nodes separately
@@ -359,17 +356,6 @@ template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::str
             out << std::endl;
             for (int i=0; i< ncoefs; i++) out<< values[cix*ncoefs + mapMRC[i]]<<" ";
             out << std::endl;
-            if(node->getSerialIx()==56 or (n==1 and idx.getTranslation(0)==0  and idx.getTranslation(1)==0  and idx.getTranslation(2)==0 )){
-            std::cout<< n-rscale+2 <<" ";// scales start at zero. NB: children are one scale larger than node
-            for (int i = 0; i < D; i++){
-                int p = (cix>>i) & 1; // shift by one for odd child indices
-                std::cout << l[i] + p << " ";
-            }
-            std::cout << std::endl;
-            T norm=0.0;
-             for (int i=0; i< ncoefs; i++) norm+=values[cix*ncoefs + i]*values[cix*ncoefs + i];
-             std::cout<<" norm quadrature "<<norm<<" "<<values[cix*ncoefs]<<std::endl;
-         }
         }
     }
     out.close();
@@ -392,7 +378,6 @@ template <int D, typename T> void FunctionTree<D, T>::saveTree(const std::string
     // Write size of tree
     int nChunks = allocator.getNChunksUsed();
     f.write((char *)&nChunks, sizeof(int));
-    std::cout<<"saving. tree norm "<<this->getSquareNorm()<<", number of nodes "<<this->getNNodes()<<", Nchunks "<<nChunks<<" "<<nChunks*allocator.getCoefChunkSize()/1024<<"kB"<<std::endl;
 
     // Write tree data, chunk by chunk
     for (int iChunk = 0; iChunk < nChunks; iChunk++) {
@@ -437,7 +422,6 @@ template <int D, typename T> void FunctionTree<D, T>::loadTree(const std::string
     allocator.reassemble();
     this->resetEndNodeTable();
     this->calcSquareNorm(true);
-    std::cout<<"Loaded. tree norm "<<this->getSquareNorm()<<" number of nodes "<<this->getNNodes()<<" N chunks"<<nChunks<<" "<<nChunks*allocator.getCoefChunkSize()/1024<<"kB"<<std::endl;
     print::time(10, "Time rewrite pointers", t2);
 }
 
@@ -559,8 +543,7 @@ template <int D, typename T> T FunctionTree<D, T>::evalf_precise(const Coord<D>
 
     MWNode<D, T> &mw_node = this->getNodeOrEndNode(arg);
     auto &f_node = static_cast<FunctionNode<D, T> &>(mw_node);
-    std::cout<<f_node.getNodeIndex()<<" "<<f_node.getSerialIx()<<" "<<f_node.getSquareNorm()<<" "<<f_node.isEndNode()<<" "<<f_node.getComponentNorm(0)<<" "<<f_node.getComponentNorm(1)<<" "<<f_node.getComponentNorm(2)<<" "<<f_node.getComponentNorm(3)<<" "<<f_node.getComponentNorm(4)<<" "<<f_node.getComponentNorm(5)<<" "<<f_node.getComponentNorm(6)<<" "<<f_node.getComponentNorm(7)<<std::endl;
-   auto result = f_node.evalf(arg);
+    auto result = f_node.evalf(arg);
     this->deleteGenerated();
 
     // Adjust for scaling factor included in basis
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index e7d3291ec..d392202e7 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -272,18 +272,14 @@ void CompFunction<D>::alloc_comp(int ialloc) {
 template <int D>
 void CompFunction<D>::free() {
     for (int i = 0; i < Ncomp(); i++) {
-        std::cout<<" delete "<<i<<std::endl;
         if(CompD[i] != nullptr) delete CompD[i];
-         std::cout<<" delete C "<<i<<std::endl;
-       if(CompC[i] !=  nullptr) delete CompC[i];
+        if(CompC[i] !=  nullptr) delete CompC[i];
         CompD[i] = nullptr;
         CompC[i] = nullptr;
-        std::cout<<" done delete "<<i<<std::endl;
     }
     if (this->func_ptr->shared_mem_real) this->func_ptr->shared_mem_real->clear();
     if (this->func_ptr->shared_mem_cplx) this->func_ptr->shared_mem_cplx->clear();
     func_ptr->Ncomp = 0;
-    std::cout<<" free done "<<std::endl;
 }
 
 template <int D>
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index c31cfc443..c7d930317 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -116,6 +116,8 @@ template <int D> class CompFunction {
     int conj() const {return func_ptr->data.conj;} // soft conjugate
     int isreal() const {return func_ptr->data.isreal;} // T=double
     int iscomplex() const {return func_ptr->data.iscomplex;} // T=DoubleComplex
+    void defreal() {func_ptr->data.isreal = 1;} // define as real
+    void defcomplex() {func_ptr->data.iscomplex = 1;} // define as complex
     int share() const {return func_ptr->data.shared;}
     int* Nchunks() const {return func_ptr->data.Nchunks;} // number of chunks of each component tree
 
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 90a066771..47fbf226e 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -219,7 +219,7 @@ void initialize() {
 
         if (is_bank) nthreads = 1; // 3)
 
-        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<" threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<" my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
+        //        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<" threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<" my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
 
         if (omp_threads > 0) {
             if (omp_threads != nthreads and world_rank == 0) {

From f1a5ef47bc99f2a25b124950070e91a3fd297ce0 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 24 Jan 2025 12:22:46 +0100
Subject: [PATCH 33/38] clang-format

---
 examples/mpi_matrix.cpp                       |    2 +-
 examples/scf.cpp                              |    5 +-
 examples/schrodinger_semigroup1d.cpp          |   91 +-
 examples/tree_cleaner.cpp                     |    2 +-
 src/functions/AnalyticFunction.h              |    4 +-
 src/functions/BoysFunction.cpp                |    2 +-
 src/functions/BoysFunction.h                  |    2 +-
 src/functions/GaussExp.h                      |    2 +-
 src/functions/Gaussian.h                      |    2 +-
 src/functions/JpowerIntegrals.cpp             |   38 +-
 src/functions/Polynomial.cpp                  |   22 +-
 src/functions/Polynomial.h                    |    4 +-
 src/functions/RepresentableFunction.h         |    2 +-
 src/functions/function_utils.cpp              |    4 +-
 src/operators/ConvolutionOperator.cpp         |   12 +-
 src/operators/ConvolutionOperator.h           |   12 +-
 src/operators/HeatKernel.h                    |    2 +-
 src/operators/HeatOperator.h                  |    2 +-
 src/operators/OperatorState.h                 |   10 +-
 src/operators/OperatorStatistics.h            |    2 +-
 src/operators/TimeEvolutionOperator.cpp       |   67 +-
 src/operators/TimeEvolutionOperator.h         |   17 +-
 src/treebuilders/AdditionCalculator.h         |    8 +-
 src/treebuilders/AnalyticAdaptor.h            |    4 +-
 src/treebuilders/ConvolutionCalculator.cpp    |   16 +-
 src/treebuilders/CopyAdaptor.h                |    2 +-
 src/treebuilders/DerivativeCalculator.cpp     |   29 +-
 src/treebuilders/DerivativeCalculator.h       |    2 +-
 src/treebuilders/MultiplicationCalculator.h   |    8 +-
 src/treebuilders/ProjectionCalculator.cpp     |    4 +-
 src/treebuilders/SquareCalculator.h           |    4 +-
 ...meEvolution_CrossCorrelationCalculator.cpp |   72 +-
 ...TimeEvolution_CrossCorrelationCalculator.h |   26 +-
 src/treebuilders/TreeBuilder.cpp              |    4 +-
 src/treebuilders/TreeCalculator.h             |    6 +-
 src/treebuilders/WaveletAdaptor.h             |    8 +-
 src/treebuilders/add.cpp                      |  181 +-
 src/treebuilders/add.h                        |   26 +-
 src/treebuilders/apply.cpp                    |  216 +-
 src/treebuilders/complex_apply.cpp            |   34 +-
 src/treebuilders/grid.cpp                     |    6 +-
 src/treebuilders/map.cpp                      |    3 +-
 src/treebuilders/map.h                        |    3 +-
 src/treebuilders/multiply.cpp                 |  362 +--
 src/treebuilders/multiply.h                   |   70 +-
 src/treebuilders/project.cpp                  |    2 -
 src/trees/BandWidth.cpp                       |    1 -
 src/trees/BandWidth.h                         |    2 +-
 src/trees/CornerOperatorTree.cpp              |   33 +-
 src/trees/CornerOperatorTree.h                |    1 -
 src/trees/FunctionNode.cpp                    |   66 +-
 src/trees/FunctionNode.h                      |   24 +-
 src/trees/FunctionTree.cpp                    |  305 ++-
 src/trees/FunctionTree.h                      |   12 +-
 src/trees/MWNode.cpp                          |  204 +-
 src/trees/MWNode.h                            |   55 +-
 src/trees/MWTree.cpp                          |   30 +-
 src/trees/MWTree.h                            |   12 +-
 src/trees/MultiResolutionAnalysis.cpp         |   22 +-
 src/trees/NodeAllocator.cpp                   |   39 +-
 src/trees/NodeAllocator.h                     |   27 +-
 src/trees/NodeBox.h                           |    4 +-
 src/trees/OperatorTree.cpp                    |   71 +-
 src/trees/OperatorTree.h                      |    6 +-
 src/trees/TreeIterator.cpp                    |    8 +-
 src/trees/TreeIterator.h                      |    2 +-
 src/utils/Bank.cpp                            |  113 +-
 src/utils/CompFunction.cpp                    | 1227 +++++-----
 src/utils/CompFunction.h                      |  152 +-
 src/utils/ComplexFunction.cpp                 | 2015 -----------------
 src/utils/ComplexFunction.h                   |  199 --
 src/utils/Plotter.cpp                         |   72 +-
 src/utils/Plotter.h                           |    6 +-
 src/utils/Printer.cpp                         |    2 +-
 src/utils/math_utils.cpp                      |    9 +-
 src/utils/mpi_utils.cpp                       |    9 +-
 src/utils/mpi_utils.h                         |    8 +-
 src/utils/parallel.cpp                        |   92 +-
 src/utils/parallel.h                          |    2 +-
 src/utils/tree_utils.cpp                      |   14 +-
 src/utils/tree_utils.h                        |    5 +-
 tests/operators/derivative_operator.cpp       |  113 +-
 tests/operators/poisson_operator.cpp          |    2 +-
 .../schrodinger_evolution_operator.cpp        |   75 +-
 tests/treebuilders/map.cpp                    |   14 +-
 tests/treebuilders/multiplication.cpp         |   30 +-
 86 files changed, 1865 insertions(+), 4620 deletions(-)
 delete mode 100644 src/utils/ComplexFunction.cpp
 delete mode 100644 src/utils/ComplexFunction.h

diff --git a/examples/mpi_matrix.cpp b/examples/mpi_matrix.cpp
index 69c370a70..536774c7f 100644
--- a/examples/mpi_matrix.cpp
+++ b/examples/mpi_matrix.cpp
@@ -54,7 +54,7 @@ int main(int argc, char **argv) {
         };
         mrcpp::FunctionTree<3> *tree = new mrcpp::FunctionTree<3>(MRA);
         if (i % wsize == wrank) {
-	  mrcpp::project<3, double>(prec, *tree, f);
+            mrcpp::project<3, double>(prec, *tree, f);
             tree->normalize();
         }
         f_vec.push_back(std::make_tuple(1.0, tree));
diff --git a/examples/scf.cpp b/examples/scf.cpp
index fe34d936b..880830c91 100644
--- a/examples/scf.cpp
+++ b/examples/scf.cpp
@@ -21,10 +21,7 @@ void setupNuclearPotential(double Z, FunctionTree<D> &V) {
 
     // Smoothing parameter
     auto c = 0.00435 * prec / std::pow(Z, 5);
-    auto u = [](double r) -> double {
-        return std::erf(r) / r +
-               1.0 / (3.0 * std::sqrt(mrcpp::pi)) * (std::exp(-r * r) + 16.0 * std::exp(-4.0 * r * r));
-    };
+    auto u = [](double r) -> double { return std::erf(r) / r + 1.0 / (3.0 * std::sqrt(mrcpp::pi)) * (std::exp(-r * r) + 16.0 * std::exp(-4.0 * r * r)); };
     auto f = [u, c, Z](const Coord<3> &r) -> double {
         auto x = std::sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2]);
         return -1.0 * Z * u(x / c) / c;
diff --git a/examples/schrodinger_semigroup1d.cpp b/examples/schrodinger_semigroup1d.cpp
index 6035aa3c3..657d296f1 100644
--- a/examples/schrodinger_semigroup1d.cpp
+++ b/examples/schrodinger_semigroup1d.cpp
@@ -1,13 +1,11 @@
 #include "MRCPP/MWFunctions"
-#include <MRCPP/MWOperators>
-#include <MRCPP/Printer>
 #include "MRCPP/Plotter"
-#include <MRCPP/Timer>
-#include "operators/TimeEvolutionOperator.h"
 #include "functions/special_functions.h"
+#include "operators/TimeEvolutionOperator.h"
 #include "treebuilders/complex_apply.h"
-
-
+#include <MRCPP/MWOperators>
+#include <MRCPP/Printer>
+#include <MRCPP/Timer>
 
 const auto min_scale = 0;
 const auto max_depth = 25;
@@ -15,15 +13,14 @@ const auto max_depth = 25;
 const auto order = 4;
 const auto prec = 1.0e-7;
 
-int finest_scale = 10; //for time evolution operator construction (not recommended to use more than 10)
-int max_Jpower = 20;  //the amount of J integrals to be used in construction (20 should be enough)
+int finest_scale = 10; // for time evolution operator construction (not recommended to use more than 10)
+int max_Jpower = 20;   // the amount of J integrals to be used in construction (20 should be enough)
 
 // Time moments:
-double t1 = 0.001;         //initial time moment (not recommended to use more than 0.001)
-double delta_t = 0.001;    //time step (not recommended to use less than 0.001)
-double t2 = delta_t + t1;  //final time moment
+double t1 = 0.001;        // initial time moment (not recommended to use more than 0.001)
+double delta_t = 0.001;   // time step (not recommended to use less than 0.001)
+double t2 = delta_t + t1; // final time moment
 
-    
 /**
  * @brief Exploring free-particle time evolution.
  * @details We check the time propagator.
@@ -41,17 +38,16 @@ double t2 = delta_t + t1;  //final time moment
  *   \psi(x, t) = \sqrt{\frac{\sigma}{4it + \sigma}} e^{-\frac{(x - x_0)^2}{4it + \sigma}}
  *   .
  * \f]
- * 
+ *
  */
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
     auto timer = mrcpp::Timer();
 
     // Initialize printing
     auto printlevel = 0;
     mrcpp::Printer::init(printlevel);
     mrcpp::print::environment(0);
-    
+
     // Initialize world in the unit cube [0,1]
     auto basis = mrcpp::LegendreBasis(order);
     auto world = mrcpp::BoundingBox<1>(min_scale);
@@ -74,22 +70,10 @@ int main(int argc, char **argv)
     double x0 = 0.5;
 
     // Functions f(x) = psi(x, t1) and g(x) = psi(x, t2)
-    auto Re_f = [sigma, x0, t=t1](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real();
-    };
-    auto Im_f = [sigma, x0, t=t1](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag();
-    };
-    auto Re_g = [sigma, x0, t=t2](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real();
-    };
-    auto Im_g = [sigma, x0, t=t2](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag();
-    };
+    auto Re_f = [sigma, x0, t = t1](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real(); };
+    auto Im_f = [sigma, x0, t = t1](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag(); };
+    auto Re_g = [sigma, x0, t = t2](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real(); };
+    auto Im_g = [sigma, x0, t = t2](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag(); };
 
     // Projecting functions
     mrcpp::FunctionTree<1> Re_f_tree(MRA);
@@ -104,57 +88,56 @@ int main(int argc, char **argv)
     // Output function trees
     mrcpp::FunctionTree<1> Re_fout_tree(MRA);
     mrcpp::FunctionTree<1> Im_fout_tree(MRA);
-    
+
     // Complex objects for use in apply()
-    mrcpp::ComplexObject< mrcpp::ConvolutionOperator<1> > E(ReExp, ImExp);
-    mrcpp::ComplexObject< mrcpp::FunctionTree<1> > input(Re_f_tree, Im_f_tree);
-    mrcpp::ComplexObject< mrcpp::FunctionTree<1> > output(Re_fout_tree, Im_fout_tree);
+    mrcpp::ComplexObject<mrcpp::ConvolutionOperator<1>> E(ReExp, ImExp);
+    mrcpp::ComplexObject<mrcpp::FunctionTree<1>> input(Re_f_tree, Im_f_tree);
+    mrcpp::ComplexObject<mrcpp::FunctionTree<1>> output(Re_fout_tree, Im_fout_tree);
 
     mrcpp::print::header(0, "Applying operator");
     mrcpp::print::footer(0, timer, 2);
 
     // Apply operator Exp(delta_t) f(x)
     mrcpp::apply(prec, output, E, input);
-    
+
     mrcpp::print::header(0, "Checking the result on analytical solution");
     mrcpp::print::footer(0, timer, 2);
 
     // Check g(x) = Exp(delta_t) f(x)
-    mrcpp::FunctionTree<1> Re_error(MRA);  // = Re_fout_tree - Re_g_tree
-    mrcpp::FunctionTree<1> Im_error(MRA);  // = Im_fout_tree - Im_g_tree
-    
+    mrcpp::FunctionTree<1> Re_error(MRA); // = Re_fout_tree - Re_g_tree
+    mrcpp::FunctionTree<1> Im_error(MRA); // = Im_fout_tree - Im_g_tree
+
     // Re_error = Re_fout_tree - Re_g_tree
     add(prec, Re_error, 1.0, Re_fout_tree, -1.0, Re_g_tree);
     auto Re_integral = Re_error.integrate();
     auto Re_sq_norm = Re_error.getSquareNorm();
     mrcpp::print::value(0, "Integral of    Re(Exp(delta_t) f(x) - g(x)) =", Re_integral);
     mrcpp::print::value(0, "Square norm of Re(Exp(delta_t) f(x) - g(x)) =", Re_sq_norm);
-    
+
     // Im_error = Im_fout_tree - Im_g_tree
     add(prec, Im_error, 1.0, Im_fout_tree, -1.0, Im_g_tree);
     auto Im_integral = Im_error.integrate();
     auto Im_sq_norm = Im_error.getSquareNorm();
     mrcpp::print::value(0, "Integral of    Im(Exp(delta_t) f(x) - g(x)) =", Im_integral);
     mrcpp::print::value(0, "Square norm of Im(Exp(delta_t) f(x) - g(x)) =", Im_sq_norm);
-        
+
     mrcpp::print::header(0, "Saving plots to files");
     mrcpp::print::footer(0, timer, 2);
 
     // Set plotting parameters
-    int nPts = 1000;                                     
-    mrcpp::Coord<1> o{0.0};                   
-    mrcpp::Coord<1> a{1.0};                   
-    mrcpp::Plotter<1> plot(o);                           
+    int nPts = 1000;
+    mrcpp::Coord<1> o{0.0};
+    mrcpp::Coord<1> a{1.0};
+    mrcpp::Plotter<1> plot(o);
     plot.setRange(a);
 
-    plot.linePlot({nPts}, Re_error, "Re_error");             // Write to file Re_error.line
-    plot.linePlot({nPts}, Im_error, "Im_error");             // Write to file Im_error.line
-    plot.linePlot({nPts}, Re_f_tree, "Re_f_tree");           // Write to file Re_f_tree.line
-    plot.linePlot({nPts}, Im_f_tree, "Im_f_tree");           // Write to file Im_f_tree.line
-    plot.linePlot({nPts}, Re_g_tree, "Re_g_tree");           // Write to file Re_g_tree.line
-    plot.linePlot({nPts}, Im_g_tree, "Im_g_tree");           // Write to file Im_g_tree.line
-    
+    plot.linePlot({nPts}, Re_error, "Re_error");   // Write to file Re_error.line
+    plot.linePlot({nPts}, Im_error, "Im_error");   // Write to file Im_error.line
+    plot.linePlot({nPts}, Re_f_tree, "Re_f_tree"); // Write to file Re_f_tree.line
+    plot.linePlot({nPts}, Im_f_tree, "Im_f_tree"); // Write to file Im_f_tree.line
+    plot.linePlot({nPts}, Re_g_tree, "Re_g_tree"); // Write to file Re_g_tree.line
+    plot.linePlot({nPts}, Im_g_tree, "Im_g_tree"); // Write to file Im_g_tree.line
+
     mrcpp::print::footer(0, timer, 2);
     return 0;
 }
-
diff --git a/examples/tree_cleaner.cpp b/examples/tree_cleaner.cpp
index dd4d85a05..350f98e40 100644
--- a/examples/tree_cleaner.cpp
+++ b/examples/tree_cleaner.cpp
@@ -43,7 +43,7 @@ int main(int argc, char **argv) {
     auto iter = 0;
     auto n_nodes = 1;
     while (n_nodes > 0) {
-      mrcpp::project<D, double>(-1.0, f_tree, f);         // Projecting on fixed grid
+        mrcpp::project<D, double>(-1.0, f_tree, f); // Projecting on fixed grid
         n_nodes = mrcpp::refine_grid(f_tree, prec); // Refine grid
         mrcpp::clear_grid(f_tree);                  // Clear MW coefs
         printout(0, " iter " << std::setw(3) << iter++ << std::setw(45));
diff --git a/src/functions/AnalyticFunction.h b/src/functions/AnalyticFunction.h
index 7043d7fe6..aca20285b 100644
--- a/src/functions/AnalyticFunction.h
+++ b/src/functions/AnalyticFunction.h
@@ -40,9 +40,7 @@ template <int D, typename T = double> class AnalyticFunction : public Representa
     AnalyticFunction(std::function<T(const Coord<D> &r)> f, const double *a = nullptr, const double *b = nullptr)
             : RepresentableFunction<D, T>(a, b)
             , func(f) {}
-    AnalyticFunction(std::function<T(const Coord<D> &r)> f,
-                     const std::vector<double> &a,
-                     const std::vector<double> &b)
+    AnalyticFunction(std::function<T(const Coord<D> &r)> f, const std::vector<double> &a, const std::vector<double> &b)
             : AnalyticFunction(f, a.data(), b.data()) {}
 
     void set(std::function<T(const Coord<D> &r)> f) { this->func = f; }
diff --git a/src/functions/BoysFunction.cpp b/src/functions/BoysFunction.cpp
index 0a3364845..7b9f1ddb5 100644
--- a/src/functions/BoysFunction.cpp
+++ b/src/functions/BoysFunction.cpp
@@ -32,7 +32,7 @@
 namespace mrcpp {
 
 BoysFunction::BoysFunction(int n, double p)
-  : RepresentableFunction<1, double>()
+        : RepresentableFunction<1, double>()
         , order(n)
         , prec(p)
         , MRA(BoundingBox<1>(), InterpolatingBasis(13)) {}
diff --git a/src/functions/BoysFunction.h b/src/functions/BoysFunction.h
index cc5cc1916..f8b8824d1 100644
--- a/src/functions/BoysFunction.h
+++ b/src/functions/BoysFunction.h
@@ -30,7 +30,7 @@
 
 namespace mrcpp {
 
-  class BoysFunction final : public RepresentableFunction<1, double> {
+class BoysFunction final : public RepresentableFunction<1, double> {
 public:
     BoysFunction(int n, double prec = 1.0e-10);
 
diff --git a/src/functions/GaussExp.h b/src/functions/GaussExp.h
index f33549ec1..a4315e381 100644
--- a/src/functions/GaussExp.h
+++ b/src/functions/GaussExp.h
@@ -51,7 +51,7 @@ namespace mrcpp {
  *
  */
 
-    template <int D> class GaussExp : public RepresentableFunction<D, double> {
+template <int D> class GaussExp : public RepresentableFunction<D, double> {
 public:
     GaussExp(int nTerms = 0, double prec = GAUSS_EXP_PREC);
     GaussExp(const GaussExp<D> &gExp);
diff --git a/src/functions/Gaussian.h b/src/functions/Gaussian.h
index 7e79e052a..ddb039202 100644
--- a/src/functions/Gaussian.h
+++ b/src/functions/Gaussian.h
@@ -40,7 +40,7 @@
 
 namespace mrcpp {
 
-    template <int D> class Gaussian : public RepresentableFunction<D, double> {
+template <int D> class Gaussian : public RepresentableFunction<D, double> {
 public:
     Gaussian(double a, double c, const Coord<D> &r, const std::array<int, D> &p);
     Gaussian(const std::array<double, D> &a, double c, const Coord<D> &r, const std::array<int, D> &p);
diff --git a/src/functions/JpowerIntegrals.cpp b/src/functions/JpowerIntegrals.cpp
index 0d0d43181..179f6fcc6 100644
--- a/src/functions/JpowerIntegrals.cpp
+++ b/src/functions/JpowerIntegrals.cpp
@@ -24,47 +24,37 @@
  */
 
 #include "JpowerIntegrals.h"
-#include <algorithm>    // std::find_if_not
-
+#include <algorithm> // std::find_if_not
 
 namespace mrcpp {
 
-
-JpowerIntegrals::JpowerIntegrals(double a, int scaling, int M, double threshold)
-{
+JpowerIntegrals::JpowerIntegrals(double a, int scaling, int M, double threshold) {
     this->scaling = scaling;
     int N = 1 << scaling;
-    for(int l = 0; l < N; l++  )
-        integrals.push_back( calculate_J_power_integrals(l, a, M, threshold) );
-    for(int l = 1 - N; l < 0; l++  )
-        integrals.push_back( calculate_J_power_integrals(l, a, M, threshold) );
+    for (int l = 0; l < N; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold));
+    for (int l = 1 - N; l < 0; l++) integrals.push_back(calculate_J_power_integrals(l, a, M, threshold));
 }
 
-
 /// @brief in progress
 /// @param index - interger lying in the interval \f$ [ -2^n + 1, \ldots, 2^n - 1 ] \f$.
 /// @return in progress
-std::vector<std::complex<double>> & JpowerIntegrals::operator[](int index)
-{
-    if( index < 0 ) index += integrals.size();
+std::vector<std::complex<double>> &JpowerIntegrals::operator[](int index) {
+    if (index < 0) index += integrals.size();
     return integrals[index];
 }
 
-std::vector<std::complex<double>> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double threshold)
-{
+std::vector<std::complex<double>> JpowerIntegrals::calculate_J_power_integrals(int l, double a, int M, double threshold) {
     using namespace std::complex_literals;
 
     std::complex<double> J_0 = 0.25 * std::exp(-0.25i * M_PI) / std::sqrt(M_PI * a) * std::exp(0.25i * static_cast<double>(l * l) / a);
     std::complex<double> beta(0, 0.5 / a);
     auto alpha = static_cast<double>(l) * beta;
-    
+
     std::vector<std::complex<double>> J = {0.0, J_0};
 
-    for (int m = 0; m < M; m++)
-    {
+    for (int m = 0; m < M; m++) {
         std::complex<double> term1 = J[J.size() - 1] * alpha;
-        std::complex<double> term2
-        = J[J.size() - 2] * beta * static_cast<double>(m) / static_cast<double>(m + 2);
+        std::complex<double> term2 = J[J.size() - 2] * beta * static_cast<double>(m) / static_cast<double>(m + 2);
         std::complex<double> last = (term1 + term2) / static_cast<double>(m + 3);
         J.push_back(last);
     }
@@ -73,14 +63,10 @@ std::vector<std::complex<double>> JpowerIntegrals::calculate_J_power_integrals(i
     return J;
 }
 
-
 /// @details Removes negligible elements in \b J until it reaches a considerable value.
-void JpowerIntegrals::crop(std::vector<std::complex<double>> & J, double threshold)
-{
+void JpowerIntegrals::crop(std::vector<std::complex<double>> &J, double threshold) {
     // Lambda function to check if an element is negligible
-    auto isNegligible = [threshold](const std::complex<double>& c) {
-        return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold;
-    };
+    auto isNegligible = [threshold](const std::complex<double> &c) { return std::abs(c.real()) < threshold && std::abs(c.imag()) < threshold; };
     // Remove negligible elements from the end of the vector
     J.erase(std::find_if_not(J.rbegin(), J.rend(), isNegligible).base(), J.end());
 }
diff --git a/src/functions/Polynomial.cpp b/src/functions/Polynomial.cpp
index 964fe687b..c54acc148 100644
--- a/src/functions/Polynomial.cpp
+++ b/src/functions/Polynomial.cpp
@@ -45,7 +45,7 @@ namespace mrcpp {
 /** Construct polynomial of order zero with given size and bounds.
  * Includes default constructor. */
 Polynomial::Polynomial(int k, const double *a, const double *b)
-  : RepresentableFunction<1, double>(a, b) {
+        : RepresentableFunction<1, double>(a, b) {
     assert(k >= 0);
     this->N = 1.0;
     this->L = 0.0;
@@ -88,8 +88,8 @@ Polynomial &Polynomial::operator=(const Polynomial &poly) {
 /** Evaluate scaled and translated polynomial */
 double Polynomial::evalf(double x) const {
     if (isBounded()) {
-        if (x < this->getScaledLowerBound() ) return 0.0;
-        if (x > this->getScaledUpperBound() ) return 0.0;
+        if (x < this->getScaledLowerBound()) return 0.0;
+        if (x > this->getScaledUpperBound()) return 0.0;
     }
     double xp = 1.0;
     double y = 0.0;
@@ -146,12 +146,8 @@ Polynomial &Polynomial::operator*=(double c) {
 /** Calculate P = P*Q */
 Polynomial &Polynomial::operator*=(const Polynomial &Q) {
     Polynomial &P = *this;
-    if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) {
-        MSG_ERROR("Polynomials not defined on same scale.");
-    }
-    if (std::abs(P.getTranslation() - Q.getTranslation()) > MachineZero) {
-        MSG_ERROR("Polynomials not defined on same translation.");
-    }
+    if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); }
+    if (std::abs(P.getTranslation() - Q.getTranslation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same translation."); }
 
     int P_order = P.getOrder();
     int Q_order = Q.getOrder();
@@ -197,12 +193,8 @@ Polynomial &Polynomial::operator-=(const Polynomial &Q) {
 /** Calculate P = P + c*Q. */
 void Polynomial::addInPlace(double c, const Polynomial &Q) {
     Polynomial &P = *this;
-    if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) {
-        MSG_ERROR("Polynomials not defined on same scale.");
-    }
-    if (std::abs(P.getTranslation() - Q.getTranslation()) > MachineZero) {
-        MSG_ERROR("Polynomials not defined on same translation.");
-    }
+    if (std::abs(P.getDilation() - Q.getDilation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same scale."); }
+    if (std::abs(P.getTranslation() - Q.getTranslation()) > MachineZero) { MSG_ERROR("Polynomials not defined on same translation."); }
 
     int P_order = P.getOrder();
     int Q_order = Q.getOrder();
diff --git a/src/functions/Polynomial.h b/src/functions/Polynomial.h
index fadc2c988..93e3ec77d 100644
--- a/src/functions/Polynomial.h
+++ b/src/functions/Polynomial.h
@@ -44,7 +44,7 @@
 
 namespace mrcpp {
 
-  class Polynomial : public RepresentableFunction<1, double> {
+class Polynomial : public RepresentableFunction<1, double> {
 public:
     Polynomial(int k = 0, const double *a = nullptr, const double *b = nullptr);
     Polynomial(int k, const std::vector<double> &a, const std::vector<double> &b)
@@ -74,7 +74,7 @@ namespace mrcpp {
     void setDilation(double n) { this->N = n; }
     void setTranslation(double l) { this->L = l; }
     void dilate(double n) { this->N *= n; }
-    void translate(double l) { this->L += this->N*l; }
+    void translate(double l) { this->L += this->N * l; }
 
     int size() const { return this->coefs.size(); } ///< Length of coefs vector
     int getOrder() const;
diff --git a/src/functions/RepresentableFunction.h b/src/functions/RepresentableFunction.h
index 82381beaa..6123e3051 100644
--- a/src/functions/RepresentableFunction.h
+++ b/src/functions/RepresentableFunction.h
@@ -37,8 +37,8 @@
 
 #include "MRCPP/constants.h"
 #include "MRCPP/mrcpp_declarations.h"
-#include "trees/NodeIndex.h"
 #include "MRCPP/utils/math_utils.h"
+#include "trees/NodeIndex.h"
 
 namespace mrcpp {
 
diff --git a/src/functions/function_utils.cpp b/src/functions/function_utils.cpp
index 39f30a938..598c9b12a 100644
--- a/src/functions/function_utils.cpp
+++ b/src/functions/function_utils.cpp
@@ -33,9 +33,7 @@ double ObaraSaika_ab(int power_a, int power_b, double pos_a, double pos_b, doubl
 
 template <int D> double function_utils::calc_overlap(const GaussFunc<D> &a, const GaussFunc<D> &b) {
     double S = 1.0;
-    for (int d = 0; d < D; d++) {
-        S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]);
-    }
+    for (int d = 0; d < D; d++) { S *= ObaraSaika_ab(a.getPower()[d], b.getPower()[d], a.getPos()[d], b.getPos()[d], a.getExp()[d], b.getExp()[d]); }
     S *= a.getCoef() * b.getCoef();
     return S;
 }
diff --git a/src/operators/ConvolutionOperator.cpp b/src/operators/ConvolutionOperator.cpp
index 26bf44e72..9d37929aa 100644
--- a/src/operators/ConvolutionOperator.cpp
+++ b/src/operators/ConvolutionOperator.cpp
@@ -28,8 +28,8 @@
 #include "core/InterpolatingBasis.h"
 #include "core/LegendreBasis.h"
 
-#include "functions/Gaussian.h"
 #include "functions/GaussExp.h"
+#include "functions/Gaussian.h"
 
 #include "treebuilders/CrossCorrelationCalculator.h"
 #include "treebuilders/OperatorAdaptor.h"
@@ -75,8 +75,7 @@ ConvolutionOperator<D>::ConvolutionOperator(const MultiResolutionAnalysis<D> &mr
     Printer::setPrintLevel(oldlevel);
 }
 
-template <int D>
-void ConvolutionOperator<D>::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) {
+template <int D> void ConvolutionOperator<D>::initialize(GaussExp<1> &kernel, double k_prec, double o_prec) {
     auto k_mra = this->getKernelMRA();
     auto o_mra = this->getOperatorMRA();
 
@@ -86,10 +85,10 @@ void ConvolutionOperator<D>::initialize(GaussExp<1> &kernel, double k_prec, doub
     for (int i = 0; i < kernel.size(); i++) {
         // Rescale Gaussian for D-dim application
         auto *k_func = kernel.getFunc(i).copy();
-        k_func->setCoef( std::copysign( std::pow(std::abs(k_func->getCoef()), 1.0/D), k_func->getCoef() ) );
+        k_func->setCoef(std::copysign(std::pow(std::abs(k_func->getCoef()), 1.0 / D), k_func->getCoef()));
 
         FunctionTree<1> k_tree(k_mra);
-        mrcpp::build_grid(k_tree, *k_func);    // Generate empty grid to hold narrow Gaussian
+        mrcpp::build_grid(k_tree, *k_func);      // Generate empty grid to hold narrow Gaussian
         mrcpp::project(k_prec, k_tree, *k_func); // Project Gaussian starting from the empty grid
         delete k_func;
 
@@ -108,8 +107,7 @@ void ConvolutionOperator<D>::initialize(GaussExp<1> &kernel, double k_prec, doub
     }
 }
 
-template <int D>
-MultiResolutionAnalysis<1> ConvolutionOperator<D>::getKernelMRA() const {
+template <int D> MultiResolutionAnalysis<1> ConvolutionOperator<D>::getKernelMRA() const {
     const BoundingBox<D> &box = this->MRA.getWorldBox();
     const ScalingBasis &basis = this->MRA.getScalingBasis();
 
diff --git a/src/operators/ConvolutionOperator.h b/src/operators/ConvolutionOperator.h
index c9879d2a2..33d254e9d 100644
--- a/src/operators/ConvolutionOperator.h
+++ b/src/operators/ConvolutionOperator.h
@@ -32,7 +32,7 @@ namespace mrcpp {
 /** @class ConvolutionOperator
  *
  * @brief Convolution defined by a Gaussian expansion
- * 
+ *
  * @details Represents the operator
  * \f[
  * 	    T = \sum_{m=1}^M
@@ -51,13 +51,13 @@ namespace mrcpp {
  *      \sum_{m=1}^M \alpha_m \exp \left( - \beta_m |x|^2 \right)
  * \f]
  * which is passed as a parameter to the first two constructors.
- *  
+ *
  * @note Every \f$ T_d \left( \beta_m, \sqrt[D]{| \alpha_m |} \right) \f$ is the same
  * operator associated with the one-dimensional variable \f$ x_d \f$ for \f$ d = 1, \ldots, D \f$.
- * 
+ *
  * \todo: One may want to change the logic so that \f$ D \f$-root is evaluated on the previous step,
  * namely, when \f$ \alpha_m, \beta_m \f$ are calculated.
- * 
+ *
  */
 template <int D> class ConvolutionOperator : public MWOperator<D> {
 public:
@@ -71,9 +71,9 @@ template <int D> class ConvolutionOperator : public MWOperator<D> {
 
 protected:
     ConvolutionOperator(const MultiResolutionAnalysis<D> &mra)
-        : MWOperator<D>(mra, mra.getRootScale(), -10) {}
+            : MWOperator<D>(mra, mra.getRootScale(), -10) {}
     ConvolutionOperator(const MultiResolutionAnalysis<D> &mra, int root, int reach)
-        : MWOperator<D>(mra, root, reach) {}
+            : MWOperator<D>(mra, root, reach) {}
 
     void initialize(GaussExp<1> &kernel, double k_prec, double o_prec);
     void setBuildPrec(double prec) { this->build_prec = prec; }
diff --git a/src/operators/HeatKernel.h b/src/operators/HeatKernel.h
index b0303eee8..bc5a8adba 100644
--- a/src/operators/HeatKernel.h
+++ b/src/operators/HeatKernel.h
@@ -49,7 +49,7 @@ namespace mrcpp {
  *   t > 0
  *   .
  * \f]
- * 
+ *
  */
 template <int D> class HeatKernel final : public GaussExp<1> {
 public:
diff --git a/src/operators/HeatOperator.h b/src/operators/HeatOperator.h
index aabc60658..f96560a81 100644
--- a/src/operators/HeatOperator.h
+++ b/src/operators/HeatOperator.h
@@ -54,7 +54,7 @@ namespace mrcpp {
  *   t > 0
  *   .
  * \f]
- * 
+ *
  */
 template <int D> class HeatOperator final : public ConvolutionOperator<D> {
 public:
diff --git a/src/operators/OperatorState.h b/src/operators/OperatorState.h
index 855f53060..677375632 100644
--- a/src/operators/OperatorState.h
+++ b/src/operators/OperatorState.h
@@ -44,7 +44,7 @@ namespace mrcpp {
 
 template <int D, typename T> class OperatorState final {
 public:
-  OperatorState(MWNode<D, T> &gn, T *scr1)
+    OperatorState(MWNode<D, T> &gn, T *scr1)
             : gNode(&gn) {
         this->kp1 = this->gNode->getKp1();
         this->kp1_d = this->gNode->getKp1_d();
@@ -64,9 +64,9 @@ template <int D, typename T> class OperatorState final {
         }
     }
 
-  OperatorState(MWNode<D, T> &gn, std::vector<T> scr1)
+    OperatorState(MWNode<D, T> &gn, std::vector<T> scr1)
             : OperatorState(gn, scr1.data()) {}
-  void setFNode(MWNode<D, T> &fn) {
+    void setFNode(MWNode<D, T> &fn) {
         this->fNode = &fn;
         this->fData = this->fNode->getCoefs();
     }
@@ -89,8 +89,8 @@ template <int D, typename T> class OperatorState final {
     T **getAuxData() { return this->aux; }
     double **getOperData() { return this->oData; }
 
-  friend class ConvolutionCalculator<D, T>;
-  friend class DerivativeCalculator<D, T>;
+    friend class ConvolutionCalculator<D, T>;
+    friend class DerivativeCalculator<D, T>;
 
 private:
     int ft;
diff --git a/src/operators/OperatorStatistics.h b/src/operators/OperatorStatistics.h
index 9de97f8e0..883bb653c 100644
--- a/src/operators/OperatorStatistics.h
+++ b/src/operators/OperatorStatistics.h
@@ -32,7 +32,7 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class OperatorStatistics final {
+template <int D, typename T> class OperatorStatistics final {
 public:
     OperatorStatistics();
     ~OperatorStatistics();
diff --git a/src/operators/TimeEvolutionOperator.cpp b/src/operators/TimeEvolutionOperator.cpp
index 90f1f1ccd..09913a591 100644
--- a/src/operators/TimeEvolutionOperator.cpp
+++ b/src/operators/TimeEvolutionOperator.cpp
@@ -26,12 +26,11 @@
 #include "TimeEvolutionOperator.h"
 //#include "MRCPP/MWOperators"
 
-
 #include "core/InterpolatingBasis.h"
 #include "core/LegendreBasis.h"
 
-#include "functions/Gaussian.h"
 #include "functions/GaussExp.h"
+#include "functions/Gaussian.h"
 
 #include "treebuilders/CrossCorrelationCalculator.h"
 #include "treebuilders/DefaultCalculator.h"
@@ -42,8 +41,8 @@
 #include "treebuilders/project.h"
 
 #include "trees/BandWidth.h"
-#include "trees/FunctionTreeVector.h"
 #include "trees/CornerOperatorTree.h"
+#include "trees/FunctionTreeVector.h"
 
 #include "utils/Printer.h"
 #include "utils/Timer.h"
@@ -55,10 +54,8 @@
 
 #include "trees/OperatorNode.h"
 
-
 namespace mrcpp {
 
-
 /** @brief A uniform constructor for TimeEvolutionOperator class.
  *
  * @param[in] mra: MRA.
@@ -72,23 +69,21 @@ namespace mrcpp {
  *
  */
 template <int D>
-TimeEvolutionOperator<D>::TimeEvolutionOperator
-(const MultiResolutionAnalysis<D> &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower)
-    : ConvolutionOperator<D>(mra, mra.getRootScale(), -10)   //One can use ConvolutionOperator instead as well
+TimeEvolutionOperator<D>::TimeEvolutionOperator(const MultiResolutionAnalysis<D> &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower)
+        : ConvolutionOperator<D>(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well
 {
     int oldlevel = Printer::setPrintLevel(0);
     this->setBuildPrec(prec);
 
-    SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType() );
+    SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType());
     this->cross_correlation = &cross_correlation;
 
-    initialize(time, finest_scale, imaginary, max_Jpower);     //will go outside of the constructor in future
+    initialize(time, finest_scale, imaginary, max_Jpower); // will go outside of the constructor in future
 
-    this->initOperExp(1);   //this turns out to be important
+    this->initOperExp(1); // this turns out to be important
     Printer::setPrintLevel(oldlevel);
 }
 
-
 /** @brief An adaptive constructor for TimeEvolutionOperator class.
  *
  * @param[in] mra: MRA.
@@ -105,24 +100,21 @@ TimeEvolutionOperator<D>::TimeEvolutionOperator
  *
  */
 template <int D>
-TimeEvolutionOperator<D>::TimeEvolutionOperator
-(const MultiResolutionAnalysis<D> &mra, double prec, double time, bool imaginary, int max_Jpower)
-    : ConvolutionOperator<D>(mra, mra.getRootScale(), -10)   //One can use ConvolutionOperator instead as well
+TimeEvolutionOperator<D>::TimeEvolutionOperator(const MultiResolutionAnalysis<D> &mra, double prec, double time, bool imaginary, int max_Jpower)
+        : ConvolutionOperator<D>(mra, mra.getRootScale(), -10) // One can use ConvolutionOperator instead as well
 {
     int oldlevel = Printer::setPrintLevel(0);
     this->setBuildPrec(prec);
 
-    SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType() );
+    SchrodingerEvolution_CrossCorrelation cross_correlation(30, mra.getOrder(), mra.getScalingBasis().getScalingType());
     this->cross_correlation = &cross_correlation;
 
-    initialize(time, imaginary, max_Jpower);     //will go outside of the constructor in future
+    initialize(time, imaginary, max_Jpower); // will go outside of the constructor in future
 
-    this->initOperExp(1);   //this turns out to be important
+    this->initOperExp(1); // this turns out to be important
     Printer::setPrintLevel(oldlevel);
 }
 
-
-
 /** @brief Creates Re or Im of operator
  *
  * @details Adaptive down to scale \f$ N = 18 \f$.
@@ -132,9 +124,7 @@ TimeEvolutionOperator<D>::TimeEvolutionOperator
  * only needed ones, while building the tree (in progress).
  *
  */
-template <int D>
-void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_Jpower)
-{
+template <int D> void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_Jpower) {
     int N = 18;
 
     double o_prec = this->build_prec;
@@ -142,8 +132,7 @@ void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_J
     auto o_tree = std::make_unique<CornerOperatorTree>(o_mra, o_prec);
 
     std::map<int, JpowerIntegrals *> J;
-    for( int n = 0; n <= N+1; n ++ )
-        J[n] = new JpowerIntegrals(time * std::pow(4, n), n, max_Jpower);
+    for (int n = 0; n <= N + 1; n++) J[n] = new JpowerIntegrals(time * std::pow(4, n), n, max_Jpower);
     TimeEvolution_CrossCorrelationCalculator calculator(J, this->cross_correlation, imaginary);
 
     OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale(), true);
@@ -155,7 +144,7 @@ void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_J
     Timer trans_t;
     o_tree->mwTransform(BottomUp);
     o_tree->removeRoughScaleNoise();
-    //o_tree->clearSquareNorm(); //does not affect printing
+    // o_tree->clearSquareNorm(); //does not affect printing
     o_tree->calcSquareNorm();
     o_tree->setupOperNodeCache();
 
@@ -164,8 +153,7 @@ void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_J
 
     this->raw_exp.push_back(std::move(o_tree));
 
-    for( int n = 0; n <= N+1; n ++ )
-        delete J[n];
+    for (int n = 0; n <= N + 1; n++) delete J[n];
 }
 
 /** @brief Creates Re or Im of operator
@@ -173,9 +161,7 @@ void TimeEvolutionOperator<D>::initialize(double time, bool imaginary, int max_J
  * @details Uniform down to finest scale.
  *
  */
-template <int D>
-void TimeEvolutionOperator<D>::initialize(double time, int finest_scale, bool imaginary, int max_Jpower)
-{
+template <int D> void TimeEvolutionOperator<D>::initialize(double time, int finest_scale, bool imaginary, int max_Jpower) {
     double o_prec = this->build_prec;
     auto o_mra = this->getOperatorMRA();
 
@@ -186,12 +172,11 @@ void TimeEvolutionOperator<D>::initialize(double time, int finest_scale, bool im
     int N = finest_scale;
     double threshold = o_prec / 1000.0;
     std::map<int, JpowerIntegrals *> J;
-    for( int n = 0; n <= N+1; n ++ )
-        J[n] = new JpowerIntegrals(time * std::pow(4, n), n, max_Jpower, threshold);
+    for (int n = 0; n <= N + 1; n++) J[n] = new JpowerIntegrals(time * std::pow(4, n), n, max_Jpower, threshold);
     TimeEvolution_CrossCorrelationCalculator calculator(J, this->cross_correlation, imaginary);
 
     auto o_tree = std::make_unique<CornerOperatorTree>(o_mra, o_prec);
-    builder.build(*o_tree, calculator, uniform, N ); // Expand 1D kernel into 2D operator
+    builder.build(*o_tree, calculator, uniform, N); // Expand 1D kernel into 2D operator
 
     // Postprocess to make the operator functional
     Timer trans_t;
@@ -203,11 +188,9 @@ void TimeEvolutionOperator<D>::initialize(double time, int finest_scale, bool im
 
     this->raw_exp.push_back(std::move(o_tree));
 
-    for( int n = 0; n <= N+1; n ++ )
-        delete J[n];
+    for (int n = 0; n <= N + 1; n++) delete J[n];
 }
 
-
 /** @brief Creates Re or Im of operator (in progress)
  *
  * @details Tree construction starts uniformly and then continues adaptively down to scale \f$ N = 18 \f$.
@@ -216,8 +199,7 @@ void TimeEvolutionOperator<D>::initialize(double time, int finest_scale, bool im
  * @note This method is not ready for use and should not be used (in progress).
  *
  */
-template <int D> void TimeEvolutionOperator<D>::initializeSemiUniformly(double time, bool imaginary, int max_Jpower)
-{
+template <int D> void TimeEvolutionOperator<D>::initializeSemiUniformly(double time, bool imaginary, int max_Jpower) {
     MSG_ERROR("Not implemented yet method.");
 
     double o_prec = this->build_prec;
@@ -234,8 +216,7 @@ template <int D> void TimeEvolutionOperator<D>::initializeSemiUniformly(double t
 
     double threshold = o_prec / 1000.0;
     std::map<int, mrcpp::JpowerIntegrals *> J;
-    for( int n = 0; n <= N+1; n ++ )
-        J[n] = new mrcpp::JpowerIntegrals(time * std::pow(4, n), n, max_Jpower, threshold);
+    for (int n = 0; n <= N + 1; n++) J[n] = new mrcpp::JpowerIntegrals(time * std::pow(4, n), n, max_Jpower, threshold);
     mrcpp::TimeEvolution_CrossCorrelationCalculator calculator(J, this->cross_correlation, imaginary);
 
     OperatorAdaptor adaptor(o_prec, o_mra.getMaxScale());
@@ -252,11 +233,9 @@ template <int D> void TimeEvolutionOperator<D>::initializeSemiUniformly(double t
 
     this->raw_exp.push_back(std::move(o_tree));
 
-    for( int n = 0; n <= N+1; n ++ )
-        delete J[n];
+    for (int n = 0; n <= N + 1; n++) delete J[n];
 }
 
-
 template class TimeEvolutionOperator<1>;
 template class TimeEvolutionOperator<2>;
 template class TimeEvolutionOperator<3>;
diff --git a/src/operators/TimeEvolutionOperator.h b/src/operators/TimeEvolutionOperator.h
index 9e8623fcd..839ba7b40 100644
--- a/src/operators/TimeEvolutionOperator.h
+++ b/src/operators/TimeEvolutionOperator.h
@@ -25,30 +25,30 @@
 
 #pragma once
 
-#include "MWOperator.h"
 #include "ConvolutionOperator.h"
+#include "MWOperator.h"
 #include "core/SchrodingerEvolution_CrossCorrelation.h"
 
 namespace mrcpp {
 
-
 /** @class TimeEvolutionOperator
  *
  * @brief Semigroup of the free-particle Schrodinger equation
- * 
+ *
  * @details Represents the semigroup
  * \f$
  *      \exp \left( i t \partial_x^2 \right)
  *      .
  * \f$
  * Matrix elements (actual operator tree) of the operator can be obtained by calling getComponent(0, 0).
- *  
+ *
  * @note So far implementation is done for Legendre scaling functions in 1d.
- * 
+ *
  * \todo: Extend to D dimensinal on a general interval [a, b] in the future.
- * 
+ *
  */
-template <int D> class TimeEvolutionOperator : public ConvolutionOperator<D>   //One can use ConvolutionOperator instead as well
+template <int D>
+class TimeEvolutionOperator : public ConvolutionOperator<D> // One can use ConvolutionOperator instead as well
 {
 public:
     TimeEvolutionOperator(const MultiResolutionAnalysis<D> &mra, double prec, double time, int finest_scale, bool imaginary, int max_Jpower = 30);
@@ -63,12 +63,11 @@ template <int D> class TimeEvolutionOperator : public ConvolutionOperator<D>   /
     void initialize(double time, int finest_scale, bool imaginary, int max_Jpower);
     void initialize(double time, bool imaginary, int max_Jpower);
     void initializeSemiUniformly(double time, bool imaginary, int max_Jpower);
-    
+
     void setBuildPrec(double prec) { this->build_prec = prec; }
 
     double build_prec{-1.0};
     SchrodingerEvolution_CrossCorrelation *cross_correlation{nullptr};
 };
 
-
 } // namespace mrcpp
diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h
index bc3ff5250..45b6b8406 100644
--- a/src/treebuilders/AdditionCalculator.h
+++ b/src/treebuilders/AdditionCalculator.h
@@ -33,8 +33,8 @@ namespace mrcpp {
 template <int D, typename T> class AdditionCalculator final : public TreeCalculator<D, T> {
 public:
     AdditionCalculator(const FunctionTreeVector<D, T> &inp, bool conjugate = false)
-        : sum_vec(inp),
-          conj(conjugate) {}
+            : sum_vec(inp)
+            , conj(conjugate) {}
 
 private:
     FunctionTreeVector<D, T> sum_vec;
@@ -52,7 +52,7 @@ template <int D, typename T> class AdditionCalculator final : public TreeCalcula
             const double *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
-       }
+        }
         node_o.setHasCoefs();
         node_o.calcNorms();
     }
@@ -68,7 +68,7 @@ template <int D, typename T> class AdditionCalculator final : public TreeCalcula
             const ComplexDouble *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
             if (func_i.conjugate() xor conj) {
-               for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); }
+                for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); }
             } else {
                 for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
             }
diff --git a/src/treebuilders/AnalyticAdaptor.h b/src/treebuilders/AnalyticAdaptor.h
index d735933ec..3e9ca0613 100644
--- a/src/treebuilders/AnalyticAdaptor.h
+++ b/src/treebuilders/AnalyticAdaptor.h
@@ -30,10 +30,10 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class AnalyticAdaptor final : public TreeAdaptor<D, T> {
+template <int D, typename T> class AnalyticAdaptor final : public TreeAdaptor<D, T> {
 public:
     AnalyticAdaptor(const RepresentableFunction<D, T> &f, int ms)
-      : TreeAdaptor<D, T>(ms)
+            : TreeAdaptor<D, T>(ms)
             , func(&f) {}
 
 private:
diff --git a/src/treebuilders/ConvolutionCalculator.cpp b/src/treebuilders/ConvolutionCalculator.cpp
index 7da95813b..497fe0dd8 100644
--- a/src/treebuilders/ConvolutionCalculator.cpp
+++ b/src/treebuilders/ConvolutionCalculator.cpp
@@ -275,7 +275,7 @@ template <int D, typename T> void ConvolutionCalculator<D, T>::calcNode(MWNode<D
 }
 
 /** Apply each component (term) of the operator expansion to a node in f */
-  template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperComp(OperatorState<D, T> &os) {
+template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperComp(OperatorState<D, T> &os) {
     double fNorm = os.fNode->getComponentNorm(os.ft);
     int o_depth = os.fNode->getScale() - this->oper->getOperatorRoot();
     for (int i = 0; i < this->oper->size(); i++) {
@@ -289,13 +289,13 @@ template <int D, typename T> void ConvolutionCalculator<D, T>::calcNode(MWNode<D
 }
 
 /** @brief Apply a single operator component (term) to a single f-node.
- * 
+ *
  * @details Apply a single operator component (term) to a single f-node.
  * Whether the operator actualy is applied is determined by a screening threshold.
  * Here we make use of the sparcity of matrices \f$ A, B, C \f$.
- * 
+ *
  */
- template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperator(int i, OperatorState<D, T> &os) {
+template <int D, typename T> void ConvolutionCalculator<D, T>::applyOperator(int i, OperatorState<D, T> &os) {
     MWNode<D, T> &gNode = *os.gNode;
     MWNode<D, T> &fNode = *os.fNode;
 
@@ -315,7 +315,7 @@ template <int D, typename T> void ConvolutionCalculator<D, T>::calcNode(MWNode<D
         int a = (os.gt & (1 << d)) >> d;
         int b = (os.ft & (1 << d)) >> d;
         int idx = (a << 1) + b;
-        if ( oTree.isOutsideBand(oTransl, o_depth, idx) ) { return; }
+        if (oTree.isOutsideBand(oTransl, o_depth, idx)) { return; }
 
         const OperatorNode &oNode = oTree.getNode(o_depth, oTransl);
         int oIdx = os.getOperIndex(d);
@@ -331,7 +331,7 @@ template <int D, typename T> void ConvolutionCalculator<D, T>::calcNode(MWNode<D
 
 /** Perorm the required linear algebra operations in order to apply an
 operator component to a f-node in a n-dimensional tesor space. */
-  template <int D, typename T> void ConvolutionCalculator<D, T>::tensorApplyOperComp(OperatorState<D, T> &os) {
+template <int D, typename T> void ConvolutionCalculator<D, T>::tensorApplyOperComp(OperatorState<D, T> &os) {
     T **aux = os.getAuxData();
     double **oData = os.getOperData();
     /*
@@ -361,8 +361,8 @@ operator component to a f-node in a n-dimensional tesor space. */
 #else
     */
     for (int i = 0; i < D; i++) {
-        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> f(aux[i], os.kp1, os.kp1_dm1);
-        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> g(aux[i + 1], os.kp1_dm1, os.kp1);
+        Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> f(aux[i], os.kp1, os.kp1_dm1);
+        Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> g(aux[i + 1], os.kp1_dm1, os.kp1);
         if (oData[i] != nullptr) {
             Eigen::Map<MatrixXd> op(oData[i], os.kp1, os.kp1);
             if (i == D - 1) { // Last dir: Add up into g
diff --git a/src/treebuilders/CopyAdaptor.h b/src/treebuilders/CopyAdaptor.h
index adeeb6766..a7825cca0 100644
--- a/src/treebuilders/CopyAdaptor.h
+++ b/src/treebuilders/CopyAdaptor.h
@@ -30,7 +30,7 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class CopyAdaptor final : public TreeAdaptor<D, T> {
+template <int D, typename T> class CopyAdaptor final : public TreeAdaptor<D, T> {
 public:
     CopyAdaptor(FunctionTree<D, T> &t, int ms, int *bw);
     CopyAdaptor(FunctionTreeVector<D, T> &t, int ms, int *bw);
diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp
index 9f384013e..8426e3d97 100644
--- a/src/treebuilders/DerivativeCalculator.cpp
+++ b/src/treebuilders/DerivativeCalculator.cpp
@@ -86,8 +86,8 @@ template <int D, typename T> void DerivativeCalculator<D, T>::printTimers() cons
     Printer::setPrecision(oldprec);
 }
 
-    template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D, T> &inpNode, MWNode<D, T> &outNode) {
-    //if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw");
+template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D, T> &inpNode, MWNode<D, T> &outNode) {
+    // if (this->oper->getMaxBandWidth() > 1) MSG_ABORT("Only implemented for zero bw");
     outNode.zeroCoefs();
     int nComp = (1 << D);
     T tmpCoefs[outNode.getNCoefs()];
@@ -102,18 +102,16 @@ template <int D, typename T> void DerivativeCalculator<D, T>::printTimers() cons
         for (int gt = 0; gt < nComp; gt++) {
             os.setGComponent(gt);
             applyOperator_bw0(os);
-       }
+        }
     }
-   // Multiply appropriate scaling factor. TODO: Could be included elsewhere
-    const double scaling_factor =
-        1.0/std::pow(outNode.getMWTree().getMRA().getWorldBox().getScalingFactor(this->applyDir), oper->getOrder());
-    if(abs(scaling_factor-1.0)>MachineZero){
+    // Multiply appropriate scaling factor. TODO: Could be included elsewhere
+    const double scaling_factor = 1.0 / std::pow(outNode.getMWTree().getMRA().getWorldBox().getScalingFactor(this->applyDir), oper->getOrder());
+    if (abs(scaling_factor - 1.0) > MachineZero) {
         for (int i = 0; i < outNode.getNCoefs(); i++) outNode.getCoefs()[i] *= scaling_factor;
     }
-    outNode.calcNorms(); //TODO:required? norms are not used for now
+    outNode.calcNorms(); // TODO:required? norms are not used for now
 }
 
-
 template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D, T> &gNode) {
     gNode.zeroCoefs();
 
@@ -146,8 +144,7 @@ template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D,
         }
     }
     // Multiply appropriate scaling factor
-    const double scaling_factor =
-        std::pow(gNode.getMWTree().getMRA().getWorldBox().getScalingFactor(this->applyDir), oper->getOrder());
+    const double scaling_factor = std::pow(gNode.getMWTree().getMRA().getWorldBox().getScalingFactor(this->applyDir), oper->getOrder());
     for (int i = 0; i < gNode.getNCoefs(); i++) gNode.getCoefs()[i] /= scaling_factor;
     this->calc_t[mrcpp_get_thread_num()].stop();
 
@@ -157,8 +154,7 @@ template <int D, typename T> void DerivativeCalculator<D, T>::calcNode(MWNode<D,
 }
 
 /** Return a vector of nodes in F affected by O, given a node in G */
-template <int D, typename T>
-MWNodeVector<D, T> DerivativeCalculator<D, T>::makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band) {
+template <int D, typename T> MWNodeVector<D, T> DerivativeCalculator<D, T>::makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band) {
     assert(this->applyDir >= 0);
     assert(this->applyDir < D);
 
@@ -183,7 +179,7 @@ MWNodeVector<D, T> DerivativeCalculator<D, T>::makeOperBand(const MWNode<D, T> &
 
 /** Apply a single operator component (term) to a single f-node assuming zero bandwidth */
 template <int D, typename T> void DerivativeCalculator<D, T>::applyOperator_bw0(OperatorState<D, T> &os) {
-    //cout<<" applyOperator "<<endl;
+    // cout<<" applyOperator "<<endl;
     MWNode<D, T> &gNode = *os.gNode;
     MWNode<D, T> &fNode = *os.fNode;
     const NodeIndex<D> &fIdx = *os.fIdx;
@@ -213,7 +209,6 @@ template <int D, typename T> void DerivativeCalculator<D, T>::applyOperator_bw0(
     tensorApplyOperComp(os);
 }
 
-
 /** Apply a single operator component (term) to a single f-node. Whether the
 operator actualy is applied is determined by a screening threshold. */
 template <int D, typename T> void DerivativeCalculator<D, T>::applyOperator(OperatorState<D, T> &os) {
@@ -304,8 +299,8 @@ template <int D, typename T> void DerivativeCalculator<D, T>::tensorApplyOperCom
 #else
     */
     for (int i = 0; i < D; i++) {
-        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> f(aux[i], os.kp1, os.kp1_dm1);
-        Eigen::Map<Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic >> g(aux[i + 1], os.kp1_dm1, os.kp1);
+        Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> f(aux[i], os.kp1, os.kp1_dm1);
+        Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> g(aux[i + 1], os.kp1_dm1, os.kp1);
         if (oData[i] != nullptr) {
             Eigen::Map<MatrixXd> op(oData[i], os.kp1, os.kp1);
             if (i == D - 1) { // Last dir: Add up into g
diff --git a/src/treebuilders/DerivativeCalculator.h b/src/treebuilders/DerivativeCalculator.h
index 9adc48046..d9f435528 100644
--- a/src/treebuilders/DerivativeCalculator.h
+++ b/src/treebuilders/DerivativeCalculator.h
@@ -32,7 +32,7 @@ namespace mrcpp {
 
 template <int D, typename T> class DerivativeCalculator final : public TreeCalculator<D, T> {
 public:
-  DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D, T> &f);
+    DerivativeCalculator(int dir, DerivativeOperator<D> &o, FunctionTree<D, T> &f);
     ~DerivativeCalculator() override;
 
     MWNodeVector<D, T> *getInitialWorkVector(MWTree<D, T> &tree) const override;
diff --git a/src/treebuilders/MultiplicationCalculator.h b/src/treebuilders/MultiplicationCalculator.h
index 29c9e2574..8b40b5e27 100644
--- a/src/treebuilders/MultiplicationCalculator.h
+++ b/src/treebuilders/MultiplicationCalculator.h
@@ -33,8 +33,8 @@ namespace mrcpp {
 template <int D, typename T> class MultiplicationCalculator final : public TreeCalculator<D, T> {
 public:
     MultiplicationCalculator(const FunctionTreeVector<D, T> &inp, bool conjugate = false)
-            : prod_vec(inp),
-              conj(conjugate) {}
+            : prod_vec(inp)
+            , conj(conjugate) {}
 
 private:
     FunctionTreeVector<D, T> prod_vec;
@@ -60,7 +60,7 @@ template <int D, typename T> class MultiplicationCalculator final : public TreeC
         node_o.setHasCoefs();
         node_o.calcNorms();
     }
-    void calcNode(MWNode<D, ComplexDouble> &node_o)  {
+    void calcNode(MWNode<D, ComplexDouble> &node_o) {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         ComplexDouble *coefs_o = node_o.getCoefs();
         for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
@@ -73,7 +73,7 @@ template <int D, typename T> class MultiplicationCalculator final : public TreeC
             node_i.cvTransform(Forward);
             const ComplexDouble *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
-           if (func_i.conjugate() xor (conj and i==0)) {
+            if (func_i.conjugate() xor (conj and i == 0)) {
                 for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * std::conj(coefs_i[j]); }
             } else {
                 for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
diff --git a/src/treebuilders/ProjectionCalculator.cpp b/src/treebuilders/ProjectionCalculator.cpp
index e451ea69e..931733232 100644
--- a/src/treebuilders/ProjectionCalculator.cpp
+++ b/src/treebuilders/ProjectionCalculator.cpp
@@ -39,8 +39,8 @@ template <int D, typename T> void ProjectionCalculator<D, T>::calcNode(MWNode<D,
     Coord<D> r;
     T *coefs = node.getCoefs();
     for (int i = 0; i < node.getNCoefs(); i++) {
-      for (int d = 0; d < D; d++) { r[d] = scaling_factor[d] * exp_pts(d, i); }
-      coefs[i] = this->func->evalf(r);
+        for (int d = 0; d < D; d++) { r[d] = scaling_factor[d] * exp_pts(d, i); }
+        coefs[i] = this->func->evalf(r);
     }
 
     node.cvTransform(Backward);
diff --git a/src/treebuilders/SquareCalculator.h b/src/treebuilders/SquareCalculator.h
index 179c97e87..8d7be36b7 100644
--- a/src/treebuilders/SquareCalculator.h
+++ b/src/treebuilders/SquareCalculator.h
@@ -32,8 +32,8 @@ namespace mrcpp {
 template <int D, typename T> class SquareCalculator final : public TreeCalculator<D, T> {
 public:
     SquareCalculator(FunctionTree<D, T> &inp, bool conjugate = false)
-            : func(&inp),
-              conj(conjugate) {}
+            : func(&inp)
+            , conj(conjugate) {}
 
 private:
     FunctionTree<D, T> *func;
diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp
index a73f9a0d5..844f952d9 100644
--- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp
+++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.cpp
@@ -33,16 +33,14 @@ using Eigen::VectorXd;
 
 namespace mrcpp {
 
-
 /** @param[in] node: ...
  *  @details This will ... (work in progress)
- * 
- * 
- * 
- * 
+ *
+ *
+ *
+ *
  */
-void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node)
-{
+void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node) {
     node.zeroCoefs();
     int type = node.getMWTree().getMRA().getScalingBasis().getScalingType();
     switch (type) {
@@ -63,49 +61,43 @@ void TimeEvolution_CrossCorrelationCalculator::calcNode(MWNode<2> &node)
     node.calcNorms();
 }
 
-
-
 /** @param[in] node: ...
  *  @details This will ... (work in progress)
- * 
- * 
- * 
- * 
+ *
+ *
+ *
+ *
  */
-//template <int T>
-void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node)
-{
-    //std::cout << node;
-    // The scale of J power integrals:
-    //int scale = node.getScale() + 1;  //scale = n = (n - 1) + 1
-    
-    int t_dim = node.getTDim();       //t_dim = 4
-    int kp1_d = node.getKp1_d();      //kp1_d = (k + 1)^2
+// template <int T>
+void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node) {
+    // std::cout << node;
+    //  The scale of J power integrals:
+    // int scale = node.getScale() + 1;  //scale = n = (n - 1) + 1
+
+    int t_dim = node.getTDim();  // t_dim = 4
+    int kp1_d = node.getKp1_d(); // kp1_d = (k + 1)^2
 
     VectorXd vec_o = VectorXd::Zero(t_dim * kp1_d);
     const NodeIndex<2> &idx = node.getNodeIndex();
 
-    auto & J_power_inetgarls = *this->J_power_inetgarls[node.getScale() + 1];
-    
-    for (int i = 0; i < t_dim; i++)
-    {
+    auto &J_power_inetgarls = *this->J_power_inetgarls[node.getScale() + 1];
+
+    for (int i = 0; i < t_dim; i++) {
         NodeIndex<2> l = idx.child(i);
         int l_b = l[1] - l[0];
 
         int vec_o_segment_index = 0;
-        for( int p = 0; p <= node.getOrder(); p++ )
-            for( int j = 0; j <= node.getOrder(); j++ )
-            {
-                //std::min(M, N)  could be used for breaking the following loop
-                //this->cross_correlation->Matrix.size() should be big enough a priori
-                for( int k = 0; 2*k + p + j < J_power_inetgarls[l_b].size(); k++ )
-                {
+        for (int p = 0; p <= node.getOrder(); p++)
+            for (int j = 0; j <= node.getOrder(); j++) {
+                // std::min(M, N)  could be used for breaking the following loop
+                // this->cross_correlation->Matrix.size() should be big enough a priori
+                for (int k = 0; 2 * k + p + j < J_power_inetgarls[l_b].size(); k++) {
                     double J;
-                    if( this->imaginary ) J = J_power_inetgarls[l_b][2*k + p + j].imag();
-                    else J = J_power_inetgarls[l_b][2*k + p + j].real();
-                    vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index)
-                    +=
-                    J * cross_correlation->Matrix[k](p, j); //by default eigen library reads a transpose matrix from a file
+                    if (this->imaginary)
+                        J = J_power_inetgarls[l_b][2 * k + p + j].imag();
+                    else
+                        J = J_power_inetgarls[l_b][2 * k + p + j].real();
+                    vec_o.segment(i * kp1_d, kp1_d)(vec_o_segment_index) += J * cross_correlation->Matrix[k](p, j); // by default eigen library reads a transpose matrix from a file
                 }
                 vec_o_segment_index++;
             }
@@ -113,9 +105,9 @@ void TimeEvolution_CrossCorrelationCalculator::applyCcc(MWNode<2> &node)
 
     double *coefs = node.getCoefs();
     for (int i = 0; i < t_dim * kp1_d; i++) {
-        //auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0);
+        // auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0);
         coefs[i] = vec_o(i);
-        //std::cout<< "coefs[i] = " << coefs[i] << std::endl;
+        // std::cout<< "coefs[i] = " << coefs[i] << std::endl;
     }
 }
 
diff --git a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h
index b2d6d0542..f2a68295f 100644
--- a/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h
+++ b/src/treebuilders/TimeEvolution_CrossCorrelationCalculator.h
@@ -30,38 +30,36 @@
 #include "core/SchrodingerEvolution_CrossCorrelation.h"
 #include "functions/JpowerIntegrals.h"
 
-
 namespace mrcpp {
 
-
 /** @class TimeEvolution_CrossCorrelationCalculator
  *
  * @brief An efficient way to calculate ... (work in progress)
  *
  * @details An efficient way to calculate ... having the form
  * \f$ \ldots = \ldots \f$
- * 
- * 
- * 
+ *
+ *
+ *
  */
-class TimeEvolution_CrossCorrelationCalculator final : public TreeCalculator<2>
-{
+class TimeEvolution_CrossCorrelationCalculator final : public TreeCalculator<2> {
 public:
-    TimeEvolution_CrossCorrelationCalculator
-    (std::map<int, JpowerIntegrals *> &  J, SchrodingerEvolution_CrossCorrelation *cross_correlation, bool imaginary)
-        : J_power_inetgarls(J), cross_correlation(cross_correlation), imaginary(imaginary){}
-//private:
+    TimeEvolution_CrossCorrelationCalculator(std::map<int, JpowerIntegrals *> &J, SchrodingerEvolution_CrossCorrelation *cross_correlation, bool imaginary)
+            : J_power_inetgarls(J)
+            , cross_correlation(cross_correlation)
+            , imaginary(imaginary) {}
+    // private:
     std::map<int, JpowerIntegrals *> J_power_inetgarls;
     SchrodingerEvolution_CrossCorrelation *cross_correlation;
-    
+
     /// @brief If False then the calculator is using th real part of integrals, otherwise - the imaginary part.
     bool imaginary;
 
     void calcNode(MWNode<2> &node) override;
 
-    //template <int T>
+    // template <int T>
     void applyCcc(MWNode<2> &node);
-    //template <int T> void applyCcc(MWNode<2> &node, CrossCorrelationCache<T> &ccc);
+    // template <int T> void applyCcc(MWNode<2> &node, CrossCorrelationCache<T> &ccc);
 };
 
 } // namespace mrcpp
diff --git a/src/treebuilders/TreeBuilder.cpp b/src/treebuilders/TreeBuilder.cpp
index 225b55cb5..ba0e5d973 100644
--- a/src/treebuilders/TreeBuilder.cpp
+++ b/src/treebuilders/TreeBuilder.cpp
@@ -35,8 +35,7 @@
 
 namespace mrcpp {
 
-template <int D, typename T>
-void TreeBuilder<D, T>::build(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator, TreeAdaptor<D, T> &adaptor, int maxIter) const {
+template <int D, typename T> void TreeBuilder<D, T>::build(MWTree<D, T> &tree, TreeCalculator<D, T> &calculator, TreeAdaptor<D, T> &adaptor, int maxIter) const {
     Timer calc_t(false), split_t(false), norm_t(false);
     println(10, " == Building tree");
 
@@ -170,7 +169,6 @@ template class TreeBuilder<1, double>;
 template class TreeBuilder<2, double>;
 template class TreeBuilder<3, double>;
 
-
 template class TreeBuilder<1, ComplexDouble>;
 template class TreeBuilder<2, ComplexDouble>;
 template class TreeBuilder<3, ComplexDouble>;
diff --git a/src/treebuilders/TreeCalculator.h b/src/treebuilders/TreeCalculator.h
index 4e171d91c..1bf41f407 100644
--- a/src/treebuilders/TreeCalculator.h
+++ b/src/treebuilders/TreeCalculator.h
@@ -29,7 +29,7 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class TreeCalculator {
+template <int D, typename T> class TreeCalculator {
 public:
     TreeCalculator() = default;
     virtual ~TreeCalculator() = default;
@@ -42,7 +42,7 @@ namespace mrcpp {
             int nNodes = nodeVec.size();
 #pragma omp for schedule(guided)
             for (int n = 0; n < nNodes; n++) {
-	      MWNode<D, T> &node = *nodeVec[n];
+                MWNode<D, T> &node = *nodeVec[n];
                 calcNode(node);
             }
         }
@@ -50,7 +50,7 @@ namespace mrcpp {
     }
 
 protected:
-  virtual void calcNode(MWNode<D, T> &node) = 0;
+    virtual void calcNode(MWNode<D, T> &node) = 0;
     virtual void postProcess() {}
 };
 
diff --git a/src/treebuilders/WaveletAdaptor.h b/src/treebuilders/WaveletAdaptor.h
index 759f6b7ee..829039bf4 100644
--- a/src/treebuilders/WaveletAdaptor.h
+++ b/src/treebuilders/WaveletAdaptor.h
@@ -31,18 +31,16 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class WaveletAdaptor : public TreeAdaptor<D, T> {
+template <int D, typename T> class WaveletAdaptor : public TreeAdaptor<D, T> {
 public:
     WaveletAdaptor(double pr, int ms, bool ap = false, double sf = 1.0)
-      : TreeAdaptor<D, T>(ms)
+            : TreeAdaptor<D, T>(ms)
             , absPrec(ap)
             , prec(pr)
             , splitFac(sf) {}
     ~WaveletAdaptor() override = default;
 
-    void setPrecFunction(const std::function<double(const NodeIndex<D> &idx)> &prec_func) {
-        this->precFunc = prec_func;
-    }
+    void setPrecFunction(const std::function<double(const NodeIndex<D> &idx)> &prec_func) { this->precFunc = prec_func; }
 
 protected:
     bool absPrec;
diff --git a/src/treebuilders/add.cpp b/src/treebuilders/add.cpp
index f7dfbe8c2..4ee28cff6 100644
--- a/src/treebuilders/add.cpp
+++ b/src/treebuilders/add.cpp
@@ -61,16 +61,7 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D, typename T>
-void add(double prec,
-         FunctionTree<D, T> &out,
-         T a,
-         FunctionTree<D, T> &inp_a,
-         T b,
-         FunctionTree<D, T> &inp_b,
-         int maxIter,
-         bool absPrec,
-         bool conjugate) {
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, T a, FunctionTree<D, T> &inp_a, T b, FunctionTree<D, T> &inp_b, int maxIter, bool absPrec, bool conjugate) {
     FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back(std::make_tuple(a, &inp_a));
     tmp_vec.push_back(std::make_tuple(b, &inp_b));
@@ -134,137 +125,55 @@ template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, std:
     add(prec, out, inp_vec, maxIter, absPrec, conjugate);
 }
 
-template void add<1, double>(double prec,
-                     FunctionTree<1, double> &out,
-                     double a,
-                     FunctionTree<1, double> &tree_a,
-                     double b,
-                     FunctionTree<1, double> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<2, double>(double prec,
-                     FunctionTree<2, double> &out,
-                     double a,
-                     FunctionTree<2, double> &tree_a,
-                     double b,
-                     FunctionTree<2, double> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<3, double>(double prec,
-                     FunctionTree<3, double> &out,
-                     double a,
-                     FunctionTree<3, double> &tree_a,
-                     double b,
-                     FunctionTree<3, double> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+template void
+add<1, double>(double prec, FunctionTree<1, double> &out, double a, FunctionTree<1, double> &tree_a, double b, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool conjugate);
+template void
+add<2, double>(double prec, FunctionTree<2, double> &out, double a, FunctionTree<2, double> &tree_a, double b, FunctionTree<2, double> &tree_b, int maxIter, bool absPrec, bool conjugate);
+template void
+add<3, double>(double prec, FunctionTree<3, double> &out, double a, FunctionTree<3, double> &tree_a, double b, FunctionTree<3, double> &tree_b, int maxIter, bool absPrec, bool conjugate);
 
-template void add<1, double>(double prec,
-                     FunctionTree<1, double> &out,
-                     FunctionTreeVector<1, double> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<2, double>(double prec,
-                     FunctionTree<2, double> &out,
-                     FunctionTreeVector<2, double> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<3, double>(double prec,
-                     FunctionTree<3, double> &out,
-		     FunctionTreeVector<3, double> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+template void add<1, double>(double prec, FunctionTree<1, double> &out, FunctionTreeVector<1, double> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<2, double>(double prec, FunctionTree<2, double> &out, FunctionTreeVector<2, double> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<3, double>(double prec, FunctionTree<3, double> &out, FunctionTreeVector<3, double> &inp, int maxIter, bool absPrec, bool conjugate);
 
-template void add<1, double>(double prec,
-                     FunctionTree<1, double> &out,
-                     std::vector<FunctionTree<1, double> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<2, double>(double prec,
-                     FunctionTree<2, double> &out,
-                     std::vector<FunctionTree<2, double> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<3, double>(double prec,
-                     FunctionTree<3, double> &out,
-                     std::vector<FunctionTree<3, double> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-
-
-template void add<1, ComplexDouble>(double prec,
-                     FunctionTree<1, ComplexDouble> &out,
-                     ComplexDouble a,
-                     FunctionTree<1, ComplexDouble> &tree_a,
-                     ComplexDouble b,
-                     FunctionTree<1, ComplexDouble> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<2, ComplexDouble>(double prec,
-                     FunctionTree<2, ComplexDouble> &out,
-                     ComplexDouble a,
-                     FunctionTree<2, ComplexDouble> &tree_a,
-                     ComplexDouble b,
-                     FunctionTree<2, ComplexDouble> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<3, ComplexDouble>(double prec,
-                     FunctionTree<3, ComplexDouble> &out,
-                     ComplexDouble a,
-                     FunctionTree<3, ComplexDouble> &tree_a,
-                     ComplexDouble b,
-                     FunctionTree<3, ComplexDouble> &tree_b,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-
-template void add<1, ComplexDouble>(double prec,
-                     FunctionTree<1, ComplexDouble> &out,
-                     FunctionTreeVector<1, ComplexDouble> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<2, ComplexDouble>(double prec,
-                     FunctionTree<2, ComplexDouble> &out,
-                     FunctionTreeVector<2, ComplexDouble> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
-template void add<3, ComplexDouble>(double prec,
-                     FunctionTree<3, ComplexDouble> &out,
-                     FunctionTreeVector<3, ComplexDouble> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+template void add<1, double>(double prec, FunctionTree<1, double> &out, std::vector<FunctionTree<1, double> *> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<2, double>(double prec, FunctionTree<2, double> &out, std::vector<FunctionTree<2, double> *> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<3, double>(double prec, FunctionTree<3, double> &out, std::vector<FunctionTree<3, double> *> &inp, int maxIter, bool absPrec, bool conjugate);
 
 template void add<1, ComplexDouble>(double prec,
-                     FunctionTree<1, ComplexDouble> &out,
-                     std::vector<FunctionTree<1, ComplexDouble> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+                                    FunctionTree<1, ComplexDouble> &out,
+                                    ComplexDouble a,
+                                    FunctionTree<1, ComplexDouble> &tree_a,
+                                    ComplexDouble b,
+                                    FunctionTree<1, ComplexDouble> &tree_b,
+                                    int maxIter,
+                                    bool absPrec,
+                                    bool conjugate);
 template void add<2, ComplexDouble>(double prec,
-                     FunctionTree<2, ComplexDouble> &out,
-                     std::vector<FunctionTree<2, ComplexDouble> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+                                    FunctionTree<2, ComplexDouble> &out,
+                                    ComplexDouble a,
+                                    FunctionTree<2, ComplexDouble> &tree_a,
+                                    ComplexDouble b,
+                                    FunctionTree<2, ComplexDouble> &tree_b,
+                                    int maxIter,
+                                    bool absPrec,
+                                    bool conjugate);
 template void add<3, ComplexDouble>(double prec,
-                     FunctionTree<3, ComplexDouble> &out,
-                     std::vector<FunctionTree<3, ComplexDouble> *> &inp,
-                     int maxIter,
-                     bool absPrec,
-                     bool conjugate);
+                                    FunctionTree<3, ComplexDouble> &out,
+                                    ComplexDouble a,
+                                    FunctionTree<3, ComplexDouble> &tree_a,
+                                    ComplexDouble b,
+                                    FunctionTree<3, ComplexDouble> &tree_b,
+                                    int maxIter,
+                                    bool absPrec,
+                                    bool conjugate);
+
+template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTreeVector<1, ComplexDouble> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTreeVector<2, ComplexDouble> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTreeVector<3, ComplexDouble> &inp, int maxIter, bool absPrec, bool conjugate);
+
+template void add<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, std::vector<FunctionTree<1, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, std::vector<FunctionTree<2, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool conjugate);
+template void add<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, std::vector<FunctionTree<3, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool conjugate);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/add.h b/src/treebuilders/add.h
index a4e35c47b..1f94dbc9d 100644
--- a/src/treebuilders/add.h
+++ b/src/treebuilders/add.h
@@ -25,29 +25,11 @@
 
 #pragma once
 
-
 namespace mrcpp {
 
-template <int D, typename T> void add(double prec,
-                          FunctionTree<D, T> &out,
-                          T a,
-                          FunctionTree<D, T> &tree_a,
-                          T b,
-                          FunctionTree<D, T> &tree_b,
-                          int maxIter = -1,
-                          bool absPrec = false,
-                          bool conjugate = false);
-template <int D, typename T> void add(double prec,
-                          FunctionTree<D, T> &out,
-                          FunctionTreeVector<D, T> &inp,
-                          int maxIter = -1,
-                          bool absPrec = false,
-                          bool conjugate = false);
-template <int D, typename T> void add(double prec,
-                          FunctionTree<D, T> &out,
-                          std::vector<FunctionTree<D, T> *> &inp,
-                          int maxIter = -1,
-                          bool absPrec = false,
-                          bool conjugate = false);
+template <int D, typename T>
+void add(double prec, FunctionTree<D, T> &out, T a, FunctionTree<D, T> &tree_a, T b, FunctionTree<D, T> &tree_b, int maxIter = -1, bool absPrec = false, bool conjugate = false);
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false);
+template <int D, typename T> void add(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 2c6e4aaa2..205684cdf 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -91,7 +91,6 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
-
 /** @brief Application of MW integral convolution operator on Four component
  *
  * @param[in] prec: Build precision of output function
@@ -119,30 +118,26 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
 template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
 
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
-    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
-        for (int ocomp = 0; ocomp < 4; ocomp++){
+    if (metric == nullptr) { metric = defaultMetric; }
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
+        for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
                 if (inp.isreal()) {
                     if (out.CompD[ocomp] == nullptr) out.alloc_comp(ocomp);
                     apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompD[ocomp]->rescale(metric[icomp][ocomp].real());
-                    }
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp].real()); }
                 } else {
                     if (out.CompC[ocomp] == nullptr) out.alloc_comp(ocomp);
                     apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                    }
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                 }
             }
         }
@@ -263,30 +258,27 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
     print::separator(10, ' ');
 }
 
-template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
+template <int D, typename T>
+void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
-    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
-        for (int ocomp = 0; ocomp < 4; ocomp++){
+    if (metric == nullptr) { metric = defaultMetric; }
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
+        for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
                 if (inp.isreal()) {
                     apply(prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], precTrees[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
-                    }
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); }
                 } else {
                     apply(prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], precTrees[icomp], maxIter, absPrec);
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                    }
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                 }
             }
         }
@@ -321,29 +313,25 @@ template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T
 
 template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp.Comp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
+    if (metric == nullptr) { metric = defaultMetric; }
+    for (int icomp = 0; icomp < 4; icomp++) {
+        if (inp.Comp[icomp] != nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++) {
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
                     if (inp.isreal()) {
                         apply_on_unit_cell<D>(false, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); }
                     } else {
                         apply_on_unit_cell<D>(false, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                     }
                 }
             }
@@ -377,32 +365,27 @@ template <int D, typename T> void apply_near_field(double prec, FunctionTree<D,
     apply_on_unit_cell<D>(true, prec, out, oper, inp, maxIter, absPrec);
 }
 
-
 template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
-    for (int icomp = 0; icomp < 4; icomp++){
-        if (inp.Comp[icomp]!=nullptr) {
-            for (int ocomp = 0; ocomp < 4; ocomp++){
+    if (metric == nullptr) { metric = defaultMetric; }
+    for (int icomp = 0; icomp < 4; icomp++) {
+        if (inp.Comp[icomp] != nullptr) {
+            for (int ocomp = 0; ocomp < 4; ocomp++) {
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
                     if (inp.isreal()) {
                         apply_on_unit_cell<D>(true, prec, *out.CompD[ocomp], oper, *inp.CompD[icomp], maxIter, absPrec);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            out.CompD[ocomp]->rescale(metric[icomp][ocomp]);
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompD[ocomp]->rescale(metric[icomp][ocomp]); }
                     } else {
                         apply_on_unit_cell<D>(true, prec, *out.CompC[ocomp], oper, *inp.CompC[icomp], maxIter, absPrec);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                     }
                 }
             }
@@ -462,39 +445,39 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
 }
 
 template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
-    //TODO: sums and not only each components independently
+    // TODO: sums and not only each components independently
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
-    for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
-        for (int ocomp = 0; ocomp < 4; ocomp++){
+    if (metric == nullptr) { metric = defaultMetric; }
+    for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
+        for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                if (inp.isreal() and (std::imag(metric[icomp][ocomp]) < MachinePrec or inp.Ncomp() == 1) ) {
+                if (inp.isreal() and (std::imag(metric[icomp][ocomp]) < MachinePrec or inp.Ncomp() == 1)) {
                     apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
                     if (std::norm(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                        if(std::imag(metric[icomp][ocomp]) < MachinePrec) out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
-                        else out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; //TODO: multiply c1 in rescale?
+                        if (std::imag(metric[icomp][ocomp]) < MachinePrec)
+                            out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
+                        else
+                            out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; // TODO: multiply c1 in rescale?
                     }
                     out.func_ptr->isreal = 1;
                 } else {
-                    if (inp.isreal() ){
+                    if (inp.isreal()) {
                         apply(*out.CompD[ocomp], oper, *inp.CompD[icomp], dir);
                         out.CompD[icomp]->CopyTreeToComplex(out.CompC[ocomp]);
                         out.func_ptr->isreal = 0;
                         out.func_ptr->iscomplex = 1;
-                  } else {
+                    } else {
                         apply(*out.CompC[ocomp], oper, *inp.CompC[icomp], dir);
                     }
-                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                         out.CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                   }
+                    if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { out.CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                 }
             }
         }
@@ -523,40 +506,36 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
     return out;
 }
 
-std::vector<CompFunction<3>*> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble  metric[4][4]) {
-    std::vector<CompFunction<3>*> out;
+std::vector<CompFunction<3> *> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble metric[4][4]) {
+    std::vector<CompFunction<3> *> out;
     ComplexDouble defaultMetric[4][4];
-    for (int i=0; i<4; i++){
-        for (int j=0; j<4; j++){
-            if (i==j) defaultMetric[i][j] = 1.0;
-            else defaultMetric[i][j] = 0.0;
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            if (i == j)
+                defaultMetric[i][j] = 1.0;
+            else
+                defaultMetric[i][j] = 0.0;
         }
     }
-    if (metric == nullptr) {
-        metric = defaultMetric;
-    }
+    if (metric == nullptr) { metric = defaultMetric; }
     for (int d = 0; d < 3; d++) {
         CompFunction<3> *grad_d = new CompFunction<3>();
-        for (int icomp = 0; icomp < inp.Ncomp(); icomp++){
-            for (int ocomp = 0; ocomp < 4; ocomp++){
+        for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
+            for (int ocomp = 0; ocomp < 4; ocomp++) {
                 if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
-                    grad_d->func_ptr->Ncomp=ocomp;
+                    grad_d->func_ptr->Ncomp = ocomp;
                     if (inp.isreal()) {
                         grad_d->func_ptr->isreal = 1;
                         grad_d->func_ptr->iscomplex = 0;
                         grad_d->CompD[ocomp] = new FunctionTree<3, double>(inp.CompD[0]->getMRA());
                         apply(*(grad_d->CompD[ocomp]), oper, *inp.CompD[icomp], d);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real());
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompD[ocomp]->rescale((metric[icomp][ocomp]).real()); }
                     } else {
                         grad_d->func_ptr->isreal = 0;
                         grad_d->func_ptr->iscomplex = 1;
                         grad_d->CompC[ocomp] = new FunctionTree<3, ComplexDouble>(inp.CompC[0]->getMRA());
                         apply(*(grad_d->CompC[ocomp]), oper, *inp.CompC[icomp], d);
-                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) {
-                            grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]);
-                        }
+                        if (abs(metric[icomp][ocomp] - 1.0) > MachinePrec) { grad_d->CompC[ocomp]->rescale(metric[icomp][ocomp]); }
                     }
                 }
             }
@@ -619,9 +598,12 @@ template void apply<3, double>(double prec, FunctionTree<3, double> &out, Convol
 template void apply<1>(double prec, CompFunction<1> &out, ConvolutionOperator<1> &oper, const CompFunction<1> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template void apply<2>(double prec, CompFunction<2> &out, ConvolutionOperator<2> &oper, const CompFunction<2> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template void apply<3>(double prec, CompFunction<3> &out, ConvolutionOperator<3> &oper, const CompFunction<3> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
-template void apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
-template void apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
-template void apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
+template void
+apply<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, FunctionTreeVector<1, double> &precTrees, int maxIter, bool absPrec);
+template void
+apply<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, FunctionTreeVector<2, double> &precTrees, int maxIter, bool absPrec);
+template void
+apply<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, FunctionTreeVector<3, double> &precTrees, int maxIter, bool absPrec);
 template void apply_far_field<1, double>(double prec, FunctionTree<1, double> &out, ConvolutionOperator<1> &oper, FunctionTree<1, double> &inp, int maxIter, bool absPrec);
 template void apply_far_field<2, double>(double prec, FunctionTree<2, double> &out, ConvolutionOperator<2> &oper, FunctionTree<2, double> &inp, int maxIter, bool absPrec);
 template void apply_far_field<3, double>(double prec, FunctionTree<3, double> &out, ConvolutionOperator<3> &oper, FunctionTree<3, double> &inp, int maxIter, bool absPrec);
@@ -641,15 +623,31 @@ template FunctionTreeVector<1, double> gradient<1>(DerivativeOperator<1> &oper,
 template FunctionTreeVector<2, double> gradient<2>(DerivativeOperator<2> &oper, FunctionTree<2, double> &inp);
 template FunctionTreeVector<3, double> gradient<3>(DerivativeOperator<3> &oper, FunctionTree<3, double> &inp);
 
-
-
 template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
 
-template void apply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, FunctionTreeVector<1, ComplexDouble> &precTrees, int maxIter, bool absPrec);
-template void apply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, FunctionTreeVector<2, ComplexDouble> &precTrees, int maxIter, bool absPrec);
-template void apply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, FunctionTreeVector<3, ComplexDouble> &precTrees, int maxIter, bool absPrec);
+template void apply<1, ComplexDouble>(double prec,
+                                      FunctionTree<1, ComplexDouble> &out,
+                                      ConvolutionOperator<1> &oper,
+                                      FunctionTree<1, ComplexDouble> &inp,
+                                      FunctionTreeVector<1, ComplexDouble> &precTrees,
+                                      int maxIter,
+                                      bool absPrec);
+template void apply<2, ComplexDouble>(double prec,
+                                      FunctionTree<2, ComplexDouble> &out,
+                                      ConvolutionOperator<2> &oper,
+                                      FunctionTree<2, ComplexDouble> &inp,
+                                      FunctionTreeVector<2, ComplexDouble> &precTrees,
+                                      int maxIter,
+                                      bool absPrec);
+template void apply<3, ComplexDouble>(double prec,
+                                      FunctionTree<3, ComplexDouble> &out,
+                                      ConvolutionOperator<3> &oper,
+                                      FunctionTree<3, ComplexDouble> &inp,
+                                      FunctionTreeVector<3, ComplexDouble> &precTrees,
+                                      int maxIter,
+                                      bool absPrec);
 template void apply_far_field<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, ConvolutionOperator<1> &oper, FunctionTree<1, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply_far_field<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, ConvolutionOperator<2> &oper, FunctionTree<2, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void apply_far_field<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, ConvolutionOperator<3> &oper, FunctionTree<3, ComplexDouble> &inp, int maxIter, bool absPrec);
diff --git a/src/treebuilders/complex_apply.cpp b/src/treebuilders/complex_apply.cpp
index 8b22d19f6..5cf0e3b08 100644
--- a/src/treebuilders/complex_apply.cpp
+++ b/src/treebuilders/complex_apply.cpp
@@ -24,7 +24,6 @@
  */
 
 #include "complex_apply.h"
-#include "apply.h"
 #include "ConvolutionCalculator.h"
 #include "CopyAdaptor.h"
 #include "DefaultCalculator.h"
@@ -33,6 +32,7 @@
 #include "TreeBuilder.h"
 #include "WaveletAdaptor.h"
 #include "add.h"
+#include "apply.h"
 #include "grid.h"
 #include "operators/ConvolutionOperator.h"
 #include "operators/DerivativeOperator.h"
@@ -42,7 +42,6 @@
 
 namespace mrcpp {
 
-
 /** @brief Application of MW integral convolution operator (complex version)
  *
  * @param[in] prec: Build precision of output function
@@ -58,7 +57,7 @@ namespace mrcpp {
  * - Repeat until convergence or `maxIter` is reached
  * - `prec < 0` or `maxIter = 0` means NO refinement
  * - `maxIter < 0` means no bound
- * 
+ *
  * The default is to work with relative precision
  * (stop when the wavelet coefficients are below a given (small) fraction of
  * function norm.
@@ -74,40 +73,25 @@ namespace mrcpp {
  * tree when the function is called (this grid should however be EMPTY, e.i.
  * no coefs).
  * \todo !!! Here should be given a method for greed cleaning !!!
- * 
+ *
  *
  */
-template <int D>
-void apply
-(
-    double prec, ComplexObject< FunctionTree<D> > &out,
-    ComplexObject< ConvolutionOperator<D> > &oper, ComplexObject< FunctionTree<D> > &inp,
-    int maxIter, bool absPrec
-)
-{
-    FunctionTree<D> temp1( inp.real->getMRA() );
-    FunctionTree<D> temp2( inp.real->getMRA() );
+template <int D> void apply(double prec, ComplexObject<FunctionTree<D>> &out, ComplexObject<ConvolutionOperator<D>> &oper, ComplexObject<FunctionTree<D>> &inp, int maxIter, bool absPrec) {
+    FunctionTree<D> temp1(inp.real->getMRA());
+    FunctionTree<D> temp2(inp.real->getMRA());
 
     apply(prec, temp1, *oper.real, *inp.real, maxIter, absPrec);
     apply(prec, temp2, *oper.imaginary, *inp.imaginary, maxIter, absPrec);
     add(prec, *out.real, 1.0, temp1, -1.0, temp2);
 
-    //temp1.setZero();
-    //temp2.setZero();
+    // temp1.setZero();
+    // temp2.setZero();
 
     apply(prec, temp1, *oper.imaginary, *inp.real, maxIter, absPrec);
     apply(prec, temp2, *oper.real, *inp.imaginary, maxIter, absPrec);
     add(prec, *out.imaginary, 1.0, temp1, 1.0, temp2);
 }
 
-
-template
-void apply <1>
-(
-    double prec, ComplexObject< FunctionTree<1> > &out,
-    ComplexObject< ConvolutionOperator<1> > &oper, ComplexObject< FunctionTree<1> > &inp,
-    int maxIter, bool absPrec
-);
-
+template void apply<1>(double prec, ComplexObject<FunctionTree<1>> &out, ComplexObject<ConvolutionOperator<1>> &oper, ComplexObject<FunctionTree<1>> &inp, int maxIter, bool absPrec);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/grid.cpp b/src/treebuilders/grid.cpp
index a93963477..0e7fb968b 100644
--- a/src/treebuilders/grid.cpp
+++ b/src/treebuilders/grid.cpp
@@ -109,7 +109,7 @@ template <int D> void build_grid(FunctionTree<D> &out, const GaussExp<D> &inp, i
         TreeBuilder<D> builder;
         DefaultCalculator<D> calculator;
         for (auto i = 0; i < inp.size(); i++) {
-	  AnalyticAdaptor<D> adaptor(inp.getFunc(i), maxScale);
+            AnalyticAdaptor<D> adaptor(inp.getFunc(i), maxScale);
             builder.build(out, calculator, adaptor, maxIter);
         }
     } else {
@@ -224,7 +224,6 @@ template <int D, typename T> void copy_grid(FunctionTree<D, T> &out, FunctionTre
     build_grid(out, inp);
 }
 
-
 /** @brief Build empty grid that is identical to another MW grid for every component
  *
  * @param[out] out: Output to be built
@@ -242,7 +241,7 @@ template <int D> void copy_grid(CompFunction<D> &out, CompFunction<D> &inp) {
     for (int i = 0; i < inp.Ncomp(); i++) {
         if (inp.isreal()) build_grid(*out.CompD[i], *inp.CompD[i]);
         if (inp.iscomplex()) build_grid(*out.CompC[i], *inp.CompC[i]);
-   }
+    }
 }
 
 /** @brief Clear the MW coefficients of a function representation
@@ -389,7 +388,6 @@ template int refine_grid<1, double>(FunctionTree<1, double> &out, const Represen
 template int refine_grid<2, double>(FunctionTree<2, double> &out, const RepresentableFunction<2, double> &inp);
 template int refine_grid<3, double>(FunctionTree<3, double> &out, const RepresentableFunction<3, double> &inp);
 
-
 template void build_grid<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &out, int scales);
 template void build_grid<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &out, int scales);
 template void build_grid<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &out, int scales);
diff --git a/src/treebuilders/map.cpp b/src/treebuilders/map.cpp
index ba064ca39..b363bf806 100644
--- a/src/treebuilders/map.cpp
+++ b/src/treebuilders/map.cpp
@@ -65,8 +65,7 @@ namespace mrcpp {
  * no coefs).
  *
  */
-template <int D>
-void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec) {
+template <int D> void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter, bool absPrec) {
 
     int maxScale = out.getMRA().getMaxScale();
     TreeBuilder<D, double> builder;
diff --git a/src/treebuilders/map.h b/src/treebuilders/map.h
index 4fe3cf72d..d1f86e201 100644
--- a/src/treebuilders/map.h
+++ b/src/treebuilders/map.h
@@ -30,7 +30,6 @@
 namespace mrcpp {
 template <int D, typename T> class FunctionTree;
 
-template <int D>
-void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter = -1, bool absPrec = false);
+template <int D> void map(double prec, FunctionTree<D, double> &out, FunctionTree<D, double> &inp, FMap<double, double> fmap, int maxIter = -1, bool absPrec = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index 7abefc7a3..84436d1e4 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -70,33 +70,25 @@ namespace mrcpp {
  *
  */
 template <int D, typename T>
-void multiply(double prec,
-              FunctionTree<D, T> &out,
-              T c,
-              FunctionTree<D, T> &inp_a,
-              FunctionTree<D, T> &inp_b,
-              int maxIter,
-              bool absPrec,
-              bool useMaxNorms,
-              bool conjugate ) {
+void multiply(double prec, FunctionTree<D, T> &out, T c, FunctionTree<D, T> &inp_a, FunctionTree<D, T> &inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     FunctionTreeVector<D, T> tmp_vec;
     tmp_vec.push_back({c, &inp_a});
     tmp_vec.push_back({1.0, &inp_b});
     multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate);
 }
-    /*
+/*
 template <int D> void multiply(double prec,
-                               FunctionTree<D, ComplexDouble> &out,
-                               ComplexDouble c,
-                               FunctionTree<D, ComplexDouble> &inp_a,
-                               FunctionTree<D, double> &inp_b,
-                               int maxIter = -1,
-                               bool absPrec = false,
-                               bool useMaxNorms = false) {
-    // we rather make a copy with ComplexDouble type only
-    FunctionTree<D, ComplexDouble>* inp_b_CPLX = inp_b.CopyTreeToComplex();
-    multiply(prec, out,c,inp_a,inp_b_CPLX,maxIter,absPrec,useMaxNorms);
-    } */
+                           FunctionTree<D, ComplexDouble> &out,
+                           ComplexDouble c,
+                           FunctionTree<D, ComplexDouble> &inp_a,
+                           FunctionTree<D, double> &inp_b,
+                           int maxIter = -1,
+                           bool absPrec = false,
+                           bool useMaxNorms = false) {
+// we rather make a copy with ComplexDouble type only
+FunctionTree<D, ComplexDouble>* inp_b_CPLX = inp_b.CopyTreeToComplex();
+multiply(prec, out,c,inp_a,inp_b_CPLX,maxIter,absPrec,useMaxNorms);
+} */
 
 /** @brief Multiplication of several MW function representations, adaptive grid
  *
@@ -122,14 +114,7 @@ template <int D> void multiply(double prec,
  * no coefs).
  *
  */
-template <int D, typename T>
-void multiply(double prec,
-              FunctionTree<D, T> &out,
-              FunctionTreeVector<D, T> &inp,
-              int maxIter,
-              bool absPrec,
-              bool useMaxNorms,
-              bool conjugate ) {
+template <int D, typename T> void multiply(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     for (auto i = 0; i < inp.size(); i++)
         if (out.getMRA() != get_func(inp, i).getMRA()) MSG_ABORT("Incompatible MRA");
 
@@ -163,14 +148,7 @@ void multiply(double prec,
     print::separator(10, ' ');
 }
 
-template <int D, typename T>
-void multiply(double prec,
-              FunctionTree<D, T> &out,
-              std::vector<FunctionTree<D, T> *> &inp,
-              int maxIter,
-              bool absPrec,
-              bool useMaxNorms,
-              bool conjugate ) {
+template <int D, typename T> void multiply(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     FunctionTreeVector<D, T> inp_vec;
     for (auto &t : inp) inp_vec.push_back({1.0, t});
     multiply(prec, out, inp_vec, maxIter, absPrec, useMaxNorms, conjugate);
@@ -243,8 +221,7 @@ template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, F
  * no coefs).
  *
  */
-template <int D, typename T>
-void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double p, int maxIter, bool absPrec) {
+template <int D, typename T> void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double p, int maxIter, bool absPrec) {
     if (out.getMRA() != inp.getMRA()) MSG_ABORT("Incompatible MRA");
     if (inp.conjugate()) MSG_ABORT("Not implemented");
 
@@ -286,13 +263,7 @@ void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double
  * @note The length of the input vectors must be the same.
  *
  */
-template <int D, typename T>
-void dot(double prec,
-         FunctionTree<D, T> &out,
-         FunctionTreeVector<D, T> &inp_a,
-         FunctionTreeVector<D, T> &inp_b,
-         int maxIter,
-         bool absPrec) {
+template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp_a, FunctionTreeVector<D, T> &inp_b, int maxIter, bool absPrec) {
     if (inp_a.size() != inp_b.size()) MSG_ABORT("Input length mismatch");
 
     FunctionTreeVector<D, T> tmp_vec;
@@ -360,7 +331,6 @@ template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &
     return result;
 }
 
-
 /** @returns Dot product <bra|ket> of two MW function representations
  *
  * @param[in] bra: Bra side input function
@@ -409,7 +379,7 @@ template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra, Function
 }
 template <int D> ComplexDouble dot(FunctionTree<D, double> &bra, FunctionTree<D, ComplexDouble> &ket) {
     ket.setConjugate(!ket.conjugate());
-    ComplexDouble prod =  dot(ket, bra);
+    ComplexDouble prod = dot(ket, bra);
     ket.setConjugate(!ket.conjugate());
     return prod;
 }
@@ -456,117 +426,27 @@ template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra, Funct
     return result;
 }
 
-template void multiply<1, double>(double prec,
-                          FunctionTree<1, double> &out,
-                          double c,
-                          FunctionTree<1, double> &tree_a,
-                          FunctionTree<1, double> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<2, double>(double prec,
-                          FunctionTree<2, double> &out,
-                          double c,
-                          FunctionTree<2, double> &tree_a,
-                          FunctionTree<2, double> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<3, double>(double prec,
-                          FunctionTree<3, double> &out,
-                          double c,
-                          FunctionTree<3, double> &tree_a,
-                          FunctionTree<3, double> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<1, double>(double prec,
-                          FunctionTree<1, double> &out,
-                          FunctionTreeVector<1, double> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<2, double>(double prec,
-                          FunctionTree<2, double> &out,
-                          FunctionTreeVector<2, double> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<3, double>(double prec,
-                          FunctionTree<3, double> &out,
-                          FunctionTreeVector<3, double> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<1, double>(double prec,
-                          FunctionTree<1, double> &out,
-                          std::vector<FunctionTree<1, double> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<2, double>(double prec,
-                          FunctionTree<2, double> &out,
-                          std::vector<FunctionTree<2, double> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<3, double>(double prec,
-                          FunctionTree<3, double> &out,
-                          std::vector<FunctionTree<3, double> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void power<1, double>(double prec,
-                       FunctionTree<1, double> &out,
-                       FunctionTree<1, double> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void power<2, double>(double prec,
-                       FunctionTree<2, double> &out,
-                       FunctionTree<2, double> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void power<3, double>(double prec,
-                       FunctionTree<3, double> &out,
-                       FunctionTree<3, double> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void square<1, double>(double prec,
-                        FunctionTree<1, double> &out,
-                        FunctionTree<1, double> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
-template void square<2, double>(double prec,
-                        FunctionTree<2, double> &out,
-                        FunctionTree<2, double> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
-template void square<3, double>(double prec,
-                        FunctionTree<3, double> &out,
-                        FunctionTree<3, double> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
-template void dot<1, double>(double prec,
-                     FunctionTree<1, double> &out,
-                     FunctionTreeVector<1, double> &inp_a,
-                     FunctionTreeVector<1, double> &inp_b,
-                     int maxIter,
-                     bool absPrec);
-template void dot<2, double>(double prec,
-                     FunctionTree<2, double> &out,
-                     FunctionTreeVector<2, double> &inp_a,
-                     FunctionTreeVector<2, double> &inp_b,
-                     int maxIter,
-                     bool absPrec);
-template void dot<3, double>(double prec,
-                     FunctionTree<3, double> &out,
-                     FunctionTreeVector<3, double> &inp_a,
-                     FunctionTreeVector<3, double> &inp_b,
-                     int maxIter,
-                     bool absPrec);
+template void
+multiply<1, double>(double prec, FunctionTree<1, double> &out, double c, FunctionTree<1, double> &tree_a, FunctionTree<1, double> &tree_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void
+multiply<2, double>(double prec, FunctionTree<2, double> &out, double c, FunctionTree<2, double> &tree_a, FunctionTree<2, double> &tree_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void
+multiply<3, double>(double prec, FunctionTree<3, double> &out, double c, FunctionTree<3, double> &tree_a, FunctionTree<3, double> &tree_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<1, double>(double prec, FunctionTree<1, double> &out, FunctionTreeVector<1, double> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<2, double>(double prec, FunctionTree<2, double> &out, FunctionTreeVector<2, double> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<3, double>(double prec, FunctionTree<3, double> &out, FunctionTreeVector<3, double> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<1, double>(double prec, FunctionTree<1, double> &out, std::vector<FunctionTree<1, double> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<2, double>(double prec, FunctionTree<2, double> &out, std::vector<FunctionTree<2, double> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<3, double>(double prec, FunctionTree<3, double> &out, std::vector<FunctionTree<3, double> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void power<1, double>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &tree, double pow, int maxIter, bool absPrec);
+template void power<2, double>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &tree, double pow, int maxIter, bool absPrec);
+template void power<3, double>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &tree, double pow, int maxIter, bool absPrec);
+template void square<1, double>(double prec, FunctionTree<1, double> &out, FunctionTree<1, double> &tree, int maxIter, bool absPrec, bool conjugate);
+template void square<2, double>(double prec, FunctionTree<2, double> &out, FunctionTree<2, double> &tree, int maxIter, bool absPrec, bool conjugate);
+template void square<3, double>(double prec, FunctionTree<3, double> &out, FunctionTree<3, double> &tree, int maxIter, bool absPrec, bool conjugate);
+template void dot<1, double>(double prec, FunctionTree<1, double> &out, FunctionTreeVector<1, double> &inp_a, FunctionTreeVector<1, double> &inp_b, int maxIter, bool absPrec);
+template void dot<2, double>(double prec, FunctionTree<2, double> &out, FunctionTreeVector<2, double> &inp_a, FunctionTreeVector<2, double> &inp_b, int maxIter, bool absPrec);
+template void dot<3, double>(double prec, FunctionTree<3, double> &out, FunctionTreeVector<3, double> &inp_a, FunctionTreeVector<3, double> &inp_b, int maxIter, bool absPrec);
 
 template double dot<1, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket);
 template double dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket);
@@ -576,121 +456,66 @@ template double node_norm_dot<1, double>(FunctionTree<1, double> &bra, FunctionT
 template double node_norm_dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket, bool exact);
 template double node_norm_dot<3, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket, bool exact);
 
-
-
-
-
-template void multiply<1, ComplexDouble>(double prec,
-                          FunctionTree<1, ComplexDouble> &out,
-                          ComplexDouble c,
-                          FunctionTree<1, ComplexDouble> &tree_a,
-                          FunctionTree<1, ComplexDouble> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<2, ComplexDouble>(double prec,
-                          FunctionTree<2, ComplexDouble> &out,
-                          ComplexDouble c,
-                          FunctionTree<2, ComplexDouble> &tree_a,
-                          FunctionTree<2, ComplexDouble> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<3, ComplexDouble>(double prec,
-                          FunctionTree<3, ComplexDouble> &out,
-                          ComplexDouble c,
-                          FunctionTree<3, ComplexDouble> &tree_a,
-                          FunctionTree<3, ComplexDouble> &tree_b,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
 template void multiply<1, ComplexDouble>(double prec,
-                          FunctionTree<1, ComplexDouble> &out,
-                          FunctionTreeVector<1, ComplexDouble> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
+                                         FunctionTree<1, ComplexDouble> &out,
+                                         ComplexDouble c,
+                                         FunctionTree<1, ComplexDouble> &tree_a,
+                                         FunctionTree<1, ComplexDouble> &tree_b,
+                                         int maxIter,
+                                         bool absPrec,
+                                         bool useMaxNorms,
+                                         bool conjugate);
 template void multiply<2, ComplexDouble>(double prec,
-                          FunctionTree<2, ComplexDouble> &out,
-                          FunctionTreeVector<2, ComplexDouble> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
+                                         FunctionTree<2, ComplexDouble> &out,
+                                         ComplexDouble c,
+                                         FunctionTree<2, ComplexDouble> &tree_a,
+                                         FunctionTree<2, ComplexDouble> &tree_b,
+                                         int maxIter,
+                                         bool absPrec,
+                                         bool useMaxNorms,
+                                         bool conjugate);
 template void multiply<3, ComplexDouble>(double prec,
-                          FunctionTree<3, ComplexDouble> &out,
-                          FunctionTreeVector<3, ComplexDouble> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<1, ComplexDouble>(double prec,
-                          FunctionTree<1, ComplexDouble> &out,
-                          std::vector<FunctionTree<1, ComplexDouble> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<2, ComplexDouble>(double prec,
-                          FunctionTree<2, ComplexDouble> &out,
-                          std::vector<FunctionTree<2, ComplexDouble> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void multiply<3, ComplexDouble>(double prec,
-                          FunctionTree<3, ComplexDouble> &out,
-                          std::vector<FunctionTree<3, ComplexDouble> *> &inp,
-                          int maxIter,
-                          bool absPrec,
-                          bool useMaxNorms, bool conjugate);
-template void power<1, ComplexDouble>(double prec,
-                       FunctionTree<1, ComplexDouble> &out,
-                       FunctionTree<1, ComplexDouble> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void power<2, ComplexDouble>(double prec,
-                       FunctionTree<2, ComplexDouble> &out,
-                       FunctionTree<2, ComplexDouble> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void power<3, ComplexDouble>(double prec,
-                       FunctionTree<3, ComplexDouble> &out,
-                       FunctionTree<3, ComplexDouble> &tree,
-                       double pow,
-                       int maxIter,
-                       bool absPrec);
-template void square<1, ComplexDouble>(double prec,
-                        FunctionTree<1, ComplexDouble> &out,
-                        FunctionTree<1, ComplexDouble> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
-template void square<2, ComplexDouble>(double prec,
-                        FunctionTree<2, ComplexDouble> &out,
-                        FunctionTree<2, ComplexDouble> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
-template void square<3, ComplexDouble>(double prec,
-                        FunctionTree<3, ComplexDouble> &out,
-                        FunctionTree<3, ComplexDouble> &tree,
-                        int maxIter,
-                        bool absPrec, bool conjugate);
+                                         FunctionTree<3, ComplexDouble> &out,
+                                         ComplexDouble c,
+                                         FunctionTree<3, ComplexDouble> &tree_a,
+                                         FunctionTree<3, ComplexDouble> &tree_b,
+                                         int maxIter,
+                                         bool absPrec,
+                                         bool useMaxNorms,
+                                         bool conjugate);
+template void multiply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTreeVector<1, ComplexDouble> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTreeVector<2, ComplexDouble> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void multiply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTreeVector<3, ComplexDouble> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void
+multiply<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, std::vector<FunctionTree<1, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void
+multiply<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, std::vector<FunctionTree<2, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void
+multiply<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, std::vector<FunctionTree<3, ComplexDouble> *> &inp, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate);
+template void power<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &tree, double pow, int maxIter, bool absPrec);
+template void power<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &tree, double pow, int maxIter, bool absPrec);
+template void power<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &tree, double pow, int maxIter, bool absPrec);
+template void square<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, FunctionTree<1, ComplexDouble> &tree, int maxIter, bool absPrec, bool conjugate);
+template void square<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, FunctionTree<2, ComplexDouble> &tree, int maxIter, bool absPrec, bool conjugate);
+template void square<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, FunctionTree<3, ComplexDouble> &tree, int maxIter, bool absPrec, bool conjugate);
 template void dot<1, ComplexDouble>(double prec,
-                     FunctionTree<1, ComplexDouble> &out,
-                     FunctionTreeVector<1, ComplexDouble> &inp_a,
-                     FunctionTreeVector<1, ComplexDouble> &inp_b,
-                     int maxIter,
-                     bool absPrec);
+                                    FunctionTree<1, ComplexDouble> &out,
+                                    FunctionTreeVector<1, ComplexDouble> &inp_a,
+                                    FunctionTreeVector<1, ComplexDouble> &inp_b,
+                                    int maxIter,
+                                    bool absPrec);
 template void dot<2, ComplexDouble>(double prec,
-                     FunctionTree<2, ComplexDouble> &out,
-                     FunctionTreeVector<2, ComplexDouble> &inp_a,
-                     FunctionTreeVector<2, ComplexDouble> &inp_b,
-                     int maxIter,
-                     bool absPrec);
+                                    FunctionTree<2, ComplexDouble> &out,
+                                    FunctionTreeVector<2, ComplexDouble> &inp_a,
+                                    FunctionTreeVector<2, ComplexDouble> &inp_b,
+                                    int maxIter,
+                                    bool absPrec);
 template void dot<3, ComplexDouble>(double prec,
-                     FunctionTree<3, ComplexDouble> &out,
-                     FunctionTreeVector<3, ComplexDouble> &inp_a,
-                     FunctionTreeVector<3, ComplexDouble> &inp_b,
-                     int maxIter,
-                     bool absPrec);
+                                    FunctionTree<3, ComplexDouble> &out,
+                                    FunctionTreeVector<3, ComplexDouble> &inp_a,
+                                    FunctionTreeVector<3, ComplexDouble> &inp_b,
+                                    int maxIter,
+                                    bool absPrec);
 
 template ComplexDouble dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket);
 template ComplexDouble dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket);
@@ -702,5 +527,4 @@ template double node_norm_dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &
 template double node_norm_dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket, bool exact);
 template double node_norm_dot<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket, bool exact);
 
-
 } // namespace mrcpp
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index d6b33f2d5..65cd44787 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -31,62 +31,34 @@ namespace mrcpp {
 template <int D, typename T> class RepresentableFunction;
 template <int D, typename T> class FunctionTree;
 
-template <int D, typename T> void dot(double prec,
-                          FunctionTree<D, T> &out,
-                          FunctionTreeVector<D, T> &inp_a,
-                          FunctionTreeVector<D, T> &inp_b,
-                          int maxIter = -1,
-                          bool absPrec = false);
+template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp_a, FunctionTreeVector<D, T> &inp_b, int maxIter = -1, bool absPrec = false);
 
-template <int D, typename T> T dot(FunctionTree<D, T> &bra,
-                            FunctionTree<D, T> &ket);
+template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket);
 
-template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra,
-                            FunctionTree<D, double> &ket);
-template <int D> ComplexDouble dot(FunctionTree<D, double> &bra,
-                            FunctionTree<D, ComplexDouble> &ket);
+template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra, FunctionTree<D, double> &ket);
+template <int D> ComplexDouble dot(FunctionTree<D, double> &bra, FunctionTree<D, ComplexDouble> &ket);
 
-template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra,
-                                      FunctionTree<D, T> &ket,
-                                      bool exact = false);
+template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket, bool exact = false);
 
-template <int D, typename T> void multiply(double prec,
-                               FunctionTree<D, T> &out,
-                               T c,
-                               FunctionTree<D, T> &inp_a,
-                               FunctionTree<D, T> &inp_b,
-                               int maxIter = -1,
-                               bool absPrec = false,
-                               bool useMaxNorms = false,
-                               bool conjugate = false );
+template <int D, typename T>
+void multiply(double prec,
+              FunctionTree<D, T> &out,
+              T c,
+              FunctionTree<D, T> &inp_a,
+              FunctionTree<D, T> &inp_b,
+              int maxIter = -1,
+              bool absPrec = false,
+              bool useMaxNorms = false,
+              bool conjugate = false);
 
-template <int D, typename T> void multiply(double prec,
-                               FunctionTree<D, T> &out,
-                               std::vector<FunctionTree<D, T> *> &inp,
-                               int maxIter = -1,
-                               bool absPrec = false,
-                               bool useMaxNorms = false,
-                               bool conjugate = false );
+template <int D, typename T>
+void multiply(double prec, FunctionTree<D, T> &out, std::vector<FunctionTree<D, T> *> &inp, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 
-template <int D, typename T> void multiply(double prec,
-                               FunctionTree<D, T> &out,
-                               FunctionTreeVector<D, T> &inp,
-                               int maxIter = -1,
-                               bool absPrec = false,
-                               bool useMaxNorms = false,
-                               bool conjugate = false );
+template <int D, typename T>
+void multiply(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 
-template <int D, typename T> void power(double prec,
-                            FunctionTree<D, T> &out,
-                            FunctionTree<D, T> &inp,
-                            double p,
-                            int maxIter = -1,
-                            bool absPrec = false );
+template <int D, typename T> void power(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, double p, int maxIter = -1, bool absPrec = false);
 
-template <int D, typename T> void square(double prec,
-                             FunctionTree<D, T> &out,
-                             FunctionTree<D, T> &inp,
-                             int maxIter = -1,
-                             bool absPrec = false, bool conjugate = false);
+template <int D, typename T> void square(double prec, FunctionTree<D, T> &out, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false, bool conjugate = false);
 
 } // namespace mrcpp
diff --git a/src/treebuilders/project.cpp b/src/treebuilders/project.cpp
index 95360b901..7eea89416 100644
--- a/src/treebuilders/project.cpp
+++ b/src/treebuilders/project.cpp
@@ -137,7 +137,6 @@ template void project<1, double>(double prec, FunctionTreeVector<1, double> &out
 template void project<2, double>(double prec, FunctionTreeVector<2, double> &out, std::vector<std::function<double(const Coord<2> &r)>> inp, int maxIter, bool absPrec);
 template void project<3, double>(double prec, FunctionTreeVector<3, double> &out, std::vector<std::function<double(const Coord<3> &r)>> inp, int maxIter, bool absPrec);
 
-
 template void project<1, ComplexDouble>(double prec, FunctionTree<1, ComplexDouble> &out, RepresentableFunction<1, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void project<2, ComplexDouble>(double prec, FunctionTree<2, ComplexDouble> &out, RepresentableFunction<2, ComplexDouble> &inp, int maxIter, bool absPrec);
 template void project<3, ComplexDouble>(double prec, FunctionTree<3, ComplexDouble> &out, RepresentableFunction<3, ComplexDouble> &inp, int maxIter, bool absPrec);
@@ -149,5 +148,4 @@ template void project<1, ComplexDouble>(double prec, FunctionTreeVector<1, Compl
 template void project<2, ComplexDouble>(double prec, FunctionTreeVector<2, ComplexDouble> &out, std::vector<std::function<ComplexDouble(const Coord<2> &r)>> inp, int maxIter, bool absPrec);
 template void project<3, ComplexDouble>(double prec, FunctionTreeVector<3, ComplexDouble> &out, std::vector<std::function<ComplexDouble(const Coord<3> &r)>> inp, int maxIter, bool absPrec);
 
-
 } // namespace mrcpp
diff --git a/src/trees/BandWidth.cpp b/src/trees/BandWidth.cpp
index 530d738b8..a79814d2f 100644
--- a/src/trees/BandWidth.cpp
+++ b/src/trees/BandWidth.cpp
@@ -44,7 +44,6 @@ void BandWidth::setWidth(int depth, int index, int wd) {
     if (wd > this->widths(depth, 4)) { this->widths(depth, 4) = wd; }
 }
 
-
 std::ostream &BandWidth::print(std::ostream &o) const {
     o << "  *BandWidths:" << std::endl;
     o << "   n      T   C   B   A  |  max " << std::endl;
diff --git a/src/trees/BandWidth.h b/src/trees/BandWidth.h
index 85a2d5a43..b4ee49e8d 100644
--- a/src/trees/BandWidth.h
+++ b/src/trees/BandWidth.h
@@ -51,7 +51,7 @@ class BandWidth final {
     int getMaxWidth(int depth) const { return (depth > getDepth()) ? -1 : this->widths(depth, 4); }
     int getWidth(int depth, int index) const { return (depth > getDepth()) ? -1 : this->widths(depth, index); }
     void setWidth(int depth, int index, int wd);
-    
+
     friend std::ostream &operator<<(std::ostream &o, const BandWidth &bw) { return bw.print(o); }
 
 private:
diff --git a/src/trees/CornerOperatorTree.cpp b/src/trees/CornerOperatorTree.cpp
index 9b7ecb24b..6de235dd3 100644
--- a/src/trees/CornerOperatorTree.cpp
+++ b/src/trees/CornerOperatorTree.cpp
@@ -24,24 +24,23 @@
  */
 
 #include "CornerOperatorTree.h"
+#include "BandWidth.h"
 #include "OperatorNode.h"
 #include "utils/Printer.h"
-#include "BandWidth.h"
 
 using namespace Eigen;
 
 namespace mrcpp {
 
-
 /** @brief Calculates band widths of the non-standard form matrices.
  *
  * @param[in] prec: Precision used for thresholding
- * 
+ *
  * @details It is starting from \f$ l = 2^n \f$ and updating the band width value each time we encounter
  * considerable value while keeping decreasing down to \f$ l = 0 \f$, that stands for the distance to the diagonal.
  * This procedure is repeated for each matrix \f$ A, B \f$ and \f$ C \f$.
- * 
- */ 
+ *
+ */
 void CornerOperatorTree::calcBandWidth(double prec) {
     if (this->bandWidth == nullptr) clearBandWidth();
     this->bandWidth = new BandWidth(getDepth());
@@ -50,11 +49,10 @@ void CornerOperatorTree::calcBandWidth(double prec) {
     getMaxTranslations(max_transl);
 
     if (prec < 0.0) prec = this->normPrec;
-    double thrs = std::max(MachinePrec, prec / 10.0); //should be enough due to oscillating behaviour of corner matrix elements (it's affected by polynomial order)
-    
-    for (int depth = 0; depth < this->getDepth(); depth++)
-    {
-        int l = (1<<depth) - 1;
+    double thrs = std::max(MachinePrec, prec / 10.0); // should be enough due to oscillating behaviour of corner matrix elements (it's affected by polynomial order)
+
+    for (int depth = 0; depth < this->getDepth(); depth++) {
+        int l = (1 << depth) - 1;
         this->bandWidth->setWidth(depth, 0, l);
         bool done = false;
 
@@ -62,7 +60,7 @@ void CornerOperatorTree::calcBandWidth(double prec) {
             done = true;
             MWNode<2> *node = findNode(NodeIndex<2>(depth, {l, 0}));
             for (int k = 1; k < 4; k++) {
-                if ( (node != nullptr) && (node->getComponentNorm(k) > thrs)) {
+                if ((node != nullptr) && (node->getComponentNorm(k) > thrs)) {
                     this->bandWidth->setWidth(depth, k, l);
                     done = false;
                 }
@@ -73,20 +71,17 @@ void CornerOperatorTree::calcBandWidth(double prec) {
     println(100, "\nOperator BandWidth" << *this->bandWidth);
 }
 
-
 /** @brief Checks if the distance to diagonal is lesser than the operator band width.
  *
  * @param[in] oTransl: distance to diagonal
  * @param[in] o_depth: scaling order
  * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$.
- * 
- * @returns True if \b oTransl is outside of the corner band (close to diagonal) and False otherwise. 
- * 
- */ 
-bool CornerOperatorTree::isOutsideBand(int oTransl, int o_depth, int idx)
-{
+ *
+ * @returns True if \b oTransl is outside of the corner band (close to diagonal) and False otherwise.
+ *
+ */
+bool CornerOperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) {
     return abs(oTransl) < this->bandWidth->getWidth(o_depth, idx);
 }
 
-
 } // namespace mrcpp
diff --git a/src/trees/CornerOperatorTree.h b/src/trees/CornerOperatorTree.h
index 06f6f6136..0ac2ad5bd 100644
--- a/src/trees/CornerOperatorTree.h
+++ b/src/trees/CornerOperatorTree.h
@@ -29,7 +29,6 @@
 
 namespace mrcpp {
 
-
 /** @class CornerOperatorTree
  *
  * @brief Special case of OperatorTree class
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index 56648fb7b..db0005aa6 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -169,15 +169,17 @@ template <int D, typename T> T FunctionNode<D, T>::integrateValues() const {
     Eigen::Matrix<T, Eigen::Dynamic, 1> coefs;
     this->getCoefs(coefs);
     int ncoefs = coefs.size();
-    int ncoefChild = ncoefs/(1<<D);
+    int ncoefChild = ncoefs / (1 << D);
     T cc[ncoefChild];
     // factorize out the children
-    for (int i = 0; i < ncoefChild; i++)cc[i]=coefs[i];
-    for (int j = 1; j < (1<<D); j++) for (int i = 0; i < ncoefChild; i++)cc[i]+=coefs[j*ncoefChild+i];
+    for (int i = 0; i < ncoefChild; i++) cc[i] = coefs[i];
+    for (int j = 1; j < (1 << D); j++)
+        for (int i = 0; i < ncoefChild; i++) cc[i] += coefs[j * ncoefChild + i];
 
     int nc = 0;
     T sum = 0.0;
-    if (D > 3) MSG_ABORT("Not Implemented")
+    if (D > 3)
+        MSG_ABORT("Not Implemented")
     else if (D == 3) {
         for (int i = 0; i < qOrder; i++) {
             T sumj = 0.0;
@@ -188,18 +190,21 @@ template <int D, typename T> T FunctionNode<D, T>::integrateValues() const {
             }
             sum += sumj * weights[i];
         }
-    } else if (D==2) {
+    } else if (D == 2) {
         for (int j = 0; j < qOrder; j++) {
-                T sumk = 0.0;
-                for (int k = 0; k < qOrder; k++) sumk += cc[nc++] * weights[k];
-                sum += sumk * weights[j];
+            T sumk = 0.0;
+            for (int k = 0; k < qOrder; k++) sumk += cc[nc++] * weights[k];
+            sum += sumk * weights[j];
         }
-    } else if (D==1) for (int k = 0; k < qOrder; k++) sum += cc[nc++] * weights[k];
-
-    int n = D * (this->getScale() + 1) ; // NB: one extra scale
-    int two_n = (1<<abs(n)); // 2**n;
-    if(n>0)sum/=two_n;
-    else sum*=two_n;
+    } else if (D == 1)
+        for (int k = 0; k < qOrder; k++) sum += cc[nc++] * weights[k];
+
+    int n = D * (this->getScale() + 1); // NB: one extra scale
+    int two_n = (1 << abs(n));          // 2**n;
+    if (n > 0)
+        sum /= two_n;
+    else
+        sum *= two_n;
     return sum;
 }
 
@@ -212,7 +217,7 @@ template <int D, typename T> void FunctionNode<D, T>::setValues(const Matrix<T,
     this->calcNorms();
 }
 
-  template <int D, typename T> void FunctionNode<D, T>::getValues(Matrix<T , Eigen::Dynamic, 1 > &vec) {
+template <int D, typename T> void FunctionNode<D, T>::getValues(Matrix<T, Eigen::Dynamic, 1> &vec) {
     if (this->isGenNode()) {
         MWNode<D, T> copy(*this);
         vec = Eigen::Matrix<T, Eigen::Dynamic, 1>::Zero(copy.getNCoefs());
@@ -411,7 +416,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D> double dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
+template <int D> double dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
@@ -428,7 +433,6 @@ template <> void FunctionNode<3>::reCompress() {
 #endif
 }
 
-
 /** Inner product of the functions represented by the scaling basis of the nodes.
  *
  * Integrates the product of the functions represented by the scaling basis on
@@ -437,7 +441,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
@@ -447,14 +451,14 @@ template <> void FunctionNode<3>::reCompress() {
     int size = bra.getKp1_d();
     ComplexDouble result = 0.0;
     // note that bra is conjugated by default
-    if (bra.getMWTree().conjugate()){
-        if (ket.getMWTree().conjugate()){
+    if (bra.getMWTree().conjugate()) {
+        if (ket.getMWTree().conjugate()) {
             for (int i = 0; i < size; i++) result += a[i] * std::conj(b[i]);
         } else {
             for (int i = 0; i < size; i++) result += a[i] * b[i];
         }
     } else {
-        if (ket.getMWTree().conjugate()){
+        if (ket.getMWTree().conjugate()) {
             for (int i = 0; i < size; i++) result += std::conj(a[i]) * std::conj(b[i]);
         } else {
             for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
@@ -471,7 +475,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
     assert(bra.hasCoefs());
     assert(ket.hasCoefs());
 
@@ -481,7 +485,7 @@ template <> void FunctionNode<3>::reCompress() {
     int size = bra.getKp1_d();
     ComplexDouble result = 0.0;
     // note that bra is conjugated by default
-    if (bra.getMWTree().conjugate()){
+    if (bra.getMWTree().conjugate()) {
         for (int i = 0; i < size; i++) result += a[i] * b[i];
     } else {
         for (int i = 0; i < size; i++) result += std::conj(a[i]) * b[i];
@@ -497,8 +501,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D>
-    double dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
+template <int D> double dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket) {
     if (bra.isGenNode() or ket.isGenNode()) return 0.0;
 
     assert(bra.hasCoefs());
@@ -518,7 +521,6 @@ template <> void FunctionNode<3>::reCompress() {
 #endif
 }
 
-
 /** Inner product of the functions represented by the wavelet basis of the nodes.
  *
  * Integrates the product of the functions represented by the wavelet basis on
@@ -527,7 +529,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket) {
     if (bra.isGenNode() or ket.isGenNode()) return 0.0;
 
     assert(bra.hasCoefs());
@@ -539,14 +541,14 @@ template <> void FunctionNode<3>::reCompress() {
     int start = bra.getKp1_d();
     int size = (bra.getTDim() - 1) * start;
     ComplexDouble result = 0.0;
-    if (bra.getMWTree().conjugate()){
-        if (ket.getMWTree().conjugate()){
+    if (bra.getMWTree().conjugate()) {
+        if (ket.getMWTree().conjugate()) {
             for (int i = 0; i < size; i++) result += a[start + i] * std::conj(b[start + i]);
         } else {
             for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
         }
     } else {
-        if (ket.getMWTree().conjugate()){
+        if (ket.getMWTree().conjugate()) {
             for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * std::conj(b[start + i]);
         } else {
             for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
@@ -563,7 +565,7 @@ template <> void FunctionNode<3>::reCompress() {
  * coefficient vectors. Assumes the nodes have identical support.
  * NB: will take conjugate of bra in case of complex values.
  */
-    template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket) {
     if (bra.isGenNode() or ket.isGenNode()) return 0.0;
 
     assert(bra.hasCoefs());
@@ -575,7 +577,7 @@ template <> void FunctionNode<3>::reCompress() {
     int start = bra.getKp1_d();
     int size = (bra.getTDim() - 1) * start;
     ComplexDouble result = 0.0;
-    if (bra.getMWTree().conjugate()){
+    if (bra.getMWTree().conjugate()) {
         for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
     } else {
         for (int i = 0; i < size; i++) result += std::conj(a[start + i]) * b[start + i];
diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h
index a985008a4..d1f7c3639 100644
--- a/src/trees/FunctionNode.h
+++ b/src/trees/FunctionNode.h
@@ -49,7 +49,7 @@ template <int D, typename T> class FunctionNode final : public MWNode<D, T> {
 
     T integrate() const;
 
-    void setValues(const Eigen::Matrix<T , Eigen::Dynamic, 1> &vec);
+    void setValues(const Eigen::Matrix<T, Eigen::Dynamic, 1> &vec);
     void getValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &vec);
     void getAbsCoefs(T *absCoefs);
 
@@ -79,19 +79,13 @@ template <int D, typename T> class FunctionNode final : public MWNode<D, T> {
     T integrateInterpolating() const;
     T integrateValues() const;
 };
-template <int D>
-double dot_scaling (const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
-template <int D>
-double dot_wavelet(const FunctionNode<D, double > &bra, const FunctionNode<D, double > &ket);
-
-template <int D>
-ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
-template <int D>
-ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
-
-template <int D>
-ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
-template <int D>
-ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
+template <int D> double dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket);
+template <int D> double dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, double> &ket);
+
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, ComplexDouble> &ket);
+
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
 
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 0d7c1174c..6cf245f2e 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -30,13 +30,13 @@
 #include "FunctionNode.h"
 #include "NodeAllocator.h"
 
+#include "treebuilders/grid.h"
 #include "utils/Bank.h"
 #include "utils/Printer.h"
 #include "utils/Timer.h"
 #include "utils/mpi_utils.h"
 #include "utils/periodic_utils.h"
 #include "utils/tree_utils.h"
-#include "treebuilders/grid.h"
 
 using namespace Eigen;
 
@@ -104,10 +104,9 @@ template <int D, typename T> void FunctionTree<D, T>::allocRootNodes() {
 
 // FunctionTree destructor
 template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
-    if (this->getNNodes()>0) this->deleteRootNodes();
+    if (this->getNNodes() > 0) this->deleteRootNodes();
 }
 
-
 /** @brief Read a previously stusing MADNESS conventions for n, l and index order.ored tree assuming text/ASCII format,
  *   in a representation
  * @param[in] file: File name
@@ -116,37 +115,39 @@ template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
 template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::string &file) {
     std::ifstream in(file);
     int NDIM, k;
-    in>>NDIM;
+    in >> NDIM;
     if (NDIM != D) NOT_IMPLEMENTED_ABORT;
     double coord[D][2];
     for (int d = 0; d < D; d++) in >> coord[d][0] >> coord[d][1];
 
     int p = 1;
-    int rscale = this->getRootScale(); //root scale of target MRA (MRChem) . NB: negative
+    int rscale = this->getRootScale(); // root scale of target MRA (MRChem) . NB: negative
     for (int i = rscale; i < 0; i++) p *= 2;
-    int L = p; //NB for now we assume the world as a cube going from -L to +L and L is a power of 2
+    int L = p; // NB for now we assume the world as a cube going from -L to +L and L is a power of 2
     // We require that the world box size is identical and a power of 2
     double TXT_thres = 1.0e-14; // threshold for differences in scaling factors
     for (int d = 0; d < D; d++) {
-        if (std::abs(coord[d][0] + L) > TXT_thres) std::cout<<coord[d][0]<<" "<<L<<std::endl;;
+        if (std::abs(coord[d][0] + L) > TXT_thres) std::cout << coord[d][0] << " " << L << std::endl;
+        ;
         if (std::abs(coord[d][0] + L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
-        if (std::abs(coord[d][1] - L) > TXT_thres) std::cout<<coord[d][1]<<" "<<L<<std::endl;;
+        if (std::abs(coord[d][1] - L) > TXT_thres) std::cout << coord[d][1] << " " << L << std::endl;
+        ;
         if (std::abs(coord[d][1] - L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
     }
 
     int nChildren = 1;
-    for (int d=0; d<D; d++) nChildren *= 2;
+    for (int d = 0; d < D; d++) nChildren *= 2;
 
-    int nmax = 0; //deppeset scale in TXT
-    in>>k;
+    int nmax = 0; // deppeset scale in TXT
+    in >> k;
     if (k != this->getKp1()) NOT_IMPLEMENTED_ABORT;
-    k--; //MRChem defines k as highest polynomial order. MADNESS as number of polynomials
+    k--; // MRChem defines k as highest polynomial order. MADNESS as number of polynomials
 
     int ncoefs = 1; // number of coefficents for one single node (not a full MRChem MWnode which stores 2**D of them)
-    for (int i = 0; i < D; i++) ncoefs *= k+1;
+    for (int i = 0; i < D; i++) ncoefs *= k + 1;
 
     std::vector<std::vector<MWNode<D, T> *>> NodeTable(50); // to store all the nodes pointers
-    std::map<int,int> mp; // to store the number of children stored in each parent node
+    std::map<int, int> mp;                                  // to store the number of children stored in each parent node
     // MRChem and MADNESS do not use the same indices order for the qudrature points
     // We read MADNESS convention (note that mapMRC[mapMRC[i]]=i for all i)
     std::vector<int> mapMRC; // mapping vector
@@ -157,11 +158,9 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     if (D < 2) ky = 0;
     int kp1 = k + 1;
     // MADNESS: zyx and i=k,k-1,k-2... MRChem: xyz, i=0,1,2,3 ...
-    for (int x = kx; x >= 0; x--){
-        for (int y = ky; y >= 0; y--){
-            for (int z = kz; z >= 0; z--){
-                mapMRC.push_back(z*kp1*kp1 + y*kp1 + x);
-            }
+    for (int x = kx; x >= 0; x--) {
+        for (int y = ky; y >= 0; y--) {
+            for (int z = kz; z >= 0; z--) { mapMRC.push_back(z * kp1 * kp1 + y * kp1 + x); }
         }
     }
 
@@ -173,58 +172,58 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     this->clearEndNodeTable();
 
     int nread; // number of nodes to read
-    in>>nread;
+    in >> nread;
     while (nread-- > 0) {
         // NB: MRChem stores quadrature points values in the PARENT node. 2**D nodes are stored in the same parent
-        int n; // TXT scale
+        int n;    // TXT scale
         int n_in; // MRChem scale
         in >> n_in;
-        n = n_in + rscale - 1; //MRChem does not define root scale as zero.
+        n = n_in + rscale - 1; // MRChem does not define root scale as zero.
 
         std::array<int, D> l_in; // translation index TXT
-        std::array<int, D> l; // translation index MRChem
-        std::array<int, D> lp; // translation index MRChem, parent
+        std::array<int, D> l;    // translation index MRChem
+        std::array<int, D> lp;   // translation index MRChem, parent
 
         for (int i = 0; i < D; i++) in >> l_in[i];
 
-        //MRChem defines smallest l as -(2**n)*L , where -L is smallest world coordinate.
-        //note that root scale has 2**D nodes (if range is -L,L)
-        for (int i=0; i<D; i++) {
-            l[i] = l_in[i] - std::pow(2,n)*L;
-            lp[i] = l_in[i]/2 - std::pow(2,n-1)*L; //for parent
+        // MRChem defines smallest l as -(2**n)*L , where -L is smallest world coordinate.
+        // note that root scale has 2**D nodes (if range is -L,L)
+        for (int i = 0; i < D; i++) {
+            l[i] = l_in[i] - std::pow(2, n) * L;
+            lp[i] = l_in[i] / 2 - std::pow(2, n - 1) * L; // for parent
         }
-        NodeIndex<D> idx_p(n-1, lp); // index of parent node
+        NodeIndex<D> idx_p(n - 1, lp); // index of parent node
         MWNode<D, T> *node = &this->getNode(idx_p, true);
         // note that node is not necesssarily an endnode, but they children are always endnodes
         // must find to which child of the parent node it corresponds
         int c_ix = 0; // child index in the parent
         int p = 1;
         for (int i = 0; i < D; i++) {
-            if (abs(l[i])%2 == 1)c_ix += p;
+            if (abs(l[i]) % 2 == 1) c_ix += p;
             p *= 2;
         }
         T *values = node->getCoefs();
-        if(mp[node->getSerialIx()]==0){
-            //init to zero
+        if (mp[node->getSerialIx()] == 0) {
+            // init to zero
             node->zeroCoefs();
             if (not node->isRootNode()) {
-                //also set siblings to zero if not set yet
+                // also set siblings to zero if not set yet
                 MWNode<D, T> *parent = &node->getMWParent();
                 for (int cIdx = 0; cIdx < nChildren; cIdx++) {
                     if (mp[parent->getMWChild(cIdx).getSerialIx()] == 0) parent->getMWChild(cIdx).zeroCoefs();
                 }
             }
         }
-        values += c_ix * ncoefs; //repoint to the right child position (ncoefs is for one child only)
+        values += c_ix * ncoefs;                                  // repoint to the right child position (ncoefs is for one child only)
         for (int i = 0; i < ncoefs; i++) in >> values[mapMRC[i]]; // the indice i is mapped
-        mp[node->getSerialIx()]++; //counts the number of children included
-        nmax = std::max(nmax, n_in); //deepest scale in TXT
+        mp[node->getSerialIx()]++;                                // counts the number of children included
+        nmax = std::max(nmax, n_in);                              // deepest scale in TXT
         if (mp[node->getSerialIx()] == 1) NodeTable[n_in].push_back(node);
     }
     in.close();
     // transform all nodes from quadrature point values to scaling coefficients
     for (int n = nmax; n > -1; n--) {
-        for (int i = 0; i < NodeTable[n].size(); i++){
+        for (int i = 0; i < NodeTable[n].size(); i++) {
             MWNode<D, T> *node = NodeTable[n][i];
             node->cvTransform(Backward);
             node->calcNorms();
@@ -234,11 +233,11 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
 
     // Transform into scaling and wavelets, starting by leaf nodes and copying scaling into parents
     for (int n = nmax; n > -1; n--) {
-        for (int i = 0; i < NodeTable[n].size(); i++){
+        for (int i = 0; i < NodeTable[n].size(); i++) {
             MWNode<D, T> *node = NodeTable[n][i];
-            if (mp[node->getSerialIx()] == nChildren ){
-                //node complete: transform into scaling and wavelets
-                if (node->isEndNode()){
+            if (mp[node->getSerialIx()] == nChildren) {
+                // node complete: transform into scaling and wavelets
+                if (node->isEndNode()) {
                     node->mwTransform(Compression);
                     node->setHasCoefs();
                     node->calcNorms();
@@ -246,16 +245,16 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
                 } else {
                     // MRCPP requires that all nodes that have no children are end nodes
                     // and all nodes are groups of 2**D siblings
-                    T* pcoefs = node->getCoefs(); // parent coefficients
+                    T *pcoefs = node->getCoefs(); // parent coefficients
                     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
                         MWNode<D, T> *cnode = &node->getMWChild(cIdx);
                         if (mp[cnode->getSerialIx()] != nChildren) {
                             // This child is not defined. must take scaling from parent
-                            if (mp[cnode->getSerialIx()] > 0) std::cout<<"accounting error "<<std::endl;
-                            T* ccoefs = cnode->getCoefs(); // child coefficients
-                            for (int j = 0; j< ncoefs; j++)  ccoefs[j] = pcoefs[j + cIdx*ncoefs];
-                            for (int j = ncoefs; j< ncoefs*nChildren; j++)  ccoefs[j] = 0.0; // the remainder are set to zero
-                            this->endNodeTable.push_back(cnode); // add to the list of nodes
+                            if (mp[cnode->getSerialIx()] > 0) std::cout << "accounting error " << std::endl;
+                            T *ccoefs = cnode->getCoefs(); // child coefficients
+                            for (int j = 0; j < ncoefs; j++) ccoefs[j] = pcoefs[j + cIdx * ncoefs];
+                            for (int j = ncoefs; j < ncoefs * nChildren; j++) ccoefs[j] = 0.0; // the remainder are set to zero
+                            this->endNodeTable.push_back(cnode);                               // add to the list of nodes
                             cnode->setHasCoefs();
                             cnode->calcNorms();
                         }
@@ -264,24 +263,24 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
                     node->setHasCoefs();
                     node->calcNorms();
                 }
-                if ( not node->isRootNode() ) {
+                if (not node->isRootNode()) {
                     // and copy the new scaling parts into parent
                     MWNode<D, T> *parent = &node->getMWParent();
                     // check if parent exist already, and put in the list if not.
-                    if (mp[parent->getSerialIx()] == 0) NodeTable[n-1].push_back(parent);
-                    int my_ix=-1;
+                    if (mp[parent->getSerialIx()] == 0) NodeTable[n - 1].push_back(parent);
+                    int my_ix = -1;
                     // find index among siblings
                     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
                         if (&parent->getMWChild(cIdx) == node) my_ix = cIdx;
                     }
-                    if(my_ix < 0)std::cout<<" DID NOT FIND INDEX"<<std::endl;
+                    if (my_ix < 0) std::cout << " DID NOT FIND INDEX" << std::endl;
                     T *ccoefs = node->getCoefs();
                     T *pcoefs = parent->getCoefs();
-                    for (int j = 0; j< ncoefs; j++)  pcoefs[j+my_ix*ncoefs] = ccoefs[j];
+                    for (int j = 0; j < ncoefs; j++) pcoefs[j + my_ix * ncoefs] = ccoefs[j];
                     mp[parent->getSerialIx()]++;
                 }
             } else {
-                std::cout<<" WARNING: found incomplete node "<<std::endl;
+                std::cout << " WARNING: found incomplete node " << std::endl;
             }
         }
     }
@@ -298,22 +297,20 @@ template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::str
 
     std::ofstream out(fname);
     out << std::setprecision(14);
-    out << D <<std::endl;
+    out << D << std::endl;
     int rscale = this->getRootScale();
     std::array<double, D> sf = this->getMRA().getWorldBox().getScalingFactors();
     double LMRChem = 1.0;
-    for (int i=0; i>rscale; i--) LMRChem *= 2; // we assume world is from -L to L, and a cube with 2 root nodes in each direction
-    for (int d=0; d<D; d++) {
-        out <<- sf[d]*LMRChem <<" "<< sf[d]*LMRChem << std::endl;
-    }
+    for (int i = 0; i > rscale; i--) LMRChem *= 2; // we assume world is from -L to L, and a cube with 2 root nodes in each direction
+    for (int d = 0; d < D; d++) { out << -sf[d] * LMRChem << " " << sf[d] * LMRChem << std::endl; }
     int kp1 = this->getKp1();
-    out << kp1 <<std::endl;
+    out << kp1 << std::endl;
     int ncoefs = 1;
-    for (int d = 0; d < D; d++) ncoefs*=kp1;
-    int Tdim = std::pow(2,D);
+    for (int d = 0; d < D; d++) ncoefs *= kp1;
+    int Tdim = std::pow(2, D);
 
     int nout = this->endNodeTable.size();
-    out << Tdim*nout <<std::endl; // could output only scaling coeff?
+    out << Tdim * nout << std::endl; // could output only scaling coeff?
 
     // MRChem and MADNESS do not use the same indices order for the qudrature points
     // We write into MADNESS convention (note that mapMRC[mapMRC[i]]=i for all i)
@@ -324,19 +321,17 @@ template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::str
     if (D < 3) kz = 0;
     if (D < 2) ky = 0;
     // MADNESS: zyx and i=k,k-1,k-2... MRChem: xyz, i=0,1,2,3 ...
-    for (int x = kx; x >= 0; x--){
-        for (int y = ky; y >= 0; y--){
-            for (int z = kz; z >= 0; z--){
-                mapMRC.push_back(z*kp1*kp1 + y*kp1 + x);
-            }
+    for (int x = kx; x >= 0; x--) {
+        for (int y = ky; y >= 0; y--) {
+            for (int z = kz; z >= 0; z--) { mapMRC.push_back(z * kp1 * kp1 + y * kp1 + x); }
         }
     }
 
-    int L = std::pow(2,-rscale);
+    int L = std::pow(2, -rscale);
     int count = -1;
-    while (++count<nout) {
+    while (++count < nout) {
         std::array<int, D> l;
-        NodeIndex<D> idx=this->endNodeTable[count]->getNodeIndex();
+        NodeIndex<D> idx = this->endNodeTable[count]->getNodeIndex();
         MWNode<D, T> *node = &(this->getNode(idx, false));
         T *values = node->getCoefs();
         int n = idx.getScale();
@@ -345,21 +340,20 @@ template <int D, typename T> void FunctionTree<D, T>::saveTreeTXT(const std::str
         // we write for each children nodes separately
         for (int i = 0; i < D; i++) {
             // l in interval [0, max], while in MRCPP it is defined in [-max/2, max/2-1]
-            l[i] = 2 * (idx.getTranslation(i) + std::pow(2,n)*L); //first child
+            l[i] = 2 * (idx.getTranslation(i) + std::pow(2, n) * L); // first child
         }
         for (int cix = 0; cix < Tdim; cix++) {
-            out<< n-rscale+2 <<" ";// scales start at zero. NB: children are one scale larger than node
-            for (int i = 0; i < D; i++){
-                int p = (cix>>i) & 1; // shift by one for odd child indices
+            out << n - rscale + 2 << " "; // scales start at zero. NB: children are one scale larger than node
+            for (int i = 0; i < D; i++) {
+                int p = (cix >> i) & 1; // shift by one for odd child indices
                 out << l[i] + p << " ";
             }
             out << std::endl;
-            for (int i=0; i< ncoefs; i++) out<< values[cix*ncoefs + mapMRC[i]]<<" ";
+            for (int i = 0; i < ncoefs; i++) out << values[cix * ncoefs + mapMRC[i]] << " ";
             out << std::endl;
         }
     }
     out.close();
-
 }
 /** @brief Write the tree structure to disk, for later use
  * @param[in] file: File name, will get ".tree" extension
@@ -444,11 +438,10 @@ template <int D, typename T> T FunctionTree<D, T>::integrate() const {
     return jacobian * result;
 }
 
-
 /** @returns Integral of a representable function over the grid given by the tree */
-  template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) {
-    //traverse tree, and treat end nodes only
-    std::vector<FunctionNode<3> *> stack;   // node from this
+template <> double FunctionTree<3, double>::integrateEndNodes(RepresentableFunction_M &f) {
+    // traverse tree, and treat end nodes only
+    std::vector<FunctionNode<3> *> stack; // node from this
     for (int i = 0; i < this->getRootBox().size(); i++) stack.push_back(&(this->getRootFuncNode(i)));
     int basis = getMRA().getScalingBasis().getScalingType();
     double result = 0.0;
@@ -458,13 +451,13 @@ template <int D, typename T> T FunctionTree<D, T>::integrate() const {
         if (Node->getNChildren() > 0) {
             for (int i = 0; i < Node->getNChildren(); i++) stack.push_back(&(Node->getFuncChild(i)));
         } else {
-            //end nodes
+            // end nodes
             Eigen::MatrixXd fmat = f.evalf(Node->nodeIndex);
             double *coefs = Node->getCoefs(); // save position of coeff, but do not use them!
             // The data in fmat is not organized so that two consecutive points are stored after each other in memory, so needs to copy before mwtransform, cannot use memory adress directly.
-            int nc=fmat.cols();
+            int nc = fmat.cols();
             double cc[nc];
-            for (int i = 0; i < nc; i++)cc[i]=fmat(0,i);
+            for (int i = 0; i < nc; i++) cc[i] = fmat(0, i);
             Node->attachCoefs(cc);
             result += Node->integrateValues();
             Node->attachCoefs(coefs); // put back original coeff
@@ -717,7 +710,7 @@ template <int D, typename T> void FunctionTree<D, T>::add_inplace(T c, FunctionT
  * function, i.e. no further grid refinement.
  *
  */
-template <int D, typename T> void FunctionTree<D, T>::absadd (T c, FunctionTree<D, T> &inp) {
+template <int D, typename T> void FunctionTree<D, T>::absadd(T c, FunctionTree<D, T> &inp) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
 #pragma omp parallel firstprivate(c) shared(inp) num_threads(mrcpp_get_num_threads())
     {
@@ -807,7 +800,7 @@ template <int D, typename T> void FunctionTree<D, T>::map(FMap<T, T> fmap) {
     this->calcSquareNorm();
 }
 
-template <int D, typename T> void FunctionTree<D, T>::getEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data) {
+template <int D, typename T> void FunctionTree<D, T>::getEndValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &data) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     int nNodes = this->getNEndNodes();
     int nCoefs = this->getTDim() * this->getKp1_d();
@@ -823,7 +816,7 @@ template <int D, typename T> void FunctionTree<D, T>::getEndValues(Eigen::Matrix
     }
 }
 
-template <int D, typename T> void FunctionTree<D, T>::setEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data) {
+template <int D, typename T> void FunctionTree<D, T>::setEndValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &data) {
     if (this->getNGenNodes() != 0) MSG_ABORT("GenNodes not cleared");
     int nNodes = this->getNEndNodes();
     int nCoefs = this->getTDim() * this->getKp1_d();
@@ -877,20 +870,20 @@ template <int D, typename T> int FunctionTree<D, T>::crop(double prec, double sp
  * Set index -1 for nodes that are not present in refTree */
 template <int D, typename T>
 void FunctionTree<D, T>::makeCoeffVector(std::vector<T *> &coefs,
-                                      std::vector<int> &indices,
-                                      std::vector<int> &parent_indices,
-                                      std::vector<double> &scalefac,
-                                      int &max_index,
-                                      MWTree<D, double> &refTree,
-                                      std::vector<MWNode<D, double> *> *refNodes) {
+                                         std::vector<int> &indices,
+                                         std::vector<int> &parent_indices,
+                                         std::vector<double> &scalefac,
+                                         int &max_index,
+                                         MWTree<D, double> &refTree,
+                                         std::vector<MWNode<D, double> *> *refNodes) {
     coefs.clear();
     indices.clear();
     parent_indices.clear();
     max_index = 0;
     int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
     int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    std::vector<MWNode<D, double> *> refstack;  // nodes from refTree
-    std::vector<MWNode<D, T> *> thisstack; // nodes from this Tree
+    std::vector<MWNode<D, double> *> refstack; // nodes from refTree
+    std::vector<MWNode<D, T> *> thisstack;     // nodes from this Tree
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
         refstack.push_back(refTree.getRootBox().getNodes()[rIdx]);
         thisstack.push_back(this->getRootBox().getNodes()[rIdx]);
@@ -1009,8 +1002,8 @@ template <int D, typename T> void FunctionTree<D, T>::makeTreefromCoeff(MWTree<D
  *  Note that we do not use coefficients, so it does not matter what is real or complex
  */
 template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, double> &inTree) {
-    std::vector<MWNode<D, double> *> instack;   // node from inTree
-    std::vector<MWNode<D, T> *> thisstack; // node from this Tree
+    std::vector<MWNode<D, double> *> instack; // node from inTree
+    std::vector<MWNode<D, T> *> thisstack;    // node from this Tree
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < inTree.getRootBox().size(); rIdx++) {
         instack.push_back(inTree.getRootBox().getNodes()[rIdx]);
@@ -1046,11 +1039,10 @@ template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D
     }
 }
 
-
 /** Traverse tree using DFS and append same nodes as another tree, without coefficients */
 template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D, ComplexDouble> &inTree) {
-    std::vector<MWNode<D, ComplexDouble> *> instack;   // node from inTree
-    std::vector<MWNode<D, T> *> thisstack; // node from this Tree
+    std::vector<MWNode<D, ComplexDouble> *> instack; // node from inTree
+    std::vector<MWNode<D, T> *> thisstack;           // node from this Tree
     this->clearEndNodeTable();
     for (int rIdx = 0; rIdx < inTree.getRootBox().size(); rIdx++) {
         instack.push_back(inTree.getRootBox().getNodes()[rIdx]);
@@ -1086,7 +1078,6 @@ template <int D, typename T> void FunctionTree<D, T>::appendTreeNoCoeff(MWTree<D
     }
 }
 
-
 template <int D, typename T> void FunctionTree<D, T>::deleteGenerated() {
     for (int n = 0; n < this->getNEndNodes(); n++) this->getEndMWNode(n).deleteGenerated();
 }
@@ -1123,7 +1114,7 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
     int stack_p = 0;
     if (mpi::wrk_rank == 0) {
         int sizecoeff = (1 << 3) * this->getKp1_d();
-        sizecoeff *= 2; // double->ComplexDouble. Saved as twice as many doubles
+        sizecoeff *= 2;                                // double->ComplexDouble. Saved as twice as many doubles
         std::vector<MWNode<3, ComplexDouble> *> stack; // nodes from this Tree
         for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) { stack.push_back(this->getRootBox().getNodes()[rIdx]); }
         while (stack.size() > stack_p) {
@@ -1144,15 +1135,15 @@ template <> int FunctionTree<3, ComplexDouble>::saveNodesAndRmCoeff() {
  *
  * @details Exact copy without any binding between old and new tree
  */
-template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out){
+template <int D, typename T> void FunctionTree<D, T>::deep_copy(FunctionTree<D, T> *out) {
     copy_grid(*out, *this);
     copy_func(*out, *this);
 }
 
 /**  @brief New tree with only real part
  */
-template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Real(){
-    FunctionTree<D, double> *out = new FunctionTree<D, double> (this->getMRA(), this->getName());
+template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Real() {
+    FunctionTree<D, double> *out = new FunctionTree<D, double>(this->getMRA(), this->getName());
 #pragma omp parallel num_threads(mrcpp_get_num_threads())
     {
         int nNodes = this->getNEndNodes();
@@ -1173,8 +1164,8 @@ template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Real()
 
 /**  @brief New tree with only imaginary part
  */
-template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag(){
-    FunctionTree<D, double> *out = new FunctionTree<D, double> (this->getMRA(), this->getName());
+template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag() {
+    FunctionTree<D, double> *out = new FunctionTree<D, double>(this->getMRA(), this->getName());
 #pragma omp parallel num_threads(mrcpp_get_num_threads())
     {
         int nNodes = this->getNEndNodes();
@@ -1193,34 +1184,34 @@ template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag()
     return out;
 }
 
-    /*
+/*
 template<>
 void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree) {
-    //void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree, FunctionTree<3, double>* inTree) {
-    FunctionTree<3, double>* inTree = this;
-    delete outTree;
-    outTree = new FunctionTree<3, ComplexDouble> (inTree->getMRA());
-    int nChunks=inTree->getNChunks();
-    outTree->getNodeAllocator().init(nChunks, true); //also allocate coefficients
-    int Ncoefperchunk = outTree->getNodeAllocator().getCoefChunkSize()/sizeof(ComplexDouble);
-    // real and complex trees have the same Ncoefperchunk.
-    for (int iChunk = 0; iChunk < nChunks; iChunk++) {
-        MWNode<3, double> * inNode = inTree->getNodeAllocator().getNodeChunk(iChunk);
-        MWNode<3, ComplexDouble> * outNode = outTree->getNodeAllocator().getNodeChunk(iChunk);
-        //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree->getNodeAllocator().getNodeChunk(iChunk);
-        int nNodes = std::min(inTree->getNNodes(), inTree->getNodeAllocator().getMaxNodesPerChunk());
-        for (int i = 0; i < nNodes; i++) {
-            outNode[i] = *reinterpret_cast<MWNode<3, std::complex<double>>*>(&inNode[i]); // could be improved
-        }
-        ComplexDouble* Ccoefs;
-        int ncoefs = nNodes * inTree->getNodeAllocator().getNCoefs();
-        Ccoefs = outTree->getNodeAllocator().getCoefChunk(iChunk);
-        auto InCoefs = inTree->getNodeAllocator().getCoefChunk(iChunk);
-        for (int i = 0; i < ncoefs; i++) {
-            Ccoefs[i] = InCoefs[i];
-        }
+//void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree, FunctionTree<3, double>* inTree) {
+FunctionTree<3, double>* inTree = this;
+delete outTree;
+outTree = new FunctionTree<3, ComplexDouble> (inTree->getMRA());
+int nChunks=inTree->getNChunks();
+outTree->getNodeAllocator().init(nChunks, true); //also allocate coefficients
+int Ncoefperchunk = outTree->getNodeAllocator().getCoefChunkSize()/sizeof(ComplexDouble);
+// real and complex trees have the same Ncoefperchunk.
+for (int iChunk = 0; iChunk < nChunks; iChunk++) {
+    MWNode<3, double> * inNode = inTree->getNodeAllocator().getNodeChunk(iChunk);
+    MWNode<3, ComplexDouble> * outNode = outTree->getNodeAllocator().getNodeChunk(iChunk);
+    //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree->getNodeAllocator().getNodeChunk(iChunk);
+    int nNodes = std::min(inTree->getNNodes(), inTree->getNodeAllocator().getMaxNodesPerChunk());
+    for (int i = 0; i < nNodes; i++) {
+        outNode[i] = *reinterpret_cast<MWNode<3, std::complex<double>>*>(&inNode[i]); // could be improved
     }
-    outTree->getNodeAllocator().reassemble();
+    ComplexDouble* Ccoefs;
+    int ncoefs = nNodes * inTree->getNodeAllocator().getNCoefs();
+    Ccoefs = outTree->getNodeAllocator().getCoefChunk(iChunk);
+    auto InCoefs = inTree->getNodeAllocator().getCoefChunk(iChunk);
+    for (int i = 0; i < ncoefs; i++) {
+        Ccoefs[i] = InCoefs[i];
+    }
+}
+outTree->getNodeAllocator().reassemble();
 }*/
 
 /*
@@ -1228,12 +1219,11 @@ void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>*
  * Should use a deep_copy if generalized in the future.
  */
 
-template<>
-void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree) {
+template <> void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&outTree) {
     delete outTree;
-    double ref=0.0;
-    outTree = new FunctionTree<3, ComplexDouble> (this->getMRA());
-    std::vector<MWNode<3, double> *> instack;   // node from this
+    double ref = 0.0;
+    outTree = new FunctionTree<3, ComplexDouble>(this->getMRA());
+    std::vector<MWNode<3, double> *> instack;         // node from this
     std::vector<MWNode<3, ComplexDouble> *> outstack; // node from outTree
     outTree->clearEndNodeTable();
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
@@ -1249,8 +1239,8 @@ void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>*
         MWNode<3, double> *inNode = instack.back();
         instack.pop_back();
         // copy coefficients:
-        double* incoefs = inNode->getCoefs();
-        ComplexDouble* outcoefs = outNode->getCoefs();
+        double *incoefs = inNode->getCoefs();
+        ComplexDouble *outcoefs = outNode->getCoefs();
         for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
         outNode->setHasCoefs();
         outNode->calcNorms();
@@ -1269,11 +1259,11 @@ void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>*
     outTree->calcSquareNorm(true);
 }
 
-template <> void FunctionTree<2, double>::CopyTreeToComplex(FunctionTree<2, ComplexDouble>* &outTree) {
+template <> void FunctionTree<2, double>::CopyTreeToComplex(FunctionTree<2, ComplexDouble> *&outTree) {
     delete outTree;
-    double ref=0.0;
-    outTree = new FunctionTree<2, ComplexDouble> (this->getMRA());
-    std::vector<MWNode<2, double> *> instack;   // node from this
+    double ref = 0.0;
+    outTree = new FunctionTree<2, ComplexDouble>(this->getMRA());
+    std::vector<MWNode<2, double> *> instack;         // node from this
     std::vector<MWNode<2, ComplexDouble> *> outstack; // node from outTree
     outTree->clearEndNodeTable();
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
@@ -1289,8 +1279,8 @@ template <> void FunctionTree<2, double>::CopyTreeToComplex(FunctionTree<2, Comp
         MWNode<2, double> *inNode = instack.back();
         instack.pop_back();
         // copy coefficients:
-        double* incoefs = inNode->getCoefs();
-        ComplexDouble* outcoefs = outNode->getCoefs();
+        double *incoefs = inNode->getCoefs();
+        ComplexDouble *outcoefs = outNode->getCoefs();
         for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
         outNode->setHasCoefs();
         outNode->calcNorms();
@@ -1309,11 +1299,11 @@ template <> void FunctionTree<2, double>::CopyTreeToComplex(FunctionTree<2, Comp
     outTree->calcSquareNorm(true);
 }
 
-template <> void FunctionTree<1, double>::CopyTreeToComplex(FunctionTree<1, ComplexDouble>* &outTree) {
+template <> void FunctionTree<1, double>::CopyTreeToComplex(FunctionTree<1, ComplexDouble> *&outTree) {
     delete outTree;
-    double ref=0.0;
-    outTree = new FunctionTree<1, ComplexDouble> (this->getMRA());
-    std::vector<MWNode<1, double> *> instack;   // node from this
+    double ref = 0.0;
+    outTree = new FunctionTree<1, ComplexDouble>(this->getMRA());
+    std::vector<MWNode<1, double> *> instack;         // node from this
     std::vector<MWNode<1, ComplexDouble> *> outstack; // node from outTree
     outTree->clearEndNodeTable();
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
@@ -1329,8 +1319,8 @@ template <> void FunctionTree<1, double>::CopyTreeToComplex(FunctionTree<1, Comp
         MWNode<1, double> *inNode = instack.back();
         instack.pop_back();
         // copy coefficients:
-        double* incoefs = inNode->getCoefs();
-        ComplexDouble* outcoefs = outNode->getCoefs();
+        double *incoefs = inNode->getCoefs();
+        ComplexDouble *outcoefs = outNode->getCoefs();
         for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
         outNode->setHasCoefs();
         outNode->calcNorms();
@@ -1350,13 +1340,12 @@ template <> void FunctionTree<1, double>::CopyTreeToComplex(FunctionTree<1, Comp
 }
 
 // for testing
-template<>
-void FunctionTree<3, double>::CopyTreeToReal(FunctionTree<3, double>* &outTree) {
+template <> void FunctionTree<3, double>::CopyTreeToReal(FunctionTree<3, double> *&outTree) {
     delete outTree;
-    double ref=0.0;
+    double ref = 0.0;
     // FunctionTree<3, double>* inTree = this;
-    outTree = new FunctionTree<3, double> (this->getMRA());
-    std::vector<MWNode<3, double> *> instack;   // node from this
+    outTree = new FunctionTree<3, double>(this->getMRA());
+    std::vector<MWNode<3, double> *> instack;  // node from this
     std::vector<MWNode<3, double> *> outstack; // node from outTree
     outTree->clearEndNodeTable();
     for (int rIdx = 0; rIdx < this->getRootBox().size(); rIdx++) {
@@ -1372,8 +1361,8 @@ void FunctionTree<3, double>::CopyTreeToReal(FunctionTree<3, double>* &outTree)
         MWNode<3, double> *inNode = instack.back();
         instack.pop_back();
         // copy coefficients:
-        double* incoefs = inNode->getCoefs();
-        double* outcoefs = outNode->getCoefs();
+        double *incoefs = inNode->getCoefs();
+        double *outcoefs = outNode->getCoefs();
         for (int i = 0; i < ncoefs; i++) outcoefs[i] = incoefs[i];
         outNode->setHasCoefs();
         outNode->calcNorms();
diff --git a/src/trees/FunctionTree.h b/src/trees/FunctionTree.h
index 59ee8bfe4..9d976d6be 100644
--- a/src/trees/FunctionTree.h
+++ b/src/trees/FunctionTree.h
@@ -68,8 +68,8 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
 
     int getNGenNodes() const { return getGenNodeAllocator().getNNodes(); }
 
-    void getEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data);
-    void setEndValues(Eigen::Matrix<T , Eigen::Dynamic, 1 > &data);
+    void getEndValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &data);
+    void setEndValues(Eigen::Matrix<T, Eigen::Dynamic, 1> &data);
 
     void saveTree(const std::string &file);
     void saveTreeTXT(const std::string &file);
@@ -120,10 +120,10 @@ template <int D, typename T> class FunctionTree final : public MWTree<D, T>, pub
     void deep_copy(FunctionTree<D, T> *out);
     FunctionTree<D, double> *Real();
     FunctionTree<D, double> *Imag();
-    void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &out);
-    void CopyTreeToComplex(FunctionTree<2, ComplexDouble>* &out);
-    void CopyTreeToComplex(FunctionTree<1, ComplexDouble>* &out);
-    void CopyTreeToReal(FunctionTree<3, double>* &out); //for testing
+    void CopyTreeToComplex(FunctionTree<3, ComplexDouble> *&out);
+    void CopyTreeToComplex(FunctionTree<2, ComplexDouble> *&out);
+    void CopyTreeToComplex(FunctionTree<1, ComplexDouble> *&out);
+    void CopyTreeToReal(FunctionTree<3, double> *&out); // for testing
 
 protected:
     std::unique_ptr<NodeAllocator<D, T>> genNodeAllocator_p{nullptr};
diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp
index 039e91b31..c8ce03dac 100644
--- a/src/trees/MWNode.cpp
+++ b/src/trees/MWNode.cpp
@@ -45,8 +45,8 @@ namespace mrcpp {
  *
  * @details Should be used only by NodeAllocator to obtain
  *  virtual table pointers for the derived classes. */
-  template <int D, typename T>
-  MWNode<D, T>::MWNode()
+template <int D, typename T>
+MWNode<D, T>::MWNode()
         : tree(nullptr)
         , parent(nullptr)
         , nodeIndex()
@@ -163,7 +163,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  *
  * @details Recursive deallocation of a node and all its decendants
  */
-  template <int D, typename T> MWNode<D, T>::~MWNode() {
+template <int D, typename T> MWNode<D, T>::~MWNode() {
     if (this->isLooseNode()) this->freeCoefs();
     MRCPP_DESTROY_OMP_LOCK();
 }
@@ -174,7 +174,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * called (derived classes must implement their own version). This was
  * to avoid having pure virtual methods in the base class.
  */
-  template <int D, typename T> void MWNode<D, T>::dealloc() {
+template <int D, typename T> void MWNode<D, T>::dealloc() {
     NOT_REACHED_ABORT;
 }
 
@@ -184,7 +184,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * are not treated by the NodeAllocator class.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::allocCoefs(int n_blocks, int block_size) {
+template <int D, typename T> void MWNode<D, T>::allocCoefs(int n_blocks, int block_size) {
     if (this->n_coefs != 0) MSG_ABORT("n_coefs should be zero");
     if (this->isAllocated()) MSG_ABORT("Coefs already allocated");
     if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!");
@@ -202,7 +202,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * are not treated by the NodeAllocator class.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::freeCoefs() {
+template <int D, typename T> void MWNode<D, T>::freeCoefs() {
     if (not this->isLooseNode()) MSG_ABORT("Only loose nodes here!");
 
     if (this->coefs != nullptr) delete[] this->coefs;
@@ -216,7 +216,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
 
 /** @brief Printout of node coefficients
  */
-  template <int D, typename T> void MWNode<D, T>::printCoefs() const {
+template <int D, typename T> void MWNode<D, T>::printCoefs() const {
     if (not this->isAllocated()) MSG_ABORT("Node is not allocated");
     println(0, "\nMW coefs");
     int kp1_d = this->getKp1_d();
@@ -228,7 +228,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
 
 /** @brief wraps the MW coefficients into an eigen vector object
  */
-  template <int D, typename T> void MWNode<D, T>::getCoefs(Eigen::Matrix<T, Eigen::Dynamic, 1> &c) const {
+template <int D, typename T> void MWNode<D, T>::getCoefs(Eigen::Matrix<T, Eigen::Dynamic, 1> &c) const {
     if (not this->isAllocated()) MSG_ABORT("Node is not allocated");
     if (not this->hasCoefs()) MSG_ABORT("Node has no coefs");
     if (this->n_coefs == 0) MSG_ABORT("ncoefs == 0");
@@ -239,7 +239,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
 /** @brief sets all MW coefficients and the norms to zero
  *
  */
-  template <int D, typename T> void MWNode<D, T>::zeroCoefs() {
+template <int D, typename T> void MWNode<D, T>::zeroCoefs() {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated " << *this);
 
     for (int i = 0; i < this->n_coefs; i++) { this->coefs[i] = 0.0; }
@@ -249,7 +249,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
 
 /** @brief Attach a set of coefs to this node. Only used locally (the tree is not aware of this).
  */
-  template <int D, typename T> void MWNode<D, T>::attachCoefs(T *coefs) {
+template <int D, typename T> void MWNode<D, T>::attachCoefs(T *coefs) {
     this->coefs = coefs;
     this->setHasCoefs();
 }
@@ -264,7 +264,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-  template <int D, typename T> void MWNode<D, T>::setCoefBlock(int block, int block_size, const T *c) {
+template <int D, typename T> void MWNode<D, T>::setCoefBlock(int block, int block_size, const T *c) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = c[i]; }
 }
@@ -279,7 +279,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-  template <int D, typename T> void MWNode<D, T>::addCoefBlock(int block, int block_size, const T *c) {
+template <int D, typename T> void MWNode<D, T>::addCoefBlock(int block, int block_size, const T *c) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] += c[i]; }
 }
@@ -293,7 +293,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * (given scaling/wavelet in each direction). Its size is then \f$
  * (k+1)^D \f$ and the index is between 0 and \f$ 2^D-1 \f$.
  */
-  template <int D, typename T> void MWNode<D, T>::zeroCoefBlock(int block, int block_size) {
+template <int D, typename T> void MWNode<D, T>::zeroCoefBlock(int block, int block_size) {
     if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
     for (int i = 0; i < block_size; i++) { this->coefs[block * block_size + i] = 0.0; }
 }
@@ -309,7 +309,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * already be present and its memory allocated for this to work
  * properly.
  */
-  template <int D, typename T> void MWNode<D, T>::giveChildrenCoefs(bool overwrite) {
+template <int D, typename T> void MWNode<D, T>::giveChildrenCoefs(bool overwrite) {
     assert(this->isBranchNode());
     if (not this->isAllocated()) MSG_ABORT("Not allocated!");
     if (not this->hasCoefs()) MSG_ABORT("No coefficients!");
@@ -345,7 +345,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * node. The scaling coefficients of the selected child are then
  * copied/summed in the correct child node.
  */
-  template <int D, typename T> void MWNode<D, T>::giveChildCoefs(int cIdx, bool overwrite) {
+template <int D, typename T> void MWNode<D, T>::giveChildCoefs(int cIdx, bool overwrite) {
 
     MWNode<D, T> node_i = *this;
 
@@ -371,7 +371,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  *
  * \warning This routine is only used in connection with Periodic Boundary Conditions
  */
-  template <int D, typename T> void MWNode<D, T>::giveParentCoefs(bool overwrite) {
+template <int D, typename T> void MWNode<D, T>::giveParentCoefs(bool overwrite) {
     MWNode<D, T> node = *this;
     MWNode<D, T> &parent = getMWParent();
     int kp1_d = this->getKp1_d();
@@ -393,11 +393,11 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * them consecutively in the corresponding block of the parent,
  * following the usual bitwise notation.
  */
-  template <int D, typename T> void MWNode<D, T>::copyCoefsFromChildren() {
+template <int D, typename T> void MWNode<D, T>::copyCoefsFromChildren() {
     int kp1_d = this->getKp1_d();
     int nChildren = this->getTDim();
     for (int cIdx = 0; cIdx < nChildren; cIdx++) {
-      MWNode<D, T> &child = getMWChild(cIdx);
+        MWNode<D, T> &child = getMWChild(cIdx);
         if (not child.hasCoefs()) MSG_ABORT("Child has no coefs");
         setCoefBlock(cIdx, kp1_d, child.getCoefs());
     }
@@ -411,7 +411,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * them consecutively in the corresponding block of the parent,
  * following the usual bitwise notation.
  */
-  template <int D, typename T> void MWNode<D, T>::threadSafeGenChildren() {
+template <int D, typename T> void MWNode<D, T>::threadSafeGenChildren() {
     if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; }
     MRCPP_SET_OMP_LOCK();
     if (isLeafNode()) {
@@ -421,7 +421,6 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
     MRCPP_UNSET_OMP_LOCK();
 }
 
-
 /** @brief Creates scaling coefficients of children
  *
  * @details If the node is a leafNode, it takes the scaling&wavelet
@@ -430,7 +429,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * them consecutively in the corresponding block of the parent,
  * following the usual bitwise notation. The new node is permanently added to the tree.
  */
-  template <int D, typename T> void MWNode<D, T>::threadSafeCreateChildren() {
+template <int D, typename T> void MWNode<D, T>::threadSafeCreateChildren() {
     if (tree->isLocal) { NOT_IMPLEMENTED_ABORT; }
     MRCPP_SET_OMP_LOCK();
     if (isLeafNode()) {
@@ -450,7 +449,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
  * NOTE: this routine assumes a 0/1 (scaling on child 0 and 1)
  *       representation, instead of s/d (scaling and wavelet).
  */
-    template <int D, typename T> void MWNode<D, T>::cvTransform(int operation, bool firstchild) {
+template <int D, typename T> void MWNode<D, T>::cvTransform(int operation, bool firstchild) {
     int kp1 = this->getKp1();
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     int kp1_d = this->getKp1_d();
@@ -465,7 +464,7 @@ MWNode<D, T>::MWNode(const MWNode<D, T> &node, bool allocCoef, bool SetCoef)
     int nChildren = this->getTDim();
     if (firstchild) nChildren = 1;
     for (int i = 0; i < D; i++) {
-        for (int t = 0; t < nChildren ; t++) {
+        for (int t = 0; t < nChildren; t++) {
             T *out = out_vec + t * kp1_d;
             T *in = in_vec + t * kp1_d;
             math_utils::apply_filter(out, in, S, kp1, kp1_dm1, 0.0);
@@ -541,25 +540,25 @@ void MWNode<D, T>::cvTransform(int operation) {
 */
 
 /** @brief Multiwavelet transform
-  *
-  * @details Application of the filters on one node to pass from a 0/1 (scaling
-  * on child 0 and 1) representation to an s/d (scaling and
-  * wavelet) representation. Bit manipulation is used in order to
-  * determine the correct filters and whether to apply them or just
-  * pass to the next couple of indexes. The starting coefficients are
-  * preserved until the application is terminated, then they are
-  * overwritten. With minor modifications this code can also be used
-  * for the inverse mw transform (just use the transpose filters) or
-  * for the application of an operator (using A, B, C and T parts of an
-  * operator instead of G1, G0, H1, H0). This is the version where the
-  * three directions are operated one after the other. Although this
-  * is formally faster than the other algorithm, the separation of the
-  * three dimensions prevent the possibility to use the norm of the
-  * operator in order to discard a priori negligible contributions.
-  *
-  *  * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1).
-  */
-  template <int D, typename T> void MWNode<D, T>::mwTransform(int operation) {
+ *
+ * @details Application of the filters on one node to pass from a 0/1 (scaling
+ * on child 0 and 1) representation to an s/d (scaling and
+ * wavelet) representation. Bit manipulation is used in order to
+ * determine the correct filters and whether to apply them or just
+ * pass to the next couple of indexes. The starting coefficients are
+ * preserved until the application is terminated, then they are
+ * overwritten. With minor modifications this code can also be used
+ * for the inverse mw transform (just use the transpose filters) or
+ * for the application of an operator (using A, B, C and T parts of an
+ * operator instead of G1, G0, H1, H0). This is the version where the
+ * three directions are operated one after the other. Although this
+ * is formally faster than the other algorithm, the separation of the
+ * three dimensions prevent the possibility to use the norm of the
+ * operator in order to discard a priori negligible contributions.
+ *
+ *  * @param[in] operation: compression (s0,s1->s,d) or reconstruction (s,d->s0,s1).
+ */
+template <int D, typename T> void MWNode<D, T>::mwTransform(int operation) {
     int kp1 = this->getKp1();
     int kp1_dm1 = math_utils::ipow(kp1, D - 1);
     int kp1_d = this->getKp1_d();
@@ -599,19 +598,19 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief Set all norms to Undefined. */
-  template <int D, typename T> void MWNode<D, T>::clearNorms() {
+template <int D, typename T> void MWNode<D, T>::clearNorms() {
     this->squareNorm = -1.0;
     for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = -1.0; }
 }
 
 /** @brief Set all norms to zero. */
-  template <int D, typename T> void MWNode<D, T>::zeroNorms() {
+template <int D, typename T> void MWNode<D, T>::zeroNorms() {
     this->squareNorm = 0.0;
     for (int i = 0; i < this->getTDim(); i++) { this->componentNorms[i] = 0.0; }
 }
 
 /** @brief Calculate and store square norm and component norms, if allocated. */
-  template <int D, typename T> void MWNode<D, T>::calcNorms() {
+template <int D, typename T> void MWNode<D, T>::calcNorms() {
     this->squareNorm = 0.0;
     for (int i = 0; i < this->getTDim(); i++) {
         double norm_i = calcComponentNorm(i);
@@ -621,7 +620,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief Calculate and return the squared scaling norm. */
-  template <int D, typename T> double MWNode<D, T>::getScalingNorm() const {
+template <int D, typename T> double MWNode<D, T>::getScalingNorm() const {
     double sNorm = this->getComponentNorm(0);
     if (sNorm >= 0.0) {
         return sNorm * sNorm;
@@ -631,7 +630,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief Calculate and return the squared wavelet norm. */
-  template <int D, typename T> double MWNode<D, T>::getWaveletNorm() const {
+template <int D, typename T> double MWNode<D, T>::getWaveletNorm() const {
     double wNorm = 0.0;
     for (int i = 1; i < this->getTDim(); i++) {
         double norm_i = this->getComponentNorm(i);
@@ -645,7 +644,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief Calculate the norm of one component (NOT the squared norm!). */
-  template <int D, typename T> double MWNode<D, T>::calcComponentNorm(int i) const {
+template <int D, typename T> double MWNode<D, T>::calcComponentNorm(int i) const {
     if (this->isGenNode() and i != 0) return 0.0;
     assert(this->isAllocated());
     assert(this->hasCoefs());
@@ -655,18 +654,18 @@ void MWNode<D, T>::cvTransform(int operation) {
     int start = i * size;
 
     double sq_norm = 0.0;
-//#ifdef HAVE_BLAS
-//    sq_norm = cblas_ddot(size, &c[start], 1, &c[start], 1);
-//#else
+    //#ifdef HAVE_BLAS
+    //    sq_norm = cblas_ddot(size, &c[start], 1, &c[start], 1);
+    //#else
     for (int i = start; i < start + size; i++) { sq_norm += std::norm(c[i]); }
-//#endif
+    //#endif
     return std::sqrt(sq_norm);
 }
 
 /** @brief Update the coefficients of the node by a mw transform of the scaling
  * coefficients of the children.
  */
-  template <int D, typename T> void MWNode<D, T>::reCompress() {
+template <int D, typename T> void MWNode<D, T>::reCompress() {
     if (this->isGenNode()) NOT_IMPLEMENTED_ABORT;
     if (this->isBranchNode()) {
         if (not this->isAllocated()) MSG_ABORT("Coefs not allocated");
@@ -683,12 +682,12 @@ void MWNode<D, T>::cvTransform(int operation) {
  * @param[in] splitFac: factor used in the split check (larger factor means tighter threshold for finer nodes)
  * @param[in] absPrec: flag to switch from relative (false) to absolute (true) precision.
  */
-  template <int D, typename T> bool MWNode<D, T>::crop(double prec, double splitFac, bool absPrec) {
+template <int D, typename T> bool MWNode<D, T>::crop(double prec, double splitFac, bool absPrec) {
     if (this->isEndNode()) {
         return true;
     } else {
         for (int i = 0; i < this->getTDim(); i++) {
-	  MWNode<D, T> &child = *this->children[i];
+            MWNode<D, T> &child = *this->children[i];
             if (child.crop(prec, splitFac, absPrec)) {
                 if (tree_utils::split_check(*this, prec, splitFac, absPrec) == false) {
                     this->deleteChildren();
@@ -700,15 +699,15 @@ void MWNode<D, T>::cvTransform(int operation) {
     return false;
 }
 
-  template <int D, typename T> void MWNode<D, T>::createChildren(bool coefs) {
+template <int D, typename T> void MWNode<D, T>::createChildren(bool coefs) {
     NOT_REACHED_ABORT;
 }
 
-  template <int D, typename T> void MWNode<D, T>::genChildren() {
+template <int D, typename T> void MWNode<D, T>::genChildren() {
     NOT_REACHED_ABORT;
 }
 
-  template <int D, typename T> void MWNode<D, T>::genParent() {
+template <int D, typename T> void MWNode<D, T>::genParent() {
     NOT_REACHED_ABORT;
 }
 
@@ -717,11 +716,11 @@ void MWNode<D, T>::cvTransform(int operation) {
  * @details
  * Leaves node as LeafNode and children[] as null pointer.
  */
-  template <int D, typename T> void MWNode<D, T>::deleteChildren() {
+template <int D, typename T> void MWNode<D, T>::deleteChildren() {
     if (this->isLeafNode()) return;
     for (int cIdx = 0; cIdx < getTDim(); cIdx++) {
         if (this->children[cIdx] != nullptr) {
-	  MWNode<D, T> &child = getMWChild(cIdx);
+            MWNode<D, T> &child = getMWChild(cIdx);
             child.deleteChildren();
             child.dealloc();
         }
@@ -732,7 +731,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief Recursive deallocation of parent and all their forefathers. */
-  template <int D, typename T> void MWNode<D, T>::deleteParent() {
+template <int D, typename T> void MWNode<D, T>::deleteParent() {
     if (this->parent == nullptr) return;
     MWNode<D, T> &parent = getMWParent();
     parent.deleteParent();
@@ -741,9 +740,8 @@ void MWNode<D, T>::cvTransform(int operation) {
     this->parent = nullptr;
 }
 
-
 /** @brief Deallocation of all generated nodes . */
-  template <int D, typename T> void MWNode<D, T>::deleteGenerated() {
+template <int D, typename T> void MWNode<D, T>::deleteGenerated() {
     if (this->isBranchNode()) {
         if (this->isEndNode()) {
             this->deleteChildren();
@@ -754,7 +752,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief returns the coordinates of the centre of the node */
-  template <int D, typename T> Coord<D> MWNode<D, T>::getCenter() const {
+template <int D, typename T> Coord<D> MWNode<D, T>::getCenter() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -764,7 +762,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief returns the upper bounds of the D-interval defining the node  */
-  template <int D, typename T> Coord<D> MWNode<D, T>::getUpperBounds() const {
+template <int D, typename T> Coord<D> MWNode<D, T>::getUpperBounds() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -774,7 +772,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 
 /** @brief returns the lower bounds of the D-interval defining the node  */
-  template <int D, typename T> Coord<D> MWNode<D, T>::getLowerBounds() const {
+template <int D, typename T> Coord<D> MWNode<D, T>::getLowerBounds() const {
     auto two_n = std::pow(2.0, -getScale());
     auto scaling_factor = getMWTree().getMRA().getWorldBox().getScalingFactors();
     auto &l = getNodeIndex();
@@ -791,7 +789,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * to be followed at the current scale in oder to get to the requested
  * node at the final scale. The result is the index of the child needed.
  * The index is obtained by bit manipulation of of the translation indices. */
-  template <int D, typename T> int MWNode<D, T>::getChildIndex(const NodeIndex<D> &nIdx) const {
+template <int D, typename T> int MWNode<D, T>::getChildIndex(const NodeIndex<D> &nIdx) const {
     assert(isAncestor(nIdx));
     int cIdx = 0;
     int diffScale = nIdx.getScale() - getScale() - 1;
@@ -811,7 +809,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  *
  * @detailsGiven a point in space, determines which child should be followed
  * to get to the corresponding terminal node. */
-  template <int D, typename T> int MWNode<D, T>::getChildIndex(const Coord<D> &r) const {
+template <int D, typename T> int MWNode<D, T>::getChildIndex(const Coord<D> &r) const {
     assert(hasCoord(r));
     int cIdx = 0;
     double sFac = std::pow(2.0, -getScale());
@@ -836,7 +834,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * grid of quadrature points.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::getPrimitiveQuadPts(MatrixXd &pts) const {
+template <int D, typename T> void MWNode<D, T>::getPrimitiveQuadPts(MatrixXd &pts) const {
     int kp1 = this->getKp1();
     pts = MatrixXd::Zero(D, kp1);
 
@@ -861,7 +859,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * nodes.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::getPrimitiveChildPts(MatrixXd &pts) const {
+template <int D, typename T> void MWNode<D, T>::getPrimitiveChildPts(MatrixXd &pts) const {
     int kp1 = this->getKp1();
     pts = MatrixXd::Zero(D, 2 * kp1);
 
@@ -886,7 +884,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * vectors of quadrature points.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::getExpandedQuadPts(Eigen::MatrixXd &pts) const {
+template <int D, typename T> void MWNode<D, T>::getExpandedQuadPts(Eigen::MatrixXd &pts) const {
     MatrixXd prim_pts;
     getPrimitiveQuadPts(prim_pts);
 
@@ -910,7 +908,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * vectors of quadrature points.
  *
  */
-  template <int D, typename T> void MWNode<D, T>::getExpandedChildPts(MatrixXd &pts) const {
+template <int D, typename T> void MWNode<D, T>::getExpandedChildPts(MatrixXd &pts) const {
     MatrixXd prim_pts;
     getPrimitiveChildPts(prim_pts);
 
@@ -944,7 +942,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * the node does not exist, or if it is a GenNode. Recursion starts at at this
  * node and ASSUMES the requested node is in fact decending from this node.
  */
-  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) const {
+template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) const {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -968,7 +966,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * the node does not exist, or if it is a GenNode. Recursion starts at at this
  * node and ASSUMES the requested node is in fact decending from this node.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeNoGen(const NodeIndex<D> &idx) {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -994,7 +992,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) const {
+template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) const {
     if (getDepth() == depth or this->isEndNode()) { return this; }
     int cIdx = getChildIndex(r);
     assert(this->children[cIdx] != nullptr);
@@ -1013,7 +1011,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const Coord<D> &r, int depth) {
     if (getDepth() == depth or this->isEndNode()) { return this; }
     int cIdx = getChildIndex(r);
     assert(this->children[cIdx] != nullptr);
@@ -1031,7 +1029,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-  template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) const {
+template <int D, typename T> const MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) const {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -1057,7 +1055,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * this node and ASSUMES the requested node is in fact decending from
  * this node.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNodeOrEndNode(const NodeIndex<D> &idx) {
     if (getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return this;
@@ -1082,7 +1080,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * that does not exist. Recursion starts at this node and ASSUMES the
  * requested node is in fact decending from this node.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const Coord<D> &r, int depth) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const Coord<D> &r, int depth) {
     if (depth < 0) MSG_ABORT("Invalid argument");
 
     if (getDepth() == depth) { return this; }
@@ -1104,22 +1102,22 @@ void MWNode<D, T>::cvTransform(int operation) {
  * node is in fact descending from this node.
  * If create = true, the nodes are permanently added to the tree.
  */
-    template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const NodeIndex<D> &idx, bool create) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveNode(const NodeIndex<D> &idx, bool create) {
     if (getScale() == idx.getScale()) { // we're done
         if (tree->isLocal) {
-	   NOT_IMPLEMENTED_ABORT;
+            NOT_IMPLEMENTED_ABORT;
             // has to fetch coeff in Bank. NOT USED YET
-            //int ncoefs = (1 << D) * this->getKp1_d();
-            //coefs = new double[ncoefs]; // TODO must be cleaned at some stage
-            //coefs = new double[ncoefs]; // TODO must be cleaned at some stage
-            //tree->getNodeCoeff(idx, coefs);
+            // int ncoefs = (1 << D) * this->getKp1_d();
+            // coefs = new double[ncoefs]; // TODO must be cleaned at some stage
+            // coefs = new double[ncoefs]; // TODO must be cleaned at some stage
+            // tree->getNodeCoeff(idx, coefs);
         }
         assert(getNodeIndex() == idx);
         return this;
     }
 
     assert(isAncestor(idx));
-    if  (create) {
+    if (create) {
         threadSafeCreateChildren();
     } else {
         threadSafeGenChildren();
@@ -1141,7 +1139,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * does not exist. Recursion starts at this node and ASSUMES the requested
  * node is in fact related to this node.
  */
-  template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveParent(const NodeIndex<D> &idx) {
+template <int D, typename T> MWNode<D, T> *MWNode<D, T>::retrieveParent(const NodeIndex<D> &idx) {
     if (getScale() < idx.getScale()) MSG_ABORT("Scale error")
     if (getScale() == idx.getScale()) return this;
     if (this->parent == nullptr) {
@@ -1160,7 +1158,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  * found, do not generate any new node, but rather give the value of the norm
  * assuming the function is uniformly distributed within the node.
  */
-  template <int D, typename T> double MWNode<D, T>::getNodeNorm(const NodeIndex<D> &idx) const {
+template <int D, typename T> double MWNode<D, T>::getNodeNorm(const NodeIndex<D> &idx) const {
     if (this->getScale() == idx.getScale()) { // we're done
         assert(getNodeIndex() == idx);
         return std::sqrt(this->squareNorm);
@@ -1178,7 +1176,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  *
  * @param[in] r: point coordinates
  */
-  template <int D, typename T> bool MWNode<D, T>::hasCoord(const Coord<D> &r) const {
+template <int D, typename T> bool MWNode<D, T>::hasCoord(const Coord<D> &r) const {
     double sFac = std::pow(2.0, -getScale());
     const NodeIndex<D> &l = getNodeIndex();
     //    println(1, "[" << r[0] << "," << r[1] << "," << r[2] << "]");
@@ -1196,7 +1194,7 @@ void MWNode<D, T>::cvTransform(int operation) {
 
 /** Testing if nodes are compatible wrt NodeIndex and Tree (order, rootScale,
  * relPrec, etc). */
-  template <int D, typename T> bool MWNode<D, T>::isCompatible(const MWNode<D, T> &node) {
+template <int D, typename T> bool MWNode<D, T>::isCompatible(const MWNode<D, T> &node) {
     NOT_IMPLEMENTED_ABORT;
     //    if (nodeIndex != node.nodeIndex) {
     //        println(0, "nodeIndex mismatch" << std::endl);
@@ -1214,7 +1212,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  *
  * @param[in] idx: the NodeIndex of the requested node
  */
-  template <int D, typename T> bool MWNode<D, T>::isAncestor(const NodeIndex<D> &idx) const {
+template <int D, typename T> bool MWNode<D, T>::isAncestor(const NodeIndex<D> &idx) const {
     int relScale = idx.getScale() - getScale();
     if (relScale < 0) return false;
     const NodeIndex<D> &l = getNodeIndex();
@@ -1225,7 +1223,7 @@ void MWNode<D, T>::cvTransform(int operation) {
     return true;
 }
 
-  template <int D, typename T> bool MWNode<D, T>::isDecendant(const NodeIndex<D> &idx) const {
+template <int D, typename T> bool MWNode<D, T>::isDecendant(const NodeIndex<D> &idx) const {
     NOT_IMPLEMENTED_ABORT;
 }
 
@@ -1233,7 +1231,7 @@ void MWNode<D, T>::cvTransform(int operation) {
  *
  * @param[in] o: the output stream
  */
-  template <int D, typename T> std::ostream &MWNode<D, T>::print(std::ostream &o) const {
+template <int D, typename T> std::ostream &MWNode<D, T>::print(std::ostream &o) const {
     std::string flags = "       ";
     o << getNodeIndex();
     if (isRootNode()) flags[0] = 'R';
@@ -1264,14 +1262,14 @@ void MWNode<D, T>::cvTransform(int operation) {
  * normalization is such that a constant function gives constant value,
  * i.e. *not* same normalization as a squareNorm
  */
-  template <int D, typename T> void MWNode<D, T>::setMaxSquareNorm() {
+template <int D, typename T> void MWNode<D, T>::setMaxSquareNorm() {
     auto n = this->getScale();
     this->maxWSquareNorm = calcScaledWSquareNorm();
     this->maxSquareNorm = calcScaledSquareNorm();
 
     if (not this->isEndNode()) {
         for (int i = 0; i < this->getTDim(); i++) {
-	  MWNode<D, T> &child = *this->children[i];
+            MWNode<D, T> &child = *this->children[i];
             child.setMaxSquareNorm();
             this->maxSquareNorm = std::max(this->maxSquareNorm, child.maxSquareNorm);
             this->maxWSquareNorm = std::max(this->maxWSquareNorm, child.maxWSquareNorm);
@@ -1280,23 +1278,23 @@ void MWNode<D, T>::cvTransform(int operation) {
 }
 /** @brief recursively reset maxSquaredNorm and maxWSquareNorm of parent and descendants to value -1
  */
-  template <int D, typename T> void MWNode<D, T>::resetMaxSquareNorm() {
+template <int D, typename T> void MWNode<D, T>::resetMaxSquareNorm() {
     auto n = this->getScale();
     this->maxSquareNorm = -1.0;
     this->maxWSquareNorm = -1.0;
     if (not this->isEndNode()) {
         for (int i = 0; i < this->getTDim(); i++) {
-	  MWNode<D, T> &child = *this->children[i];
+            MWNode<D, T> &child = *this->children[i];
             child.resetMaxSquareNorm();
         }
     }
 }
 
-  template class MWNode<1, double>;
-  template class MWNode<2, double>;
-  template class MWNode<3, double>;
-  template class MWNode<1, ComplexDouble>;
-  template class MWNode<2, ComplexDouble>;
-  template class MWNode<3, ComplexDouble>;
+template class MWNode<1, double>;
+template class MWNode<2, double>;
+template class MWNode<3, double>;
+template class MWNode<1, ComplexDouble>;
+template class MWNode<2, ComplexDouble>;
+template class MWNode<3, ComplexDouble>;
 
 } // namespace mrcpp
diff --git a/src/trees/MWNode.h b/src/trees/MWNode.h
index 6a96675a2..f86313846 100644
--- a/src/trees/MWNode.h
+++ b/src/trees/MWNode.h
@@ -23,14 +23,13 @@
  * <https://mrcpp.readthedocs.io/>
  */
 
-
 #pragma once
 
 #include <Eigen/Core>
 
 #include "MRCPP/macros.h"
-#include "utils/omp_utils.h"
 #include "utils/math_utils.h"
+#include "utils/omp_utils.h"
 
 #include "HilbertPath.h"
 #include "MWTree.h"
@@ -55,7 +54,7 @@ namespace mrcpp {
 template <int D, typename T> class MWNode {
 public:
     MWNode(const MWNode<D, T> &node, bool allocCoef = true, bool SetCoef = true);
-    MWNode<D , T> &operator=(const MWNode<D , T> &node) = delete;
+    MWNode<D, T> &operator=(const MWNode<D, T> &node) = delete;
     virtual ~MWNode();
 
     int getKp1() const { return getMWTree().getKp1(); }
@@ -102,12 +101,12 @@ template <int D, typename T> class MWNode {
     void getExpandedChildPts(Eigen::MatrixXd &pts) const;
 
     MWTree<D, T> &getMWTree() { return static_cast<MWTree<D, T> &>(*this->tree); }
-    MWNode<D , T> &getMWParent() { return static_cast<MWNode<D , T> &>(*this->parent); }
-    MWNode<D , T> &getMWChild(int i) { return static_cast<MWNode<D , T> &>(*this->children[i]); }
+    MWNode<D, T> &getMWParent() { return static_cast<MWNode<D, T> &>(*this->parent); }
+    MWNode<D, T> &getMWChild(int i) { return static_cast<MWNode<D, T> &>(*this->children[i]); }
 
     const MWTree<D, T> &getMWTree() const { return static_cast<const MWTree<D, T> &>(*this->tree); }
-    const MWNode<D , T> &getMWParent() const { return static_cast<const MWNode<D , T> &>(*this->parent); }
-    const MWNode<D , T> &getMWChild(int i) const { return static_cast<const MWNode<D , T> &>(*this->children[i]); }
+    const MWNode<D, T> &getMWParent() const { return static_cast<const MWNode<D, T> &>(*this->parent); }
+    const MWNode<D, T> &getMWChild(int i) const { return static_cast<const MWNode<D, T> &>(*this->children[i]); }
 
     void zeroCoefs();
     void setCoefBlock(int block, int block_size, const T *c);
@@ -155,7 +154,7 @@ template <int D, typename T> class MWNode {
     void clearIsRootNode() { CLEAR_BITS(status, FlagRootNode); }
     void clearIsAllocated() { CLEAR_BITS(status, FlagAllocated); }
 
-    friend std::ostream &operator<<(std::ostream &o, const MWNode<D , T> &nd) { return nd.print(o); }
+    friend std::ostream &operator<<(std::ostream &o, const MWNode<D, T> &nd) { return nd.print(o); }
 
     friend class TreeBuilder<D, T>;
     friend class MultiplicationCalculator<D, T>;
@@ -166,27 +165,27 @@ template <int D, typename T> class MWNode {
     friend class FunctionNode<D, T>;
     friend class OperatorNode;
     friend class DerivativeCalculator<D, T>;
-    bool isComplex = false; //TODO put as one of the flags
+    bool isComplex = false;               // TODO put as one of the flags
     friend class FunctionTree<D, double>; // required if a ComplexDouble tree access a double node from another tree!
     friend class FunctionTree<D, ComplexDouble>;
-    int childSerialIx{-1};  ///< index of first child in serial Tree, or -1 for leafnodes/endnodes
+    int childSerialIx{-1}; ///< index of first child in serial Tree, or -1 for leafnodes/endnodes
 
 protected:
     MWTree<D, T> *tree{nullptr};    ///< Tree the node belongs to
-    MWNode<D , T> *parent{nullptr};  ///< Parent node
-    MWNode<D , T> *children[1 << D]; ///< 2^D children
+    MWNode<D, T> *parent{nullptr};  ///< Parent node
+    MWNode<D, T> *children[1 << D]; ///< 2^D children
 
     double squareNorm{-1.0};       ///< Squared norm of all 2^D (k+1)^D coefficients
     double componentNorms[1 << D]; ///< Squared norms of the separeted 2^D components
     double maxSquareNorm{-1.0};    ///< Largest squared norm among itself and descendants.
     double maxWSquareNorm{-1.0};   ///< Largest wavelet squared norm among itself and descendants.
                                    ///< NB: must be set before used.
-    T *coefs{nullptr};     ///< the 2^D (k+1)^D MW coefficients
-                           ///< For example, in case of a one dimensional function \f$ f \f$
-                           ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$,
-                           ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$
-                           ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$.
-                           ///< Here \f$ n, l \f$ are unique for every node.
+    T *coefs{nullptr};             ///< the 2^D (k+1)^D MW coefficients
+                                   ///< For example, in case of a one dimensional function \f$ f \f$
+                                   ///< this array equals \f$ s_0, \ldots, s_k, d_0, \ldots, d_k \f$,
+                                   ///< where scaling coefficients \f$ s_j = s_{jl}^n(f) \f$
+                                   ///< and wavelet coefficients \f$ d_j = d_{jl}^n(f) \f$.
+                                   ///< Here \f$ n, l \f$ are unique for every node.
     int n_coefs{0};
 
     int serialIx{-1};       ///< index in serial Tree
@@ -223,20 +222,20 @@ template <int D, typename T> class MWNode {
     int getChildIndex(const NodeIndex<D> &nIdx) const;
     int getChildIndex(const Coord<D> &r) const;
 
-    bool diffBranch(const MWNode<D , T> &rhs) const;
+    bool diffBranch(const MWNode<D, T> &rhs) const;
 
-    MWNode<D , T> *retrieveNode(const Coord<D> &r, int depth);
-    MWNode<D , T> *retrieveNode(const NodeIndex<D> &idx, bool create = false);
-    MWNode<D , T> *retrieveParent(const NodeIndex<D> &idx);
+    MWNode<D, T> *retrieveNode(const Coord<D> &r, int depth);
+    MWNode<D, T> *retrieveNode(const NodeIndex<D> &idx, bool create = false);
+    MWNode<D, T> *retrieveParent(const NodeIndex<D> &idx);
 
-    const MWNode<D , T> *retrieveNodeNoGen(const NodeIndex<D> &idx) const;
-    MWNode<D , T> *retrieveNodeNoGen(const NodeIndex<D> &idx);
+    const MWNode<D, T> *retrieveNodeNoGen(const NodeIndex<D> &idx) const;
+    MWNode<D, T> *retrieveNodeNoGen(const NodeIndex<D> &idx);
 
-    const MWNode<D , T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth) const;
-    MWNode<D , T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth);
+    const MWNode<D, T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth) const;
+    MWNode<D, T> *retrieveNodeOrEndNode(const Coord<D> &r, int depth);
 
-    const MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx) const;
-    MWNode<D , T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx);
+    const MWNode<D, T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx) const;
+    MWNode<D, T> *retrieveNodeOrEndNode(const NodeIndex<D> &idx);
 
     void threadSafeCreateChildren();
     void threadSafeGenChildren();
diff --git a/src/trees/MWTree.cpp b/src/trees/MWTree.cpp
index 9fd5c907a..6a646d33f 100644
--- a/src/trees/MWTree.cpp
+++ b/src/trees/MWTree.cpp
@@ -26,10 +26,10 @@
 #include "MWTree.h"
 
 #include "MWNode.h"
-#include "NodeIndex.h"
-#include "TreeIterator.h"
 #include "MultiResolutionAnalysis.h"
 #include "NodeAllocator.h"
+#include "NodeIndex.h"
+#include "TreeIterator.h"
 #include "utils/Bank.h"
 #include "utils/Printer.h"
 #include "utils/math_utils.h"
@@ -49,11 +49,11 @@ namespace mrcpp {
  * root nodes. The information for the root node configuration to use
  * is in the mra object which is passed to the constructor.
  */
-  template <int D, typename T>
+template <int D, typename T>
 MWTree<D, T>::MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n)
         : MRA(mra)
-        , order(mra.getOrder()) /// polynomial order
-        , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) ///nr of scaling coefficients \f$ (k+1)^D \f$
+        , order(mra.getOrder())                          /// polynomial order
+        , kp1_d(math_utils::ipow(mra.getOrder() + 1, D)) /// nr of scaling coefficients \f$ (k+1)^D \f$
         , name(n)
         , squareNorm(-1.0)
         , rootBox(mra.getWorldBox()) {
@@ -68,11 +68,11 @@ template <int D, typename T> MWTree<D, T>::~MWTree() {
 }
 
 /** @brief Deletes all the nodes in the tree
-  *
-  * @details This method will recursively delete all the nodes,
-  * including the root nodes. Derived classes will call this method
-  * when the object is deleted.
-  */
+ *
+ * @details This method will recursively delete all the nodes,
+ * including the root nodes. Derived classes will call this method
+ * when the object is deleted.
+ */
 template <int D, typename T> void MWTree<D, T>::deleteRootNodes() {
     for (int i = 0; i < this->rootBox.size(); i++) {
         MWNode<D, T> &root = this->getRootMWNode(i);
@@ -337,7 +337,7 @@ template <int D, typename T> MWNode<D, T> *MWTree<D, T>::findNode(NodeIndex<D> i
  * from this.
  * The nodes are permanently added to the tree if create = true
  */
-    template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> idx, bool create) {
+template <int D, typename T> MWNode<D, T> &MWTree<D, T>::getNode(NodeIndex<D> idx, bool create) {
     if (getRootBox().isPeriodic()) periodic::index_manipulation<D>(idx, getRootBox().getPeriodic());
 
     MWNode<D, T> *out = nullptr;
@@ -461,7 +461,6 @@ template <int D, typename T> void MWTree<D, T>::resetEndNodeTable() {
     }
 }
 
-
 template <int D, typename T> int MWTree<D, T>::countBranchNodes(int depth) {
     NOT_IMPLEMENTED_ABORT;
 }
@@ -550,8 +549,10 @@ template <int D, typename T> void MWTree<D, T>::makeMaxSquareNorms() {
  */
 template <int D, typename T> int MWTree<D, T>::getIx(NodeIndex<D> nIdx) {
     if (this->isLocal == false) MSG_ERROR("getIx only implemented in local representation");
-    if(NodeIndex2serialIx.count(nIdx) == 0) return -1;
-    else return NodeIndex2serialIx[nIdx];
+    if (NodeIndex2serialIx.count(nIdx) == 0)
+        return -1;
+    else
+        return NodeIndex2serialIx[nIdx];
 }
 
 template <int D, typename T> void MWTree<D, T>::getNodeCoeff(NodeIndex<D> nIdx, T *data) {
@@ -566,7 +567,6 @@ template class MWTree<1, double>;
 template class MWTree<2, double>;
 template class MWTree<3, double>;
 
-
 template class MWTree<1, ComplexDouble>;
 template class MWTree<2, ComplexDouble>;
 template class MWTree<3, ComplexDouble>;
diff --git a/src/trees/MWTree.h b/src/trees/MWTree.h
index e3c656b7c..b0261aca6 100644
--- a/src/trees/MWTree.h
+++ b/src/trees/MWTree.h
@@ -26,8 +26,8 @@
 #pragma once
 
 #include <Eigen/Core>
-#include <memory>
 #include <map>
+#include <memory>
 
 #include "MRCPP/mrcpp_declarations.h"
 #include "utils/omp_utils.h"
@@ -61,7 +61,7 @@ class BankAccount;
  * present. See specific methods for details.
  *
  */
-  template <int D, typename T> class MWTree {
+template <int D, typename T> class MWTree {
 public:
     MWTree(const MultiResolutionAnalysis<D> &mra, const std::string &n);
     MWTree(const MWTree<D, T> &tree) = delete;
@@ -133,18 +133,18 @@ class BankAccount;
     int countLeafNodes(int depth = -1);
     int countAllocNodes(int depth = -1);
     int countNodes(int depth = -1);
-    bool isLocal = false; // to know whether the tree coeffcients are stored in the Bank
+    bool isLocal = false;         // to know whether the tree coeffcients are stored in the Bank
     int getIx(NodeIndex<D> nIdx); // gives serialIx of a stored node from its NodeIndex if isLocal
 
     void makeMaxSquareNorms(); // sets values for maxSquareNorm and maxWSquareNorm in all nodes
 
     NodeAllocator<D, T> &getNodeAllocator() { return *this->nodeAllocator_p; }
     const NodeAllocator<D, T> &getNodeAllocator() const { return *this->nodeAllocator_p; }
-    MWNodeVector<D, T> endNodeTable;          ///< Final projected nodes
+    MWNodeVector<D, T> endNodeTable; ///< Final projected nodes
 
     void getNodeCoeff(NodeIndex<D> nIdx, T *data); // fetch coefficient from a specific node stored in Bank
     bool conjugate() const { return this->conj; }
-    void setConjugate(bool conjug)  { this->conj = conjug; }
+    void setConjugate(bool conjug) { this->conj = conjug; }
 
     friend std::ostream &operator<<(std::ostream &o, const MWTree<D, T> &tree) { return tree.print(o); }
 
@@ -171,7 +171,7 @@ class BankAccount;
 
     // Tree data
     double squareNorm;
-    NodeBox<D, T> rootBox;                    ///< The actual container of nodes
+    NodeBox<D, T> rootBox;                 ///< The actual container of nodes
     std::vector<int> nodesAtDepth;         ///< Node counter
     std::vector<int> nodesAtNegativeDepth; ///< Node counter
 
diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp
index b72a8c5e8..2724bacf9 100644
--- a/src/trees/MultiResolutionAnalysis.cpp
+++ b/src/trees/MultiResolutionAnalysis.cpp
@@ -106,7 +106,8 @@ MultiResolutionAnalysis<D>::MultiResolutionAnalysis(const MultiResolutionAnalysi
  * @param[in] sb: Polynomial basis (MW) as a ScalingBasis object
  * @param[in] depth: Maximum allowed resolution depth, relative to root scale
  *
- *  @details Creates a MRA object from pre-existing BoundingBox and ScalingBasis objects. These objects are taken as reference. For more details about the constructor itself, see the first constructor.
+ *  @details Creates a MRA object from pre-existing BoundingBox and ScalingBasis objects. These objects are taken as reference. For more details about the constructor itself, see the first
+ * constructor.
  */
 template <int D>
 MultiResolutionAnalysis<D>::MultiResolutionAnalysis(const BoundingBox<D> &bb, const ScalingBasis &sb, int depth)
@@ -124,9 +125,9 @@ MultiResolutionAnalysis<D>::MultiResolutionAnalysis(const BoundingBox<D> &bb, co
  *
  * @param[in] mra: MRA object, taken by constant reference
  *
- *  @details Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis object) and maximum depth (integer), and false otherwise.
- *  Computations on different MRA cannot be combined, this operator can be used to make sure that the multiple MRAs are compatible.
- *  For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes.
+ *  @details Equality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis
+ * object) and maximum depth (integer), and false otherwise. Computations on different MRA cannot be combined, this operator can be used to make sure that the multiple MRAs are compatible. For more
+ * information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes.
  */
 template <int D> bool MultiResolutionAnalysis<D>::operator==(const MultiResolutionAnalysis<D> &mra) const {
     if (this->basis != mra.basis) return false;
@@ -141,14 +142,17 @@ template <int D> bool MultiResolutionAnalysis<D>::operator==(const MultiResoluti
  *
  * @param[in] mra: MRA object, taken by constant reference
  *
- *  @details Inequality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis object) and maximum depth (integer), and false otherwise.
- *  Opposite of the == operator.
- *  For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their respective classes.
+ *  @details Inequality operator for the MultiResolutionAnalysis class, returns true if both MRAs have the same polynomial basis represented by a BoundingBox object, computational domain (ScalingBasis
+ * object) and maximum depth (integer), and false otherwise. Opposite of the == operator. For more information about the meaning of equality for BoundingBox and ScalingBasis objets, see their
+ * respective classes.
  */
 template <int D> bool MultiResolutionAnalysis<D>::operator!=(const MultiResolutionAnalysis<D> &mra) const {
-    if (this->basis != mra.basis) std::cout<<"diff basis "<<this->basis<<std::endl <<"and  "<< mra.basis<<std::endl;
+    if (this->basis != mra.basis) std::cout << "diff basis " << this->basis << std::endl << "and  " << mra.basis << std::endl;
     if (this->basis != mra.basis) return true;
-    if (this->world != mra.world) std::cout<<"diff world "<<this->world<<std::endl <<"and  " <<" "<< mra.world<<std::endl;
+    if (this->world != mra.world)
+        std::cout << "diff world " << this->world << std::endl
+                  << "and  "
+                  << " " << mra.world << std::endl;
     if (this->world != mra.world) return true;
     if (this->maxDepth != mra.maxDepth) return true;
     return false;
diff --git a/src/trees/NodeAllocator.cpp b/src/trees/NodeAllocator.cpp
index 5b6db8a34..5079459aa 100644
--- a/src/trees/NodeAllocator.cpp
+++ b/src/trees/NodeAllocator.cpp
@@ -27,18 +27,19 @@
 
 #include <stack>
 
-#include "MWNode.h"
-#include "FunctionTree.h"
 #include "FunctionNode.h"
-#include "OperatorTree.h"
+#include "FunctionTree.h"
+#include "MWNode.h"
 #include "OperatorNode.h"
+#include "OperatorTree.h"
 
 #include "utils/Printer.h"
 #include "utils/mpi_utils.h"
 
 namespace mrcpp {
 
-template <int D, typename T> NodeAllocator<D, T>::NodeAllocator(FunctionTree<D, T> *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk)
+template <int D, typename T>
+NodeAllocator<D, T>::NodeAllocator(FunctionTree<D, T> *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk)
         : coefsPerNode(coefsPerNode)
         , maxNodesPerChunk(nodesPerChunk)
         , tree_p(tree)
@@ -54,7 +55,8 @@ template <int D, typename T> NodeAllocator<D, T>::NodeAllocator(FunctionTree<D,
     MRCPP_INIT_OMP_LOCK();
 }
 
-template <> NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory<double> *mem, int coefsPerNode, int nodesPerChunk)
+template <>
+NodeAllocator<2>::NodeAllocator(OperatorTree *tree, SharedMemory<double> *mem, int coefsPerNode, int nodesPerChunk)
         : coefsPerNode(coefsPerNode)
         , maxNodesPerChunk(nodesPerChunk)
         , tree_p(tree)
@@ -82,28 +84,28 @@ template <int D, typename T> NodeAllocator<D, T>::~NodeAllocator() {
     MRCPP_DESTROY_OMP_LOCK();
 }
 
-template <int D, typename T> MWNode<D, T> * NodeAllocator<D, T>::getNode_p(int sIdx) {
+template <int D, typename T> MWNode<D, T> *NodeAllocator<D, T>::getNode_p(int sIdx) {
     MRCPP_SET_OMP_LOCK();
     auto *node = getNodeNoLock(sIdx);
     MRCPP_UNSET_OMP_LOCK();
     return node;
 }
 
-template <int D, typename T> T * NodeAllocator<D, T>::getCoef_p(int sIdx) {
+template <int D, typename T> T *NodeAllocator<D, T>::getCoef_p(int sIdx) {
     MRCPP_SET_OMP_LOCK();
     auto *coefs = getCoefNoLock(sIdx);
     MRCPP_UNSET_OMP_LOCK();
     return coefs;
 }
 
-template <int D, typename T> MWNode<D, T> * NodeAllocator<D, T>::getNodeNoLock(int sIdx) {
+template <int D, typename T> MWNode<D, T> *NodeAllocator<D, T>::getNodeNoLock(int sIdx) {
     if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr;
     int chunk = sIdx / this->maxNodesPerChunk; // which chunk
     int cIdx = sIdx % this->maxNodesPerChunk;  // position in chunk
     return this->nodeChunks[chunk] + cIdx;
 }
 
-template <int D, typename T> T * NodeAllocator<D, T>::getCoefNoLock(int sIdx) {
+template <int D, typename T> T *NodeAllocator<D, T>::getCoefNoLock(int sIdx) {
     if (sIdx < 0 or sIdx >= this->stackStatus.size()) return nullptr;
     int chunk = sIdx / this->maxNodesPerChunk; // which chunk
     int idx = sIdx % this->maxNodesPerChunk;   // position in chunk
@@ -128,7 +130,7 @@ template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coe
 
     // we require that the index for first child is a multiple of 2**D
     // so that we can find the sibling rank using rank=sIdx%(2**D)
-    if (sIdx%nNodes != 0) MSG_ERROR(" node allocate error");
+    if (sIdx % nNodes != 0) MSG_ERROR(" node allocate error");
 
     // fill stack status
     auto &status = this->stackStatus;
@@ -148,11 +150,11 @@ template <int D, typename T> int NodeAllocator<D, T>::alloc(int nNodes, bool coe
 
 template <int D, typename T> void NodeAllocator<D, T>::dealloc(int sIdx) {
     MRCPP_SET_OMP_LOCK();
-   if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx);
+    if (sIdx < 0 or sIdx >= this->stackStatus.size()) MSG_ABORT("Invalid serial index: " << sIdx);
     auto *node_p = getNodeNoLock(sIdx);
     node_p->~MWNode();
-    this->stackStatus[sIdx] = 0; // mark as available
-    if (sIdx == this->topStack - 1) {  // top of stack
+    this->stackStatus[sIdx] = 0;      // mark as available
+    if (sIdx == this->topStack - 1) { // top of stack
         while (this->stackStatus[this->topStack - 1] == 0) {
             this->topStack--;
             if (this->topStack < 1) break;
@@ -167,10 +169,10 @@ template <int D, typename T> void NodeAllocator<D, T>::dealloc(int sIdx) {
 template <int D, typename T> void NodeAllocator<D, T>::deallocAllCoeff() {
     if (not this->isShared())
         for (auto &chunk : this->coefChunks) delete[] chunk;
-    else delete this->shmem_p;
+    else
+        delete this->shmem_p;
     this->shmem_p = nullptr;
     this->coefChunks.clear();
-
 }
 
 template <int D, typename T> void NodeAllocator<D, T>::init(int nChunks, bool coefs) {
@@ -221,8 +223,7 @@ template <int D, typename T> void NodeAllocator<D, T>::appendChunk(bool coefs) {
 template <int D, typename T> int NodeAllocator<D, T>::compress() {
     MRCPP_SET_OMP_LOCK();
     int nNodes = (1 << D);
-    if (this->maxNodesPerChunk * this->nodeChunks.size() <=
-        getTree().getNNodes() + this->maxNodesPerChunk + nNodes - 1) {
+    if (this->maxNodesPerChunk * this->nodeChunks.size() <= getTree().getNNodes() + this->maxNodesPerChunk + nNodes - 1) {
         MRCPP_UNSET_OMP_LOCK();
         return 0; // nothing to compress
     }
@@ -256,13 +257,13 @@ template <int D, typename T> int NodeAllocator<D, T>::deleteUnusedChunks() {
     // number of occupied chunks
     int nChunksTotal = getNChunks();
     int nChunksUsed = getNChunksUsed();
-    if(nChunksTotal == nChunksUsed) return 0; // no unused chunks
+    if (nChunksTotal == nChunksUsed) return 0; // no unused chunks
     assert(nChunksTotal >= nChunksUsed);
     for (int i = nChunksUsed; i < nChunksTotal; i++) delete[](char *)(this->nodeChunks[i]);
 
     if (isShared()) {
         // shared coefficients cannot be fully deallocated, only pointer is moved.
-       getMemory().sh_end_ptr -= (nChunksTotal - nChunksUsed) * this->coefsPerNode * this->maxNodesPerChunk;
+        getMemory().sh_end_ptr -= (nChunksTotal - nChunksUsed) * this->coefsPerNode * this->maxNodesPerChunk;
     } else {
         for (int i = nChunksUsed; i < nChunksTotal; i++) delete[] this->coefChunks[i];
     }
diff --git a/src/trees/NodeAllocator.h b/src/trees/NodeAllocator.h
index 69065d1dc..7e33b7e21 100644
--- a/src/trees/NodeAllocator.h
+++ b/src/trees/NodeAllocator.h
@@ -40,7 +40,7 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class NodeAllocator final {
+template <int D, typename T> class NodeAllocator final {
 public:
     NodeAllocator(OperatorTree *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk);
     NodeAllocator(FunctionTree<D, T> *tree, SharedMemory<T> *mem, int coefsPerNode, int nodesPerChunk);
@@ -66,26 +66,26 @@ namespace mrcpp {
     int getCoefChunkSize() const { return this->maxNodesPerChunk * this->coefsPerNode * sizeof(T); }
     int getMaxNodesPerChunk() const { return this->maxNodesPerChunk; }
 
-    T * getCoef_p(int sIdx);
-    MWNode<D, T> * getNode_p(int sIdx);
+    T *getCoef_p(int sIdx);
+    MWNode<D, T> *getNode_p(int sIdx);
 
-    T * getCoefChunk(int i) { return this->coefChunks[i]; }
-    MWNode<D, T> * getNodeChunk(int i) { return this->nodeChunks[i]; }
+    T *getCoefChunk(int i) { return this->coefChunks[i]; }
+    MWNode<D, T> *getNodeChunk(int i) { return this->nodeChunks[i]; }
 
     void print() const;
 
 protected:
-    int nNodes{0};                  // number of nodes actually in use
-    int topStack{0};                // index of last node on stack
-    int sizeOfNode{0};              // sizeof(NodeType)
-    int coefsPerNode{0};            // number of coef for one node
-    int maxNodesPerChunk{0};        // max number of nodes per allocation
+    int nNodes{0};           // number of nodes actually in use
+    int topStack{0};         // index of last node on stack
+    int sizeOfNode{0};       // sizeof(NodeType)
+    int coefsPerNode{0};     // number of coef for one node
+    int maxNodesPerChunk{0}; // max number of nodes per allocation
 
     std::vector<int> stackStatus{};
     std::vector<T *> coefChunks{};
     std::vector<MWNode<D, T> *> nodeChunks{};
 
-    char *cvptr{nullptr};           // pointer to virtual table
+    char *cvptr{nullptr};              // pointer to virtual table
     MWNode<D, T> *last_p{nullptr};     // pointer just after the last active node, i.e. where to put next node
     MWTree<D, T> *tree_p{nullptr};     // pointer to external object
     SharedMemory<T> *shmem_p{nullptr}; // pointer to external object
@@ -94,8 +94,8 @@ namespace mrcpp {
     MWTree<D, T> &getTree() { return *this->tree_p; }
     SharedMemory<T> &getMemory() { return *this->shmem_p; }
 
-    T * getCoefNoLock(int sIdx);
-    MWNode<D, T> * getNodeNoLock(int sIdx);
+    T *getCoefNoLock(int sIdx);
+    MWNode<D, T> *getNodeNoLock(int sIdx);
 
     void moveNodes(int nNodes, int srcIdx, int dstIdx);
     void appendChunk(bool coefs);
@@ -108,5 +108,4 @@ namespace mrcpp {
 #endif
 };
 
-
 } // namespace mrcpp
diff --git a/src/trees/NodeBox.h b/src/trees/NodeBox.h
index 3b53da538..7a7fc086e 100644
--- a/src/trees/NodeBox.h
+++ b/src/trees/NodeBox.h
@@ -30,7 +30,7 @@
 
 namespace mrcpp {
 
-  template <int D, typename T> class NodeBox final : public BoundingBox<D> {
+template <int D, typename T> class NodeBox final : public BoundingBox<D> {
 public:
     NodeBox(const NodeIndex<D> &idx, const std::array<int, D> &nb = {});
     NodeBox(const NodeBox<D, T> &box);
@@ -53,7 +53,7 @@ namespace mrcpp {
     MWNode<D, T> **getNodes() { return this->nodes; }
 
 protected:
-    int nOccupied;     ///< Number of non-zero pointers in box
+    int nOccupied;        ///< Number of non-zero pointers in box
     MWNode<D, T> **nodes; ///< Container of nodes
 
     void allocNodePointers();
diff --git a/src/trees/OperatorTree.cpp b/src/trees/OperatorTree.cpp
index 44963d465..890f2677c 100644
--- a/src/trees/OperatorTree.cpp
+++ b/src/trees/OperatorTree.cpp
@@ -25,9 +25,9 @@
 
 #include "OperatorTree.h"
 #include "BandWidth.h"
-#include "TreeIterator.h"
 #include "NodeAllocator.h"
 #include "OperatorNode.h"
+#include "TreeIterator.h"
 #include "utils/Printer.h"
 #include "utils/tree_utils.h"
 
@@ -98,15 +98,14 @@ void OperatorTree::clearBandWidth() {
     this->bandWidth = nullptr;
 }
 
-
 /** @brief Calculates band widths of the non-standard form matrices.
  *
  * @param[in] prec: Precision used for thresholding
- * 
+ *
  * @details It is starting from \f$ l = 0 \f$ and updating the band width value each time we encounter
- * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal. 
- * 
- */ 
+ * considerable value while keeping increasing \f$ l \f$, that stands for the distance to the diagonal.
+ *
+ */
 void OperatorTree::calcBandWidth(double prec) {
     if (this->bandWidth == nullptr) clearBandWidth();
     this->bandWidth = new BandWidth(getDepth());
@@ -134,61 +133,45 @@ void OperatorTree::calcBandWidth(double prec) {
     println(100, "\nOperator BandWidth" << *this->bandWidth);
 }
 
-
 /** @brief Checks if the distance to diagonal is bigger than the operator band width.
  *
  * @param[in] oTransl: distance to diagonal
  * @param[in] o_depth: scaling order
  * @param[in] idx: index corresponding to one of the matrices \f$ A, B, C \f$ or \f$ T \f$.
- * 
- * @returns True if \b oTransl is outside of the band and False otherwise. 
- * 
- */ 
-bool OperatorTree::isOutsideBand(int oTransl, int o_depth, int idx)
-{
+ *
+ * @returns True if \b oTransl is outside of the band and False otherwise.
+ *
+ */
+bool OperatorTree::isOutsideBand(int oTransl, int o_depth, int idx) {
     return abs(oTransl) > this->bandWidth->getWidth(o_depth, idx);
 }
 
-
 /** @brief Cleans up end nodes.
  *
  * @param[in] trust_scale: there is no cleaning down below \b trust_scale (it speeds up operator building).
- * 
+ *
  * @details Traverses the tree and rewrites end nodes having branch node twins,
  * i. e. identical with respect to scale and translation.
  * This method is very handy, when an adaptive operator construction
  * can make a significunt noise at low scaling depth.
  * Its need comes from the fact that mwTransform up cannot override
  * rubbish that can potentially stick to end nodes at a particular level,
- * and as a result spread further up to the root with mwTransform. 
- * 
+ * and as a result spread further up to the root with mwTransform.
+ *
  */
-void OperatorTree::removeRoughScaleNoise(int trust_scale)
-{
-    MWNode<2> *p_rubbish;     //possibly inexact end node
-    MWNode<2> *p_counterpart; //exact branch node
-    for( int n = (this->getDepth() - 2 < trust_scale) ? this->getDepth() - 2 : trust_scale; n > this->getRootScale(); n--)
-    {
-        int N =  1<<n;
-        for( int m = 0; m < N; m++ )
-            for( int l = 0; l < N; l++ )
-            {
-                p_rubbish = this->findNode( NodeIndex<2>(n, {m, l}) );
-                if( p_rubbish != nullptr && p_rubbish->isEndNode() )
-                {
-                    for( int m1 = 0; m1 < N; m1++ )
-                        for( int l1 = 0; l1 < N; l1++ )
-                            if
-                            (
-                                (m1 - l1 == m - l)
-                                &&
-                                ( p_counterpart = this->findNode( NodeIndex<2>(n, {m1, l1}) ) ) != nullptr
-                                &&
-                                p_counterpart->isBranchNode()
-                            )
-                            {
-                                for(int i = 0; i < p_counterpart->n_coefs; i++)
-                                    p_rubbish->coefs[i] = p_counterpart->coefs[i];
+void OperatorTree::removeRoughScaleNoise(int trust_scale) {
+    MWNode<2> *p_rubbish;     // possibly inexact end node
+    MWNode<2> *p_counterpart; // exact branch node
+    for (int n = (this->getDepth() - 2 < trust_scale) ? this->getDepth() - 2 : trust_scale; n > this->getRootScale(); n--) {
+        int N = 1 << n;
+        for (int m = 0; m < N; m++)
+            for (int l = 0; l < N; l++) {
+                p_rubbish = this->findNode(NodeIndex<2>(n, {m, l}));
+                if (p_rubbish != nullptr && p_rubbish->isEndNode()) {
+                    for (int m1 = 0; m1 < N; m1++)
+                        for (int l1 = 0; l1 < N; l1++)
+                            if ((m1 - l1 == m - l) && (p_counterpart = this->findNode(NodeIndex<2>(n, {m1, l1}))) != nullptr && p_counterpart->isBranchNode()) {
+                                for (int i = 0; i < p_counterpart->n_coefs; i++) p_rubbish->coefs[i] = p_counterpart->coefs[i];
                             }
                 }
             }
@@ -196,8 +179,6 @@ void OperatorTree::removeRoughScaleNoise(int trust_scale)
     }
 }
 
-
-
 void OperatorTree::getMaxTranslations(VectorXi &maxTransl) {
     int nScales = this->nodesAtDepth.size();
     maxTransl = VectorXi::Zero(nScales);
diff --git a/src/trees/OperatorTree.h b/src/trees/OperatorTree.h
index dcc2e09a8..83be4789a 100644
--- a/src/trees/OperatorTree.h
+++ b/src/trees/OperatorTree.h
@@ -50,8 +50,10 @@ class OperatorTree : public MWTree<2> {
     BandWidth &getBandWidth() { return *this->bandWidth; }
     const BandWidth &getBandWidth() const { return *this->bandWidth; }
 
-    OperatorNode &getNode(int n, int l) { return *nodePtrAccess[n][l]; }        ///< TODO: It has to be specified more.
-                                                                                ///< \b l is distance to the diagonal.
+    OperatorNode &getNode(int n, int l) {
+        return *nodePtrAccess[n][l];
+    } ///< TODO: It has to be specified more.
+      ///< \b l is distance to the diagonal.
     const OperatorNode &getNode(int n, int l) const { return *nodePtrAccess[n][l]; }
 
     void mwTransformDown(bool overwrite) override;
diff --git a/src/trees/TreeIterator.cpp b/src/trees/TreeIterator.cpp
index f7f88e03b..b9d4aee85 100644
--- a/src/trees/TreeIterator.cpp
+++ b/src/trees/TreeIterator.cpp
@@ -29,7 +29,8 @@
 
 namespace mrcpp {
 
-template <int D, typename T> TreeIterator<D, T>::TreeIterator(int traverse, int iterator)
+template <int D, typename T>
+TreeIterator<D, T>::TreeIterator(int traverse, int iterator)
         : root(0)
         , nRoots(0)
         , mode(traverse)
@@ -38,7 +39,8 @@ template <int D, typename T> TreeIterator<D, T>::TreeIterator(int traverse, int
         , state(nullptr)
         , initialState(nullptr) {}
 
-template <int D, typename T> TreeIterator<D, T>::TreeIterator(MWTree<D, T> &tree, int traverse, int iterator)
+template <int D, typename T>
+TreeIterator<D, T>::TreeIterator(MWTree<D, T> &tree, int traverse, int iterator)
         : root(0)
         , nRoots(0)
         , mode(traverse)
@@ -53,7 +55,7 @@ template <int D, typename T> TreeIterator<D, T>::~TreeIterator() {
     if (this->initialState != nullptr) delete this->initialState;
 }
 
-  template<int D, typename T> int TreeIterator<D, T>::getChildIndex(int i) const {
+template <int D, typename T> int TreeIterator<D, T>::getChildIndex(int i) const {
     const MWNode<D, T> &node = *this->state->node;
     const HilbertPath<D> &h = node.getHilbertPath();
     // Legesgue type returns i, Hilbert type returns Hilbert index
diff --git a/src/trees/TreeIterator.h b/src/trees/TreeIterator.h
index a79fe8412..82ea49eb9 100644
--- a/src/trees/TreeIterator.h
+++ b/src/trees/TreeIterator.h
@@ -46,7 +46,7 @@ template <int D, typename T> class TreeIterator {
     bool nextParent();
     MWNode<D, T> &getNode() { return *this->state->node; }
 
-  friend class IteratorNode<D, T>;
+    friend class IteratorNode<D, T>;
 
 protected:
     int root;
diff --git a/src/utils/Bank.cpp b/src/utils/Bank.cpp
index 0c9cc3769..f8c111a53 100644
--- a/src/utils/Bank.cpp
+++ b/src/utils/Bank.cpp
@@ -17,7 +17,7 @@ Bank::~Bank() {
 
 struct Blockdata_struct {
     std::vector<double *> data; // to store the incoming data. One column for each orbital on the same node.
-    int N_rows = 0; // the number of coefficients in one column of the block.
+    int N_rows = 0;             // the number of coefficients in one column of the block.
     std::map<int, int> id2data; // internal index of the data in the block
     std::vector<int> id;        // the id of each column. Either nodeid, or orbid
 };
@@ -29,33 +29,33 @@ struct OrbBlock_struct {
 };
 struct mem_struct {
     std::vector<double *> chunk_p; // vector with allocated chunks
-    int p = -1; // position of next available memory (not allocated if < 0)
-    //on Betzy 1024*1024*4 ok, 1024*1024*2 NOT ok: leads to memory fragmentation (on "Betzy" 2023)
-    int chunk_size = 1024*1024*4; // chunksize (in number of doubles). data_p[i]+chunk_size is end of chunk i
-    int account=-1;
-    double * get_mem(int size){
-        if(p<0 or size > chunk_size or p + size > chunk_size){ //allocate new chunk of memory
-            if(size > 1024*1024){
-                //make a special chunk just for this
-                double * m_p = new double[size];
+    int p = -1;                    // position of next available memory (not allocated if < 0)
+    // on Betzy 1024*1024*4 ok, 1024*1024*2 NOT ok: leads to memory fragmentation (on "Betzy" 2023)
+    int chunk_size = 1024 * 1024 * 4; // chunksize (in number of doubles). data_p[i]+chunk_size is end of chunk i
+    int account = -1;
+    double *get_mem(int size) {
+        if (p < 0 or size > chunk_size or p + size > chunk_size) { // allocate new chunk of memory
+            if (size > 1024 * 1024) {
+                // make a special chunk just for this
+                double *m_p = new double[size];
                 chunk_p.push_back(m_p);
-                p=-1;
+                p = -1;
                 return m_p;
             } else {
-                double * m_p = new double[chunk_size];
+                double *m_p = new double[chunk_size];
                 chunk_p.push_back(m_p);
-                p=0;
+                p = 0;
             }
         }
-        double * m_p =  chunk_p[chunk_p.size()-1] + p;
+        double *m_p = chunk_p[chunk_p.size() - 1] + p;
         p += size;
         return m_p;
     }
 };
 std::map<int, std::map<int, Blockdata_struct> *> get_nodeid2block; // to get block from its nodeid (all coeff for one node)
-std::map<int, std::map<int, OrbBlock_struct> *> get_orbid2block;  // to get block from its orbid
+std::map<int, std::map<int, OrbBlock_struct> *> get_orbid2block;   // to get block from its orbid
 
-std::map<int, mem_struct*> mem;
+std::map<int, mem_struct *> mem;
 
 int const MIN_SCALE = -999; // Smaller than smallest scale
 int naccounts = 0;
@@ -115,7 +115,7 @@ void Bank::open() {
             get_readytasks[account] = new std::map<int, std::vector<int>>;
             currentsize[account] = 0;
             mem[account] = new mem_struct;
-            mem[account]->account=account;
+            mem[account]->account = account;
             MPI_Send(&account, 1, MPI_INT, status.MPI_SOURCE, 1, comm_bank);
             continue;
         }
@@ -153,8 +153,8 @@ void Bank::open() {
             this->clear_bank();
             for (auto const &block : nodeid2block) {
                 if (block.second.data.size() > 0) {
-                    currentsize[account] -= block.second.N_rows * block.second.data.size()/ 128; // converted into kB
-                    totcurrentsize -= block.second.N_rows * block.second.data.size()/ 128;       // converted into kB
+                    currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB
+                    totcurrentsize -= block.second.N_rows * block.second.data.size() / 128;       // converted into kB
                 }
             }
             nodeid2block.clear();
@@ -171,9 +171,9 @@ void Bank::open() {
                 int dataindex = 0; // internal index of the data in the block
                 int size = 0;
                 if (message == GET_NODEDATA) {
-                    int orbid = messages[3];           // which part of the block to fetch
+                    int orbid = messages[3];          // which part of the block to fetch
                     dataindex = block.id2data[orbid]; // column of the data in the block
-                    size = block.N_rows;   // number of doubles to fetch
+                    size = block.N_rows;              // number of doubles to fetch
                     if (size != messages[4]) std::cout << "ERROR nodedata has wrong size" << std::endl;
                     double *data_p = block.data[dataindex];
                     if (size > 0) MPI_Send(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank);
@@ -186,12 +186,12 @@ void Bank::open() {
                     if (printinfo) std::cout << " rewrite into superblock " << block.data.size() << " " << block.N_rows << " nodeid " << nodeid << std::endl;
                     for (int j = 0; j < block.data.size(); j++) {
                         for (int i = 0; i < block.N_rows; i++) { DataBlock(i, j) = block.data[j][i]; }
-                   }
+                    }
                     dataindex = 0; // start from first column
                     // send info about the size of the superblock
-                    metadata_block[0] = nodeid;             // nodeid
+                    metadata_block[0] = nodeid;            // nodeid
                     metadata_block[1] = block.data.size(); // number of columns
-                    metadata_block[2] = size;               // total size = rows*columns
+                    metadata_block[2] = size;              // total size = rows*columns
                     MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank);
                     // send info about the id of each column
                     MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank);
@@ -242,7 +242,7 @@ void Bank::open() {
                 // send info about the size of the superblock
                 metadata_block[0] = orbid;
                 metadata_block[1] = block.data.size(); // number of columns
-                metadata_block[2] = size;               // total size = rows*columns
+                metadata_block[2] = size;              // total size = rows*columns
                 MPI_Send(metadata_block, size_metadata, MPI_INT, status.MPI_SOURCE, 1, comm_bank);
                 MPI_Send(block.id.data(), metadata_block[1], MPI_INT, status.MPI_SOURCE, 2, comm_bank);
                 MPI_Send(coeff.data(), size, MPI_DOUBLE, status.MPI_SOURCE, 3, comm_bank);
@@ -319,20 +319,20 @@ void Bank::open() {
             // append the incoming data
             Blockdata_struct &block = nodeid2block[nodeid];
             block.id2data[orbid] = nodeid2block[nodeid].data.size(); // internal index of the data in the block
-            double *data_p = mem[account]->get_mem(size);//new double[size];
-            currentsize[account] += size / 128; // converted into kB
-            totcurrentsize += size / 128;       // converted into kB
+            double *data_p = mem[account]->get_mem(size);            // new double[size];
+            currentsize[account] += size / 128;                      // converted into kB
+            totcurrentsize += size / 128;                            // converted into kB
             this->maxsize = std::max(totcurrentsize, this->maxsize);
             block.data.push_back(data_p);
             block.id.push_back(orbid);
-            if (block.N_rows > 0 and block.N_rows != size) cout<<" ERROR block size incompatible " <<block.N_rows <<" "<< size<<endl;
+            if (block.N_rows > 0 and block.N_rows != size) cout << " ERROR block size incompatible " << block.N_rows << " " << size << endl;
             block.N_rows = size;
 
             OrbBlock_struct &orbblock = orbid2block[orbid];
             orbblock.id2data[nodeid] = orbblock.data.size(); // internal index of the data in the block
             orbblock.data.push_back(data_p);
             orbblock.id.push_back(nodeid);
-            //orbblock.N_rows.push_back(size);
+            // orbblock.N_rows.push_back(size);
 
             MPI_Recv(data_p, size, MPI_DOUBLE, status.MPI_SOURCE, 1, comm_bank, &status);
             if (printinfo) std::cout << " written block " << nodeid << " id " << orbid << " subblocks " << nodeid2block[nodeid].data.size() << std::endl;
@@ -373,9 +373,9 @@ void Bank::open() {
                 if (message == SAVE_FUNCTION) deposits[ix].orb = new CompFunction<3>(0);
                 if (message == SAVE_DATA) {
                     datasize = messages[3];
-                    deposits[ix].data = mem[account]->get_mem(datasize);//new double[datasize];
-                    currentsize[account] += datasize / 128; // converted into kB
-                    totcurrentsize += datasize / 128;       // converted into kB
+                    deposits[ix].data = mem[account]->get_mem(datasize); // new double[datasize];
+                    currentsize[account] += datasize / 128;              // converted into kB
+                    totcurrentsize += datasize / 128;                    // converted into kB
                     this->maxsize = std::max(totcurrentsize, this->maxsize);
                     deposits[ix].hasdata = true;
                 }
@@ -479,13 +479,13 @@ void Bank::remove_account(int account) {
     }
     std::vector<deposit> &deposits = *get_deposits[account];
     for (int ix = 1; ix < deposits.size(); ix++) {
-       if (deposits[ix].orb != nullptr) deposits[ix].orb->free();
-       if (deposits[ix].hasdata) {
-           currentsize[account] -= deposits[ix].datasize / 128;
-           totcurrentsize -= deposits[ix].datasize / 128;
-       }
-       if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; // indicate that it does not exist
-       deposits[ix].hasdata = false;
+        if (deposits[ix].orb != nullptr) deposits[ix].orb->free();
+        if (deposits[ix].hasdata) {
+            currentsize[account] -= deposits[ix].datasize / 128;
+            totcurrentsize -= deposits[ix].datasize / 128;
+        }
+        if (deposits[ix].hasdata) (*get_id2ix[account])[deposits[ix].id] = 0; // indicate that it does not exist
+        deposits[ix].hasdata = false;
     }
     deposits.clear();
     get_deposits.erase(account);
@@ -502,8 +502,8 @@ void Bank::remove_account(int account) {
     std::map<int, OrbBlock_struct> &orbid2block = *get_orbid2block[account];
 
     for (auto const &block : nodeid2block) {
-        currentsize[account] -= block.second.N_rows * block.second.data.size()/ 128; // converted into kB
-        totcurrentsize -= block.second.N_rows * block.second.data.size()/ 128;       // converted into kB
+        currentsize[account] -= block.second.N_rows * block.second.data.size() / 128; // converted into kB
+        totcurrentsize -= block.second.N_rows * block.second.data.size() / 128;       // converted into kB
     }
     nodeid2block.clear();
     orbid2block.clear();
@@ -511,7 +511,7 @@ void Bank::remove_account(int account) {
     get_nodeid2block.erase(account);
     get_orbid2block.erase(account);
 
-    for (double* c_p : mem[account]->chunk_p) delete [] c_p;
+    for (double *c_p : mem[account]->chunk_p) delete[] c_p;
     mem.erase(account);
     currentsize.erase(account);
 #endif
@@ -729,7 +729,7 @@ int BankAccount::put_data(int id, int size, ComplexDouble *data) {
     messages[0] = SAVE_DATA;
     messages[1] = account_id;
     messages[2] = id;
-    messages[3] = size * 2;//save as twice as many doubles
+    messages[3] = size * 2;  // save as twice as many doubles
     messages[4] = MIN_SCALE; // to indicate that it is defined by id
     MPI_Send(messages, 5, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
     MPI_Send(data, size, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank);
@@ -764,7 +764,7 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) {
     messages[0] = SAVE_DATA;
     messages[1] = account_id;
     messages[2] = nIdx.getTranslation(0);
-    messages[3] = size * 2; //save as twice as many doubles
+    messages[3] = size * 2; // save as twice as many doubles
     messages[4] = nIdx.getScale();
     messages[5] = nIdx.getTranslation(1);
     messages[6] = nIdx.getTranslation(2);
@@ -775,7 +775,6 @@ int BankAccount::put_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) {
     return 1;
 }
 
-
 // get data with identity id
 int BankAccount::get_data(int id, int size, double *data) {
 #ifdef MRCPP_HAS_MPI
@@ -791,7 +790,6 @@ int BankAccount::get_data(int id, int size, double *data) {
     return 1;
 }
 
-
 // get data with identity id
 int BankAccount::get_data(int id, int size, ComplexDouble *data) {
 #ifdef MRCPP_HAS_MPI
@@ -802,8 +800,8 @@ int BankAccount::get_data(int id, int size, ComplexDouble *data) {
     messages[2] = id;
     messages[3] = MIN_SCALE;
     MPI_Send(messages, 4, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
-    //fetch as twice as many doubles
-    MPI_Recv(data, size*2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
+    // fetch as twice as many doubles
+    MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
 #endif
     return 1;
 }
@@ -841,8 +839,8 @@ int BankAccount::get_data(NodeIndex<3> nIdx, int size, ComplexDouble *data) {
     messages[5] = nIdx.getTranslation(1);
     messages[6] = nIdx.getTranslation(2);
     MPI_Send(messages, 7, MPI_INT, bankmaster[id % bank_size], 0, comm_bank);
-    //fetch as twice as many doubles
-    MPI_Recv(data, size*2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
+    // fetch as twice as many doubles
+    MPI_Recv(data, size * 2, MPI_DOUBLE, bankmaster[id % bank_size], 1, comm_bank, &status);
 #endif
     return 1;
 }
@@ -871,11 +869,11 @@ int BankAccount::put_nodedata(int id, int nodeid, int size, ComplexDouble *data)
     int messages[message_size];
     messages[0] = SAVE_NODEDATA;
     messages[1] = account_id;
-    messages[2] = nodeid; // which block
-    messages[3] = id;     // id within block
-    messages[4] = 2*size;   // size of this data
+    messages[2] = nodeid;   // which block
+    messages[3] = id;       // id within block
+    messages[4] = 2 * size; // size of this data
     MPI_Send(messages, 5, MPI_INT, bankmaster[nodeid % bank_size], 0, comm_bank);
-    MPI_Send(data, 2*size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank);
+    MPI_Send(data, 2 * size, MPI_DOUBLE, bankmaster[nodeid % bank_size], 1, comm_bank);
 #endif
     return 1;
 }
@@ -897,7 +895,6 @@ int BankAccount::get_nodedata(int id, int nodeid, int size, double *data, std::v
     return 1;
 }
 
-
 // get data with identity id
 int BankAccount::get_nodedata(int id, int nodeid, int size, ComplexDouble *data, std::vector<int> &idVec) {
 #ifdef MRCPP_HAS_MPI
@@ -935,7 +932,6 @@ int BankAccount::get_nodeblock(int nodeid, double *data, std::vector<int> &idVec
     return 1;
 }
 
-
 // get all data for nodeid (same nodeid, different orbitals)
 int BankAccount::get_nodeblock(int nodeid, ComplexDouble *data, std::vector<int> &idVec) {
 #ifdef MRCPP_HAS_MPI
@@ -977,7 +973,6 @@ int BankAccount::get_orbblock(int orbid, double *&data, std::vector<int> &nodeid
     return 1;
 }
 
-
 // get all data with identity orbid (same orbital, different nodes)
 int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector<int> &nodeidVec, int bankstart) {
 #ifdef MRCPP_HAS_MPI
@@ -993,7 +988,7 @@ int BankAccount::get_orbblock(int orbid, ComplexDouble *&data, std::vector<int>
     nodeidVec.resize(metadata_block[1]);
     int totsize = metadata_block[2];
     if (totsize > 0) MPI_Recv(nodeidVec.data(), metadata_block[1], MPI_INT, bankmaster[nodeid % bank_size], 2, comm_bank, &status);
-    data = new ComplexDouble[totsize/2];
+    data = new ComplexDouble[totsize / 2];
     if (totsize > 0) MPI_Recv(data, totsize, MPI_DOUBLE, bankmaster[nodeid % bank_size], 3, comm_bank, &status);
 #endif
     return 1;
diff --git a/src/utils/CompFunction.cpp b/src/utils/CompFunction.cpp
index d392202e7..a8ce70799 100644
--- a/src/utils/CompFunction.cpp
+++ b/src/utils/CompFunction.cpp
@@ -1,13 +1,13 @@
-#include <fstream>
+#include "CompFunction.h"
+#include "Bank.h"
 #include "Printer.h"
 #include "parallel.h"
-#include "Bank.h"
-#include "treebuilders/grid.h"
-#include "trees/FunctionNode.h"
-#include "treebuilders/project.h"
 #include "treebuilders/add.h"
+#include "treebuilders/grid.h"
 #include "treebuilders/multiply.h"
-#include "CompFunction.h"
+#include "treebuilders/project.h"
+#include "trees/FunctionNode.h"
+#include <fstream>
 
 /* Some rules for CompFunction:
  * NComp is the number of components. If Ncomp>0, the corresponding trees must exist (can be only empty roots).
@@ -22,105 +22,99 @@
 
 namespace mrcpp {
 
-  template <int D>
-  MultiResolutionAnalysis<D> *defaultCompMRA = nullptr; // Global MRA
+template <int D> MultiResolutionAnalysis<D> *defaultCompMRA = nullptr; // Global MRA
 
-  template <int D>
-  CompFunction<D>::CompFunction(MultiResolutionAnalysis<D> &mra)
-  { defaultCompMRA<D> = &mra;
+template <int D> CompFunction<D>::CompFunction(MultiResolutionAnalysis<D> &mra) {
+    defaultCompMRA<D> = &mra;
     func_ptr = std::make_shared<TreePtr<D>>(false);
     CompD = func_ptr->real;
     CompC = func_ptr->cplx;
     for (int i = 0; i < 4; i++) CompD[i] = nullptr;
     for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-  }
+}
 
-  template <int D>
-  CompFunction<D>::CompFunction()
-  { func_ptr = std::make_shared<TreePtr<D>>(false);
+template <int D> CompFunction<D>::CompFunction() {
+    func_ptr = std::make_shared<TreePtr<D>>(false);
     CompD = func_ptr->real;
     CompC = func_ptr->cplx;
     for (int i = 0; i < 4; i++) CompD[i] = nullptr;
     for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-  }
+}
 
 /*
  * Empty functions (no components defined)
  */
-  template <int D>
-  CompFunction<D>::CompFunction(int n1)
-  {   func_ptr = std::make_shared<TreePtr<D>>(false);
-      CompD = func_ptr->real;
-      CompC = func_ptr->cplx;
-      for (int i = 0; i < 4; i++) CompD[i] = nullptr;
-      for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-      func_ptr->data.n1[0] = n1;
-      func_ptr->data.n2[0] = -1;
-      func_ptr->data.n3[0] = 0;
-      func_ptr->rank = 0;
-      func_ptr->isreal = 1;
-      func_ptr->iscomplex = 0;
-      func_ptr->data.shared = false;
-  }
+template <int D> CompFunction<D>::CompFunction(int n1) {
+    func_ptr = std::make_shared<TreePtr<D>>(false);
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+    func_ptr->data.n1[0] = n1;
+    func_ptr->data.n2[0] = -1;
+    func_ptr->data.n3[0] = 0;
+    func_ptr->rank = 0;
+    func_ptr->isreal = 1;
+    func_ptr->iscomplex = 0;
+    func_ptr->data.shared = false;
+}
 
 /*
  * Empty functions (no components defined)
  */
-  template <int D>
-  CompFunction<D>::CompFunction(int n1, bool share)
-  {   func_ptr = std::make_shared<TreePtr<D>>(share);
-      CompD = func_ptr->real;
-      CompC = func_ptr->cplx;
-      for (int i = 0; i < 4; i++) CompD[i] = nullptr;
-      for (int i = 0; i < 4; i++) CompC[i] = nullptr;
-      func_ptr->data.n1[0] = n1;
-      func_ptr->data.n2[0] = -1;
-      func_ptr->data.n3[0] = 0;
-      func_ptr->rank = 0;
-      func_ptr->isreal = 1;
-      func_ptr->iscomplex = 0;
-      func_ptr->data.shared = share;
-  }
+template <int D> CompFunction<D>::CompFunction(int n1, bool share) {
+    func_ptr = std::make_shared<TreePtr<D>>(share);
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+    for (int i = 0; i < 4; i++) CompD[i] = nullptr;
+    for (int i = 0; i < 4; i++) CompC[i] = nullptr;
+    func_ptr->data.n1[0] = n1;
+    func_ptr->data.n2[0] = -1;
+    func_ptr->data.n3[0] = 0;
+    func_ptr->rank = 0;
+    func_ptr->isreal = 1;
+    func_ptr->iscomplex = 0;
+    func_ptr->data.shared = share;
+}
 
 /*
  * Empty functions (trees defined but zero)
  */
-  template <int D>
-  CompFunction<D>::CompFunction(const CompFunctionData<D>& indata, bool alloc)
-  { func_ptr = std::make_shared<TreePtr<D>>(indata.shared);
+template <int D> CompFunction<D>::CompFunction(const CompFunctionData<D> &indata, bool alloc) {
+    func_ptr = std::make_shared<TreePtr<D>>(indata.shared);
     func_ptr->data = indata;
     CompD = func_ptr->real;
     CompC = func_ptr->cplx;
-    if (alloc) this->alloc(Ncomp());
-    else this->free();
-  }
+    if (alloc)
+        this->alloc(Ncomp());
+    else
+        this->free();
+}
 
 /** @brief Copy constructor
  *
  * Shallow copy: meta data is copied along with the component pointers,
  * NO transfer of ownership.
  */
-  template <int D>
-  CompFunction<D>::CompFunction(const CompFunction<D> &compfunc) {
-      func_ptr = compfunc.func_ptr;
-      CompD = func_ptr->real;
-      CompC = func_ptr->cplx;
-  }
+template <int D> CompFunction<D>::CompFunction(const CompFunction<D> &compfunc) {
+    func_ptr = compfunc.func_ptr;
+    CompD = func_ptr->real;
+    CompC = func_ptr->cplx;
+}
 
 /** @brief Copy constructor
  *
  * Shallow copy: meta data is copied along with the component pointers,
  * NO transfer of ownership.
  */
-  template <int D>
-  CompFunction<D> &CompFunction<D>::operator=(const CompFunction<D> &compfunc) {
-      if (this != &compfunc) {
-          func_ptr = compfunc.func_ptr;
-          CompD = func_ptr->real;
-          CompC = func_ptr->cplx;
-      }
-      return *this;
-  }
+template <int D> CompFunction<D> &CompFunction<D>::operator=(const CompFunction<D> &compfunc) {
+    if (this != &compfunc) {
+        func_ptr = compfunc.func_ptr;
+        CompD = func_ptr->real;
+        CompC = func_ptr->cplx;
+    }
+    return *this;
+}
 
 template <int D>
 /** @brief Parameter copy
@@ -131,9 +125,7 @@ CompFunction<D> CompFunction<D>::paramCopy(bool alloc) const {
     return CompFunction<D>(func_ptr->data, alloc);
 }
 
-
-template <int D>
-void CompFunction<D>::flushMRAData() {
+template <int D> void CompFunction<D>::flushMRAData() {
     const auto &box = defaultCompMRA<3>->getWorldBox();
     func_ptr->data.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
     func_ptr->data.order = defaultCompMRA<3>->getOrder();
@@ -147,8 +139,7 @@ void CompFunction<D>::flushMRAData() {
     func_ptr->data.corner[2] = box.getCornerIndex().getTranslation(2);
 }
 
-template <int D>
-void CompFunction<D>::flushFuncData() {
+template <int D> void CompFunction<D>::flushFuncData() {
     if (D == 3) flushMRAData();
     for (int i = 0; i < Ncomp(); i++) {
         if (isreal()) {
@@ -160,8 +151,7 @@ void CompFunction<D>::flushFuncData() {
     for (int i = Ncomp(); i < 4; i++) func_ptr->Nchunks[i] = 0;
 }
 
-template <int D>
-CompFunctionData<D> CompFunction<D>::getFuncData() const {
+template <int D> CompFunctionData<D> CompFunction<D>::getFuncData() const {
     CompFunctionData<D> outdata;
     const auto &box = defaultCompMRA<3>->getWorldBox();
     outdata.type = defaultCompMRA<3>->getScalingBasis().getScalingType();
@@ -185,28 +175,26 @@ CompFunctionData<D> CompFunction<D>::getFuncData() const {
     return outdata;
 }
 
-
-template <int D>
-ComplexDouble CompFunction<D>::integrate() const {
+template <int D> ComplexDouble CompFunction<D>::integrate() const {
     ComplexDouble integral;
-    if (isreal()) integral = CompD[0]->integrate();
-    else integral = CompC[0]->integrate();
+    if (isreal())
+        integral = CompD[0]->integrate();
+    else
+        integral = CompC[0]->integrate();
     return integral;
 }
 
-template <int D>
-double CompFunction<D>::norm() const {
+template <int D> double CompFunction<D>::norm() const {
     double norm = getSquareNorm();
     if (norm > 0.0) norm = std::sqrt(norm);
     return norm;
 }
-template <int D>
-double CompFunction<D>::getSquareNorm() const {
+template <int D> double CompFunction<D>::getSquareNorm() const {
     double norm = 0.0;
     for (int i = 0; i < Ncomp(); i++) {
-        if (isreal() and CompD[i]!= nullptr) {
+        if (isreal() and CompD[i] != nullptr) {
             norm += CompD[i]->getSquareNorm();
-        } else if (iscomplex() and CompC[i]!= nullptr) {
+        } else if (iscomplex() and CompC[i] != nullptr) {
             norm += CompC[i]->getSquareNorm();
         }
     }
@@ -217,63 +205,60 @@ double CompFunction<D>::getSquareNorm() const {
 //  Allocates all ialloc trees, with indices 0,...ialloc-1
 //  nalloc is the number of components allocated. ialloc=1 allocates one tree.
 //  deletes all old trees if found.
-template <int D>
-void CompFunction<D>::alloc(int nalloc, bool zero) {
-      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
-      for (int i = 0; i < nalloc; i++) {
-          delete CompD[i];
-          delete CompC[i];
-          CompD[i] = nullptr;
-          CompC[i] = nullptr;
-          if (isreal()) {
-              CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
-              if (zero) CompD[i]->setZero();
-          }
-          if (iscomplex()) {
-              CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
-              if (zero) CompC[i]->setZero();
-          }
-          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
-      }
-      for (int i = nalloc; i < Ncomp(); i++) {
-          //delete possible remaining components
-          delete CompD[i];
-          delete CompC[i];
-          CompD[i] = nullptr;
-          CompC[i] = nullptr;
-      }
+template <int D> void CompFunction<D>::alloc(int nalloc, bool zero) {
+    if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
+    if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex");
+    for (int i = 0; i < nalloc; i++) {
+        delete CompD[i];
+        delete CompC[i];
+        CompD[i] = nullptr;
+        CompC[i] = nullptr;
+        if (isreal()) {
+            CompD[i] = new FunctionTree<D, double>(*defaultCompMRA<D>, func_ptr->shared_mem_real);
+            if (zero) CompD[i]->setZero();
+        }
+        if (iscomplex()) {
+            CompC[i] = new FunctionTree<D, ComplexDouble>(*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
+            if (zero) CompC[i]->setZero();
+        }
+        func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+    }
+    for (int i = nalloc; i < Ncomp(); i++) {
+        // delete possible remaining components
+        delete CompD[i];
+        delete CompC[i];
+        CompD[i] = nullptr;
+        CompC[i] = nullptr;
+    }
 }
 
 //  Allocate one empty trees for one specific component.
 //  The tree must be defined as real or complex already.
 //  ialloc is index allocated. ialloc=0 allocates the tree with index zero.
 //  deletes old tree if found.
-template <int D>
-void CompFunction<D>::alloc_comp(int ialloc) {
-      if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
-      if (isreal() == 0 and iscomplex() == 0)  MSG_ABORT("Function must be defined either real or complex");
-      int i = ialloc;
-      delete CompD[i];
-      delete CompC[i];
-      CompD[i] = nullptr;
-      CompC[i] = nullptr;
-      if (isreal()) {
-          CompD[i] =  new FunctionTree<D, double> (*defaultCompMRA<D>, func_ptr->shared_mem_real);
-          CompD[i]->setZero();
-      }
-      if (iscomplex()) {
-          CompC[i] = new FunctionTree<D, ComplexDouble> (*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
-          CompC[i]->setZero();
-      }
-      func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+template <int D> void CompFunction<D>::alloc_comp(int ialloc) {
+    if (defaultCompMRA<D> == nullptr) MSG_ABORT("Default MRA not yet defined");
+    if (isreal() == 0 and iscomplex() == 0) MSG_ABORT("Function must be defined either real or complex");
+    int i = ialloc;
+    delete CompD[i];
+    delete CompC[i];
+    CompD[i] = nullptr;
+    CompC[i] = nullptr;
+    if (isreal()) {
+        CompD[i] = new FunctionTree<D, double>(*defaultCompMRA<D>, func_ptr->shared_mem_real);
+        CompD[i]->setZero();
+    }
+    if (iscomplex()) {
+        CompC[i] = new FunctionTree<D, ComplexDouble>(*defaultCompMRA<D>, func_ptr->shared_mem_cplx);
+        CompC[i]->setZero();
+    }
+    func_ptr->Ncomp = std::max(Ncomp(), i + 1);
 }
 
-template <int D>
-void CompFunction<D>::free() {
+template <int D> void CompFunction<D>::free() {
     for (int i = 0; i < Ncomp(); i++) {
-        if(CompD[i] != nullptr) delete CompD[i];
-        if(CompC[i] !=  nullptr) delete CompC[i];
+        if (CompD[i] != nullptr) delete CompD[i];
+        if (CompC[i] != nullptr) delete CompC[i];
         CompD[i] = nullptr;
         CompC[i] = nullptr;
     }
@@ -282,22 +267,20 @@ void CompFunction<D>::free() {
     func_ptr->Ncomp = 0;
 }
 
-template <int D>
-int CompFunction<D>::getSizeNodes() const {
+template <int D> int CompFunction<D>::getSizeNodes() const {
     int size_mb = 0; // Memory size in kB
     for (int i = 0; i < Ncomp(); i++) {
-        if (isreal() and CompD[i]!= nullptr) size_mb +=CompD[i]->getSizeNodes();
-        if (iscomplex() and CompC[i]!= nullptr) size_mb +=CompC[i]->getSizeNodes();
+        if (isreal() and CompD[i] != nullptr) size_mb += CompD[i]->getSizeNodes();
+        if (iscomplex() and CompC[i] != nullptr) size_mb += CompC[i]->getSizeNodes();
     }
     return size_mb;
 }
 
-template <int D>
-int CompFunction<D>::getNNodes() const {
+template <int D> int CompFunction<D>::getNNodes() const {
     int nNodes = 0;
-     for (int i = 0; i < Ncomp(); i++) {
-         if (isreal() and CompD[i]!= nullptr) nNodes +=CompD[i]->getNNodes();
-         if (iscomplex() and CompC[i]!= nullptr) nNodes +=CompC[i]->getNNodes();
+    for (int i = 0; i < Ncomp(); i++) {
+        if (isreal() and CompD[i] != nullptr) nNodes += CompD[i]->getNNodes();
+        if (iscomplex() and CompC[i] != nullptr) nNodes += CompC[i]->getNNodes();
     }
     return nNodes;
 }
@@ -307,70 +290,67 @@ int CompFunction<D>::getNNodes() const {
  * Will use complex conjugate in operations (add, multiply etc.)
  * Does change the state (conj flag), but does not actively change all coefficients.
  */
-template <int D>
-void CompFunction<D>::dagger() {
+template <int D> void CompFunction<D>::dagger() {
     func_ptr->data.conj = not(func_ptr->data.conj);
     for (int i = 0; i < Ncomp(); i++) {
-        if (CompC[i]!= nullptr) CompC[i]->setConjugate(func_ptr->data.conj);
+        if (CompC[i] != nullptr) CompC[i]->setConjugate(func_ptr->data.conj);
     }
 }
 
-template <int D>
-FunctionTree<D, double> &CompFunction<D>::real(int i) {
+template <int D> FunctionTree<D, double> &CompFunction<D>::real(int i) {
     if (!isreal()) MSG_ABORT("not real function");
     if (CompD[i] == nullptr) alloc_comp(i);
     return *CompD[i];
 }
-template <int D> //NB: should return CompC in the future
-FunctionTree<D, double>  &CompFunction<D>::imag(int i) {
+template <int D> // NB: should return CompC in the future
+FunctionTree<D, double> &CompFunction<D>::imag(int i) {
     MSG_ABORT("Must choose real or complex");
     if (!iscomplex()) MSG_ABORT("not complex function");
     return *CompD[i];
 }
 
-template <int D>
-FunctionTree<D, ComplexDouble>  &CompFunction<D>::complex(int i) {
+template <int D> FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) {
     if (!iscomplex()) MSG_ABORT("not marked as a complex function");
     if (CompC[i] == nullptr) alloc_comp(i);
     return *CompC[i];
 }
 
-template <int D>
-const FunctionTree<D, double> &CompFunction<D>::real(int i) const {
+template <int D> const FunctionTree<D, double> &CompFunction<D>::real(int i) const {
     if (!isreal()) MSG_ABORT("not real function");
     return *CompD[i];
 }
-template <int D> //NB: should use complex or real
+template <int D> // NB: should use complex or real
 const FunctionTree<D, double> &CompFunction<D>::imag(int i) const {
     MSG_ABORT("Must choose real or complex");
     if (!iscomplex()) MSG_ABORT("not complex function");
     return *CompD[i];
 }
-template <int D>
-const FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) const {
+template <int D> const FunctionTree<D, ComplexDouble> &CompFunction<D>::complex(int i) const {
     if (!iscomplex()) MSG_ABORT("not marked as a complex function");
     return *CompC[i];
 }
 
- /* for backwards compatibility */
-template <int D>
-void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
-      func_ptr->isreal = 1;
-      //if (CompD[i] != nullptr) delete CompD[i];
-      CompD[i] = tree;
-      if (tree != nullptr) {
-          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
-      } else {func_ptr->Ncomp = std::min(Ncomp(), i);}
+/* for backwards compatibility */
+template <int D> void CompFunction<D>::setReal(FunctionTree<D, double> *tree, int i) {
+    func_ptr->isreal = 1;
+    // if (CompD[i] != nullptr) delete CompD[i];
+    CompD[i] = tree;
+    if (tree != nullptr) {
+        func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+    } else {
+        func_ptr->Ncomp = std::min(Ncomp(), i);
+    }
 }
 
-template <int D>
-void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
-      func_ptr->iscomplex = 1;
-      //if (CompC[i] != nullptr) delete CompC[i];
-      CompC[i] = tree;
-      if (tree != nullptr) {
-          func_ptr->Ncomp = std::max(Ncomp(), i + 1);
-      } else {func_ptr->Ncomp = std::min(Ncomp(), i);}
+template <int D> void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
+    func_ptr->iscomplex = 1;
+    // if (CompC[i] != nullptr) delete CompC[i];
+    CompC[i] = tree;
+    if (tree != nullptr) {
+        func_ptr->Ncomp = std::max(Ncomp(), i + 1);
+    } else {
+        func_ptr->Ncomp = std::min(Ncomp(), i);
+    }
 }
 
 /** @brief In place addition.
@@ -378,32 +358,30 @@ void CompFunction<D>::setCplx(FunctionTree<D, ComplexDouble> *tree, int i) {
  * Output is extended to union grid.
  *
  */
-template <int D>
-void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
+template <int D> void CompFunction<D>::add(ComplexDouble c, CompFunction<D> inp) {
 
-    if (Ncomp()<inp.Ncomp()){
+    if (Ncomp() < inp.Ncomp()) {
         func_ptr->data = inp.func_ptr->data;
         alloc(inp.Ncomp(), true);
     }
 
     for (int i = 0; i < inp.Ncomp(); i++) {
-        if (inp.isreal() and c.imag()<MachineZero) {
-            CompD[i]->add_inplace(c.real(),*inp.CompD[i]);
+        if (inp.isreal() and c.imag() < MachineZero) {
+            CompD[i]->add_inplace(c.real(), *inp.CompD[i]);
         } else {
-            if (this->isreal()){
+            if (this->isreal()) {
                 CompD[i]->CopyTreeToComplex(CompC[i]);
                 delete CompD[i];
                 CompD[i] = nullptr;
                 func_ptr->iscomplex = true;
                 func_ptr->isreal = false;
             }
-            CompC[i]->add_inplace(c,*inp.CompC[i]);
+            CompC[i]->add_inplace(c, *inp.CompC[i]);
         }
     }
 }
 
-template <int D>
-int CompFunction<D>::crop(double prec) {
+template <int D> int CompFunction<D>::crop(double prec) {
     if (prec < 0.0) return 0;
     int nChunksremoved = 0;
     for (int i = 0; i < Ncomp(); i++) {
@@ -417,15 +395,14 @@ int CompFunction<D>::crop(double prec) {
 }
 
 /** @brief In place multiply with scalar. Fully in-place.*/
-template <int D>
-void CompFunction<D>::rescale(ComplexDouble c) {
+template <int D> void CompFunction<D>::rescale(ComplexDouble c) {
     bool need_to_rescale = not(isShared()) or mpi::share_master();
     if (need_to_rescale) {
         for (int i = 0; i < Ncomp(); i++) {
             if (iscomplex()) {
                 CompC[i]->rescale(c);
             } else {
-                if(abs(c.imag())>MachineZero){ //works only only for NComp==1)
+                if (abs(c.imag()) > MachineZero) { // works only only for NComp==1)
                     CompD[i]->CopyTreeToComplex(CompC[i]);
                     delete CompD[i];
                     CompD[i] = nullptr;
@@ -437,13 +414,13 @@ void CompFunction<D>::rescale(ComplexDouble c) {
                 }
             }
         }
-    } else MSG_ABORT("Not implemented");
+    } else
+        MSG_ABORT("Not implemented");
 }
 
-
-template class  MultiResolutionAnalysis<1>;
-template class  MultiResolutionAnalysis<2>;
-template class  MultiResolutionAnalysis<3>;
+template class MultiResolutionAnalysis<1>;
+template class MultiResolutionAnalysis<2>;
+template class MultiResolutionAnalysis<3>;
 template class CompFunction<1>;
 template class CompFunction<2>;
 template class CompFunction<3>;
@@ -452,44 +429,40 @@ template class CompFunction<3>;
  *
  * Deep copy: meta data is copied along with the content of each component.
  */
-  template <int D>
-  void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp) {
-      out->func_ptr->data = inp.func_ptr->data;
-      out->alloc(inp.Ncomp());
-      for (int i = 0; i < inp.Ncomp(); i++) {
-          if (inp.isreal()) {
-              inp.CompD[i]->deep_copy(out->CompD[i]);
-          } else {
-              inp.CompC[i]->deep_copy(out->CompC[i]);
-          }
-      }
-  }
-
+template <int D> void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp) {
+    out->func_ptr->data = inp.func_ptr->data;
+    out->alloc(inp.Ncomp());
+    for (int i = 0; i < inp.Ncomp(); i++) {
+        if (inp.isreal()) {
+            inp.CompD[i]->deep_copy(out->CompD[i]);
+        } else {
+            inp.CompC[i]->deep_copy(out->CompC[i]);
+        }
+    }
+}
 
 /** @brief Deep copy
  *
  * Deep copy: meta func_ptr->data is copied along with the content of each component.
  */
-  template <int D>
-  void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp) {
-      out.func_ptr->data = inp.func_ptr->data;
-      out.alloc(inp.Ncomp());
-      for (int i = 0; i < inp.Ncomp(); i++) {
-          if (inp.isreal()) {
-              inp.CompD[i]->deep_copy(out.CompD[i]);
-          } else {
-              inp.CompC[i]->deep_copy(out.CompC[i]);
-          }
-      }
-  }
+template <int D> void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp) {
+    out.func_ptr->data = inp.func_ptr->data;
+    out.alloc(inp.Ncomp());
+    for (int i = 0; i < inp.Ncomp(); i++) {
+        if (inp.isreal()) {
+            inp.CompD[i]->deep_copy(out.CompD[i]);
+        } else {
+            inp.CompC[i]->deep_copy(out.CompC[i]);
+        }
+    }
+}
 
 /** @brief out = a*inp_a + b*inp_b
  *
  * Recast into linear_combination.
  *
  */
-template <int D>
-void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate) {
+template <int D> void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate) {
     std::vector<ComplexDouble> coefs(2);
     coefs[0] = a;
     coefs[1] = b;
@@ -505,15 +478,15 @@ void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDo
  *
  * OMP parallel, but not MPI parallel
  */
-template <int D>
-    void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate) {
+template <int D> void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate) {
     double thrs = MachineZero;
     bool need_to_add = not(out.isShared()) or mpi::share_master();
     bool share = out.isShared();
     out.func_ptr->data = inp[0].func_ptr->data;
     out.func_ptr->data.shared = share; // we don' inherit the shareness
     bool iscomplex = false;
-    for (int i = 0; i < inp.size(); i++) if(inp[i].iscomplex() or c[i].imag() > MachineZero) iscomplex = true;
+    for (int i = 0; i < inp.size(); i++)
+        if (inp[i].iscomplex() or c[i].imag() > MachineZero) iscomplex = true;
     if (iscomplex) {
         out.func_ptr->data.iscomplex = 1;
         out.func_ptr->data.isreal = 0;
@@ -524,7 +497,7 @@ template <int D>
             FunctionTreeVector<D, double> fvec; // one component vector
             for (int i = 0; i < inp.size(); i++) {
                 if (std::norm(c[i]) < thrs) continue;
-                 if (inp[i].getNNodes()==0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue;
+                if (inp[i].getNNodes() == 0 or inp[i].CompD[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i].real(), inp[i].CompD[comp]));
             }
             if (need_to_add) {
@@ -548,16 +521,16 @@ template <int D>
                     inp[i].CompD[comp] = nullptr;
                     inp[i].func_ptr->iscomplex = true;
                     inp[i].func_ptr->isreal = false;
-               }
+                }
                 if (std::norm(c[i]) < thrs) continue;
-                if (inp[i].getNNodes()==0 or inp[i].CompC[comp]->getSquareNorm() < thrs) continue;
+                if (inp[i].getNNodes() == 0 or inp[i].CompC[comp]->getSquareNorm() < thrs) continue;
                 fvec.push_back(std::make_tuple(c[i], inp[i].CompC[comp]));
             }
             if (need_to_add) {
                 if (fvec.size() > 0) {
                     if (prec < 0.0) {
                         build_grid(*out.CompC[comp], fvec);
-                      mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
+                        mrcpp::add(prec, *out.CompC[comp], fvec, 0, false, conjugate);
                     } else {
                         mrcpp::add(prec, *out.CompC[comp], fvec, -1, false, conjugate);
                     }
@@ -573,18 +546,15 @@ template <int D>
 /** @brief out = inp_a * inp_b
  *
  */
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate) {
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate) {
     multiply(prec, out, 1.0, inp_a, inp_b, -1, absPrec, useMaxNorms, conjugate);
 }
 
-
 /** @brief out = inp_a * inp_b
  *  Takes conjugate of inp_a if conjugate=true
  *  In case of mixed real/complex inputs, the real functions are converted into complex functions.
  */
-template <int D>
-void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
+template <int D> void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter, bool absPrec, bool useMaxNorms, bool conjugate) {
     if (inp_b.func_ptr->conj) MSG_ABORT("Not implemented");
     if (inp_a.func_ptr->conj) conjugate = (not conjugate);
     bool need_to_multiply = not(out.isShared()) or mpi::share_master();
@@ -593,7 +563,7 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
     bool share = out.isShared();
     out.func_ptr->data = inp_a.func_ptr->data;
     out.func_ptr->data.shared = share; // we don't inherit the shareness
-    out.func_ptr->conj = false; // we don't inherit conjugaison
+    out.func_ptr->conj = false;        // we don't inherit conjugaison
     for (int comp = 0; comp < inp_a.Ncomp(); comp++) {
         out.func_ptr->data.c1[comp] = inp_a.func_ptr->data.c1[comp] * inp_b.func_ptr->data.c1[comp]; // we could put this is coef if everything is real?
         if (inp_a.isreal() and inp_b.isreal()) {
@@ -607,18 +577,18 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                 } else {
                     // Adaptive grid
                     mrcpp::multiply(prec, *out.CompD[comp], coef, *inp_a.CompD[comp], *inp_b.CompD[comp], maxIter, absPrec, useMaxNorms, conjugate);
-               }
+                }
             }
         } else {
-           // if one of the input is real, we simply make a new complex copy of it
+            // if one of the input is real, we simply make a new complex copy of it
             bool inp_aisReal = inp_a.isreal();
             bool inp_bisReal = inp_b.isreal();
-            if(inp_aisReal) {
-               inp_a.CompD[comp]->CopyTreeToComplex(inp_a.CompC[comp]);
-               inp_a.func_ptr->iscomplex = true;
-               inp_a.func_ptr->isreal = false;
+            if (inp_aisReal) {
+                inp_a.CompD[comp]->CopyTreeToComplex(inp_a.CompC[comp]);
+                inp_a.func_ptr->iscomplex = true;
+                inp_a.func_ptr->isreal = false;
             }
-            if(inp_bisReal) {
+            if (inp_bisReal) {
                 inp_b.CompD[comp]->CopyTreeToComplex(inp_b.CompC[comp]);
                 inp_b.func_ptr->iscomplex = true;
                 inp_b.func_ptr->isreal = false;
@@ -635,19 +605,19 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                     build_grid(*out.CompC[comp], *inp_a.CompC[comp]);
                     build_grid(*out.CompC[comp], *inp_b.CompC[comp]);
                     mrcpp::multiply(prec, *out.CompC[comp], coef, *inp_a.CompC[comp], *inp_b.CompC[comp], 0, false, false, conjugate);
-                } else {// note that this assumes Ncomp=1
+                } else { // note that this assumes Ncomp=1
                     // Adaptive grid
-                    if (out.CompD[comp] != nullptr) { //NB: func_ptr has alreadybeen overwritten!
-                        if(out.CompD[comp]->getNNodes() > 0){
+                    if (out.CompD[comp] != nullptr) { // NB: func_ptr has alreadybeen overwritten!
+                        if (out.CompD[comp]->getNNodes() > 0) {
                             out.CompD[comp]->CopyTreeToComplex(out.CompC[comp]);
                             out.func_ptr->iscomplex = 1;
                             out.func_ptr->isreal = 0;
                             delete out.CompD[comp];
                             out.CompD[comp] = nullptr;
                         } else {
-                             out.func_ptr->iscomplex = 1;
-                             out.func_ptr->isreal = 0;
-                             out.alloc(out.Ncomp());
+                            out.func_ptr->iscomplex = 1;
+                            out.func_ptr->isreal = 0;
+                            out.alloc(out.Ncomp());
                         }
                     } else {
                         out.func_ptr->iscomplex = 1;
@@ -658,13 +628,13 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
                 }
             }
             // restore original tree
-            if(inp_aisReal) {
+            if (inp_aisReal) {
                 delete inp_a.CompC[comp];
                 inp_a.CompC[comp] = nullptr;
                 inp_a.func_ptr->iscomplex = false;
                 inp_a.func_ptr->isreal = true;
             }
-            if(inp_bisReal) {
+            if (inp_bisReal) {
                 delete inp_b.CompC[comp];
                 inp_b.CompC[comp] = nullptr;
                 inp_b.func_ptr->iscomplex = false;
@@ -679,8 +649,7 @@ void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> in
  *
  *  Only one component is multiplied
  */
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
     if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented");
     if (inp_a.isreal() != 1) MSG_ABORT("Not implemented");
     if (conjugate) MSG_ABORT("Not implemented");
@@ -696,8 +665,7 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
  *
  *  Only one component is multiplied
  */
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
     MSG_ABORT("Not implemented");
     if (inp_a.Ncomp() > 1) MSG_ABORT("Not implemented");
     if (inp_a.iscomplex() != 1) MSG_ABORT("Not implemented");
@@ -707,14 +675,12 @@ void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunctio
     CompFunctionVector CompVecOut;
     // CompVecOut = multiply(CompVec, f, prec, nrefine, true);
     out = CompVecOut[0];
-
 }
 
 /** @brief out = inp_a * f
  *
  */
-template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
+template <int D> void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine, bool conjugate) {
     CompFunction<D> func_a;
     func_a.func_ptr->isreal = 1;
     func_a.func_ptr->iscomplex = 0;
@@ -723,8 +689,7 @@ void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, Representabl
     multiply(out, func_a, f, prec, nrefine, conjugate);
     func_a.CompD[0] = nullptr;
 }
-template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
+template <int D> void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine, bool conjugate) {
     CompFunction<D> func_a(1);
     func_a.func_ptr->isreal = 0;
     func_a.func_ptr->iscomplex = 1;
@@ -733,26 +698,23 @@ void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, Repre
     func_a.CompC[0] = nullptr;
 }
 
-
 /** @brief Compute <bra|ket> = int bra^\dag(r) * ket(r) dr.
  *
  *  Sum of component dots.
  *  Notice that the <bra| position is already complex conjugated.
  *
  */
-template <int D>
-ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
+template <int D> ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
     if (bra.func_ptr->conj or ket.func_ptr->conj) MSG_ABORT("Not implemented");
     ComplexDouble dotprodtot = 0.0;
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
         ComplexDouble dotprod = 0.0;
-        if (bra.func_ptr->data.n1[0] != ket.func_ptr->data.n1[0] and
-            bra.func_ptr->data.n1[0] != 0 and ket.func_ptr->data.n1[0]!= 0) continue;
+        if (bra.func_ptr->data.n1[0] != ket.func_ptr->data.n1[0] and bra.func_ptr->data.n1[0] != 0 and ket.func_ptr->data.n1[0] != 0) continue;
         if (bra.isreal() and ket.isreal()) {
             dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompD[comp]);
-        } else  if (bra.isreal() and ket.iscomplex()) {
+        } else if (bra.isreal() and ket.iscomplex()) {
             dotprod += mrcpp::dot(*bra.CompD[comp], *ket.CompC[comp]);
-        } else  if (bra.iscomplex() and ket.isreal()) {
+        } else if (bra.iscomplex() and ket.isreal()) {
             dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompD[comp]);
         } else {
             dotprod += mrcpp::dot(*bra.CompC[comp], *ket.CompC[comp]);
@@ -767,36 +729,34 @@ ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket) {
     }
 }
 
-
 /** @brief Compute  <bra|ket> = int |bra^\dag(r)| * |ket(r)| dr.
  *
  *  sum of components
  */
-template <int D>
-double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
+template <int D> double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket) {
     double dotprodtot = 0.0;
     for (int comp = 0; comp < bra.Ncomp(); comp++) {
         double dotprod = 0.0;
         if (bra.isreal() and ket.isreal()) {
             dotprod += mrcpp::node_norm_dot(*bra.CompD[comp], *ket.CompD[comp]);
-        } else  if (bra.isreal() and ket.iscomplex()) {
+        } else if (bra.isreal() and ket.iscomplex()) {
             MSG_ABORT("Not implemented");
-        } else  if (bra.iscomplex() and ket.isreal()) {
+        } else if (bra.iscomplex() and ket.isreal()) {
             MSG_ABORT("Not implemented");
         } else {
             dotprod += mrcpp::node_norm_dot(*bra.CompC[comp], *ket.CompC[comp]);
         }
-        dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); //for fully complex values this does not really give the norm
+        dotprod *= std::norm(bra.func_ptr->data.c1[comp]) * std::norm(ket.func_ptr->data.c1[comp]); // for fully complex values this does not really give the norm
         dotprodtot += dotprod;
-   }
+    }
     return dotprodtot;
 }
 
-void project(CompFunction<3> &out, std::function<double(const Coord<3>& r)> f, double prec) {
+void project(CompFunction<3> &out, std::function<double(const Coord<3> &r)> f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 1;
     out.func_ptr->iscomplex = 0;
-    if(out.Ncomp() < 1) out.alloc(1);
+    if (out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
@@ -806,35 +766,32 @@ void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 0;
     out.func_ptr->iscomplex = 1;
-    if(out.Ncomp() < 1) out.alloc(1);
+    if (out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<3>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 123123, mpi::comm_share);
 }
 
-template <int D>
-void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec) {
+template <int D> void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 1;
     out.func_ptr->iscomplex = 0;
-    if(out.Ncomp() < 1) out.alloc(1);
+    if (out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<D, double>(prec, *out.CompD[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
 }
-template <int D>
-void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec) {
+template <int D> void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec) {
     bool need_to_project = not(out.isShared()) or mpi::share_master();
     out.func_ptr->isreal = 0;
     out.func_ptr->iscomplex = 1;
-    if(out.Ncomp() < 1) out.alloc(1);
+    if (out.Ncomp() < 1) out.alloc(1);
     if (need_to_project) mrcpp::project<D, ComplexDouble>(prec, *out.CompC[0], f);
     mpi::share_function(out, 0, 132231, mpi::comm_share);
- }
+}
 
 // CompFunctionVector
 
-
-CompFunctionVector::CompFunctionVector(int N):
-    std::vector<CompFunction<3>>(N) {
+CompFunctionVector::CompFunctionVector(int N)
+        : std::vector<CompFunction<3>>(N) {
     for (int i = 0; i < N; i++) (*this)[i].func_ptr->rank = i;
     vecMRA = defaultCompMRA<3>;
 }
@@ -842,7 +799,6 @@ void CompFunctionVector::distribute() {
     for (int i = 0; i < this->size(); i++) (*this)[i].func_ptr->rank = i;
 }
 
-
 /** @brief Make a linear combination of functions
  *
  * Uses "local" representation: treats one node at a time.
@@ -860,12 +816,12 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
     bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
     int N = Phi.size();
     int M = Psi.size();
-    for (int i = 0; i < M; i++){
-        for (int j; j< 4; j++) delete Psi[i].CompD[j];
+    for (int i = 0; i < M; i++) {
+        for (int j; j < 4; j++) delete Psi[i].CompD[j];
         Psi[i].func_ptr->isreal = 0;
         Psi[i].func_ptr->iscomplex = 1;
     }
-    for (int i = 0; i < N; i++){
+    for (int i = 0; i < N; i++) {
         if (Phi[i].func_ptr->conj) MSG_ABORT("Conjugaison not implemneted for rotations");
     }
     if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
@@ -898,15 +854,15 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 
     // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
 
-    BankAccount nodesPhi;             // to put the original nodes
-    BankAccount nodesRotated;         // to put the rotated nodes
+    BankAccount nodesPhi;     // to put the original nodes
+    BankAccount nodesRotated; // to put the rotated nodes
 
     // used for serial only:
     std::vector<std::vector<ComplexDouble *>> coeffVec(N);
     std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
+                                                 // orbital given the node index in the reference tree
     if (serial) {
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
         std::vector<int> parindexVec; // serialIx of the parent nodes
@@ -917,9 +873,9 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             // make a map that gives j from indexVec
             int orb_node_ix = 0;
             for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
+                orb2node[j][ix] = orb_node_ix++;
+                if (ix < 0) continue;
+                node2orbVec[ix].push_back(j);
             }
         }
     } else { // MPI case
@@ -929,24 +885,24 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
     }
 
     // 4) rotate all the nodes
-    IntMatrix split_serial;                             // in the serial case all split are stored in one array
+    IntMatrix split_serial;                                 // in the serial case all split are stored in one array
     std::vector<std::vector<ComplexDouble *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
-    std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<ComplexMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
-   // j indices are for unrotated orbitals, i indices are for rotated orbitals
+    std::vector<std::map<int, int>> ix2coef(M);             // to find the index in for example rotCoeffVec[] corresponding to a serialIx
+    int csize;                                              // size of the current coefficients (different for roots and branches)
+    std::vector<ComplexMatrix> rotatedCoeffVec;             // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+                                                            // j indices are for unrotated orbitals, i indices are for rotated orbitals
     if (serial) {
-        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
-        split_serial.resize(M, max_n); // not use in the MPI case
+        std::map<int, int> ix2coef_ref; // to find the index n corresponding to a serialIx
+        split_serial.resize(M, max_n);  // not use in the MPI case
         for (int n = 0; n < max_n; n++) {
             int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
             ix2coef_ref[node_ix] = n;
             for (int i = 0; i < M; i++) split_serial(i, n) = 1;
         }
         std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
-        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
-        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
-        // n is finished.
+                                              // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
+                                              // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
+                                              // n is finished.
 #pragma omp parallel for schedule(dynamic)
         for (int n = 0; n < max_n; n++) {
             int csize;
@@ -973,14 +929,14 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             };
 
             std::vector<int> orbiVec;
-            for (int i = 0; i < M; i++) { // loop over all rotated orbitals
+            for (int i = 0; i < M; i++) {                                                                        // loop over all rotated orbitals
                 if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
 
             // 4c) rotate this node
             ComplexMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
-            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
+            for (int i = 0; i < orbiVec.size(); i++) {        // loop over rotated orbitals
                 for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = U(orbjVec[j], orbiVec[i]); }
             }
             ComplexMatrix rotatedCoeff(csize, orbiVec.size());
@@ -1041,9 +997,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             int parentid = parindexVec_ref[n];
             if (parentid == -1) {
                 // root node, split if output needed
-                for (int i = 0; i < M; i++) {
-                    split[i] = 1.0;
-                }
+                for (int i = 0; i < M; i++) { split[i] = 1.0; }
                 csize = sizecoeff;
             } else {
                 // note that it will wait until data is available
@@ -1052,7 +1006,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
             }
             std::vector<int> orbiVec;
             std::vector<int> orbjVec;
-            for (int i = 0; i < M; i++) {  // loop over rotated orbitals
+            for (int i = 0; i < M; i++) {     // loop over rotated orbitals
                 if (split[i] < 0.0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
@@ -1092,7 +1046,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
         mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
     }
 
-     // 5) reconstruct trees using rotated nodes.
+    // 5) reconstruct trees using rotated nodes.
 
     // only serial case can use OMP, because MPI cannot be used by threads
     if (serial) {
@@ -1101,16 +1055,16 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
 
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
-           if (coeffpVec[j].size()==0) continue;
-            Psi[j].alloc(1); //All data is stored in coeffpVec[j]
+            if (coeffpVec[j].size() == 0) continue;
+            Psi[j].alloc(1); // All data is stored in coeffpVec[j]
             Psi[j].complex().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-       }
+        }
     } else { // MPI case
         for (int j = 0; j < M; j++) {
             if (not mpi::my_func(j)) continue;
             // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
             std::vector<ComplexDouble *> coeffpVec; //
-            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
+            std::map<int, int> ix2coef;             // to find the index in coeffVec[] corresponding to a serialIx
             int ix = 0;
             std::vector<ComplexDouble *> pointerstodelete; // list of temporary arrays to clean up
             for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
@@ -1139,8 +1093,6 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
     }
 }
 
-
-
 /** @brief Make a linear combination of functions
  *
  * Uses "local" representation: treats one node at a time.
@@ -1151,7 +1103,7 @@ void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVe
  */
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec) {
 
-    if (Phi[0].iscomplex() ){
+    if (Phi[0].iscomplex()) {
         rotate_cplx(Phi, U, Psi, prec);
         return;
     }
@@ -1193,9 +1145,9 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     // used for serial only:
     std::vector<std::vector<double *>> coeffVec(N);
     std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
+                                                 // orbital given the node index in the reference tree
     if (serial) {
 
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
@@ -1219,15 +1171,15 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
     }
 
     // 4) rotate all the nodes
-    IntMatrix split_serial;                             // in the serial case all split are stored in one array
+    IntMatrix split_serial;                          // in the serial case all split are stored in one array
     std::vector<std::vector<double *>> coeffpVec(M); // to put pointers to the rotated coefficient for each orbital in serial case
     std::vector<std::map<int, int>> ix2coef(M);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
+    int csize;                                       // size of the current coefficients (different for roots and branches)
+    std::vector<DoubleMatrix> rotatedCoeffVec;       // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
     // j indices are for unrotated orbitals, i indices are for rotated orbitals
     if (serial) {
-        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
-        split_serial.resize(M, max_n); // not use in the MPI case
+        std::map<int, int> ix2coef_ref; // to find the index n corresponding to a serialIx
+        split_serial.resize(M, max_n);  // not use in the MPI case
         for (int n = 0; n < max_n; n++) {
             int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
             ix2coef_ref[node_ix] = n;
@@ -1265,7 +1217,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             };
 
             std::vector<int> orbiVec;
-            for (int i = 0; i < M; i++) { // loop over all rotated orbitals
+            for (int i = 0; i < M; i++) {                                                                        // loop over all rotated orbitals
                 if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
@@ -1332,9 +1284,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             int parentid = parindexVec_ref[n];
             if (parentid == -1) {
                 // root node, split if output needed
-                for (int i = 0; i < M; i++) {
-                         split[i] = 1.0;
-                }
+                for (int i = 0; i < M; i++) { split[i] = 1.0; }
                 csize = sizecoeff;
             } else {
                 // note that it will wait until data is available
@@ -1343,7 +1293,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
             }
             std::vector<int> orbiVec;
             std::vector<int> orbjVec;
-            for (int i = 0; i < M; i++) {  // loop over rotated orbitals
+            for (int i = 0; i < M; i++) {     // loop over rotated orbitals
                 if (split[i] < 0.0) continue; // parent node has too small wavelets
                 orbiVec.push_back(i);
             }
@@ -1382,7 +1332,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
         mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
     }
 
-     // 5) reconstruct trees using rotated nodes.
+    // 5) reconstruct trees using rotated nodes.
 
     // only serial case can use OMP, because MPI cannot be used by threads
     if (serial) {
@@ -1391,7 +1341,7 @@ void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector
 
 #pragma omp parallel for schedule(static)
         for (int j = 0; j < M; j++) {
-            if (coeffpVec[j].size()==0) continue;
+            if (coeffpVec[j].size() == 0) continue;
             Psi[j].alloc(1);
             Psi[j].real().clear();
             Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
@@ -1531,9 +1481,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     }
     mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
     CompFunctionVector out(N);
-    for (int i = 0; i < N; i++) {
-        out[0] = Phi[i].paramCopy();
-    }
+    for (int i = 0; i < N; i++) { out[0] = Phi[i].paramCopy(); }
     if (not PsihasReIm[0] and not PsihasReIm[1]) {
         return out; // do nothing
     }
@@ -1557,9 +1505,9 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
     // used for serial only:
     std::vector<std::vector<double *>> coeffVec(N);
     std::vector<std::vector<int>> indexVec(N);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
+                                                 // orbital given the node index in the reference tree
     if (serial) {
         // make list of all coefficients (coeffVec), and their reference indices (indexVec)
         std::vector<int> parindexVec; // serialIx of the parent nodes
@@ -1600,7 +1548,7 @@ CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f
 
     // 3) mutiply for each node
     std::vector<std::vector<double *>> coeffpVec(N); // to put pointers to the multiplied coefficient for each orbital in serial case
-    std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
+    std::vector<DoubleMatrix> multipliedCoeffVec;    // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
     std::vector<std::map<int, int>> ix2coef(N);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
     DoubleVector NODEP = DoubleVector::Zero(nCoefs);
     DoubleVector NODEF = DoubleVector::Zero(nCoefs);
@@ -1889,9 +1837,9 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
 
     // only used for serial case:
     std::vector<std::vector<ComplexDouble *>> coeffVec(N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
+                                                 // the orbital given the node index in the reference tree
 
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
     mrcpp::BankAccount nodesBraKet;
@@ -1923,74 +1871,69 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
     int ibank = 0;
 #pragma omp parallel if (serial)
     {
-    ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
+        ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
 
 #pragma omp for schedule(dynamic)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
+        for (int n = 0; n < max_n; n++) {
+            if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+            int csize;
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            std::vector<int> orbVec;       // identifies which orbitals use this node
+            if (serial and node2orbVec[node_ix].size() <= 0) continue;
+            if (parindexVec_ref[n] < 0)
+                csize = sizecoeff;
+            else
+                csize = sizecoeffW;
 
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+            // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+            if (serial) {
+                int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+                if (parindexVec_ref[n] < 0) shift = 0;
+                ComplexMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+                for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2node[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                    orbVec.push_back(j);
+                }
+                if (orbVec.size() > 0) {
+                    ComplexMatrix S_temp(orbVec.size(), orbVec.size());
+                    S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
+                    for (int i = 0; i < orbVec.size(); i++) {
+                        for (int j = 0; j < orbVec.size(); j++) {
+                            if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                                BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        }
                     }
                 }
-            }
-        } else { // MPI case
-            ComplexMatrix coeffBlock(csize, N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+            } else { // MPI case
+                ComplexMatrix coeffBlock(csize, N);
+                nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+                if (orbVec.size() > 0) {
+                    ComplexMatrix S_temp(orbVec.size(), orbVec.size());
+                    coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                    S_temp.noalias() = coeffBlock.transpose().conjugate() * coeffBlock;
+                    for (int i = 0; i < orbVec.size(); i++) {
+                        for (int j = 0; j < orbVec.size(); j++) {
+                            if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                                BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        }
                     }
                 }
             }
         }
-    }
-    if (serial) {
+        if (serial) {
 #pragma omp critical
-        for (int i = 0; i < N; i++) {
-            for (int j = 0; j < N; j++) {
-                S(i, j) += S_omp(i, j);
+            for (int i = 0; i < N; i++) {
+                for (int j = 0; j < N; j++) { S(i, j) += S_omp(i, j); }
             }
         }
     }
 
-    }
-
     for (int i = 0; i < N; i++) {
         for (int j = 0; j <= i; j++) {
             if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
@@ -2009,18 +1952,14 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &BraKet) {
 
     mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
     for (int i = 0; i < N; i++) {
-        for (int j = 0; j < N; j++) {
-            S(i, j) *=  std::conj(Fac[i])*Fac[j];
-        }
+        for (int j = 0; j < N; j++) { S(i, j) *= std::conj(Fac[i]) * Fac[j]; }
     }
 
     return S;
 }
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     // NB: should be spinseparated at this point!
-    if (BraKet[0].iscomplex() ){
-        return calc_overlap_matrix_cplx(BraKet);
-   }
+    if (BraKet[0].iscomplex()) { return calc_overlap_matrix_cplx(BraKet); }
 
     int N = BraKet.size();
     ComplexMatrix S = ComplexMatrix::Zero(N, N);
@@ -2046,9 +1985,9 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
 
     // only used for serial case:
     std::vector<std::vector<double *>> coeffVec(N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
+                                                 // the orbital given the node index in the reference tree
 
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
     mrcpp::BankAccount nodesBraKet;
@@ -2080,74 +2019,69 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     int ibank = 0;
 #pragma omp parallel if (serial)
     {
-    ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
+        ComplexMatrix S_omp = ComplexMatrix::Zero(N, N); // copy for each thread
 
 #pragma omp for schedule(dynamic)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
+        for (int n = 0; n < max_n; n++) {
+            if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+            int csize;
+            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
+            std::vector<int> orbVec;       // identifies which orbitals use this node
+            if (serial and node2orbVec[node_ix].size() <= 0) continue;
+            if (parindexVec_ref[n] < 0)
+                csize = sizecoeff;
+            else
+                csize = sizecoeffW;
 
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                ComplexMatrix S_temp(orbVec.size(), orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+            // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
+            if (serial) {
+                int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+                if (parindexVec_ref[n] < 0) shift = 0;
+                DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
+                for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2node[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
+                    orbVec.push_back(j);
+                }
+                if (orbVec.size() > 0) {
+                    ComplexMatrix S_temp(orbVec.size(), orbVec.size());
+                    S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                    for (int i = 0; i < orbVec.size(); i++) {
+                        for (int j = 0; j < orbVec.size(); j++) {
+                            if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                                BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S_omp(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        }
                     }
                 }
-            }
-        } else { // MPI case
-            DoubleMatrix coeffBlock(csize, N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S(orbVec[i], orbVec[j]) += S_temp(i, j);
+            } else { // MPI case
+                DoubleMatrix coeffBlock(csize, N);
+                nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
+
+                if (orbVec.size() > 0) {
+                    DoubleMatrix S_temp(orbVec.size(), orbVec.size());
+                    coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
+                    S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
+                    for (int i = 0; i < orbVec.size(); i++) {
+                        for (int j = 0; j < orbVec.size(); j++) {
+                            if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                                BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S(orbVec[i], orbVec[j]) += S_temp(i, j);
+                        }
                     }
                 }
             }
         }
-    }
-    if (serial) {
+        if (serial) {
 #pragma omp critical
-        for (int i = 0; i < N; i++) {
-            for (int j = 0; j < N; j++) {
-                S(i, j) += S_omp(i, j);
+            for (int i = 0; i < N; i++) {
+                for (int j = 0; j < N; j++) { S(i, j) += S_omp(i, j); }
             }
         }
     }
 
-    }
-
     for (int i = 0; i < N; i++) {
         for (int j = 0; j <= i; j++) {
             if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
@@ -2165,9 +2099,7 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet) {
     }
     mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
     for (int i = 0; i < N; i++) {
-        for (int j = 0; j < N; j++) {
-            S(i, j) *=  std::conj(Fac[i])*Fac[j];
-        }
+        for (int j = 0; j < N; j++) { S(i, j) *= std::conj(Fac[i]) * Fac[j]; }
     }
 
     return S;
@@ -2183,13 +2115,13 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     bool ketisreal = !Ket[0].iscomplex();
     if (braisreal or ketisreal) {
         // temporary solution: copy as complex trees
-        if(braisreal){
+        if (braisreal) {
             for (int i = 0; i < Bra.size(); i++) {
                 Bra[i].CompD[0]->CopyTreeToComplex(Bra[i].CompC[0]);
                 Bra[i].func_ptr->iscomplex = 1;
             }
         }
-        if(ketisreal){
+        if (ketisreal) {
             for (int i = 0; i < Ket.size(); i++) {
                 Ket[i].CompD[0]->CopyTreeToComplex(Ket[i].CompC[0]);
                 Ket[i].func_ptr->iscomplex = 1;
@@ -2238,13 +2170,13 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
 
     // only used for serial case:
     std::vector<std::vector<ComplexDouble *>> coeffVecBra(N);
-    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
+                                                    // the orbital given the node index in the reference tree
     std::vector<std::vector<ComplexDouble *>> coeffVecKet(M);
-    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
+                                                    // the orbital given the node index in the reference tree
     mrcpp::BankAccount nodesBra;
     mrcpp::BankAccount nodesKet;
 
@@ -2290,105 +2222,102 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     int totget = 0;
     int mxtotsiz = 0;
     int ibank = 0;
-    //the omp crashes sometime for unknown reasons?
+    // the omp crashes sometime for unknown reasons?
 #pragma omp parallel if (serial)
     {
-    ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread
+        ComplexMatrix S_omp = ComplexMatrix::Zero(N, M); // copy for each thread
 
 #pragma omp for schedule(dynamic)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
-        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        if (serial) {
-            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            ComplexMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
-            ComplexMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
-            if (parindexVec_ref[n] < 0) shift = 0;
-
-            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeBra[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
-                orbVecBra.push_back(j);
-            }
-            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeKet[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
-                orbVecKet.push_back(j);
-            }
+        for (int n = 0; n < max_n; n++) {
+            if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+            int csize;
+            std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
+            std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
+            if (parindexVec_ref[n] < 0)
+                csize = sizecoeff;
+            else
+                csize = sizecoeffW;
+            if (serial) {
+                int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
+                int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+                ComplexMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
+                ComplexMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
+                if (parindexVec_ref[n] < 0) shift = 0;
+
+                for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2nodeBra[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
+                    orbVecBra.push_back(j);
+                }
+                for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2nodeKet[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
+                    orbVecKet.push_back(j);
+                }
 
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                if ( not conjMatBra[0] and not conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
-                } else if ( conjMatBra[0] and not conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                } else if (not conjMatBra[0] and conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
-                } else if (conjMatBra[0] and conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
-                } else MSG_ABORT("Unexpected case");
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                    ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                    if (not conjMatBra[0] and not conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
+                    } else if (conjMatBra[0] and not conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                    } else if (not conjMatBra[0] and conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
+                    } else if (conjMatBra[0] and conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
+                    } else
+                        MSG_ABORT("Unexpected case");
+                    for (int i = 0; i < orbVecBra.size(); i++) {
+                        for (int j = 0; j < orbVecKet.size(); j++) {
+                            if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                                Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                        }
                     }
                 }
-
-            }
-        } else { // MPI case
-
-            ComplexMatrix coeffBlockBra(csize, N);
-            ComplexMatrix coeffBlockKet(csize, M);
-            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
-            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
-            totsiz += orbVecBra.size() * orbVecKet.size();
-            mxtotsiz += N * M;
-            totget += orbVecBra.size() + orbVecKet.size();
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
-                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
-                if ( not conjMatBra[0] and not conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
-                } else if ( conjMatBra[0] and not conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                } else if (not conjMatBra[0] and conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
-                } else if (conjMatBra[0] and conjMatBra[0]) {
-                    S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
-                } else MSG_ABORT("Unexpected case");
-
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+            } else { // MPI case
+
+                ComplexMatrix coeffBlockBra(csize, N);
+                ComplexMatrix coeffBlockKet(csize, M);
+                nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
+                nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
+                totsiz += orbVecBra.size() * orbVecKet.size();
+                mxtotsiz += N * M;
+                totget += orbVecBra.size() + orbVecKet.size();
+                if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                    ComplexMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                    coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
+                    coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
+                    if (not conjMatBra[0] and not conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose().conjugate() * coeffBlockKet;
+                    } else if (conjMatBra[0] and not conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                    } else if (not conjMatBra[0] and conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet.transpose();
+                    } else if (conjMatBra[0] and conjMatBra[0]) {
+                        S_temp.noalias() = coeffBlockBra * coeffBlockKet.transpose();
+                    } else
+                        MSG_ABORT("Unexpected case");
+
+                    for (int i = 0; i < orbVecBra.size(); i++) {
+                        for (int j = 0; j < orbVecKet.size(); j++) {
+                            if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                                Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                        }
                     }
                 }
             }
         }
-    }
-   if (serial) {
+        if (serial) {
 #pragma omp critical
-        for (int i = 0; i < N; i++) {
-            for (int j = 0; j < M; j++) {
-                S(i, j) += S_omp(i, j);
+            for (int i = 0; i < N; i++) {
+                for (int j = 0; j < M; j++) { S(i, j) += S_omp(i, j); }
             }
         }
     }
-    }
 
     // 4) collect results from all MPI. Linearity: result is sum of all node contributions
 
@@ -2408,13 +2337,11 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
     mrcpp::mpi::allreduce_vector(FacBra, mrcpp::mpi::comm_wrk);
     mrcpp::mpi::allreduce_vector(FacKet, mrcpp::mpi::comm_wrk);
     for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            S(i, j) *=  std::conj(FacBra[i])*FacKet[j];
-        }
+        for (int j = 0; j < M; j++) { S(i, j) *= std::conj(FacBra[i]) * FacKet[j]; }
     }
 
     // restore input
-    if(braisreal){
+    if (braisreal) {
         for (int i = 0; i < Bra.size(); i++) {
             delete Bra[i].CompC[0];
             Bra[i].CompC[0] = nullptr;
@@ -2422,7 +2349,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
             Bra[i].func_ptr->isreal = 1;
         }
     }
-    if(ketisreal){
+    if (ketisreal) {
         for (int i = 0; i < Ket.size(); i++) {
             delete Ket[i].CompC[0];
             Ket[i].CompC[0] = nullptr;
@@ -2438,9 +2365,7 @@ ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVect
  */
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket) {
 
-    if (Bra[0].iscomplex() or Ket[0].iscomplex()){
-         return calc_overlap_matrix_cplx(Bra, Ket);
-    }
+    if (Bra[0].iscomplex() or Ket[0].iscomplex()) { return calc_overlap_matrix_cplx(Bra, Ket); }
 
     mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
 
@@ -2473,13 +2398,13 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
 
     // only used for serial case:
     std::vector<std::vector<double *>> coeffVecBra(N);
-    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVecBra; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2nodeBra(N); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
+                                                    // the orbital given the node index in the reference tree
     std::vector<std::vector<double *>> coeffVecKet(M);
-    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVecKet; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2nodeKet(M); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
+                                                    // the orbital given the node index in the reference tree
     mrcpp::BankAccount nodesBra;
     mrcpp::BankAccount nodesKet;
     // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
@@ -2526,85 +2451,81 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     int ibank = 0;
 #pragma omp parallel if (serial)
     {
-    DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread
-    //NB: dynamic does give strange errors?
+        DoubleMatrix S_omp = DoubleMatrix::Zero(N, M); // copy for each thread
+        // NB: dynamic does give strange errors?
 #pragma omp for schedule(static)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
-        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        if (serial) {
-            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
-            DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
-            if (parindexVec_ref[n] < 0) shift = 0;
-
-            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeBra[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
-                orbVecBra.push_back(j);
-            }
-            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeKet[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
-                orbVecKet.push_back(j);
-            }
+        for (int n = 0; n < max_n; n++) {
+            if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
+            int csize;
+            std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
+            std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
+            if (parindexVec_ref[n] < 0)
+                csize = sizecoeff;
+            else
+                csize = sizecoeffW;
+            if (serial) {
+                int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
+                int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
+                DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
+                DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
+                if (parindexVec_ref[n] < 0) shift = 0;
+
+                for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2nodeBra[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
+                    orbVecBra.push_back(j);
+                }
+                for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
+                    int orb_node_ix = orb2nodeKet[j][node_ix];
+                    for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
+                    orbVecKet.push_back(j);
+                }
 
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                    DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
 
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                    for (int i = 0; i < orbVecBra.size(); i++) {
+                        for (int j = 0; j < orbVecKet.size(); j++) {
+                            if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                                Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S_omp(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                        }
                     }
                 }
-            }
-        } else { // MPI case
-
-            DoubleMatrix coeffBlockBra(csize, N);
-            DoubleMatrix coeffBlockKet(csize, M);
-            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
-            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
-            totsiz += orbVecBra.size() * orbVecKet.size();
-            mxtotsiz += N * M;
-            totget += orbVecBra.size() + orbVecKet.size();
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
-                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and
-                            Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
-                            Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
-                            continue;
-                        S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+            } else { // MPI case
+
+                DoubleMatrix coeffBlockBra(csize, N);
+                DoubleMatrix coeffBlockKet(csize, M);
+                nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
+                nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
+                totsiz += orbVecBra.size() * orbVecKet.size();
+                mxtotsiz += N * M;
+                totget += orbVecBra.size() + orbVecKet.size();
+                if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
+                    DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
+                    coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
+                    coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
+                    S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
+                    for (int i = 0; i < orbVecBra.size(); i++) {
+                        for (int j = 0; j < orbVecKet.size(); j++) {
+                            if (Bra[orbVecBra[i]].func_ptr->data.n1[0] != Ket[orbVecKet[j]].func_ptr->data.n1[0] and Bra[orbVecBra[i]].func_ptr->data.n1[0] != 0 and
+                                Ket[orbVecKet[j]].func_ptr->data.n1[0] != 0)
+                                continue;
+                            S(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
+                        }
                     }
                 }
             }
         }
-    }
-    if (serial) {
+        if (serial) {
 #pragma omp critical
-        for (int i = 0; i < N; i++) {
-            for (int j = 0; j < M; j++) {
-                S(i, j) += S_omp(i, j);
+            for (int i = 0; i < N; i++) {
+                for (int j = 0; j < M; j++) { S(i, j) += S_omp(i, j); }
             }
         }
     }
-    }
 
     // 4) collect results from all MPI. Linearity: result is sum of all node contributions
 
@@ -2624,15 +2545,12 @@ ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &K
     mrcpp::mpi::allreduce_vector(FacBra, mrcpp::mpi::comm_wrk);
     mrcpp::mpi::allreduce_vector(FacKet, mrcpp::mpi::comm_wrk);
     for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            S(i, j) *=  std::conj(FacBra[i])*FacKet[j];
-        }
+        for (int j = 0; j < M; j++) { S(i, j) *= std::conj(FacBra[i]) * FacKet[j]; }
     }
 
     return S;
 }
 
-
 /** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
  *
  */
@@ -2661,9 +2579,9 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
 
     // only used for serial case:
     std::vector<std::vector<double *>> coeffVec(N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
+    std::map<int, std::vector<int>> node2orbVec; // for each node index, gives a vector with the indices of the orbitals using this node
     std::vector<std::map<int, int>> orb2node(N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
+                                                 // the orbital given the node index in the reference tree
 
     bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
     mrcpp::BankAccount nodesBraKet;
@@ -2733,9 +2651,8 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
                             continue;
                         double &Srealij = Sreal(orbVec[i], orbVec[j]);
                         double &Stempij = S_temp(i, j);
@@ -2755,9 +2672,8 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
                 S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
                 for (int i = 0; i < orbVec.size(); i++) {
                     for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and
-                            BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
-                            BraKet[orbVec[j]].func_ptr->data.n1[0]!= 0)
+                        if (BraKet[orbVec[i]].func_ptr->data.n1[0] != BraKet[orbVec[j]].func_ptr->data.n1[0] and BraKet[orbVec[i]].func_ptr->data.n1[0] != 0 and
+                            BraKet[orbVec[j]].func_ptr->data.n1[0] != 0)
                             continue;
                         Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
                     }
@@ -2790,9 +2706,7 @@ DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet) {
     }
     mrcpp::mpi::allreduce_vector(Fac, mrcpp::mpi::comm_wrk);
     for (int i = 0; i < N; i++) {
-        for (int j = 0; j < N; j++) {
-            S(i, j) *=  std::norm(std::conj(Fac[i]))*std::norm(Fac[j]);
-        }
+        for (int j = 0; j < N; j++) { S(i, j) *= std::norm(std::conj(Fac[i])) * std::norm(Fac[j]); }
     }
     return S;
 }
@@ -2807,51 +2721,48 @@ void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket
     int M = Ket.size();
     DoubleVector Ketnorms = DoubleVector::Zero(M);
     for (int i = 0; i < M; i++) {
-        if (mpi::my_func(Ket[i])) Ketnorms(i)  = Ket[i].getSquareNorm();
+        if (mpi::my_func(Ket[i])) Ketnorms(i) = Ket[i].getSquareNorm();
     }
     mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
-    ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
+    ComplexMatrix rmat = ComplexMatrix::Zero(M, N);
     for (int j = 0; j < N; j++) {
-        for (int i = 0; i < M; i++) {
-            rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
-        }
+        for (int i = 0; i < M; i++) { rmat(i, j) = 0.0 - S.conjugate()(j, i) / Ketnorms(i); }
     }
     CompFunctionVector rotatedKet(N);
     rotate(Ket, rmat, rotatedKet, prec / M);
     for (int j = 0; j < N; j++) {
-        if(my_func(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
+        if (my_func(Bra[j])) Bra[j].add(1.0, rotatedKet[j]);
     }
 }
 
 /** @brief Orthogonalize the Bra against Ket
  *
  */
-template <int D>
-void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket) {
+template <int D> void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket) {
     ComplexDouble overlap = dot(Bra, Ket);
     double sq_norm = Ket.getSquareNorm();
     for (int i = 0; i < Bra.Ncomp(); i++) {
         if (Bra.isreal()) {
-            if (abs(overlap.imag())>MachineZero) MSG_ABORT("NOT IMPLEMENTED");
-            Bra.CompD[i]->add_inplace(-overlap.real()/sq_norm,*Ket.CompD[i]);
+            if (abs(overlap.imag()) > MachineZero) MSG_ABORT("NOT IMPLEMENTED");
+            Bra.CompD[i]->add_inplace(-overlap.real() / sq_norm, *Ket.CompD[i]);
         } else {
             if (Ket.isreal()) MSG_ABORT("NOT IMPLEMENTED");
-            Bra.CompC[i]->add_inplace(-std::conj(overlap/sq_norm),*Ket.CompC[i]);
+            Bra.CompC[i]->add_inplace(-std::conj(overlap / sq_norm), *Ket.CompC[i]);
             overlap = dot(Bra, Ket);
-       }
+        }
     }
 }
 
 template ComplexDouble dot(CompFunction<3> bra, CompFunction<3> ket);
-template void project(CompFunction<3>& out, RepresentableFunction<3, double>& f, double prec);
-template void project(CompFunction<3>& out, RepresentableFunction<3, ComplexDouble>& f, double prec);
+template void project(CompFunction<3> &out, RepresentableFunction<3, double> &f, double prec);
+template void project(CompFunction<3> &out, RepresentableFunction<3, ComplexDouble> &f, double prec);
 template void multiply(CompFunction<3> &out, CompFunction<3> inp_a, CompFunction<3> inp_b, double prec, bool absPrec, bool useMaxNorms, bool conjugate);
-template void multiply(CompFunction<3>& out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double>& f, double prec, int nrefine = 0, bool conjugate);
-template void multiply(CompFunction<3>& out, FunctionTree<3, ComplexDouble> &inp_a, RepresentableFunction<3, ComplexDouble>& f, double prec, int nrefine = 0, bool conjugate);
+template void multiply(CompFunction<3> &out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0, bool conjugate);
+template void multiply(CompFunction<3> &out, FunctionTree<3, ComplexDouble> &inp_a, RepresentableFunction<3, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate);
 template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0, bool conjugate);
 template void multiply(CompFunction<3> &out, CompFunction<3> &inp_a, RepresentableFunction<3, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate);
-template void deep_copy(CompFunction<3>* out, const CompFunction<3> &inp);
-template void deep_copy(CompFunction<3>& out, const CompFunction<3> &inp);
+template void deep_copy(CompFunction<3> *out, const CompFunction<3> &inp);
+template void deep_copy(CompFunction<3> &out, const CompFunction<3> &inp);
 template void add(CompFunction<3> &out, ComplexDouble a, CompFunction<3> inp_a, ComplexDouble b, CompFunction<3> inp_b, double prec, bool conjugate);
 template void linear_combination(CompFunction<3> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<3>> &inp, double prec, bool conjugate);
 template double node_norm_dot(CompFunction<3> bra, CompFunction<3> ket);
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index c7d930317..17e01ebc2 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -1,38 +1,37 @@
 #pragma once
 
-#include "trees/FunctionTree.h"
 #include "mpi_utils.h"
+#include "trees/FunctionTree.h"
 
 using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D>
-struct CompFunctionData {
+template <int D> struct CompFunctionData {
     // additional data that describe the overall multicomponent function (defined by user):
     // occupancy, quantum number, norm, etc.
     int Ncomp{0}; // number of components defined
     int rank{-1}; // rank (index) if part of a vector
-    int conj{0}; // soft conjugate (all components)
+    int conj{0};  // soft conjugate (all components)
     int CompFn1{0};
     int CompFn2{0};
-    int isreal{0}; // trees are defined for T=double
+    int isreal{0};    // trees are defined for T=double
     int iscomplex{0}; // trees are defined for T=DoubleComplex
     double CompFd1{0.0};
     double CompFd2{0.0};
     double CompFd3{0.0};
     // additional data that describe each component (defined by user):
     // occupancy, quantum number, norm, etc.
-    //Note: defined with fixed size to ease copying and MPI send
-    int n1[4]{0,0,0,0}; // 0: neutral. otherwise different values are orthogonal to each other (product = 0)
-    int n2[4]{0,0,0,0};
-    int n3[4]{0,0,0,0};
-    int n4[4]{0,0,0,0};
-    //multiplicative scalar for the function. So far only actively used to take care of imag factor in momentum operator.
-    ComplexDouble c1[4]{{1.0,0.0},{1.0,0.0},{1.0,0.0},{1.0,0.0}};
-    double d1[4]{0.0,0.0,0.0,0.0};
-    double d2[4]{0.0,0.0,0.0,0.0};
-    double d3[4]{0.0,0.0,0.0,0.0};
+    // Note: defined with fixed size to ease copying and MPI send
+    int n1[4]{0, 0, 0, 0}; // 0: neutral. otherwise different values are orthogonal to each other (product = 0)
+    int n2[4]{0, 0, 0, 0};
+    int n3[4]{0, 0, 0, 0};
+    int n4[4]{0, 0, 0, 0};
+    // multiplicative scalar for the function. So far only actively used to take care of imag factor in momentum operator.
+    ComplexDouble c1[4]{{1.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}, {1.0, 0.0}};
+    double d1[4]{0.0, 0.0, 0.0, 0.0};
+    double d2[4]{0.0, 0.0, 0.0, 0.0};
+    double d3[4]{0.0, 0.0, 0.0, 0.0};
     // used for storage on disk
     int type{0};
     int order{1};
@@ -43,7 +42,7 @@ struct CompFunctionData {
 
     // used internally
     int shared{0};
-    int Nchunks[4]{0,0,0,0}; // number of chunks of each component tree
+    int Nchunks[4]{0, 0, 0, 0}; // number of chunks of each component tree
 };
 
 template <int D> class TreePtr final {
@@ -69,57 +68,57 @@ template <int D> class TreePtr final {
         for (int i = 0; i < 4; i++) {
             if (this->real[i] != nullptr) delete this->real[i];
             if (this->cplx[i] != nullptr) delete this->cplx[i];
-             this->real[i] = nullptr;
-             this->cplx[i] = nullptr;
+            this->real[i] = nullptr;
+            this->cplx[i] = nullptr;
         }
     }
     CompFunctionData<D> data;
-    int& Ncomp = data.Ncomp; //number of components defined
-    int& rank = data.rank; // rank (index) if part of a vector
-    int& conj = data.conj; // soft conjugate
-    int& isreal = data.isreal; // T=double
-    int& iscomplex = data.iscomplex; // T=DoubleComplex
-    int& share = data.shared;
-    int* Nchunks = data.Nchunks;
+    int &Ncomp = data.Ncomp;         // number of components defined
+    int &rank = data.rank;           // rank (index) if part of a vector
+    int &conj = data.conj;           // soft conjugate
+    int &isreal = data.isreal;       // T=double
+    int &iscomplex = data.iscomplex; // T=DoubleComplex
+    int &share = data.shared;
+    int *Nchunks = data.Nchunks;
 
     bool is_shared = false;
     friend class CompFunction<D>;
+
 protected:
-    FunctionTree<D, double> *real[4]; // Real function
+    FunctionTree<D, double> *real[4];        // Real function
     FunctionTree<D, ComplexDouble> *cplx[4]; // Complex function
     SharedMemory<double> *shared_mem_real;
     SharedMemory<ComplexDouble> *shared_mem_cplx;
 };
 
-
 template <int D> class CompFunction {
 public:
     CompFunction(MultiResolutionAnalysis<D> &mra);
     CompFunction();
     CompFunction(int n1);
     CompFunction(int n1, bool share);
-    CompFunction(const CompFunctionData<D>& indata, bool alloc = false);
+    CompFunction(const CompFunctionData<D> &indata, bool alloc = false);
     CompFunction(const CompFunction<D> &compfunc);
-    CompFunction(CompFunction<D> && compfunc);
+    CompFunction(CompFunction<D> &&compfunc);
     CompFunction<D> &operator=(const CompFunction<D> &compfunc);
     virtual ~CompFunction() = default;
 
-    FunctionTree<D, double>** CompD; //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
-    FunctionTree<D, ComplexDouble>** CompC; // = func_ptr->cplx
+    FunctionTree<D, double> **CompD;        //  = func_ptr->real so that we can use name CompD instead of func_ptr.real
+    FunctionTree<D, ComplexDouble> **CompC; // = func_ptr->cplx
 
     std::string name;
 
     // additional data that describe each component (defined by user):
-    CompFunctionData<D> data() const {return func_ptr->data;}
-    int Ncomp() const {return func_ptr->data.Ncomp;} //number of components defined
-    int rank() const {return func_ptr->data.rank;} // rank (index) if part of a vector
-    int conj() const {return func_ptr->data.conj;} // soft conjugate
-    int isreal() const {return func_ptr->data.isreal;} // T=double
-    int iscomplex() const {return func_ptr->data.iscomplex;} // T=DoubleComplex
-    void defreal() {func_ptr->data.isreal = 1;} // define as real
-    void defcomplex() {func_ptr->data.iscomplex = 1;} // define as complex
-    int share() const {return func_ptr->data.shared;}
-    int* Nchunks() const {return func_ptr->data.Nchunks;} // number of chunks of each component tree
+    CompFunctionData<D> data() const { return func_ptr->data; }
+    int Ncomp() const { return func_ptr->data.Ncomp; }         // number of components defined
+    int rank() const { return func_ptr->data.rank; }           // rank (index) if part of a vector
+    int conj() const { return func_ptr->data.conj; }           // soft conjugate
+    int isreal() const { return func_ptr->data.isreal; }       // T=double
+    int iscomplex() const { return func_ptr->data.iscomplex; } // T=DoubleComplex
+    void defreal() { func_ptr->data.isreal = 1; }              // define as real
+    void defcomplex() { func_ptr->data.iscomplex = 1; }        // define as complex
+    int share() const { return func_ptr->data.shared; }
+    int *Nchunks() const { return func_ptr->data.Nchunks; } // number of chunks of each component tree
 
     CompFunction paramCopy(bool alloc = false) const;
     ComplexDouble integrate() const;
@@ -129,8 +128,8 @@ template <int D> class CompFunction {
     void alloc_comp(int i = 0); // allocate one specific component
     void setReal(FunctionTree<D, double> *tree, int i = 0);
     void setCplx(FunctionTree<D, ComplexDouble> *tree, int i = 0);
-    void setRank(int i) {func_ptr->rank = i;};
-    const int getRank() const {return func_ptr->rank;};
+    void setRank(int i) { func_ptr->rank = i; };
+    const int getRank() const { return func_ptr->rank; };
     void add(ComplexDouble c, CompFunction<D> inp);
 
     int crop(double prec);
@@ -146,54 +145,38 @@ template <int D> class CompFunction {
     const FunctionTree<D, double> &real(int i = 0) const;
     const FunctionTree<D, ComplexDouble> &complex(int i = 0) const;
 
-    //NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class
+    // NB: All below should be revised. Now only for backwards compatibility to ComplexFunction class
 
-    void free(int type) {free();}
-    bool hasReal()  const {return isreal();}
-    bool hasImag()  const {return iscomplex();}
-    bool isShared() const {return share();}
-    bool conjugate() const {return conj();}
+    void free(int type) { free(); }
+    bool hasReal() const { return isreal(); }
+    bool hasImag() const { return iscomplex(); }
+    bool isShared() const { return share(); }
+    bool conjugate() const { return conj(); }
     void dagger();
-    FunctionTree<D, double> &imag(int i = 0); //does not make sense now
-    const FunctionTree<D, double> &imag(int i = 0) const; //does not make sense now
+    FunctionTree<D, double> &imag(int i = 0);             // does not make sense now
+    const FunctionTree<D, double> &imag(int i = 0) const; // does not make sense now
     std::shared_ptr<mrcpp::TreePtr<D>> func_ptr;
-
 };
 
-template <int D>
-void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp);
-template <int D>
-void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp);
-template <int D>
-void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate = false);
-template <int D>
-void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
+template <int D> void deep_copy(CompFunction<D> *out, const CompFunction<D> &inp);
+template <int D> void deep_copy(CompFunction<D> &out, const CompFunction<D> &inp);
+template <int D> void add(CompFunction<D> &out, ComplexDouble a, CompFunction<D> inp_a, ComplexDouble b, CompFunction<D> inp_b, double prec, bool conjugate = false);
+template <int D> void linear_combination(CompFunction<D> &out, const std::vector<ComplexDouble> &c, std::vector<CompFunction<D>> &inp, double prec, bool conjugate = false);
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, double prec, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
 template <int D>
 void multiply(double prec, CompFunction<D> &out, double coef, CompFunction<D> inp_a, CompFunction<D> inp_b, int maxIter = -1, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
-template <int D>
-void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
-template <int D>
-ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
-template <int D>
-double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket);
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> inp_a, CompFunction<D> inp_b, bool absPrec = false, bool useMaxNorms = false, bool conjugate = false);
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
+template <int D> void multiply(CompFunction<D> &out, CompFunction<D> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
+template <int D> void multiply(CompFunction<D> &out, FunctionTree<D, double> &inp_a, RepresentableFunction<D, double> &f, double prec, int nrefine = 0, bool conjugate = false);
+template <int D> void multiply(CompFunction<D> &out, FunctionTree<D, ComplexDouble> &inp_a, RepresentableFunction<D, ComplexDouble> &f, double prec, int nrefine = 0, bool conjugate = false);
+template <int D> ComplexDouble dot(CompFunction<D> bra, CompFunction<D> ket);
+template <int D> double node_norm_dot(CompFunction<D> bra, CompFunction<D> ket);
 void project(CompFunction<3> &out, std::function<double(const Coord<3> &r)> f, double prec);
 void project(CompFunction<3> &out, std::function<ComplexDouble(const Coord<3> &r)> f, double prec);
-template <int D>
-void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec);
-template <int D>
-void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
-template <int D>
-void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket);
+template <int D> void project(CompFunction<D> &out, RepresentableFunction<D, double> &f, double prec);
+template <int D> void project(CompFunction<D> &out, RepresentableFunction<D, ComplexDouble> &f, double prec);
+template <int D> void orthogonalize(double prec, CompFunction<D> &Bra, CompFunction<D> &Ket);
 
 class CompFunctionVector : public std::vector<CompFunction<3>> {
 public:
@@ -204,7 +187,7 @@ class CompFunctionVector : public std::vector<CompFunction<3>> {
 
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, double prec = -1.0);
 void rotate(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
-//void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
+// void rotate_cplx(CompFunctionVector &Phi, const ComplexMatrix &U, CompFunctionVector &Psi, double prec = -1.0);
 void save_nodes(CompFunctionVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
 CompFunctionVector multiply(CompFunctionVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, CompFunction<3> *Func = nullptr, int nrefine = 1, bool all = false);
 void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
@@ -212,9 +195,8 @@ ComplexVector dot(CompFunctionVector &Bra, CompFunctionVector &Ket);
 ComplexMatrix calc_lowdin_matrix(CompFunctionVector &Phi);
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &BraKet);
 ComplexMatrix calc_overlap_matrix(CompFunctionVector &Bra, CompFunctionVector &Ket);
-//ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket);
+// ComplexMatrix calc_overlap_matrix_cplx(CompFunctionVector &Bra, CompFunctionVector &Ket);
 DoubleMatrix calc_norm_overlap_matrix(CompFunctionVector &BraKet);
 void orthogonalize(double prec, CompFunctionVector &Bra, CompFunctionVector &Ket);
 
-
 } // namespace mrcpp
diff --git a/src/utils/ComplexFunction.cpp b/src/utils/ComplexFunction.cpp
deleted file mode 100644
index 35e01078d..000000000
--- a/src/utils/ComplexFunction.cpp
+++ /dev/null
@@ -1,2015 +0,0 @@
-#include "ComplexFunction.h"
-#include "Bank.h"
-#include "Printer.h"
-#include "Timer.h"
-#include "parallel.h"
-#include "treebuilders/grid.h"
-#include "treebuilders/multiply.h"
-#include "treebuilders/project.h"
-#include "trees/FunctionNode.h"
-#include "treebuilders/add.h"
-
-using mrcpp::Timer;
-
-namespace mrcpp {
-
-MultiResolutionAnalysis<3> *defaultMRA; // Global MRA
-
-ComplexFunction::ComplexFunction(std::shared_ptr<TreePtr> funcptr)
-        : funcMRA(defaultMRA)
-        , func_ptr(funcptr) {}
-
-ComplexFunction::ComplexFunction(const ComplexFunction &func)
-        : funcMRA(func.funcMRA)
-        , conj(func.conj)
-        , func_ptr(func.func_ptr)
-        , rank(func.rank) {}
-
-ComplexFunction &ComplexFunction::operator=(const ComplexFunction &func) {
-    if (this != &func) {
-        this->conj = func.conj;
-        this->func_ptr = func.func_ptr;
-        this->funcMRA = func.funcMRA;
-        this->rank = func.rank;
-    }
-    return *this;
-}
-
-/** @brief Constructor
- *
- * @param spin: electron spin (SPIN::Alpha/Beta/Paired)
- * @param occ: occupation
- * @param rank: MPI ownership (-1 means all MPI ranks)
- *
- * Initializes the mrcpp::ComplexFunction with NULL pointers for both real and imaginary part.
- */
-ComplexFunction::ComplexFunction(int spin, double occ, int rank, bool share)
-        : funcMRA(defaultMRA)
-        , func_ptr(std::make_shared<TreePtr>(share))
-        , rank(rank) {
-    this->getFunctionData().spin = spin;
-    this->getFunctionData().occ = occ;
-    if (this->spin() < 0) INVALID_ARG_ABORT;
-    if (this->occ() < 0) {
-        if (this->spin() == SPIN::Paired) this->getFunctionData().occ = 2;
-        if (this->spin() == SPIN::Alpha) this->getFunctionData().occ = 1;
-        if (this->spin() == SPIN::Beta) this->getFunctionData().occ = 1;
-    }
-}
-
-/** @brief Parameter copy
- *
- * Returns a new ComplexFunction with the same spin, occupation and rank_id as *this.
- */
-ComplexFunction ComplexFunction::paramCopy() const {
-    return ComplexFunction(this->spin(), this->occ(), this->getRank());
-}
-
-MPI_FuncVector::MPI_FuncVector(int N)
-        : std::vector<ComplexFunction>(N) {
-    for (int i = 0; i < N; i++) (*this)[i].setRank(i);
-    vecMRA = defaultMRA;
-}
-void MPI_FuncVector::distribute() {
-    for (int i = 0; i < this->size(); i++) (*this)[i].setRank(i);
-}
-
-/** @brief Returns the orbital meta data
- *
- * Tree sizes (nChunks) are flushed before return.
- */
-FunctionData &ComplexFunction::getFunctionData() {
-    this->func_ptr->flushFuncData();
-    return this->func_ptr->func_data;
-}
-
-ComplexFunction ComplexFunction::dagger() {
-    ComplexFunction out(*this);
-    out.conj = not(this->conj);
-    return out; // Return shallow copy
-}
-
-void ComplexFunction::setReal(FunctionTree<3> *tree) {
-    if (isShared()) MSG_ABORT("Cannot set in shared function");
-    this->func_ptr->re = tree;
-}
-
-void ComplexFunction::setImag(FunctionTree<3> *tree) {
-    if (isShared()) MSG_ABORT("Cannot set in shared function");
-    this->func_ptr->im = tree;
-}
-
-void ComplexFunction::alloc(int type, MultiResolutionAnalysis<3> *mra) {
-    if (mra == nullptr) mra = funcMRA;
-    if (mra == nullptr) MSG_ABORT("Invalid argument");
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) MSG_ABORT("Real part already allocated");
-        this->func_ptr->re = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_re);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) MSG_ABORT("Imaginary part already allocated");
-        this->func_ptr->im = new FunctionTree<3>(*mra, this->func_ptr->shared_mem_im);
-    }
-}
-
-void ComplexFunction::free(int type) {
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) delete this->func_ptr->re;
-        this->func_ptr->re = nullptr;
-        if (this->func_ptr->shared_mem_re) this->func_ptr->shared_mem_re->clear();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) delete this->func_ptr->im;
-        this->func_ptr->im = nullptr;
-        if (this->func_ptr->shared_mem_im) this->func_ptr->shared_mem_im->clear();
-    }
-}
-
-int ComplexFunction::getSizeNodes(int type) const {
-    int size_mb = 0; // Memory size in kB
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) size_mb += real().getSizeNodes();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) size_mb += imag().getSizeNodes();
-    }
-    return size_mb;
-}
-
-int ComplexFunction::getNNodes(int type) const {
-    int nNodes = 0;
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (hasReal()) nNodes += real().getNNodes();
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (hasImag()) nNodes += imag().getNNodes();
-    }
-    return nNodes;
-}
-
-int ComplexFunction::crop(double prec) {
-    if (prec < 0.0) return 0;
-    bool need_to_crop = not(isShared()) or mpi::share_master();
-    int nChunksremoved = 0;
-    if (need_to_crop) {
-        if (hasReal()) nChunksremoved = real().crop(prec, 1.0, false);
-        if (hasImag()) nChunksremoved += imag().crop(prec, 1.0, false);
-    }
-    mpi::share_function(*this, 0, 7744, mpi::comm_share);
-    return nChunksremoved;
-}
-
-ComplexDouble ComplexFunction::integrate() const {
-    double int_r = 0.0;
-    double int_i = 0.0;
-    if (hasReal()) int_r = real().integrate();
-    if (hasImag()) int_i = imag().integrate();
-    return ComplexDouble(int_r, int_i);
-}
-
-/** @brief Returns the norm of the orbital */
-double ComplexFunction::norm() const {
-    double norm = getSquareNorm();
-    if (norm > 0.0) norm = std::sqrt(norm);
-    return norm;
-}
-
-/** @brief Returns the squared norm of the orbital */
-double ComplexFunction::getSquareNorm() const {
-    double sq_r = -1.0;
-    double sq_i = -1.0;
-    if (hasReal()) sq_r = real().getSquareNorm();
-    if (hasImag()) sq_i = imag().getSquareNorm();
-
-    double sq_norm = 0.0;
-    if (sq_r < 0.0 and sq_i < 0.0) {
-        sq_norm = -1.0;
-    } else {
-        if (sq_r >= 0.0) sq_norm += sq_r;
-        if (sq_i >= 0.0) sq_norm += sq_i;
-    }
-    return sq_norm;
-}
-
-/** @brief In place addition.
- *
- * Output is extended to union grid.
- *
- */
-void ComplexFunction::add(ComplexDouble c, ComplexFunction inp) {
-    double thrs = MachineZero;
-    bool cHasReal = (std::abs(c.real()) > thrs);
-    bool cHasImag = (std::abs(c.imag()) > thrs);
-    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
-    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
-
-    ComplexFunction &out = *this;
-    bool clearReal(false), clearImag(false);
-    if (outNeedsReal and not(out.hasReal())) {
-        out.alloc(NUMBER::Real);
-        clearReal = true;
-    }
-
-    if (outNeedsImag and not(out.hasImag())) {
-        out.alloc(NUMBER::Imag);
-        clearImag = true;
-    }
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (clearReal) out.real().setZero();
-        if (clearImag) out.imag().setZero();
-        if (cHasReal and inp.hasReal()) {
-            while (refine_grid(out.real(), inp.real())) {}
-            out.real().add(c.real(), inp.real());
-        }
-        if (cHasReal and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.imag(), inp.imag())) {}
-            out.imag().add(conj * c.real(), inp.imag());
-        }
-        if (cHasImag and inp.hasReal()) {
-            while (refine_grid(out.imag(), inp.real())) {}
-            out.imag().add(c.imag(), inp.real());
-        }
-        if (cHasImag and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.real(), inp.imag())) {}
-            out.real().add(-1.0 * conj * c.imag(), inp.imag());
-        }
-    }
-    mpi::share_function(out, 0, 9911, mpi::comm_share);
-}
-
-/** @brief In place addition of absolute values.
- *
- * Output is extended to union grid.
- *
- */
-void ComplexFunction::absadd(ComplexDouble c, ComplexFunction inp) {
-    double thrs = MachineZero;
-    bool cHasReal = (std::abs(c.real()) > thrs);
-    bool cHasImag = (std::abs(c.imag()) > thrs);
-    bool outNeedsReal = (cHasReal and inp.hasReal()) or (cHasImag and inp.hasImag());
-    bool outNeedsImag = (cHasReal and inp.hasImag()) or (cHasImag and inp.hasReal());
-
-    ComplexFunction &out = *this;
-    bool clearReal(false), clearImag(false);
-    if (outNeedsReal and not(out.hasReal())) {
-        out.alloc(NUMBER::Real);
-        clearReal = true;
-    }
-
-    if (outNeedsImag and not(out.hasImag())) {
-        out.alloc(NUMBER::Imag);
-        clearImag = true;
-    }
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (clearReal) out.real().setZero();
-        if (clearImag) out.imag().setZero();
-        if (cHasReal and inp.hasReal()) {
-            while (refine_grid(out.real(), inp.real())) {}
-            out.real().absadd(c.real(), inp.real());
-        }
-        if (cHasReal and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.imag(), inp.imag())) {}
-            out.imag().absadd(conj * c.real(), inp.imag());
-        }
-        if (cHasImag and inp.hasReal()) {
-            while (refine_grid(out.imag(), inp.real())) {}
-            out.imag().absadd(c.imag(), inp.real());
-        }
-        if (cHasImag and inp.hasImag()) {
-            double conj = (inp.conjugate()) ? -1.0 : 1.0;
-            while (refine_grid(out.real(), inp.imag())) {}
-            out.real().absadd(-1.0 * conj * c.imag(), inp.imag());
-        }
-    }
-    mpi::share_function(out, 0, 9912, mpi::comm_share);
-}
-
-/** @brief In place multiply with real scalar. Fully in-place.*/
-void ComplexFunction::rescale(double c) {
-    bool need_to_rescale = not(isShared()) or mpi::share_master();
-    if (need_to_rescale) {
-        if (hasReal()) real().rescale(c);
-        if (hasImag()) imag().rescale(c);
-    }
-    mpi::share_function(*this, 0, 5543, mpi::comm_share);
-}
-
-/** @brief In place multiply with complex scalar. Involves a deep copy.*/
-void ComplexFunction::rescale(ComplexDouble c) {
-    ComplexFunction &out = *this;
-    ComplexFunction tmp(spin(), occ(), rank, isShared());
-    cplxfunc::deep_copy(tmp, out);
-    out.free(NUMBER::Total);
-    out.add(c, tmp);
-}
-
-/** @brief Returns a character representing the spin (a/b/p) */
-char ComplexFunction::printSpin() const {
-    char sp = 'u';
-    if (this->spin() == SPIN::Paired) sp = 'p';
-    if (this->spin() == SPIN::Alpha) sp = 'a';
-    if (this->spin() == SPIN::Beta) sp = 'b';
-    return sp;
-}
-
-void cplxfunc::SetdefaultMRA(MultiResolutionAnalysis<3> *MRA) {
-    defaultMRA = MRA;
-}
-
-/** @brief Compute <bra|ket> = int bra^\dag(r) * ket(r) dr.
- *
- *  Notice that the <bra| position is already complex conjugated.
- *
- */
-ComplexDouble cplxfunc::dot(ComplexFunction bra, ComplexFunction ket) {
-    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
-    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::dot(bra.real(), ket.real());
-    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::dot(bra.real(), ket.imag());
-    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::dot(bra.imag(), ket.real());
-    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::dot(bra.imag(), ket.imag());
-
-    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
-    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
-
-    double real_part = rr + bra_conj * ket_conj * ii;
-    double imag_part = ket_conj * ri - bra_conj * ir;
-    return ComplexDouble(real_part, imag_part);
-}
-
-/** @brief Compute <bra|ket> = int |bra^\dag(r)| * |ket(r)| dr.
- *
- */
-ComplexDouble cplxfunc::node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact) {
-    double rr(0.0), ri(0.0), ir(0.0), ii(0.0);
-    if (bra.hasReal() and ket.hasReal()) rr = mrcpp::node_norm_dot(bra.real(), ket.real(), exact);
-    if (bra.hasReal() and ket.hasImag()) ri = mrcpp::node_norm_dot(bra.real(), ket.imag(), exact);
-    if (bra.hasImag() and ket.hasReal()) ir = mrcpp::node_norm_dot(bra.imag(), ket.real(), exact);
-    if (bra.hasImag() and ket.hasImag()) ii = mrcpp::node_norm_dot(bra.imag(), ket.imag(), exact);
-
-    double bra_conj = (bra.conjugate()) ? -1.0 : 1.0;
-    double ket_conj = (ket.conjugate()) ? -1.0 : 1.0;
-
-    double real_part = rr + bra_conj * ket_conj * ii;
-    double imag_part = ket_conj * ri - bra_conj * ir;
-    return ComplexDouble(real_part, imag_part);
-}
-
-/** @brief Deep copy
- *
- * Returns a new function which is a full blueprint copy of the input function.
- * This is achieved by building a new grid for the real and imaginary parts and
- * copying.
- */
-void cplxfunc::deep_copy(ComplexFunction &out, ComplexFunction &inp) {
-    bool need_to_copy = not(out.isShared()) or mpi::share_master();
-    out.funcMRA = inp.funcMRA;
-    out.setRank(inp.getRank());
-    if (inp.hasReal()) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_copy) {
-            copy_grid(out.real(), inp.real());
-            copy_func(out.real(), inp.real());
-        }
-    }
-    if (inp.hasImag()) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_copy) {
-            copy_grid(out.imag(), inp.imag());
-            copy_func(out.imag(), inp.imag());
-            if (out.conjugate()) out.imag().rescale(-1.0);
-        }
-    }
-    mpi::share_function(out, 0, 1324, mpi::comm_share);
-}
-
-void cplxfunc::project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec) {
-    bool need_to_project = not(out.isShared()) or mpi::share_master();
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
-    }
-    mpi::share_function(out, 0, 123123, mpi::comm_share);
-}
-
-void cplxfunc::project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec) {
-    bool need_to_project = not(out.isShared()) or mpi::share_master();
-    if (type == NUMBER::Real or type == NUMBER::Total) {
-        if (not out.hasReal()) out.alloc(NUMBER::Real);
-        if (need_to_project) build_grid(out.real(), f);
-        if (need_to_project) mrcpp::project<3>(prec, out.real(), f);
-    }
-    if (type == NUMBER::Imag or type == NUMBER::Total) {
-        if (not out.hasImag()) out.alloc(NUMBER::Imag);
-        if (need_to_project) build_grid(out.imag(), f);
-        if (need_to_project) mrcpp::project<3>(prec, out.imag(), f);
-    }
-    mpi::share_function(out, 0, 132231, mpi::comm_share);
-}
-
-/** @brief out = a*inp_a + b*inp_b
- *
- * Recast into linear_combination.
- *
- */
-void cplxfunc::add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec) {
-    ComplexVector coefs(2);
-    coefs(0) = a;
-    coefs(1) = b;
-
-    std::vector<ComplexFunction> funcs; // NB: not a ComplexFunctionVector, because not run in parallel!
-    funcs.push_back(inp_a);
-    funcs.push_back(inp_b);
-
-    cplxfunc::linear_combination(out, coefs, funcs, prec);
-}
-
-/** @brief out = inp_a * inp_b
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    multiply_real(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
-    multiply_imag(out, inp_a, inp_b, prec, absPrec, useMaxNorms);
-}
-
-/** @brief out = inp_a * f
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
-    // uses the mpifuncvec multiply
-    MPI_FuncVector mpi_funcvec_a;
-    mpi_funcvec_a.push_back(inp_a);
-    MPI_FuncVector mpi_funcvec_out;
-    mpi_funcvec_out = mpifuncvec::multiply(mpi_funcvec_a, f, prec, nullptr, nrefine, true);
-    out = mpi_funcvec_out[0];
-}
-
-/** @brief out = inp_a * f
- *
- */
-void cplxfunc::multiply(ComplexFunction &out, FunctionTree<3> &inp_a, RepresentableFunction<3> &f, double prec, int nrefine) {
-    ComplexFunction cplxfunc_a;
-    cplxfunc_a.setReal(&inp_a);
-    cplxfunc::multiply(out, cplxfunc_a, f, prec, nrefine);
-    cplxfunc_a.setReal(nullptr); // otherwise inp_a is deleted by cplxfunc_a destructor
-}
-
-/** @brief out = c_0*inp_0 + c_1*inp_1 + ... + c_N*inp_N
- *
- */
-void cplxfunc::linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec) {
-    FunctionTreeVector<3> rvec;
-    FunctionTreeVector<3> ivec;
-
-    double thrs = MachineZero;
-    for (int i = 0; i < inp.size(); i++) {
-        double sign = (inp[i].conjugate()) ? -1.0 : 1.0;
-
-        bool cHasReal = (std::abs(c[i].real()) > thrs);
-        bool cHasImag = (std::abs(c[i].imag()) > thrs);
-
-        if (cHasReal and inp[i].hasReal()) rvec.push_back(std::make_tuple(c[i].real(), &inp[i].real()));
-        if (cHasImag and inp[i].hasImag()) rvec.push_back(std::make_tuple(-sign * c[i].imag(), &inp[i].imag()));
-
-        if (cHasImag and inp[i].hasReal()) ivec.push_back(std::make_tuple(c[i].imag(), &inp[i].real()));
-        if (cHasReal and inp[i].hasImag()) ivec.push_back(std::make_tuple(sign * c[i].real(), &inp[i].imag()));
-    }
-
-    if (rvec.size() > 0 and not out.hasReal()) out.alloc(NUMBER::Real);
-    if (ivec.size() > 0 and not out.hasImag()) out.alloc(NUMBER::Imag);
-
-    bool need_to_add = not(out.isShared()) or mpi::share_master();
-    if (need_to_add) {
-        if (rvec.size() > 0) {
-            if (prec < 0.0) {
-                build_grid(out.real(), rvec);
-                mrcpp::add(prec, out.real(), rvec, 0);
-            } else {
-                mrcpp::add(prec, out.real(), rvec);
-            }
-        } else if (out.hasReal()) {
-            out.real().setZero();
-        }
-        if (ivec.size() > 0) {
-            if (prec < 0.0) {
-                build_grid(out.imag(), ivec);
-                mrcpp::add(prec, out.imag(), ivec, 0);
-            } else {
-                mrcpp::add(prec, out.imag(), ivec);
-            }
-        } else if (out.hasImag()) {
-            out.imag().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9911, mpi::comm_share);
-}
-
-/** @brief out = Re(inp_a * inp_b)
- *
- */
-void cplxfunc::multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
-    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
-
-    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-
-    FunctionTreeVector<3> vec;
-    if (inp_a.hasReal() and inp_b.hasReal()) {
-        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
-        if (need_to_multiply) {
-            double coef = 1.0;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.real());
-                build_grid(*tree, inp_b.real());
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.real(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-    if (inp_a.hasImag() and inp_b.hasImag()) {
-        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
-        if (need_to_multiply) {
-            double coef = -1.0 * conj_a * conj_b;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.imag());
-                build_grid(*tree, inp_b.imag());
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.imag(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-
-    if (vec.size() > 0) {
-        if (out.hasReal()) {
-            if (need_to_multiply) out.real().clear();
-        } else {
-            // All sharing procs must allocate
-            out.alloc(NUMBER::Real);
-        }
-    }
-
-    if (need_to_multiply) {
-        if (vec.size() == 1) {
-            FunctionTree<3> &func_0 = get_func(vec, 0);
-            copy_grid(out.real(), func_0);
-            copy_func(out.real(), func_0);
-            clear(vec, true);
-        } else if (vec.size() == 2) {
-            build_grid(out.real(), vec);
-            mrcpp::add(prec, out.real(), vec, 0);
-            clear(vec, true);
-        } else if (out.hasReal()) {
-            out.real().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9191, mpi::comm_share);
-}
-
-/** @brief out = Im(inp_a * inp_b)
- *
- */
-void cplxfunc::multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec, bool useMaxNorms) {
-    double conj_a = (inp_a.conjugate()) ? -1.0 : 1.0;
-    double conj_b = (inp_b.conjugate()) ? -1.0 : 1.0;
-    bool need_to_multiply = not(out.isShared()) or mpi::share_master();
-
-    FunctionTreeVector<3> vec;
-    if (inp_a.hasReal() and inp_b.hasImag()) {
-        auto *tree = new FunctionTree<3>(inp_a.real().getMRA());
-        if (need_to_multiply) {
-            double coef = conj_b;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.real());
-                build_grid(*tree, inp_b.imag());
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.real(), inp_b.imag(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-    if (inp_a.hasImag() and inp_b.hasReal()) {
-        auto *tree = new FunctionTree<3>(inp_a.imag().getMRA());
-        if (need_to_multiply) {
-            double coef = conj_a;
-            if (prec < 0.0) {
-                // Union grid
-                build_grid(*tree, inp_a.imag());
-                build_grid(*tree, inp_b.real());
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), 0);
-            } else {
-                // Adaptive grid
-                mrcpp::multiply(prec, *tree, coef, inp_a.imag(), inp_b.real(), -1, absPrec, useMaxNorms);
-            }
-        }
-        vec.push_back(std::make_tuple(1.0, tree));
-    }
-
-    if (vec.size() > 0) {
-        if (out.hasImag()) {
-            if (need_to_multiply) out.imag().clear();
-        } else {
-            // All sharing procs must allocate
-            out.alloc(NUMBER::Imag);
-        }
-    }
-
-    if (need_to_multiply) {
-        if (vec.size() == 1) {
-            FunctionTree<3> &func_0 = get_func(vec, 0);
-            copy_grid(out.imag(), func_0);
-            copy_func(out.imag(), func_0);
-            clear(vec, true);
-        } else if (vec.size() == 2) {
-            build_grid(out.imag(), vec);
-            mrcpp::add(prec, out.imag(), vec, 0);
-            clear(vec, true);
-        } else if (out.hasImag()) {
-            out.imag().setZero();
-        }
-    }
-    mpi::share_function(out, 0, 9292, mpi::comm_share);
-}
-
-namespace mpifuncvec {
-
-
-/** @brief Make a linear combination of functions
- *
- * Uses "local" representation: treats one node at a time.
- * For each node, all functions are transformed simultaneously
- * by a dense matrix multiplication.
- * Phi input functions, Psi output functions
- *
- */
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec) {
-
-    // The principle of this routine is that nodes are rotated one by one using matrix multiplication.
-    // The routine does avoid when possible to move data, but uses pointers and indices manipulation.
-    // MPI version does not use OMP yet, Serial version uses OMP
-    // size of input is N, size of output is M
-    int N = Phi.size();
-    int M = Psi.size();
-    if (U.rows() < N) MSG_ABORT("Incompatible number of rows for U matrix");
-    if (U.cols() < M) MSG_ABORT("Incompatible number of columns for U matrix");
-
-    // 1) make union tree without coefficients
-    FunctionTree<3> refTree(*Phi.vecMRA);
-    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    std::vector<double> scalefac_ref;
-    std::vector<double *> coeffVec_ref; // not used!
-    std::vector<int> indexVec_ref;      // serialIx of the nodes
-    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
-    int max_ix;
-    // get a list of all nodes in union tree, identified by their serialIx indices
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-   // 2) We work with real numbers only. Make real blocks for U matrix
-    bool UhasReal = false;
-    bool UhasImag = false;
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            if (std::abs(U(i, j).real()) > 10*MachineZero) UhasReal = true;
-            if (std::abs(U(i, j).imag()) > 10*MachineZero) UhasImag = true;
-        }
-    }
-
-    IntVector PsihasReIm = IntVector::Zero(2);
-    for (int j = 0; j < N; j++) {
-        if (!mpi::my_orb(j)) continue;
-        PsihasReIm[0] = (Phi[j].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[j].hasImag()) ? 1 : 0;
-    }
-    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    if (not PsihasReIm[0] and not PsihasReIm[1]) {
-        return; // do nothing
-    }
-
-    bool makeReal = (UhasReal and PsihasReIm[0]) or (UhasImag and PsihasReIm[1]);
-    bool makeImag = (UhasReal and PsihasReIm[1]) or (UhasImag and PsihasReIm[0]);
-
-    for (int j = 0; j < M; j++) {
-        if (!mpi::my_orb(j)) continue;
-        if (not makeReal and Psi[j].hasReal()) Psi[j].free(NUMBER::Real);
-        if (not makeImag and Psi[j].hasImag()) Psi[j].free(NUMBER::Imag);
-    }
-
-    if (not makeReal and not makeImag) { return; }
-
-    int Neff = N;               // effective number of input orbitals
-    int Meff = M;               // effective number of output orbitals
-    if (makeImag) Neff = 2 * N; // Imag and Real treated independently. We always use real part of U
-    if (makeImag) Meff = 2 * M; // Imag and Real treated independently. We always use real part of U
-
-    IntVector conjMat = IntVector::Zero(Neff);
-    for (int j = 0; j < Neff; j++) {
-        if (!mpi::my_orb(j % N)) continue;
-        conjMat[j] = (Phi[j % N].conjugate()) ? -1 : 1;
-    }
-    mpi::allreduce_vector(conjMat, mpi::comm_wrk);
-
-    // we make a real matrix = U,  but organized as one or four real blocks
-    // out_r = U_rr*in_r - U_ir*in_i*conjMat
-    // out_i = U_ri*in_r - U_ii*in_i*conjMat
-    // the first index of U is the one used on input Phi
-    DoubleMatrix Ureal(Neff, Meff); // four blocks, for rr ri ir ii
-    for (int j = 0; j < Neff; j++) {
-        for (int i = 0; i < Meff; i++) {
-            double sign = 1.0;
-            if (j < N and i < M) {
-                // real U applied on real Phi
-                Ureal(j, i) = U.real()(j % N, i % M);
-            } else if (j >= N and i >= M) {
-                // real U applied on imag Phi
-                Ureal(j, i) = conjMat[j] * U.real()(j % N, i % M);
-            } else if (j < N and i >= M) {
-                // imag U applied on real Phi
-                Ureal(j, i) = U.imag()(j % N, i % M);
-            } else {
-                // imag U applied on imag Phi
-                Ureal(j, i) = -1.0 * conjMat[j] * U.imag()(j % N, i % M);
-            }
-        }
-    }
-
-    // 3) In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
-    BankAccount nodesPhi;             // to put the original nodes
-    BankAccount nodesRotated;         // to put the rotated nodes
-
-    // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
-    if (serial) {
-        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<double> scalefac;
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j + N]) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-
-        // send own nodes to bank, identifying them through the serialIx of refTree
-        mpifuncvec::save_nodes(Phi, refTree, nodesPhi);
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-    }
-
-    // 4) rotate all the nodes
-    IntMatrix split_serial;                             // in the serial case all split are stored in one array
-    std::vector<std::vector<double *>> coeffpVec(Meff); // to put pointers to the rotated coefficient for each orbital in serial case
-    std::vector<std::map<int, int>> ix2coef(Meff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    int csize;                                          // size of the current coefficients (different for roots and branches)
-    std::vector<DoubleMatrix> rotatedCoeffVec;          // just to ensure that the data from rotatedCoeff is not deleted, since we point to it.
-    // j indices are for unrotated orbitals, i indices are for rotated orbitals
-    if (serial) {
-        std::map<int, int> ix2coef_ref;   // to find the index n corresponding to a serialIx
-        split_serial.resize(Meff, max_n); // not use in the MPI case
-        for (int n = 0; n < max_n; n++) {
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-            ix2coef_ref[node_ix] = n;
-            for (int i = 0; i < Meff; i++) split_serial(i, n) = 1;
-        }
-
-        std::vector<int> nodeReady(max_n, 0); // To indicate to OMP threads that the parent is ready (for splits)
-
-        // assumes the nodes are ordered such that parent are treated before children. BFS or DFS ok.
-        // NB: the n must be traversed approximately in right order: Thread n may have to wait until som other preceding
-        // n is finished.
-#pragma omp parallel for schedule(dynamic)
-        for (int n = 0; n < max_n; n++) {
-            int csize;
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-            // 4a) make a dense contiguous matrix with the coefficient from all the orbitals using node n
-            std::vector<int> orbjVec; // to remember which orbital correspond to each orbVec.size();
-            if (node2orbVec[node_ix].size() <= 0) continue;
-            csize = sizecoeffW;
-            if (parindexVec_ref[n] < 0) csize = sizecoeff; // for root nodes we include scaling coeff
-
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbjVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbjVec.push_back(j);
-            }
-
-            // 4b) make a list of rotated orbitals needed for this node
-            // OMP must wait until parent is ready
-            while (parindexVec_ref[n] >= 0 and nodeReady[ix2coef_ref[parindexVec_ref[n]]] == 0) {
-#pragma omp flush
-            };
-
-            std::vector<int> orbiVec;
-            for (int i = 0; i < Meff; i++) { // loop over all rotated orbitals
-                if (not makeReal and i < M) continue;
-                if (not makeImag and i >= M) continue;
-                if (parindexVec_ref[n] >= 0 and split_serial(i, ix2coef_ref[parindexVec_ref[n]]) == 0) continue; // parent node has too small wavelets
-                orbiVec.push_back(i);
-            }
-
-            // 4c) rotate this node
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size()); // chunk of U, with reorganized indices
-            for (int i = 0; i < orbiVec.size(); i++) {       // loop over rotated orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { Un(j, i) = Ureal(orbjVec[j], orbiVec[i]); }
-            }
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
-            // HERE IT HAPPENS!
-            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
-
-            // 4d) store and make rotated node pointers
-            // for now we allocate in buffer, in future could be directly allocated in the final trees
-            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
-            // make all norms:
-            for (int i = 0; i < orbiVec.size(); i++) {
-                // check if parent must be split
-                if (parindexVec_ref[n] == -1 or split_serial(orbiVec[i], ix2coef_ref[parindexVec_ref[n]])) {
-                    // mark this node for this orbital for later split
-#pragma omp critical
-                    {
-                        ix2coef[orbiVec[i]][node_ix] = coeffpVec[orbiVec[i]].size();
-                        coeffpVec[orbiVec[i]].push_back(&(rotatedCoeff(0, i))); // list of coefficient pointers
-                    }
-                    // check norms for split
-                    double wnorm = 0.0; // rotatedCoeff(k, i) is already in cache here
-                    int kstart = 0;
-                    if (parindexVec_ref[n] < 0) kstart = sizecoeff - sizecoeffW; // do not include scaling, even for roots
-                    for (int k = kstart; k < csize; k++) wnorm += rotatedCoeff(k, i) * rotatedCoeff(k, i);
-                    if (thres < wnorm or prec < 0)
-                        split_serial(orbiVec[i], n) = 1;
-                    else
-                        split_serial(orbiVec[i], n) = 0;
-                } else {
-                    ix2coef[orbiVec[i]][node_ix] = max_n + 1; // should not be used
-                    split_serial(orbiVec[i], n) = 0;          // do not split if parent does not need to be split
-                }
-            }
-            nodeReady[n] = 1;
-#pragma omp critical
-            {
-                // this ensures that rotatedCoeff is not deleted, when getting out of scope
-                rotatedCoeffVec.push_back(std::move(rotatedCoeff));
-            }
-        }
-    } else { // MPI case
-
-        // TODO? rotate in bank, so that we do not get and put. Requires clever handling of splits.
-        std::vector<double> split(Meff, -1.0);    // which orbitals need splitting (at a given node). For now double for compatibilty with bank
-        std::vector<double> needsplit(Meff, 1.0); // which orbitals need splitting
-        BankAccount nodeSplits;
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-
-        DoubleMatrix coeffBlock(sizecoeff, Neff);
-        max_ix++; // largest node index + 1. to store rotated orbitals with different id
-        TaskManager tasks(max_n);
-        for (int nn = 0; nn < max_n; nn++) {
-            int n = tasks.next_task();
-            if (n < 0) break;
-            double thres = prec * prec * scalefac_ref[n] * scalefac_ref[n];
-            // 4a) make list of orbitals that should split the parent node, i.e. include this node
-            int parentid = parindexVec_ref[n];
-            if (parentid == -1) {
-                // root node, split if output needed
-                for (int i = 0; i < M; i++) {
-                    if (makeReal)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
-                }
-                for (int i = N; i < Meff; i++) {
-                    if (makeImag)
-                        split[i] = 1.0;
-                    else
-                        split[i] = -1.0;
-                }
-                csize = sizecoeff;
-            } else {
-                // note that it will wait until data is available
-                nodeSplits.get_data(parentid, Meff, split.data());
-                csize = sizecoeffW;
-            }
-            std::vector<int> orbiVec;
-            std::vector<int> orbjVec;
-            for (int i = 0; i < Meff; i++) {  // loop over rotated orbitals
-                if (split[i] < 0.0) continue; // parent node has too small wavelets
-                orbiVec.push_back(i);
-            }
-
-            // 4b) rotate this node
-            DoubleMatrix coeffBlock(csize, Neff); // largest possible used size
-            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
-            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
-
-            // chunk of U, with reorganized indices and separate blocks for real and imag:
-            DoubleMatrix Un(orbjVec.size(), orbiVec.size());
-            DoubleMatrix rotatedCoeff(csize, orbiVec.size());
-
-            for (int i = 0; i < orbiVec.size(); i++) {     // loop over included rotated real and imag part of orbitals
-                for (int j = 0; j < orbjVec.size(); j++) { // loop over input orbital, possibly imaginary parts
-                    Un(j, i) = Ureal(orbjVec[j], orbiVec[i]);
-                }
-            }
-
-            // HERE IT HAPPENS
-            rotatedCoeff.noalias() = coeffBlock * Un; // Matrix mutiplication
-
-            // 3c) find which orbitals need to further refine this node, and store rotated node (after each other while
-            // in cache).
-            for (int i = 0; i < orbiVec.size(); i++) { // loop over rotated orbitals
-                needsplit[orbiVec[i]] = -1.0;          // default, do not split
-                // check if this node/orbital needs further refinement
-                double wnorm = 0.0;
-                int kwstart = csize - sizecoeffW; // do not include scaling
-                for (int k = kwstart; k < csize; k++) wnorm += rotatedCoeff.col(i)[k] * rotatedCoeff.col(i)[k];
-                if (thres < wnorm or prec < 0) needsplit[orbiVec[i]] = 1.0;
-                nodesRotated.put_nodedata(orbiVec[i], indexVec_ref[n] + max_ix, csize, rotatedCoeff.col(i).data());
-            }
-            nodeSplits.put_data(indexVec_ref[n], Meff, needsplit.data());
-        }
-        mpi::barrier(mpi::comm_wrk); // wait until all rotated nodes are ready
-    }
-
-    // 5) reconstruct trees using rotated nodes.
-
-    // only serial case can use OMP, because MPI cannot be used by threads
-    if (serial) {
-        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
-        // operation is writing the coefficient into the tree)
-
-#pragma omp parallel for schedule(static)
-        for (int j = 0; j < Meff; j++) {
-            if (coeffpVec[j].size()==0) continue;
-            if (j < M) {
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-            } else {
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], prec);
-            }
-        }
-
-    } else { // MPI case
-
-        for (int j = 0; j < Meff; j++) {
-            if (not mpi::my_orb(j % M)) continue;
-            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
-            std::vector<double *> coeffpVec; //
-            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx
-            int ix = 0;
-            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
-            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
-                std::vector<int> nodeidVec;
-                double *dataVec; // will be allocated by bank
-                nodesRotated.get_orbblock(j, dataVec, nodeidVec, ibank);
-                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
-                int shift = 0;
-                for (int n = 0; n < nodeidVec.size(); n++) {
-                    assert(nodeidVec[n] - max_ix >= 0);                // unrotated nodes have been deleted
-                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
-                    ix2coef[nodeidVec[n] - max_ix] = ix++;
-                    csize = sizecoeffW;
-                    if (parindexVec_ref[nodeidVec[n] - max_ix] < 0) csize = sizecoeff;
-                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
-                    shift += csize;
-                }
-            }
-            if (j < M) {
-                // Real part
-                if (!Psi[j].hasReal()) Psi[j].alloc(NUMBER::Real);
-                Psi[j].real().clear();
-                Psi[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            } else {
-                // Imag part
-                if (!Psi[j % M].hasImag()) Psi[j % M].alloc(NUMBER::Imag);
-                Psi[j % M].imag().clear();
-                Psi[j % M].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, prec);
-            }
-            for (double *p : pointerstodelete) delete[] p;
-            pointerstodelete.clear();
-        }
-    }
-}
-
-
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec) {
-    rotate(Phi, U, Phi, prec);
-    return;
-}
-
-/** @brief Save all nodes in bank; identify them using serialIx from refTree
- * shift is a shift applied in the id
- */
-void save_nodes(MPI_FuncVector &Phi, FunctionTree<3> &refTree, BankAccount &account, int sizes) {
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-    int max_nNodes = refTree.getNNodes();
-    std::vector<double *> coeffVec;
-    std::vector<double> scalefac;
-    std::vector<int> indexVec;    // SerialIx of the node in refOrb
-    std::vector<int> parindexVec; // SerialIx of the parent node
-    int N = Phi.size();
-    int max_ix;
-    for (int j = 0; j < N; j++) {
-        if (not mpi::my_orb(j)) continue;
-        // make vector with all coef address and their index in the union grid
-        if (Phi[j].hasReal()) {
-            Phi[j].real().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
-            int max_n = indexVec.size();
-            // send node coefs from Phi[j] to bank
-            // except for the root nodes, only wavelets are sent
-            for (int i = 0; i < max_n; i++) {
-                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
-                int csize = sizecoeffW;
-                if (parindexVec[i] < 0) csize = sizecoeff;
-                if (sizes > 0) { // fixed size
-                    account.put_nodedata(j, indexVec[i], sizes, coeffVec[i]);
-                } else {
-                    account.put_nodedata(j, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
-                }
-            }
-        }
-        // Imaginary parts are considered as orbitals with an orbid shifted by N
-        if (Phi[j].hasImag()) {
-            Phi[j].imag().makeCoeffVector(coeffVec, indexVec, parindexVec, scalefac, max_ix, refTree);
-            int max_n = indexVec.size();
-            // send node coefs from Phi[j] to bank
-            for (int i = 0; i < max_n; i++) {
-                if (indexVec[i] < 0) continue; // nodes that are not in refOrb
-                // NB: the identifier (indexVec[i]) must be shifted for not colliding with the nodes from the real part
-                int csize = sizecoeffW;
-                if (parindexVec[i] < 0) csize = sizecoeff;
-                if (sizes > 0) { // fixed size
-                    account.put_nodedata(j + N, indexVec[i], sizes, coeffVec[i]);
-                } else {
-                    account.put_nodedata(j + N, indexVec[i], csize, &(coeffVec[i][sizecoeff - csize]));
-                }
-            }
-        }
-    }
-}
-
-/** @brief Multiply all orbitals with a function
- *
- * @param Phi: orbitals to multiply
- * @param f  : function to multiply
- *
- * Computes the product of each orbital with a function
- * in parallel using a local representation.
- * Input trees are extended by one scale at most.
- */
-MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec, ComplexFunction *Func, int nrefine, bool all) {
-
-    int N = Phi.size();
-    const int D = 3;
-    bool serial = mpi::wrk_size == 1; // flag for serial/MPI switch
-
-    // 1a) extend grid where f is large (around nuclei)
-    // TODO: do it in save_nodes + refTree, only saving the extra nodes, without keeping them permanently. Or refine refTree?
-
-    for (int i = 0; i < N; i++) {
-        if (!mpi::my_orb(i)) continue;
-        int irefine = 0;
-        while (Phi[i].hasReal() and irefine < nrefine and refine_grid(Phi[i].real(), f) > 0) irefine++;
-        irefine = 0;
-        while (Phi[i].hasImag() and irefine < nrefine and refine_grid(Phi[i].imag(), f) > 0) irefine++;
-    }
-
-    // 1b) make union tree without coefficients
-    FunctionTree<D> refTree(*Phi.vecMRA);
-    // refine_grid(refTree, f); //to test
-    mpi::allreduce_Tree_noCoeff(refTree, Phi, mpi::comm_wrk);
-
-    int kp1 = refTree.getKp1();
-    int kp1_d = refTree.getKp1_d();
-    int nCoefs = refTree.getTDim() * kp1_d;
-
-    IntVector PsihasReIm = IntVector::Zero(2);
-    for (int i = 0; i < N; i++) {
-        if (!mpi::my_orb(i)) continue;
-        PsihasReIm[0] = (Phi[i].hasReal()) ? 1 : 0;
-        PsihasReIm[1] = (Phi[i].hasImag()) ? 1 : 0;
-    }
-    mpi::allreduce_vector(PsihasReIm, mpi::comm_wrk);
-    MPI_FuncVector out(N);
-    MPI_FuncVector outtest(N);
-    if (not PsihasReIm[0] and not PsihasReIm[1]) {
-        return out; // do nothing
-    }
-
-    int Neff = N;
-    if (PsihasReIm[1]) Neff = 2 * N; // Imag and Real treated independently. We always treat real part of Psi
-
-    std::vector<double> scalefac_ref;
-    std::vector<double *> coeffVec_ref; // not used!
-    std::vector<int> indexVec_ref;      // serialIx of the nodes
-    std::vector<int> parindexVec_ref;   // serialIx of the parent nodes
-    std::vector<MWNode<D> *> refNodes;  // pointers to nodes
-    int max_ix;
-    // get a list of all nodes in union tree, identified by their serialIx indices
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac_ref, max_ix, refTree, &refNodes);
-    int max_n = indexVec_ref.size();
-    std::map<int, int> ix2n; // for a given serialIx, give index in vectors
-    for (int nn = 0; nn < max_n; nn++) ix2n[indexVec_ref[nn]] = nn;
-
-    // 2a) send own nodes to bank, identifying them through the serialIx of refTree
-    BankAccount nodesPhi;        // to put the original nodes
-    BankAccount nodesMultiplied; // to put the multiplied nodes
-
-    // used for serial only:
-    std::vector<std::vector<double *>> coeffVec(Neff);
-    std::vector<std::vector<int>> indexVec(Neff);   // serialIx of the nodes
-    std::map<int, std::vector<int>> node2orbVec;    // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(Neff); // for a given orbital and a given node, gives the node index in the
-                                                    // orbital given the node index in the reference tree
-    if (serial) {
-        // make list of all coefficients (coeffVec), and their reference indices (indexVec)
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<double> scalefac;
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Phi[j].hasReal()) {
-                Phi[j].real().makeCoeffVector(coeffVec[j], indexVec[j], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j]) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (Phi[j].hasImag()) {
-                Phi[j].imag().makeCoeffVector(coeffVec[j + N], indexVec[j + N], parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec[j + N]) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else {
-        mpifuncvec::save_nodes(Phi, refTree, nodesPhi, nCoefs);
-        mpi::barrier(mpi::comm_wrk); // required for now, as the blockdata functionality has no queue yet.
-    }
-
-    // 2b) save Func in bank and remove its coefficients
-    if (Func != nullptr and !serial) {
-        // put Func in local representation if not already done
-        if (!Func->real().isLocal) { Func->real().saveNodesAndRmCoeff(); }
-    }
-
-    // 3) mutiply for each node
-    std::vector<std::vector<double *>> coeffpVec(Neff); // to put pointers to the multiplied coefficient for each orbital in serial case
-    std::vector<DoubleMatrix> multipliedCoeffVec;       // just to ensure that the data from multipliedCoeff is not deleted, since we point to it.
-    std::vector<std::map<int, int>> ix2coef(Neff);      // to find the index in for example rotCoeffVec[] corresponding to a serialIx
-    DoubleVector NODEP = DoubleVector::Zero(nCoefs);
-    DoubleVector NODEF = DoubleVector::Zero(nCoefs);
-
-    if (serial) {
-#pragma omp parallel for schedule(dynamic)
-        for (int n = 0; n < max_n; n++) {
-            MWNode<D> node(*(refNodes[n]), false);
-            int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-
-            // 3a) make values for f at this node
-            // 3a1) get coordinates of quadrature points for this node
-            Eigen::MatrixXd pts; // Eigen::Zero(D, nCoefs);
-            double fval[nCoefs];
-            Coord<D> r;
-            double *originalCoef = nullptr;
-            MWNode<3> *Fnode = nullptr;
-            if (Func == nullptr) {
-                node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-                for (int j = 0; j < nCoefs; j++) {
-                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                    fval[j] = f.evalf(r);
-                }
-            } else {
-                Fnode = Func->real().findNode(node.getNodeIndex());
-                if (Fnode == nullptr) {
-                    node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-                    for (int j = 0; j < nCoefs; j++) {
-                        for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                        fval[j] = f.evalf(r);
-                    }
-                } else {
-                    originalCoef = Fnode->getCoefs();
-                    for (int j = 0; j < nCoefs; j++) fval[j] = originalCoef[j];
-                    Fnode->attachCoefs(fval); // note that each thread has its own copy
-                    Fnode->mwTransform(Reconstruction);
-                    Fnode->cvTransform(Forward);
-                }
-            }
-            DoubleMatrix multipliedCoeff(nCoefs, node2orbVec[node_ix].size());
-            int i = 0;
-            // 3b) fetch all orbitals at this node
-            std::vector<int> orbjVec;            // to remember which orbital correspond to each orbVec.size();
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                orbjVec.push_back(j);
-                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) = coeffVec[j][orb_node_ix][k];
-                // 3c) transform to grid
-                node.attachCoefs(&(multipliedCoeff(0, i)));
-                node.mwTransform(Reconstruction);
-                node.cvTransform(Forward);
-                // 3d) multiply
-                for (int k = 0; k < nCoefs; k++) multipliedCoeff(k, i) *= fval[k]; // replace by Matrix vector multiplication?
-                // 3e) transform back to mw
-                node.cvTransform(Backward);
-                node.mwTransform(Compression);
-                i++;
-            }
-            if (Func != nullptr and originalCoef != nullptr) {
-                // restablish original values
-                Fnode->attachCoefs(originalCoef);
-            }
-
-            // 3f) save multiplied nodes
-            for (int i = 0; i < orbjVec.size(); i++) {
-#pragma omp critical
-                {
-                    ix2coef[orbjVec[i]][node_ix] = coeffpVec[orbjVec[i]].size();
-                    coeffpVec[orbjVec[i]].push_back(&(multipliedCoeff(0, i))); // list of coefficient pointers
-                }
-            }
-#pragma omp critical
-            {
-                // this ensures that multipliedCoeff is not deleted, when getting out of scope
-                multipliedCoeffVec.push_back(std::move(multipliedCoeff));
-            }
-            node.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
-        }
-    } else {
-        // MPI
-        int count1 = 0;
-        int count2 = 0;
-        TaskManager tasks(max_n);
-        for (int nn = 0; nn < max_n; nn++) {
-            int n = tasks.next_task();
-            if (n < 0) break;
-            MWNode<D> node(*(refNodes[n]), false);
-            // 3a) make values for f
-            // 3a1) get coordinates of quadrature points for this node
-            Eigen::MatrixXd pts;           // Eigen::Zero(D, nCoefs);
-            node.getExpandedChildPts(pts); // TODO: use getPrimitiveChildPts (less cache).
-            double fval[nCoefs];
-            Coord<D> r;
-            MWNode<D> Fnode(*(refNodes[n]), false);
-            if (Func == nullptr) {
-                for (int j = 0; j < nCoefs; j++) {
-                    for (int d = 0; d < D; d++) r[d] = pts(d, j); //*scaling_factor[d]?
-                    fval[j] = f.evalf(r);
-                }
-            } else {
-                int nIdx = Func->real().getIx(node.getNodeIndex());
-                count1++;
-                if (nIdx < 0) {
-                    // use the function f instead of Func
-                    count2++;
-                    for (int j = 0; j < nCoefs; j++) {
-                        for (int d = 0; d < D; d++) r[d] = pts(d, j);
-                        fval[j] = f.evalf(r);
-                    }
-                } else {
-                    Func->real().getNodeCoeff(nIdx, fval); // fetch coef from Bank
-                    Fnode.attachCoefs(fval);
-                    Fnode.mwTransform(Reconstruction);
-                    Fnode.cvTransform(Forward);
-                }
-            }
-
-            // 3b) fetch all orbitals at this node
-            DoubleMatrix coeffBlock(nCoefs, Neff); // largest possible used size
-            std::vector<int> orbjVec;
-            nodesPhi.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbjVec);
-            coeffBlock.conservativeResize(Eigen::NoChange, orbjVec.size()); // keep only used part
-            DoubleMatrix MultipliedCoeff(nCoefs, orbjVec.size());
-            // 3c) transform to grid
-            for (int j = 0; j < orbjVec.size(); j++) { // TODO: transform all j at once ?
-                // TODO: select only nodes that are end nodes?
-                node.attachCoefs(coeffBlock.col(j).data());
-                node.mwTransform(Reconstruction);
-                node.cvTransform(Forward);
-                // 3d) multiply
-                double *coefs = node.getCoefs();
-                for (int i = 0; i < nCoefs; i++) coefs[i] *= fval[i];
-                // 3e) transform back to mw
-                node.cvTransform(Backward);
-                node.mwTransform(Compression);
-                // 3f) save multiplied nodes
-                nodesMultiplied.put_nodedata(orbjVec[j], indexVec_ref[n] + max_ix, nCoefs, coefs);
-            }
-            node.attachCoefs(nullptr);  // to avoid deletion of valid multipliedCoeff by destructor
-            Fnode.attachCoefs(nullptr); // to avoid deletion of valid multipliedCoeff by destructor
-        }
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 5) reconstruct trees using multiplied nodes.
-
-    // only serial case can use OMP, because MPI cannot be used by threads
-    if (serial) {
-        // OMP parallelized, but does not scale well, because the total memory bandwidth is a bottleneck. (the main
-        // operation is writing the coefficient into the tree)
-
-#pragma omp parallel for schedule(static)
-        for (int j = 0; j < Neff; j++) {
-            if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
-                }
-            } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec[j], ix2coef[j], -1.0, "copy");
-                    out[j].imag().mwTransform(BottomUp);
-                    out[j].imag().calcSquareNorm();
-                }
-            }
-        }
-    } else {
-        for (int j = 0; j < Neff; j++) {
-            if (not mpi::my_orb(j % N) and not all) continue;
-            // traverse possible nodes, and stop descending when norm is zero (leaf in out[j])
-            std::vector<double *> coeffpVec; //
-            std::map<int, int> ix2coef;      // to find the index in coeffVec[] corresponding to a serialIx in refTree
-            int ix = 0;
-            std::vector<double *> pointerstodelete; // list of temporary arrays to clean up
-
-            for (int ibank = 0; ibank < mpi::bank_size; ibank++) {
-                std::vector<int> nodeidVec;
-                double *dataVec; // will be allocated by bank
-                nodesMultiplied.get_orbblock(j, dataVec, nodeidVec, ibank);
-                if (nodeidVec.size() > 0) pointerstodelete.push_back(dataVec);
-                int shift = 0;
-                for (int n = 0; n < nodeidVec.size(); n++) {
-                    assert(nodeidVec[n] - max_ix >= 0);                // unmultiplied nodes have been deleted
-                    assert(ix2coef.count(nodeidVec[n] - max_ix) == 0); // each nodeid treated once
-                    ix2coef[nodeidVec[n] - max_ix] = ix++;
-                    coeffpVec.push_back(&dataVec[shift]); // list of coeff pointers
-                    shift += nCoefs;
-                }
-            }
-            if (j < N) {
-                if (Phi[j].hasReal()) {
-                    out[j].alloc(NUMBER::Real);
-                    out[j].real().clear();
-                    out[j].real().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    // 6) reconstruct trees from end nodes
-                    out[j].real().mwTransform(BottomUp);
-                    out[j].real().calcSquareNorm();
-                    out[j].real().resetEndNodeTable();
-                    // out[j].real().crop(prec, 1.0, false); //bad convergence if out is cropped
-                    if (nrefine > 0) Phi[j].real().crop(prec, 1.0, false); // restablishes original Phi
-                }
-            } else {
-                if (Phi[j % N].hasImag()) {
-                    out[j % N].alloc(NUMBER::Imag);
-                    out[j % N].imag().clear();
-                    out[j % N].imag().makeTreefromCoeff(refTree, coeffpVec, ix2coef, -1.0, "copy");
-                    out[j % N].imag().mwTransform(BottomUp);
-                    out[j % N].imag().calcSquareNorm();
-                    // out[j % N].imag().crop(prec, 1.0, false);
-                    if (nrefine > 0) Phi[j % N].imag().crop(prec, 1.0, false);
-                }
-            }
-
-            for (double *p : pointerstodelete) delete[] p;
-            pointerstodelete.clear();
-        }
-    }
-    return out;
-}
-
-ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    int N = Bra.size();
-    ComplexVector result = ComplexVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        // The bra is sent to the owner of the ket
-        if (my_orb(Bra[i]) != my_orb(Ket[i])) { MSG_ABORT("same indices should have same ownership"); }
-        result[i] = cplxfunc::dot(Bra[i], Ket[i]);
-        if (not mrcpp::mpi::my_orb(i)) Bra[i].free(NUMBER::Total);
-    }
-    mrcpp::mpi::allreduce_vector(result, mrcpp::mpi::comm_wrk);
-    return result;
-}
-
-/** @brief Compute Löwdin orthonormalization matrix
- *
- * @param Phi: orbitals to orthonomalize
- *
- * Computes the inverse square root of the orbital overlap matrix S^(-1/2)
- */
-ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi) {
-    ComplexMatrix S_tilde = mpifuncvec::calc_overlap_matrix(Phi);
-    ComplexMatrix S_m12 = math_utils::hermitian_matrix_pow(S_tilde, -1.0 / 2.0);
-    return S_m12;
-}
-
-/** @brief Orbital transformation out_j = sum_i inp_i*U_ij
- *
- * NOTE: OrbitalVector is considered a ROW vector, so rotation
- *       means matrix multiplication from the right
- *
- * MPI: Rank distribution of output vector is the same as input vector
- *
- */
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet) {
-    // NB: must be spinseparated at this point!
-
-    int N = BraKet.size();
-    ComplexMatrix S = ComplexMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
-
-    // 1) make union tree without coefficients
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double> scalefac;
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    int max_ix;                       // largest index value (not used here)
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-    mrcpp::BankAccount nodesBraKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(BraKet, refTree, nodesBraKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-
-    int ibank = 0;
-#pragma omp parallel for schedule(dynamic) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(BraKet[i])) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j <= i; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N);
-            S.imag()(i, j) = conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
-            if (i != j) S(j, i) = std::conj(S(i, j)); // ensure exact symmetri
-        }
-    }
-
-    // Assumes linearity: result is sum of all nodes contributions
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-
-    return S;
-}
-
-/** @brief Compute the overlap matrix S_ij = <bra_i|ket_j>
- *
- */
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // for consistent timings
-
-    MultiResolutionAnalysis<3> *mra = Bra.vecMRA;
-
-    int N = Bra.size();
-    int M = Ket.size();
-    ComplexMatrix S = ComplexMatrix::Zero(N, M);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * M); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-
-    // 1) make union tree without coefficients for Bra (supposed smallest)
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, Bra, mpi::comm_wrk);
-    // note that Ket is not part of union grid: if a node is in ket but not in Bra, the dot product is zero.
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    std::vector<double> scalefac;
-    int max_ix;
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-    max_ix++;
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVecBra(2 * N);
-    std::map<int, std::vector<int>> node2orbVecBra;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeBra(2 * N); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
-    std::vector<std::vector<double *>> coeffVecKet(2 * M);
-    std::map<int, std::vector<int>> node2orbVecKet;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2nodeKet(2 * M); // for a given orbital and a given node, gives the node index in
-                                                        // the orbital given the node index in the reference tree
-    mrcpp::BankAccount nodesBra;
-    mrcpp::BankAccount nodesKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        // TODO? : do not copy coefficients, but use directly the pointers
-        // could OMP parallelize, but is fast anyway
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (Bra[j].hasReal()) {
-                Bra[j].real().makeCoeffVector(coeffVecBra[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j);
-                }
-            }
-            if (Bra[j].hasImag()) {
-                Bra[j].imag().makeCoeffVector(coeffVecBra[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeBra[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecBra[ix].push_back(j + N);
-                }
-            }
-        }
-        for (int j = 0; j < M; j++) {
-            if (Ket[j].hasReal()) {
-                Ket[j].real().makeCoeffVector(coeffVecKet[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j);
-                }
-            }
-            if (Ket[j].hasImag()) {
-                Ket[j].imag().makeCoeffVector(coeffVecKet[j + M], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2nodeKet[j + M][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVecKet[ix].push_back(j + M);
-                }
-            }
-        }
-
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(Bra, refTree, nodesBra);
-        save_nodes(Ket, refTree, nodesKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-    int totsiz = 0;
-    int totget = 0;
-    int mxtotsiz = 0;
-    int ibank = 0;
-    //For some unknown reason the h2_mag_lda test sometimes fails when schedule(dynamic) is chosen
-#pragma omp parallel for schedule(static) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        std::vector<int> orbVecBra; // identifies which Bra orbitals use this node
-        std::vector<int> orbVecKet; // identifies which Ket orbitals use this node
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        if (serial) {
-            int node_ix = indexVec_ref[n];      // SerialIx for this node in the reference tree
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            DoubleMatrix coeffBlockBra(csize, node2orbVecBra[node_ix].size());
-            DoubleMatrix coeffBlockKet(csize, node2orbVecKet[node_ix].size());
-            if (parindexVec_ref[n] < 0) shift = 0;
-
-            for (int j : node2orbVecBra[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeBra[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockBra(k, orbVecBra.size()) = coeffVecBra[j][orb_node_ix][k + shift];
-                orbVecBra.push_back(j);
-            }
-            for (int j : node2orbVecKet[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2nodeKet[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlockKet(k, orbVecKet.size()) = coeffVecKet[j][orb_node_ix][k + shift];
-                orbVecKet.push_back(j);
-            }
-
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
-                            continue;
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
-                            continue;
-                        // must ensure that threads are not competing
-                        double &Srealij = Sreal(orbVecBra[i], orbVecKet[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else {
-
-            DoubleMatrix coeffBlockBra(csize, 2 * N);
-            DoubleMatrix coeffBlockKet(csize, 2 * M);
-            nodesBra.get_nodeblock(indexVec_ref[n], coeffBlockBra.data(), orbVecBra); // get Bra parts
-            nodesKet.get_nodeblock(indexVec_ref[n], coeffBlockKet.data(), orbVecKet); // get Ket parts
-            totsiz += orbVecBra.size() * orbVecKet.size();
-            mxtotsiz += N * M;
-            totget += orbVecBra.size() + orbVecKet.size();
-            if (orbVecBra.size() > 0 and orbVecKet.size() > 0) {
-                DoubleMatrix S_temp(orbVecBra.size(), orbVecKet.size());
-                coeffBlockBra.conservativeResize(Eigen::NoChange, orbVecBra.size());
-                coeffBlockKet.conservativeResize(Eigen::NoChange, orbVecKet.size());
-                S_temp.noalias() = coeffBlockBra.transpose() * coeffBlockKet;
-                for (int i = 0; i < orbVecBra.size(); i++) {
-                    for (int j = 0; j < orbVecKet.size(); j++) {
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Alpha and Ket[orbVecKet[j] % M].spin() == SPIN::Beta)
-                            continue;
-                        if (Bra[orbVecBra[i] % N].spin() == SPIN::Beta and Ket[orbVecKet[j] % M].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVecBra[i], orbVecKet[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-
-    IntVector conjMatBra = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(Bra[i])) continue;
-        conjMatBra[i] = (Bra[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatBra, mrcpp::mpi::comm_wrk);
-    IntVector conjMatKet = IntVector::Zero(M);
-    for (int i = 0; i < M; i++) {
-        if (!mrcpp::mpi::my_orb(Ket[i])) continue;
-        conjMatKet[i] = (Ket[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMatKet, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j < M; j++) {
-            S.real()(i, j) = Sreal(i, j) + conjMatBra[i] * conjMatKet[j] * Sreal(i + N, j + M);
-            S.imag()(i, j) = conjMatKet[j] * Sreal(i, j + M) - conjMatBra[i] * Sreal(i + N, j);
-        }
-    }
-
-    // 4) collect results from all MPI. Linearity: result is sum of all node contributions
-
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-
-    return S;
-}
-
-/** @brief Compute the overlap matrix of the absolute value of the functions S_ij = <|bra_i|||ket_j|>
- *
- */
-DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet) {
-    int N = BraKet.size();
-    DoubleMatrix S = DoubleMatrix::Zero(N, N);
-    DoubleMatrix Sreal = DoubleMatrix::Zero(2 * N, 2 * N); // same as S, but stored as 4 blocks, rr,ri,ir,ii
-    MultiResolutionAnalysis<3> *mra = BraKet.vecMRA;
-
-    // 1) make union tree without coefficients
-    mrcpp::FunctionTree<3> refTree(*mra);
-    mrcpp::mpi::allreduce_Tree_noCoeff(refTree, BraKet, mpi::comm_wrk);
-
-    int sizecoeff = (1 << refTree.getDim()) * refTree.getKp1_d();
-    int sizecoeffW = ((1 << refTree.getDim()) - 1) * refTree.getKp1_d();
-
-    // get a list of all nodes in union grid, as defined by their indices
-    std::vector<double> scalefac;
-    std::vector<double *> coeffVec_ref;
-    std::vector<int> indexVec_ref;    // serialIx of the nodes
-    std::vector<int> parindexVec_ref; // serialIx of the parent nodes
-    int max_ix;                       // largest index value (not used here)
-
-    refTree.makeCoeffVector(coeffVec_ref, indexVec_ref, parindexVec_ref, scalefac, max_ix, refTree);
-    int max_n = indexVec_ref.size();
-
-    // only used for serial case:
-    std::vector<std::vector<double *>> coeffVec(2 * N);
-    std::map<int, std::vector<int>> node2orbVec;     // for each node index, gives a vector with the indices of the orbitals using this node
-    std::vector<std::map<int, int>> orb2node(2 * N); // for a given orbital and a given node, gives the node index in
-                                                     // the orbital given the node index in the reference tree
-
-    bool serial = mrcpp::mpi::wrk_size == 1; // flag for serial/MPI switch
-    mrcpp::BankAccount nodesBraKet;
-
-    // In the serial case we store the coeff pointers in coeffVec. In the mpi case the coeff are stored in the bank
-    if (serial) {
-        // 2) make list of all coefficients, and their reference indices
-        // for different orbitals, indexVec will give the same index for the same node in space
-        std::vector<int> parindexVec; // serialIx of the parent nodes
-        std::vector<int> indexVec;    // serialIx of the nodes
-        for (int j = 0; j < N; j++) {
-            // make vector with all coef pointers and their indices in the union grid
-            if (BraKet[j].hasReal()) {
-                BraKet[j].real().makeCoeffVector(coeffVec[j], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j);
-                }
-            }
-            if (BraKet[j].hasImag()) {
-                BraKet[j].imag().makeCoeffVector(coeffVec[j + N], indexVec, parindexVec, scalefac, max_ix, refTree);
-                // make a map that gives j from indexVec
-                int orb_node_ix = 0;
-                for (int ix : indexVec) {
-                    orb2node[j + N][ix] = orb_node_ix++;
-                    if (ix < 0) continue;
-                    node2orbVec[ix].push_back(j + N);
-                }
-            }
-        }
-    } else { // MPI case
-        // 2) send own nodes to bank, identifying them through the serialIx of refTree
-        save_nodes(BraKet, refTree, nodesBraKet);
-        mrcpp::mpi::barrier(mrcpp::mpi::comm_wrk); // wait until everything is stored before fetching!
-    }
-
-    // 3) make dot product for all the nodes and accumulate into S
-
-    int ibank = 0;
-#pragma omp parallel for schedule(dynamic) if (serial)
-    for (int n = 0; n < max_n; n++) {
-        if (n % mrcpp::mpi::wrk_size != mrcpp::mpi::wrk_rank) continue;
-        int csize;
-        int node_ix = indexVec_ref[n]; // SerialIx for this node in the reference tree
-        std::vector<int> orbVec;       // identifies which orbitals use this node
-        if (serial and node2orbVec[node_ix].size() <= 0) continue;
-        if (parindexVec_ref[n] < 0)
-            csize = sizecoeff;
-        else
-            csize = sizecoeffW;
-        // In the serial case we copy the coeff coeffBlock. In the mpi case coeffBlock is provided by the bank
-        if (serial) {
-            int shift = sizecoeff - sizecoeffW; // to copy only wavelet part
-            if (parindexVec_ref[n] < 0) shift = 0;
-            DoubleMatrix coeffBlock(csize, node2orbVec[node_ix].size());
-            for (int j : node2orbVec[node_ix]) { // loop over indices of the orbitals using this node
-                int orb_node_ix = orb2node[j][node_ix];
-                for (int k = 0; k < csize; k++) coeffBlock(k, orbVec.size()) = coeffVec[j][orb_node_ix][k + shift];
-                orbVec.push_back(j);
-            }
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock = coeffBlock.cwiseAbs();
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        double &Srealij = Sreal(orbVec[i], orbVec[j]);
-                        double &Stempij = S_temp(i, j);
-#pragma omp atomic
-                        Srealij += Stempij;
-                    }
-                }
-            }
-        } else { // MPI case
-            DoubleMatrix coeffBlock(csize, 2 * N);
-            nodesBraKet.get_nodeblock(indexVec_ref[n], coeffBlock.data(), orbVec);
-
-            if (orbVec.size() > 0) {
-                DoubleMatrix S_temp(orbVec.size(), orbVec.size());
-                coeffBlock.conservativeResize(Eigen::NoChange, orbVec.size());
-                coeffBlock = coeffBlock.cwiseAbs();
-                S_temp.noalias() = coeffBlock.transpose() * coeffBlock;
-                for (int i = 0; i < orbVec.size(); i++) {
-                    for (int j = 0; j < orbVec.size(); j++) {
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Alpha and BraKet[orbVec[j] % N].spin() == SPIN::Beta)
-                            continue;
-                        if (BraKet[orbVec[i] % N].spin() == SPIN::Beta and BraKet[orbVec[j] % N].spin() == SPIN::Alpha)
-                            continue;
-                        Sreal(orbVec[i], orbVec[j]) += S_temp(i, j);
-                    }
-                }
-            }
-        }
-    }
-
-    IntVector conjMat = IntVector::Zero(N);
-    for (int i = 0; i < N; i++) {
-        if (!mrcpp::mpi::my_orb(i)) continue;
-        conjMat[i] = (BraKet[i].conjugate()) ? -1 : 1;
-    }
-    mrcpp::mpi::allreduce_vector(conjMat, mrcpp::mpi::comm_wrk);
-
-    for (int i = 0; i < N; i++) {
-        for (int j = 0; j <= i; j++) {
-            S(i, j) = Sreal(i, j) + conjMat[i] * conjMat[j] * Sreal(i + N, j + N) + conjMat[j] * Sreal(i, j + N) - conjMat[i] * Sreal(i + N, j);
-            S(j, i) = S(i, j);
-        }
-    }
-
-    // Assumes linearity: result is sum of all nodes contributions
-    mrcpp::mpi::allreduce_matrix(S, mrcpp::mpi::comm_wrk);
-    return S;
-}
-
-/** @brief Orthogonalize the functions in Bra against all orbitals in Ket
- *
- */
-void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket) {
-    // TODO: generalize for cases where Ket functions are not orthogonal to each other?
-    ComplexMatrix S = mpifuncvec::calc_overlap_matrix(Bra, Ket);
-    int N = Bra.size();
-    int M = Ket.size();
-    DoubleVector Ketnorms = DoubleVector::Zero(M);
-    for (int i = 0; i < M; i++) {
-        if (mpi::my_orb(Ket[i])) Ketnorms(i)  = Ket[i].getSquareNorm();
-    }
-    mrcpp::mpi::allreduce_vector(Ketnorms, mrcpp::mpi::comm_wrk);
-    ComplexMatrix rmat =  ComplexMatrix::Zero(M, N);
-    for (int j = 0; j < N; j++) {
-        for (int i = 0; i < M; i++) {
-            rmat(i,j) = 0.0 - S.conjugate()(j,i)/Ketnorms(i);
-        }
-    }
-    MPI_FuncVector rotatedKet(N);
-    mpifuncvec::rotate(Ket, rmat, rotatedKet, prec / M);
-    for (int j = 0; j < N; j++) {
-        if(my_orb(Bra[j]))Bra[j].add(1.0,rotatedKet[j]);
-    }
-}
-} // namespace mpifuncvec
-} // namespace mrcpp
diff --git a/src/utils/ComplexFunction.h b/src/utils/ComplexFunction.h
deleted file mode 100644
index f6103dc1c..000000000
--- a/src/utils/ComplexFunction.h
+++ /dev/null
@@ -1,199 +0,0 @@
-#pragma once
-
-#include "functions/RepresentableFunction.h"
-#include "math_utils.h"
-#include "mpi_utils.h"
-#include "trees/FunctionTree.h"
-#include "trees/MultiResolutionAnalysis.h"
-#include <Eigen/Core>
-
-using namespace Eigen;
-
-using IntVector = Eigen::VectorXi;
-using DoubleVector = Eigen::VectorXd;
-using ComplexVector = Eigen::VectorXcd;
-
-using IntMatrix = Eigen::MatrixXi;
-using DoubleMatrix = Eigen::MatrixXd;
-using ComplexMatrix = Eigen::MatrixXcd;
-
-class MPI_FuncVector;
-
-namespace mrcpp {
-
-class BankAccount;
-  template <int D, typename T> class FunctionTree;
-template <int D> class MultiResolutionAnalysis;
-
-using ComplexDouble = std::complex<double>;
-namespace NUMBER {
-enum type { Total, Real, Imag };
-}
-namespace SPIN {
-enum type { Paired, Alpha, Beta };
-}
-
-struct FunctionData {
-    int type{0};
-    int order{1};
-    int scale{0};
-    int depth{0};
-    int boxes[3] = {0, 0, 0};
-    int corner[3] = {0, 0, 0};
-    int real_size{0};
-    int imag_size{0};
-    bool is_shared{false};
-    int spin{0};
-    double occ{0};
-};
-
-class TreePtr final {
-public:
-    explicit TreePtr(bool share)
-            : shared_mem_re(nullptr)
-            , shared_mem_im(nullptr)
-            , re(nullptr)
-            , im(nullptr) {
-        this->func_data.is_shared = share;
-        if (this->func_data.is_shared and mpi::share_size > 1) {
-            // Memory size in MB defined in input. Virtual memory, does not cost anything if not used.
-#ifdef MRCPP_HAS_MPI
-            this->shared_mem_re = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
-            this->shared_mem_im = new mrcpp::SharedMemory<double>(mpi::comm_share, mpi::shared_memory_size);
-#endif
-        }
-    }
-
-    ~TreePtr() {
-        if (this->shared_mem_re != nullptr) delete this->shared_mem_re;
-        if (this->shared_mem_im != nullptr) delete this->shared_mem_im;
-        if (this->re != nullptr) delete this->re;
-        if (this->im != nullptr) delete this->im;
-    }
-
-    friend class ComplexFunction;
-
-private:
-    FunctionData func_data;
-    mrcpp::SharedMemory<double> *shared_mem_re;
-    mrcpp::SharedMemory<double> *shared_mem_im;
-    mrcpp::FunctionTree<3, double> *re; ///< Real part of function
-    mrcpp::FunctionTree<3, double> *im; ///< Imaginary part of function
-
-    void flushFuncData() {
-        this->func_data.real_size = 0;
-        this->func_data.imag_size = 0;
-        if (this->re != nullptr) {
-            this->func_data.real_size = this->re->getNChunksUsed();
-            flushMRAData(this->re->getMRA());
-        }
-        if (this->im != nullptr) {
-            this->func_data.imag_size = this->im->getNChunksUsed();
-            flushMRAData(this->im->getMRA());
-        }
-    }
-
-    void flushMRAData(const mrcpp::MultiResolutionAnalysis<3> &mra) {
-        const auto &box = mra.getWorldBox();
-        this->func_data.type = mra.getScalingBasis().getScalingType();
-        this->func_data.order = mra.getOrder();
-        this->func_data.depth = mra.getMaxDepth();
-        this->func_data.scale = box.getScale();
-        this->func_data.boxes[0] = box.size(0);
-        this->func_data.boxes[1] = box.size(1);
-        this->func_data.boxes[2] = box.size(2);
-        this->func_data.corner[0] = box.getCornerIndex().getTranslation(0);
-        this->func_data.corner[1] = box.getCornerIndex().getTranslation(1);
-        this->func_data.corner[2] = box.getCornerIndex().getTranslation(2);
-    }
-};
-
-class ComplexFunction {
-public:
-    ComplexFunction(std::shared_ptr<TreePtr> funcptr);
-    ComplexFunction(const ComplexFunction &func);
-    ComplexFunction(int spin = 0, double occ = -1, int rank = -1, bool share = false);
-    ComplexFunction &operator=(const ComplexFunction &func);
-    ComplexFunction paramCopy() const;
-    bool isShared() const { return this->func_ptr->func_data.is_shared; }
-    bool hasReal() const { return (this->func_ptr->re == nullptr) ? false : true; }
-    bool hasImag() const { return (this->func_ptr->im == nullptr) ? false : true; }
-    FunctionData &getFunctionData();
-    double occ() const { return this->func_ptr->func_data.occ; }
-    int spin() const { return this->func_ptr->func_data.spin; }
-    FunctionTree<3, double> &real() { return *this->func_ptr->re; }
-    FunctionTree<3, double> &imag() { return *this->func_ptr->im; }
-    const FunctionTree<3, double> &real() const { return *this->func_ptr->re; }
-    const FunctionTree<3, double> &imag() const { return *this->func_ptr->im; }
-    void release() { this->func_ptr.reset(); }
-    bool conjugate() const { return this->conj; }
-    MultiResolutionAnalysis<3> *funcMRA = nullptr;
-    int getRank() const { return rank; }
-    void setRank(int rank) { (*this).rank = rank; }
-    void setOcc(double occ) { this->getFunctionData().occ = occ; }
-    void setSpin(int spin) { this->getFunctionData().spin = spin; }
-    ComplexFunction dagger();
-    virtual ~ComplexFunction() = default;
-
-    void alloc(int type, mrcpp::MultiResolutionAnalysis<3> *mra = nullptr);
-    void free(int type);
-
-    int getSizeNodes(int type) const;
-    int getNNodes(int type) const;
-
-    void setReal(mrcpp::FunctionTree<3, double> *tree);
-    void setImag(mrcpp::FunctionTree<3, double> *tree);
-
-    double norm() const;
-    double getSquareNorm() const;
-    ComplexDouble integrate() const;
-
-    int crop(double prec);
-    void rescale(double c);
-    void rescale(ComplexDouble c);
-    void add(ComplexDouble c, ComplexFunction inp);
-    void absadd(ComplexDouble c, ComplexFunction inp);
-    char printSpin() const;
-
-protected:
-    bool conj{false};
-    std::shared_ptr<mrcpp::TreePtr> func_ptr;
-    int rank = -1; // index in vector
-};
-
-namespace cplxfunc {
-void SetdefaultMRA(MultiResolutionAnalysis<3> *MRA);
-ComplexDouble dot(ComplexFunction bra, ComplexFunction ket);
-ComplexDouble node_norm_dot(ComplexFunction bra, ComplexFunction ket, bool exact);
-void deep_copy(ComplexFunction &out, ComplexFunction &inp);
-void add(ComplexFunction &out, ComplexDouble a, ComplexFunction inp_a, ComplexDouble b, ComplexFunction inp_b, double prec);
-void project(ComplexFunction &out, std::function<double(const Coord<3> &r)> f, int type, double prec);
-void project(ComplexFunction &out, RepresentableFunction<3> &f, int type, double prec);
-void multiply(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply_real(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply_imag(ComplexFunction &out, ComplexFunction inp_a, ComplexFunction inp_b, double prec, bool absPrec = false, bool useMaxNorms = false);
-void multiply(ComplexFunction &out, ComplexFunction &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
-void multiply(ComplexFunction &out, FunctionTree<3, double> &inp_a, RepresentableFunction<3, double> &f, double prec, int nrefine = 0);
-void linear_combination(ComplexFunction &out, const ComplexVector &c, std::vector<ComplexFunction> &inp, double prec);
-} // namespace cplxfunc
-
-class MPI_FuncVector : public std::vector<ComplexFunction> {
-public:
-    MPI_FuncVector(int N = 0);
-    MultiResolutionAnalysis<3> *vecMRA;
-    void distribute();
-};
-
-namespace mpifuncvec {
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, double prec = -1.0);
-void rotate(MPI_FuncVector &Phi, const ComplexMatrix &U, MPI_FuncVector &Psi, double prec = -1.0);
-void save_nodes(MPI_FuncVector &Phi, mrcpp::FunctionTree<3, double> &refTree, BankAccount &account, int sizes = -1);
-MPI_FuncVector multiply(MPI_FuncVector &Phi, RepresentableFunction<3> &f, double prec = -1.0, ComplexFunction *Func = nullptr, int nrefine = 1, bool all = false);
-ComplexVector dot(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-ComplexMatrix calc_lowdin_matrix(MPI_FuncVector &Phi);
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &BraKet);
-ComplexMatrix calc_overlap_matrix(MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-DoubleMatrix calc_norm_overlap_matrix(MPI_FuncVector &BraKet);
-void orthogonalize(double prec, MPI_FuncVector &Bra, MPI_FuncVector &Ket);
-} // namespace mpifuncvec
-} // namespace mrcpp
diff --git a/src/utils/Plotter.cpp b/src/utils/Plotter.cpp
index b24f2a643..c29b3ee2e 100644
--- a/src/utils/Plotter.cpp
+++ b/src/utils/Plotter.cpp
@@ -109,16 +109,13 @@ template <int D, typename T> void Plotter<D, T>::gridPlot(const MWTree<D, T> &tr
  *  vector A starting from the origin O to a file named fname + file extension
  *  (".line" as default).
  */
-template <int D, typename T>
-void Plotter<D, T>::linePlot(const std::array<int, 1> &npts,
-                          const RepresentableFunction<D, T> &func,
-                          const std::string &fname) {
+template <int D, typename T> void Plotter<D, T>::linePlot(const std::array<int, 1> &npts, const RepresentableFunction<D, T> &func, const std::string &fname) {
     println(20, "----------Line Plot-----------");
     std::stringstream file;
     file << fname << this->suffix[Plotter<D, T>::Line];
     if (verifyRange(1)) { // Verifies only A vector
         Eigen::MatrixXd coords = calcLineCoordinates(npts[0]);
-        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
+        Eigen::Matrix<T, Eigen::Dynamic, 1> values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeData(coords, values);
         closePlot();
@@ -138,16 +135,13 @@ void Plotter<D, T>::linePlot(const std::array<int, 1> &npts,
  *  vectors A (npts[0] points) and B (npts[1] points), starting from the
  *  origin O, to a file named fname + file extension (".surf" as default).
  */
-template <int D, typename T>
-void Plotter<D, T>::surfPlot(const std::array<int, 2> &npts,
-                          const RepresentableFunction<D, T> &func,
-                          const std::string &fname) {
+template <int D, typename T> void Plotter<D, T>::surfPlot(const std::array<int, 2> &npts, const RepresentableFunction<D, T> &func, const std::string &fname) {
     println(20, "--------Surface Plot----------");
     std::stringstream file;
     file << fname << this->suffix[Plotter<D, T>::Surface];
     if (verifyRange(2)) { // Verifies A and B vectors
         Eigen::MatrixXd coords = calcSurfCoordinates(npts[0], npts[1]);
-        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
+        Eigen::Matrix<T, Eigen::Dynamic, 1> values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeData(coords, values);
         closePlot();
@@ -168,16 +162,13 @@ void Plotter<D, T>::surfPlot(const std::array<int, 2> &npts,
  *  starting from the origin O, to a file named fname + file extension
  *  (".cube" as default).
  */
-template <int D, typename T>
-void Plotter<D, T>::cubePlot(const std::array<int, 3> &npts,
-                          const RepresentableFunction<D, T> &func,
-                          const std::string &fname) {
+template <int D, typename T> void Plotter<D, T>::cubePlot(const std::array<int, 3> &npts, const RepresentableFunction<D, T> &func, const std::string &fname) {
     println(20, "----------Cube Plot-----------");
     std::stringstream file;
     file << fname << this->suffix[Plotter<D, T>::Cube];
     if (verifyRange(3)) { // Verifies A, B and C vectors
         Eigen::MatrixXd coords = calcCubeCoordinates(npts[0], npts[1], npts[2]);
-        Eigen::Matrix< T, Eigen::Dynamic, 1 > values = evaluateFunction(func, coords);
+        Eigen::Matrix<T, Eigen::Dynamic, 1> values = evaluateFunction(func, coords);
         openPlot(file.str());
         writeCube(npts, values);
         closePlot();
@@ -272,12 +263,10 @@ template <int D, typename T> Eigen::MatrixXd Plotter<D, T>::calcCubeCoordinates(
  *  this routine evaluates the function in these points and stores the results
  *  in the vector "values".
  */
-template <int D, typename T>
-Eigen::Matrix< T, Eigen::Dynamic, 1 > Plotter<D, T>::evaluateFunction(const RepresentableFunction<D, T> &func,
-                                             const Eigen::MatrixXd &coords) const {
+template <int D, typename T> Eigen::Matrix<T, Eigen::Dynamic, 1> Plotter<D, T>::evaluateFunction(const RepresentableFunction<D, T> &func, const Eigen::MatrixXd &coords) const {
     auto npts = coords.rows();
     if (npts == 0) MSG_ERROR("Empty coordinates");
-    Eigen::Matrix< T, Eigen::Dynamic, 1 > values = Eigen::Matrix< T, Eigen::Dynamic, 1 >::Zero(npts);
+    Eigen::Matrix<T, Eigen::Dynamic, 1> values = Eigen::Matrix<T, Eigen::Dynamic, 1>::Zero(npts);
 #pragma omp parallel for schedule(static) num_threads(mrcpp_get_num_threads())
     for (auto i = 0; i < npts; i++) {
         Coord<D> r{};
@@ -294,7 +283,7 @@ Eigen::Matrix< T, Eigen::Dynamic, 1 > Plotter<D, T>::evaluateFunction(const Repr
  *  point number (between 0 and nPoints), coordinates 1 through D and the
  *  function value.
  */
-  template <int D, typename T> void Plotter<D, T>::writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values) {
+template <int D, typename T> void Plotter<D, T>::writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix<T, Eigen::Dynamic, 1> &values) {
     if (coords.rows() != values.size()) INVALID_ARG_ABORT;
     std::ofstream &o = *this->fout;
     for (auto i = 0; i < values.size(); i++) {
@@ -308,7 +297,7 @@ Eigen::Matrix< T, Eigen::Dynamic, 1 > Plotter<D, T>::evaluateFunction(const Repr
 }
 
 // Specialized for D=3 below
-  template <int D, typename T> void Plotter<D, T>::writeCube(const std::array<int, 3> &npts, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values) {
+template <int D, typename T> void Plotter<D, T>::writeCube(const std::array<int, 3> &npts, const Eigen::Matrix<T, Eigen::Dynamic, 1> &values) {
     NOT_IMPLEMENTED_ABORT
 }
 
@@ -412,31 +401,22 @@ template <> void Plotter<3>::writeNodeGrid(const MWNode<3> &node, const std::str
 
     for (int d = 0; d < 3; d++) origin[d] = node.getNodeIndex()[d] * length;
 
-    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] << " "
-      << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] + length
-      << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] << color << std::endl;
-
-    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] << " "
-      << origin[2] + length << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] + length
-      << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] << color << std::endl;
-    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] + length
-      << " " << origin[2] << " " << color << origin[0] + length << " " << origin[1] + length << " " << origin[2] << " "
-      << color << origin[0] + length << " " << origin[1] << " " << origin[2] << color << std::endl;
-
-    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color
-      << origin[0] + length << " " << origin[1] + length << " " << origin[2] << " " << color << origin[0] + length
-      << " " << origin[1] << " " << origin[2] << " " << color << origin[0] + length << " " << origin[1] << " "
-      << origin[2] + length << color << std::endl;
-
-    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color
-      << origin[0] + length << " " << origin[1] + length << " " << origin[2] << " " << color << origin[0] << " "
-      << origin[1] + length << " " << origin[2] << " " << color << origin[0] << " " << origin[1] + length << " "
-      << origin[2] + length << color << std::endl;
-
-    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color
-      << origin[0] + length << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " "
-      << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " "
-      << origin[2] + length << color << std::endl;
+    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length
+      << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] << color << std::endl;
+
+    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] + length << " " << origin[1]
+      << " " << origin[2] + length << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] << color << std::endl;
+    o << origin[0] << " " << origin[1] << " " << origin[2] << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] << " " << color << origin[0] + length << " "
+      << origin[1] + length << " " << origin[2] << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] << color << std::endl;
+
+    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color << origin[0] + length << " " << origin[1] + length << " " << origin[2] << " " << color
+      << origin[0] + length << " " << origin[1] << " " << origin[2] << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] + length << color << std::endl;
+
+    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color << origin[0] + length << " " << origin[1] + length << " " << origin[2] << " " << color
+      << origin[0] << " " << origin[1] + length << " " << origin[2] << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] + length << color << std::endl;
+
+    o << origin[0] + length << " " << origin[1] + length << " " << origin[2] + length << " " << color << origin[0] + length << " " << origin[1] << " " << origin[2] + length << " " << color
+      << origin[0] << " " << origin[1] << " " << origin[2] + length << " " << color << origin[0] << " " << origin[1] + length << " " << origin[2] + length << color << std::endl;
 }
 
 /** @brief Writing grid data to file
diff --git a/src/utils/Plotter.h b/src/utils/Plotter.h
index 547150197..9612dedec 100644
--- a/src/utils/Plotter.h
+++ b/src/utils/Plotter.h
@@ -86,10 +86,10 @@ template <int D, typename T> class Plotter {
     Eigen::MatrixXd calcSurfCoordinates(int pts_a, int pts_b) const;
     Eigen::MatrixXd calcCubeCoordinates(int pts_a, int pts_b, int pts_c) const;
 
-    Eigen::Matrix< T, Eigen::Dynamic, 1 > evaluateFunction(const RepresentableFunction<D, T> &func, const Eigen::MatrixXd &coords) const;
+    Eigen::Matrix<T, Eigen::Dynamic, 1> evaluateFunction(const RepresentableFunction<D, T> &func, const Eigen::MatrixXd &coords) const;
 
-  void writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix< T, Eigen::Dynamic, 1 > &values);
-  virtual void writeCube(const std::array<int, 3> &npts, const Eigen::Matrix< T, Eigen::Dynamic, 1 >  &values);
+    void writeData(const Eigen::MatrixXd &coords, const Eigen::Matrix<T, Eigen::Dynamic, 1> &values);
+    virtual void writeCube(const std::array<int, 3> &npts, const Eigen::Matrix<T, Eigen::Dynamic, 1> &values);
 
     void writeGrid(const MWTree<D, T> &tree);
     void writeNodeGrid(const MWNode<D, T> &node, const std::string &color);
diff --git a/src/utils/Printer.cpp b/src/utils/Printer.cpp
index 957d7322b..24585feb3 100644
--- a/src/utils/Printer.cpp
+++ b/src/utils/Printer.cpp
@@ -265,7 +265,7 @@ void print::tree(int level, const std::string &txt, int n, int m, double t) {
  * @param[in] tree: Tree to be printed
  * @param[in] timer: Timer to be evaluated
  */
-  template <int D, typename T> void print::tree(int level, const std::string &txt, const MWTree<D, T> &tree, const Timer &timer) {
+template <int D, typename T> void print::tree(int level, const std::string &txt, const MWTree<D, T> &tree, const Timer &timer) {
     if (level > Printer::getPrintLevel()) return;
 
     auto n = tree.getNNodes();
diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp
index 6c4d9b02b..11e8aa3c2 100644
--- a/src/utils/math_utils.cpp
+++ b/src/utils/math_utils.cpp
@@ -188,9 +188,9 @@ void math_utils::apply_filter(double *out, double *in, const MatrixXd &filter, i
 
 /** Matrix multiplication of the filter with the input coefficient (type complex)*/
 void math_utils::apply_filter(ComplexDouble *out, ComplexDouble *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
-  //#ifdef HAVE_BLAS
-//    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
-//#else
+    //#ifdef HAVE_BLAS
+    //    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
+    //#else
     Map<MatrixXcd> f(in, kp1, kp1_dm1);
     Map<MatrixXcd> g(out, kp1_dm1, kp1);
     if (fac < MachineZero) {
@@ -198,7 +198,7 @@ void math_utils::apply_filter(ComplexDouble *out, ComplexDouble *in, const Matri
     } else {
         g.noalias() += f.transpose() * filter;
     }
-//#endif
+    //#endif
 }
 
 /** Make a nD-representation from 1D-representations of separable functions.
@@ -242,7 +242,6 @@ void math_utils::tensor_expand_coords_3D(int kp1, const MatrixXd &primitive, Mat
     }
 }
 
-
 /** @brief Compute the eigenvalues and eigenvectors of a Hermitian matrix
  *
  * @param A: matrix to diagonalize (not modified)
diff --git a/src/utils/mpi_utils.cpp b/src/utils/mpi_utils.cpp
index e193aea3a..77526375b 100644
--- a/src/utils/mpi_utils.cpp
+++ b/src/utils/mpi_utils.cpp
@@ -36,7 +36,8 @@ namespace mrcpp {
  *  @param[in] comm: Communicator sharing resources
  *  @param[in] sh_size: Memory size, in MB
  */
-template <typename T> SharedMemory<T>::SharedMemory(mrcpp::mpi_comm comm, int sh_size)
+template <typename T>
+SharedMemory<T>::SharedMemory(mrcpp::mpi_comm comm, int sh_size)
         : sh_start_ptr(nullptr)
         , sh_end_ptr(nullptr)
         , sh_max_ptr(nullptr)
@@ -101,8 +102,7 @@ template <int D, typename T> void send_tree(FunctionTree<D, T> &tree, int dst, i
     Timer t1;
     for (int iChunk = 0; iChunk < nChunks; iChunk++) {
         MPI_Send(allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize(), MPI_BYTE, dst, tag + iChunk + 1, comm);
-        if (coeff)
-            MPI_Send(allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize(), MPI_BYTE, dst, tag + iChunk + 1001, comm);
+        if (coeff) MPI_Send(allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize(), MPI_BYTE, dst, tag + iChunk + 1001, comm);
     }
     println(10, " Time send                   " << std::setw(30) << t1.elapsed());
 #endif
@@ -136,8 +136,7 @@ template <int D, typename T> void recv_tree(FunctionTree<D, T> &tree, int src, i
     allocator.init(nChunks, coeff);
     for (int iChunk = 0; iChunk < nChunks; iChunk++) {
         MPI_Recv(allocator.getNodeChunk(iChunk), allocator.getNodeChunkSize(), MPI_BYTE, src, tag + iChunk + 1, comm, &status);
-        if (coeff)
-            MPI_Recv(allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize(), MPI_BYTE, src, tag + iChunk + 1001, comm, &status);
+        if (coeff) MPI_Recv(allocator.getCoefChunk(iChunk), allocator.getCoefChunkSize(), MPI_BYTE, src, tag + iChunk + 1001, comm, &status);
     }
     println(10, " Time receive                " << std::setw(30) << t1.elapsed());
 
diff --git a/src/utils/mpi_utils.h b/src/utils/mpi_utils.h
index 0a854580f..062d1affa 100644
--- a/src/utils/mpi_utils.h
+++ b/src/utils/mpi_utils.h
@@ -63,7 +63,7 @@ extern MPI_Comm comm_share;
 extern MPI_Comm comm_sh_group;
 extern MPI_Comm comm_bank;
 
-}// namespace mpi
+} // namespace mpi
 } // namespace mrcpp
 
 namespace mrcpp {
@@ -85,9 +85,9 @@ template <typename T> class SharedMemory {
 
     void clear(); // show shared memory as entirely available
 
-    T *sh_start_ptr;  // start of shared block
-    T *sh_end_ptr;    // end of used part
-    T *sh_max_ptr;    // end of shared block
+    T *sh_start_ptr;       // start of shared block
+    T *sh_end_ptr;         // end of used part
+    T *sh_max_ptr;         // end of shared block
     mrcpp::mpi_win sh_win; // MPI window object
     int rank;              // rank among shared group
 };
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 47fbf226e..ab1000e66 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -1,8 +1,8 @@
 #include <Eigen/Core>
 #include <MRCPP/Printer>
 #include <MRCPP/Timer>
-#include <vector>
 #include <thread>
+#include <vector>
 
 #include "Bank.h"
 #include "omp_utils.h"
@@ -11,7 +11,7 @@
 
 #ifdef MRCPP_HAS_OMP
 #define mrcpp_get_max_threads() omp_get_max_threads()
-#define mrcpp_get_num_procs() omp_get_num_procs()/2
+#define mrcpp_get_num_procs() omp_get_num_procs() / 2
 #define mrcpp_set_dynamic(n) omp_set_dynamic(n)
 #else
 #define mrcpp_get_max_threads() 1
@@ -55,10 +55,10 @@ int is_bankclient = 1;
 int is_bankmaster = 0; // only one bankmaster is_bankmaster
 int bank_size = 0;
 int bank_per_node = 0;
-int omp_threads = -1; // can be set to force number of threads
+int omp_threads = -1;         // can be set to force number of threads
 int use_omp_num_threads = -1; // can be set to use number of threads from env
-int tot_bank_size = 0; // size of bank, including the task manager
-int max_tag = 0;       // max value allowed by MPI
+int tot_bank_size = 0;        // size of bank, including the task manager
+int max_tag = 0;              // max value allowed by MPI
 vector<int> bankmaster;
 int task_bank = -1; // world rank of the task manager
 
@@ -67,7 +67,6 @@ MPI_Comm comm_share;
 MPI_Comm comm_sh_group;
 MPI_Comm comm_bank;
 
-
 int id_shift; // to ensure that nodes, orbitals and functions do not collide
 
 extern int metadata_block[3]; // can add more metadata in future
@@ -95,21 +94,19 @@ void initialize() {
     // define independent group of MPI processes, that are not part of comm_wrk
     // for now the new group does not include comm_share
     comm_bank = MPI_COMM_WORLD; // clients and master
-    MPI_Comm comm_remainder;         // clients only
+    MPI_Comm comm_remainder;    // clients only
 
     // set bank_size automatically if not defined by user
     if (world_size < 2) {
         bank_size = 0;
     } else if (bank_size < 0) {
         if (bank_per_node >= 0) {
-              bank_size = node_size * bank_per_node;
+            bank_size = node_size * bank_per_node;
         } else {
             bank_size = max(world_size / 3, 1);
         }
-    } else if (bank_size >=0 and bank_per_node >= 0) {
-        if (bank_size != node_size * bank_per_node and world_rank == 0)
-            std::cout<<"WARNING: bank_size and bank_per_node are incompatible "<<
-                bank_size<<" "<<bank_per_node<<std::endl;
+    } else if (bank_size >= 0 and bank_per_node >= 0) {
+        if (bank_size != node_size * bank_per_node and world_rank == 0) std::cout << "WARNING: bank_size and bank_per_node are incompatible " << bank_size << " " << bank_per_node << std::endl;
     }
     if (world_size - bank_size < 1) MSG_ABORT("No MPI ranks left for working!");
     if (bank_size < 1 and world_size > 1) MSG_ABORT("Bank size must be at least one when using MPI!");
@@ -173,13 +170,12 @@ void initialize() {
     max_tag = *(int *)val / 2;
     id_shift = max_tag / 2; // half is reserved for non orbital.
 
-
-    MPI_Comm comm_share_world;//all that share the memory
+    MPI_Comm comm_share_world; // all that share the memory
     MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &comm_share_world);
 
-    int n_bank_thisnode; //number of banks on this node
+    int n_bank_thisnode; // number of banks on this node
     MPI_Allreduce(&is_bank, &n_bank_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world);
-    int n_wrk_thisnode; //number of workers on this node
+    int n_wrk_thisnode; // number of workers on this node
     MPI_Allreduce(&is_bankclient, &n_wrk_thisnode, 1, MPI_INT, MPI_SUM, comm_share_world);
 
     int omp_threads_available = thread::hardware_concurrency();
@@ -193,7 +189,7 @@ void initialize() {
         // NB: OMP_NUM_THREADS is the number of threads for all MPI processes on one node.
         // The bank need only one thread, and can give "their" remaining share to workers.
         int total_omp_threads_per_node = my_OMP_NUM_THREADS * (n_bank_thisnode + n_wrk_thisnode);
-        nthreads = (total_omp_threads_per_node - n_bank_thisnode)/n_wrk_thisnode;
+        nthreads = (total_omp_threads_per_node - n_bank_thisnode) / n_wrk_thisnode;
     } else {
         // we determine the number of threads by detecting what is available
         // determine the number of threads we can assign to each mpi worker.
@@ -209,36 +205,35 @@ void initialize() {
         // 3) Bank needs only one thread
         // 4) workers need as many threads as possible (but all workers use same number of threads)
         // 5) at least one thread
-        if (is_bankclient) nthreads = (omp_threads_available/2-n_bank_thisnode)/n_wrk_thisnode; // 1) and 4)
-        //cout<<nthreads<<" after direct calculation"<<endl;
-        // do not exceed total number of cores accessible (assumed to be half the number of logical threads)
-        nthreads = min(nthreads, omp_get_num_procs()/2); // 2)
-        //cout<<nthreads<<" after mrcpp_get_num_procs"<<endl;
+        if (is_bankclient) nthreads = (omp_threads_available / 2 - n_bank_thisnode) / n_wrk_thisnode; // 1) and 4)
+        // cout<<nthreads<<" after direct calculation"<<endl;
+        //  do not exceed total number of cores accessible (assumed to be half the number of logical threads)
+        nthreads = min(nthreads, omp_get_num_procs() / 2); // 2)
+        // cout<<nthreads<<" after mrcpp_get_num_procs"<<endl;
 
         // NB: we do not use OMP_NUM_THREADS. Use all cores accessible.
 
         if (is_bank) nthreads = 1; // 3)
 
-        //        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<" threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<" my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
+        //        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<"
+        //        threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<"
+        //        my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
 
         if (omp_threads > 0) {
             if (omp_threads != nthreads and world_rank == 0) {
-                cout<<"Warning: recommended number of threads is "<<nthreads<<endl;
-                cout<<"setting number of threads to omp_threads, "<<max(1, omp_threads)<<endl;
+                cout << "Warning: recommended number of threads is " << nthreads << endl;
+                cout << "setting number of threads to omp_threads, " << max(1, omp_threads) << endl;
             }
             nthreads = omp_threads;
         }
     }
     nthreads = max(1, nthreads); // 5)
 
-    if (nthreads*n_wrk_thisnode+n_bank_thisnode < omp_threads_available/3 and world_rank == 0) {
-        std::cout<<"WARNING: only "<<nthreads*n_wrk_thisnode+n_bank_thisnode<<" threads used per node while "<<omp_threads_available<<" logical cpus are accessible "<<std::endl;
+    if (nthreads * n_wrk_thisnode + n_bank_thisnode < omp_threads_available / 3 and world_rank == 0) {
+        std::cout << "WARNING: only " << nthreads * n_wrk_thisnode + n_bank_thisnode << " threads used per node while " << omp_threads_available << " logical cpus are accessible " << std::endl;
     }
 
-    if (nthreads > omp_get_num_procs()) {
-        std::cout<<"WARNING: MPI rank "<<world_rank<<" will use "<<nthreads<<" but only "<<
-            omp_get_num_procs()<<" procs are accessible"<<std::endl;
-    }
+    if (nthreads > omp_get_num_procs()) { std::cout << "WARNING: MPI rank " << world_rank << " will use " << nthreads << " but only " << omp_get_num_procs() << " procs are accessible" << std::endl; }
 
     omp::n_threads = nthreads;
     mrcpp::set_max_threads(nthreads);
@@ -290,11 +285,10 @@ bool my_func(int j) {
 }
 
 /** @brief Test if function belongs to this MPI rank */
-bool my_func(const CompFunction<3>& func) {
+bool my_func(const CompFunction<3> &func) {
     return my_func(func.rank());
 }
 
-
 /** @brief Test if function belongs to this MPI rank */
 bool my_func(CompFunction<3> *func) {
     return my_func(func->rank());
@@ -359,14 +353,18 @@ void allreduce_matrix(ComplexMatrix &mat, MPI_Comm comm) {
 void send_function(const CompFunction<3> &func, int dst, int tag, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     for (int i = 0; i < func.Ncomp(); i++) {
-        //make sure that Nchunks is up to date
-        if (func.isreal()) func.Nchunks()[i] = func.CompD[i]->getNChunks();
-        else func.Nchunks()[i] = func.CompC[i]->getNChunks();
+        // make sure that Nchunks is up to date
+        if (func.isreal())
+            func.Nchunks()[i] = func.CompD[i]->getNChunks();
+        else
+            func.Nchunks()[i] = func.CompC[i]->getNChunks();
     }
     MPI_Send(&func.func_ptr->data, sizeof(CompFunctionData<3>), MPI_BYTE, dst, 0, comm);
     for (int i = 0; i < func.Ncomp(); i++) {
-        if (func.isreal()) mrcpp::send_tree(*func.CompD[i], dst, tag, comm, func.Nchunks()[i]);
-        else mrcpp::send_tree(*func.CompC[i], dst, tag, comm, func.Nchunks()[i]);
+        if (func.isreal())
+            mrcpp::send_tree(*func.CompD[i], dst, tag, comm, func.Nchunks()[i]);
+        else
+            mrcpp::send_tree(*func.CompC[i], dst, tag, comm, func.Nchunks()[i]);
     }
 #endif
 }
@@ -378,9 +376,11 @@ void recv_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     int func_ncomp_in = func.Ncomp();
     MPI_Recv(&func.func_ptr->data, sizeof(CompFunctionData<3>), MPI_BYTE, src, 0, comm, &status);
     for (int i = 0; i < func.Ncomp(); i++) {
-        if (func_ncomp_in <= i) func.alloc(i+1);
-        if (func.isreal()) mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks()[i]);
-        else  mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks()[i]);
+        if (func_ncomp_in <= i) func.alloc(i + 1);
+        if (func.isreal())
+            mrcpp::recv_tree(*func.CompD[i], src, tag, comm, func.Nchunks()[i]);
+        else
+            mrcpp::recv_tree(*func.CompC[i], src, tag, comm, func.Nchunks()[i]);
     }
 #endif
 }
@@ -390,8 +390,10 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm) {
     if (func.isShared()) {
 #ifdef MRCPP_HAS_MPI
         for (int comp = 0; comp < func.Ncomp(); comp++) {
-            if (func.isreal()) mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
-            else  mrcpp::share_tree(*func.CompC[comp], src, tag, comm);
+            if (func.isreal())
+                mrcpp::share_tree(*func.CompD[comp], src, tag, comm);
+            else
+                mrcpp::share_tree(*func.CompC[comp], src, tag, comm);
         }
 #endif
     }
@@ -542,7 +544,6 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
 #endif
 }
 
-
 /** @brief make union tree without coeff and send to all
  *  Complex trees
  */
@@ -564,11 +565,10 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
 #endif
 }
 
-
 /** @brief make union tree without coeff and send to all
  *  Include both real and imaginary parts
  */
-    void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
+void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 12f1c0cf8..40b74bc7a 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -41,7 +41,7 @@ bool grand_master();
 bool share_master();
 
 bool my_func(int j);
-bool my_func(const CompFunction<3>& func);
+bool my_func(const CompFunction<3> &func);
 bool my_func(CompFunction<3> *func);
 
 // bool my_unique_orb(const Orbital &orb);
diff --git a/src/utils/tree_utils.cpp b/src/utils/tree_utils.cpp
index f45fcc158..333544f6e 100644
--- a/src/utils/tree_utils.cpp
+++ b/src/utils/tree_utils.cpp
@@ -216,7 +216,7 @@ template <int D, typename T> void tree_utils::mw_transform(const MWTree<D, T> &t
 }
 
 // Specialized for D=3 below.
-//template <int D, typename T> void tree_utils::mw_transform_back(MWTree<D, T> &tree, double *coeff_in, double *coeff_out, int stride) {
+// template <int D, typename T> void tree_utils::mw_transform_back(MWTree<D, T> &tree, double *coeff_in, double *coeff_out, int stride) {
 //    NOT_IMPLEMENTED_ABORT;
 //}
 
@@ -226,7 +226,7 @@ template <int D, typename T> void tree_utils::mw_transform(const MWTree<D, T> &t
  * The output is read directly from the 8 children scaling coefficients.
  * NB: ASSUMES that the children coefficients are separated by Children_Stride!
  */
-template <typename T>  void tree_utils::mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride) {
+template <typename T> void tree_utils::mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride) {
     int operation = Compression;
     int kp1 = tree.getKp1();
     int kp1_d = tree.getKp1_d();
@@ -300,7 +300,6 @@ template <typename T>  void tree_utils::mw_transform_back(MWTree<3, T> &tree, T
     }
 }
 
-
 template void tree_utils::make_node_table<1, double>(MWTree<1, double> &tree, MWNodeVector<1, double> &table);
 template void tree_utils::make_node_table<2, double>(MWTree<2, double> &tree, MWNodeVector<2, double> &table);
 template void tree_utils::make_node_table<3, double>(MWTree<3, double> &tree, MWNodeVector<3, double> &table);
@@ -317,11 +316,10 @@ template void tree_utils::mw_transform<1, double>(const MWTree<1, double> &tree,
 template void tree_utils::mw_transform<2, double>(const MWTree<2, double> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
 template void tree_utils::mw_transform<3, double>(const MWTree<3, double> &tree, double *coeff_in, double *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
 
-//template void tree_utils::mw_transform_back<1, double>(MWTree<1, double> &tree, double *coeff_in, double *coeff_out, int stride);
-//template void tree_utils::mw_transform_back<2, double>(MWTree<2, double> &tree, double *coeff_in, double *coeff_out, int stride);
+// template void tree_utils::mw_transform_back<1, double>(MWTree<1, double> &tree, double *coeff_in, double *coeff_out, int stride);
+// template void tree_utils::mw_transform_back<2, double>(MWTree<2, double> &tree, double *coeff_in, double *coeff_out, int stride);
 template void tree_utils::mw_transform_back<double>(MWTree<3, double> &tree, double *coeff_in, double *coeff_out, int stride);
 
-  
 template void tree_utils::make_node_table<1, ComplexDouble>(MWTree<1, ComplexDouble> &tree, MWNodeVector<1, ComplexDouble> &table);
 template void tree_utils::make_node_table<2, ComplexDouble>(MWTree<2, ComplexDouble> &tree, MWNodeVector<2, ComplexDouble> &table);
 template void tree_utils::make_node_table<3, ComplexDouble>(MWTree<3, ComplexDouble> &tree, MWNodeVector<3, ComplexDouble> &table);
@@ -338,8 +336,8 @@ template void tree_utils::mw_transform<1, ComplexDouble>(const MWTree<1, Complex
 template void tree_utils::mw_transform<2, ComplexDouble>(const MWTree<2, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
 template void tree_utils::mw_transform<3, ComplexDouble>(const MWTree<3, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, bool readOnlyScaling, int stride, bool b_overwrite);
 
-//template void tree_utils::mw_transform_back<1, ComplexDouble>(MWTree<1, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
-//template void tree_utils::mw_transform_back<2, ComplexDouble>(MWTree<2, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
+// template void tree_utils::mw_transform_back<1, ComplexDouble>(MWTree<1, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
+// template void tree_utils::mw_transform_back<2, ComplexDouble>(MWTree<2, ComplexDouble &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
 template void tree_utils::mw_transform_back<ComplexDouble>(MWTree<3, ComplexDouble> &tree, ComplexDouble *coeff_in, ComplexDouble *coeff_out, int stride);
 
 } // namespace mrcpp
diff --git a/src/utils/tree_utils.h b/src/utils/tree_utils.h
index 90ff2a418..56c8c7d79 100644
--- a/src/utils/tree_utils.h
+++ b/src/utils/tree_utils.h
@@ -25,8 +25,8 @@
 
 #pragma once
 
-#include "utils/math_utils.h"
 #include "MRCPP/mrcpp_declarations.h"
+#include "utils/math_utils.h"
 
 namespace mrcpp {
 namespace tree_utils {
@@ -37,9 +37,8 @@ template <int D, typename T> void make_node_table(MWTree<D, T> &tree, MWNodeVect
 template <int D, typename T> void make_node_table(MWTree<D, T> &tree, std::vector<MWNodeVector<D, T>> &table);
 
 template <int D, typename T> void mw_transform(const MWTree<D, T> &tree, T *coeff_in, T *coeff_out, bool readOnlyScaling, int stride, bool overwrite = true);
-//template <int D, typename T> void mw_transform_back(MWTree<D, T> &tree, T *coeff_in, T *coeff_out, int stride);
+// template <int D, typename T> void mw_transform_back(MWTree<D, T> &tree, T *coeff_in, T *coeff_out, int stride);
 template <typename T> void mw_transform_back(MWTree<3, T> &tree, T *coeff_in, T *coeff_out, int stride);
 
-
 } // namespace tree_utils
 } // namespace mrcpp
diff --git a/tests/operators/derivative_operator.cpp b/tests/operators/derivative_operator.cpp
index e4f887c8f..b6d73fd8c 100644
--- a/tests/operators/derivative_operator.cpp
+++ b/tests/operators/derivative_operator.cpp
@@ -153,7 +153,7 @@ template <int D> void testDifferentiationCplxABGV(double a, double b) {
     apply(dg_tree, diff, f_tree, 0);
 
     FunctionTree<D, ComplexDouble> err_tree(*mra);
-    add(-1.0, err_tree, {1.0, 0.0}, df_tree, {-1.0, 0.0}, dg_tree);// difference between analytical and MW derivative of f.
+    add(-1.0, err_tree, {1.0, 0.0}, df_tree, {-1.0, 0.0}, dg_tree); // difference between analytical and MW derivative of f.
 
     double df_norm = std::sqrt(df_tree.getSquareNorm());
     double abs_err = std::sqrt(err_tree.getSquareNorm());
@@ -301,64 +301,117 @@ template <int D> void testDifferentiationBS(int order) {
 
 TEST_CASE("ABGV differentiantion central difference", "[derivative_operator], [central_difference]") {
     // 0.5,0.5 specifies central difference
-    SECTION("1D derivative test") { testDifferentiationABGV<1>(0.5, 0.5); }
-    SECTION("2D derivative test") { testDifferentiationABGV<2>(0.5, 0.5); }
-    SECTION("3D derivative test") { testDifferentiationABGV<3>(0.5, 0.5); }
+    SECTION("1D derivative test") {
+        testDifferentiationABGV<1>(0.5, 0.5);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationABGV<2>(0.5, 0.5);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationABGV<3>(0.5, 0.5);
+    }
 }
 
 TEST_CASE("ABGV differentiantion center difference", "[derivative_operator], [center_difference]") {
     // 0,0 specifies center difference
-    SECTION("1D derivative test") { testDifferentiationABGV<1>(0, 0); }
-    SECTION("2D derivative test") { testDifferentiationABGV<2>(0, 0); }
-    SECTION("3D derivative test") { testDifferentiationABGV<3>(0, 0); }
+    SECTION("1D derivative test") {
+        testDifferentiationABGV<1>(0, 0);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationABGV<2>(0, 0);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationABGV<3>(0, 0);
+    }
 }
 
-
 TEST_CASE("ABGV differentiantion of Complex function", "[derivative_operator], [Complex]") {
     // 0.5,0.5 specifies central difference
-    SECTION("1D derivative test") { testDifferentiationCplxABGV<1>(0.5, 0.5); }
-    SECTION("2D derivative test") { testDifferentiationCplxABGV<2>(0.5, 0.5); }
-    SECTION("3D derivative test") { testDifferentiationCplxABGV<3>(0.5, 0.5); }
+    SECTION("1D derivative test") {
+        testDifferentiationCplxABGV<1>(0.5, 0.5);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationCplxABGV<2>(0.5, 0.5);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationCplxABGV<3>(0.5, 0.5);
+    }
 }
 
 TEST_CASE("PH differentiantion first order", "[derivative_operator], [PH_first_order]") {
-    SECTION("1D derivative test") { testDifferentiationPH<1>(1); }
-    SECTION("2D derivative test") { testDifferentiationPH<2>(1); }
-    SECTION("3D derivative test") { testDifferentiationPH<3>(1); }
+    SECTION("1D derivative test") {
+        testDifferentiationPH<1>(1);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationPH<2>(1);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationPH<3>(1);
+    }
 }
 
 TEST_CASE("PH differentiantion second order", "[derivative_operator], [PH_second_order]") {
-    SECTION("1D second order derivative test") { testDifferentiationPH<1>(2); }
-    SECTION("2D second order derivative test") { testDifferentiationPH<2>(2); }
-    SECTION("3D second order derivative test") { testDifferentiationPH<3>(2); }
+    SECTION("1D second order derivative test") {
+        testDifferentiationPH<1>(2);
+    }
+    SECTION("2D second order derivative test") {
+        testDifferentiationPH<2>(2);
+    }
+    SECTION("3D second order derivative test") {
+        testDifferentiationPH<3>(2);
+    }
 }
 
 TEST_CASE("Periodic ABGV differentiantion central difference", "[periodic_derivative],[derivative_operator], [central_difference], [ABGV_periodic]") {
     // 0.5,0.5 specifies central difference
-    SECTION("3D periodic derivative test") { testDifferentiationPeriodicABGV<3>(0.5, 0.5); }
+    SECTION("3D periodic derivative test") {
+        testDifferentiationPeriodicABGV<3>(0.5, 0.5);
+    }
 }
 
 TEST_CASE("Periodic PH differentiantion", "[periodic_derivative], [derivative_operator], [PH_periodic]") {
-    SECTION("3D first order periodic derivative test") { testDifferentiationPeriodicPH<3>(1); }
-    SECTION("3D first order periodic derivative test") { testDifferentiationPeriodicPH<3>(2); }
+    SECTION("3D first order periodic derivative test") {
+        testDifferentiationPeriodicPH<3>(1);
+    }
+    SECTION("3D first order periodic derivative test") {
+        testDifferentiationPeriodicPH<3>(2);
+    }
 }
 
 TEST_CASE("BS differentiantion first order", "[derivative_operator], [BS_first_order]") {
-    SECTION("1D derivative test") { testDifferentiationBS<1>(1); }
-    SECTION("2D derivative test") { testDifferentiationBS<2>(1); }
-    SECTION("3D derivative test") { testDifferentiationBS<3>(1); }
+    SECTION("1D derivative test") {
+        testDifferentiationBS<1>(1);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationBS<2>(1);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationBS<3>(1);
+    }
 }
 
 TEST_CASE("BS differentiantion second order", "[derivative_operator], [BS_second_order]") {
-    SECTION("1D derivative test") { testDifferentiationBS<1>(2); }
-    SECTION("2D derivative test") { testDifferentiationBS<2>(2); }
-    SECTION("3D derivative test") { testDifferentiationBS<3>(2); }
+    SECTION("1D derivative test") {
+        testDifferentiationBS<1>(2);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationBS<2>(2);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationBS<3>(2);
+    }
 }
 
 TEST_CASE("BS differentiantion third order", "[derivative_operator], [BS_third_order]") {
-    SECTION("1D derivative test") { testDifferentiationBS<1>(3); }
-    SECTION("2D derivative test") { testDifferentiationBS<2>(3); }
-    SECTION("3D derivative test") { testDifferentiationBS<3>(3); }
+    SECTION("1D derivative test") {
+        testDifferentiationBS<1>(3);
+    }
+    SECTION("2D derivative test") {
+        testDifferentiationBS<2>(3);
+    }
+    SECTION("3D derivative test") {
+        testDifferentiationBS<3>(3);
+    }
 }
 
 TEST_CASE("Gradient operator", "[derivative_operator], [gradient_operator]") {
@@ -439,6 +492,6 @@ TEST_CASE("Divergence operator", "[derivative_operator], [divergence_operator]")
     }
 
     delete mra;
-    }
+}
 
 } // namespace derivative_operator
diff --git a/tests/operators/poisson_operator.cpp b/tests/operators/poisson_operator.cpp
index eab986886..23bb22a06 100644
--- a/tests/operators/poisson_operator.cpp
+++ b/tests/operators/poisson_operator.cpp
@@ -195,6 +195,6 @@ TEST_CASE("Apply Periodic Poisson' operator", "[apply_periodic_Poisson], [poisso
 
     REQUIRE(sol_tree.evalf({0.0, 0.0, 0.0}) == Catch::Approx(1.0).epsilon(apply_prec));
     REQUIRE(sol_tree.evalf({pi, 0.0, 0.0}) == Catch::Approx(-1.0).epsilon(apply_prec));
-    }
+}
 
 } // namespace poisson_operator
diff --git a/tests/operators/schrodinger_evolution_operator.cpp b/tests/operators/schrodinger_evolution_operator.cpp
index 477065062..e6e416f09 100644
--- a/tests/operators/schrodinger_evolution_operator.cpp
+++ b/tests/operators/schrodinger_evolution_operator.cpp
@@ -28,13 +28,12 @@
 #include "factory_functions.h"
 
 #include "functions/GaussFunc.h"
+#include "functions/special_functions.h"
 #include "operators/MWOperator.h"
-#include "treebuilders/project.h"
 #include "operators/TimeEvolutionOperator.h"
-#include "functions/special_functions.h"
-#include "treebuilders/complex_apply.h"
 #include "treebuilders/add.h"
-
+#include "treebuilders/complex_apply.h"
+#include "treebuilders/project.h"
 
 namespace schrodinger_evolution_operator {
 
@@ -45,13 +44,13 @@ TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolutio
     const auto order = 4;
     const auto prec = 1.0e-7;
 
-    int finest_scale = 7; //for time evolution operator construction (not recommended to use more than 10)
-    //int max_Jpower = 20;  //the amount of J integrals to be used in construction (20 should be enough)
+    int finest_scale = 7; // for time evolution operator construction (not recommended to use more than 10)
+    // int max_Jpower = 20;  //the amount of J integrals to be used in construction (20 should be enough)
 
     // Time moments:
-    double t1 = 0.001;         //initial time moment (not recommended to use more than 0.001)
-    double delta_t = 0.03;    //time step (not recommended to use less than 0.001)
-    double t2 = delta_t + t1;  //final time moment
+    double t1 = 0.001;        // initial time moment (not recommended to use more than 0.001)
+    double delta_t = 0.03;    // time step (not recommended to use less than 0.001)
+    double t2 = delta_t + t1; // final time moment
 
     // Initialize world in the unit cube [0,1]
     auto basis = mrcpp::LegendreBasis(order);
@@ -61,28 +60,16 @@ TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolutio
     // Time evolution operatror Exp(delta_t)
     mrcpp::TimeEvolutionOperator<1> ReExp(MRA, prec, delta_t, finest_scale, false);
     mrcpp::TimeEvolutionOperator<1> ImExp(MRA, prec, delta_t, finest_scale, true);
-    
+
     // Analytical solution parameters for psi(x, t)
     double sigma = 0.001;
     double x0 = 0.5;
 
     // Functions f(x) = psi(x, t1) and g(x) = psi(x, t2)
-    auto Re_f = [sigma, x0, t=t1](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real();
-    };
-    auto Im_f = [sigma, x0, t=t1](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag();
-    };
-    auto Re_g = [sigma, x0, t=t2](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real();
-    };
-    auto Im_g = [sigma, x0, t=t2](const mrcpp::Coord<1> &r) -> double
-    {
-        return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag();
-    };
+    auto Re_f = [sigma, x0, t = t1](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real(); };
+    auto Im_f = [sigma, x0, t = t1](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag(); };
+    auto Re_g = [sigma, x0, t = t2](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).real(); };
+    auto Im_g = [sigma, x0, t = t2](const mrcpp::Coord<1> &r) -> double { return mrcpp::free_particle_analytical_solution(r[0], x0, t, sigma).imag(); };
 
     // Projecting functions
     mrcpp::FunctionTree<1> Re_f_tree(MRA);
@@ -97,33 +84,33 @@ TEST_CASE("Apply Schrodinger's evolution operator", "[apply_schrodinger_evolutio
     // Output function trees
     mrcpp::FunctionTree<1> Re_fout_tree(MRA);
     mrcpp::FunctionTree<1> Im_fout_tree(MRA);
-    
+
     // Complex objects for use in apply()
-    mrcpp::ComplexObject< mrcpp::ConvolutionOperator<1> > E(ReExp, ImExp);
-    mrcpp::ComplexObject< mrcpp::FunctionTree<1> > input(Re_f_tree, Im_f_tree);
-    mrcpp::ComplexObject< mrcpp::FunctionTree<1> > output(Re_fout_tree, Im_fout_tree);
+    mrcpp::ComplexObject<mrcpp::ConvolutionOperator<1>> E(ReExp, ImExp);
+    mrcpp::ComplexObject<mrcpp::FunctionTree<1>> input(Re_f_tree, Im_f_tree);
+    mrcpp::ComplexObject<mrcpp::FunctionTree<1>> output(Re_fout_tree, Im_fout_tree);
 
     // Apply operator Exp(delta_t) f(x)
     mrcpp::apply(prec, output, E, input);
-    
+
     // Check g(x) = Exp(delta_t) f(x)
-    mrcpp::FunctionTree<1> Re_error(MRA);  // = Re_fout_tree - Re_g_tree
-    mrcpp::FunctionTree<1> Im_error(MRA);  // = Im_fout_tree - Im_g_tree
-    
+    mrcpp::FunctionTree<1> Re_error(MRA); // = Re_fout_tree - Re_g_tree
+    mrcpp::FunctionTree<1> Im_error(MRA); // = Im_fout_tree - Im_g_tree
+
     // Re_error = Re_fout_tree - Re_g_tree
     mrcpp::add(prec, Re_error, 1.0, Re_fout_tree, -1.0, Re_g_tree);
-    auto Re_sq_norm = Re_error.getSquareNorm();    //1.7e-16
-    
+    auto Re_sq_norm = Re_error.getSquareNorm(); // 1.7e-16
+
     // Im_error = Im_fout_tree - Im_g_tree
     mrcpp::add(prec, Im_error, 1.0, Im_fout_tree, -1.0, Im_g_tree);
-    auto Im_sq_norm = Im_error.getSquareNorm();    //1.7e-17
-    
-    double tolerance = prec * prec / 50.0;         //2.0e-16
-    
-    //std::cout << "Re_sq_norm = " << Re_sq_norm << std::endl;
-    //std::cout << "Im_sq_norm = " << Im_sq_norm << std::endl;
-    //std::cout << "tolerance = " << tolerance << std::endl;
-    
+    auto Im_sq_norm = Im_error.getSquareNorm(); // 1.7e-17
+
+    double tolerance = prec * prec / 50.0; // 2.0e-16
+
+    // std::cout << "Re_sq_norm = " << Re_sq_norm << std::endl;
+    // std::cout << "Im_sq_norm = " << Im_sq_norm << std::endl;
+    // std::cout << "tolerance = " << tolerance << std::endl;
+
     REQUIRE(Re_sq_norm == Catch::Approx(0.0).margin(tolerance));
     REQUIRE(Im_sq_norm == Catch::Approx(0.0).margin(tolerance));
 }
diff --git a/tests/treebuilders/map.cpp b/tests/treebuilders/map.cpp
index c3c333bba..6be2153f0 100644
--- a/tests/treebuilders/map.cpp
+++ b/tests/treebuilders/map.cpp
@@ -40,9 +40,15 @@ namespace mapping {
 template <int D> void testMapping();
 
 SCENARIO("Map a MW tree", "[map], [tree_builder]") {
-    GIVEN("One MW functions in 1D") { testMapping<1>(); }
-    GIVEN("One MW functions in 2D") { testMapping<2>(); }
-    GIVEN("One MW functions in 3D") { testMapping<3>(); }
+    GIVEN("One MW functions in 1D") {
+        testMapping<1>();
+    }
+    GIVEN("One MW functions in 2D") {
+        testMapping<2>();
+    }
+    GIVEN("One MW functions in 3D") {
+        testMapping<3>();
+    }
 }
 
 template <int D> void testMapping() {
@@ -77,7 +83,7 @@ template <int D> void testMapping() {
     const double inp_int = inp_tree.integrate();
     const double inp_norm = inp_tree.getSquareNorm();
 
-    FMap<double,double> fmap = [](double val) { return val * val; };
+    FMap<double, double> fmap = [](double val) { return val * val; };
 
     WHEN("the function is mapped") {
         FunctionTree<D> out_tree(*mra);
diff --git a/tests/treebuilders/multiplication.cpp b/tests/treebuilders/multiplication.cpp
index 7e8437f77..c8d35e723 100644
--- a/tests/treebuilders/multiplication.cpp
+++ b/tests/treebuilders/multiplication.cpp
@@ -41,9 +41,15 @@ template <int D> void testMultiplication();
 template <int D> void testSquare();
 
 SCENARIO("Multiplying MW trees", "[multiplication], [tree_builder]") {
-    GIVEN("Two MW functions in 1D") { testMultiplication<1>(); }
-    GIVEN("Two MW functions in 2D") { testMultiplication<2>(); }
-    GIVEN("Two MW functions in 3D") { testMultiplication<3>(); }
+    GIVEN("Two MW functions in 1D") {
+        testMultiplication<1>();
+    }
+    GIVEN("Two MW functions in 2D") {
+        testMultiplication<2>();
+    }
+    GIVEN("Two MW functions in 3D") {
+        testMultiplication<3>();
+    }
 }
 
 template <int D> void testMultiplication() {
@@ -116,9 +122,15 @@ template <int D> void testMultiplication() {
 }
 
 SCENARIO("Squaring MW trees", "[square], [tree_builder]") {
-    GIVEN("A MW function in 1D") { testSquare<1>(); }
-    GIVEN("A MW function in 2D") { testSquare<2>(); }
-    GIVEN("A MW function in 3D") { testSquare<3>(); }
+    GIVEN("A MW function in 1D") {
+        testSquare<1>();
+    }
+    GIVEN("A MW function in 2D") {
+        testSquare<2>();
+    }
+    GIVEN("A MW function in 3D") {
+        testSquare<3>();
+    }
 }
 
 template <int D> void testSquare() {
@@ -202,7 +214,7 @@ template <int D> void testSquare() {
     }
     finalize(&mra);
 }
-  
+
 TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot]") {
     MultiResolutionAnalysis<3> *mra = nullptr;
     initialize<3>(&mra);
@@ -221,7 +233,7 @@ TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot
         double r2 = (r[0] * r[0] + r[1] * r[1] + r[2] * r[2]);
         return r[0] * r[1] * std::exp(-2.0 * r2);
     };
-    
+
     FunctionTree<3> fx_tree(*mra);
     FunctionTree<3> fy_tree(*mra);
     FunctionTree<3> fz_tree(*mra);
@@ -252,6 +264,6 @@ TEST_CASE("Dot product FunctionTreeVectors", "[multiplication], [tree_vector_dot
     }
 
     finalize(&mra);
-    }
+}
 
 } // namespace multiplication

From 71302e59589cde1dd7d8e2e2d9a026187297529b Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 24 Jan 2025 16:36:38 +0100
Subject: [PATCH 34/38] removed the defaultMetric from apply.cpp

---
 cmake/compiler_flags/CXXFlags.cmake         |  2 +-
 src/treebuilders/apply.cpp                  | 66 ++-------------------
 src/treebuilders/apply.h                    |  5 +-
 src/utils/CompFunction.h                    |  2 +-
 tests/operators/heat_evolution_operator.cpp |  9 ++-
 5 files changed, 16 insertions(+), 68 deletions(-)

diff --git a/cmake/compiler_flags/CXXFlags.cmake b/cmake/compiler_flags/CXXFlags.cmake
index a12df3d12..cbcf32898 100644
--- a/cmake/compiler_flags/CXXFlags.cmake
+++ b/cmake/compiler_flags/CXXFlags.cmake
@@ -13,7 +13,7 @@
 
 option(ENABLE_ARCH_FLAGS "Enable architecture-specific compiler flags" ON)
 
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 set(CMAKE_CXX_EXTENSIONS FALSE)
 set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index 205684cdf..a2610a537 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -38,6 +38,7 @@
 #include "trees/FunctionTree.h"
 #include "utils/Printer.h"
 #include "utils/Timer.h"
+#include <optional>
 
 namespace mrcpp {
 
@@ -117,16 +118,6 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
  */
 template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
 
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
     for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
         for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -260,16 +251,7 @@ template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, Co
 
 template <int D, typename T>
 void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
+
     for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
         for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -312,16 +294,7 @@ template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T
 }
 
 template <int D> void apply_far_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
+
     for (int icomp = 0; icomp < 4; icomp++) {
         if (inp.Comp[icomp] != nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++) {
@@ -366,16 +339,7 @@ template <int D, typename T> void apply_near_field(double prec, FunctionTree<D,
 }
 
 template <int D> void apply_near_field(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, ComplexDouble metric[4][4], int maxIter, bool absPrec) {
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
+
     for (int icomp = 0; icomp < 4; icomp++) {
         if (inp.Comp[icomp] != nullptr) {
             for (int ocomp = 0; ocomp < 4; ocomp++) {
@@ -446,16 +410,7 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
 
 template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
     // TODO: sums and not only each components independently
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
+
     for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
         for (int ocomp = 0; ocomp < 4; ocomp++) {
             if (std::norm(metric[icomp][ocomp]) > MachinePrec) {
@@ -508,16 +463,7 @@ template <int D, typename T> FunctionTreeVector<D, T> gradient(DerivativeOperato
 
 std::vector<CompFunction<3> *> gradient(DerivativeOperator<3> &oper, CompFunction<3> &inp, ComplexDouble metric[4][4]) {
     std::vector<CompFunction<3> *> out;
-    ComplexDouble defaultMetric[4][4];
-    for (int i = 0; i < 4; i++) {
-        for (int j = 0; j < 4; j++) {
-            if (i == j)
-                defaultMetric[i][j] = 1.0;
-            else
-                defaultMetric[i][j] = 0.0;
-        }
-    }
-    if (metric == nullptr) { metric = defaultMetric; }
+
     for (int d = 0; d < 3; d++) {
         CompFunction<3> *grad_d = new CompFunction<3>();
         for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
diff --git a/src/treebuilders/apply.h b/src/treebuilders/apply.h
index 452b17f72..3bc9c8267 100644
--- a/src/treebuilders/apply.h
+++ b/src/treebuilders/apply.h
@@ -31,12 +31,15 @@
 namespace mrcpp {
 
 // clang-format off
+
 template <int D, typename T> class FunctionTree;
 template <int D> class DerivativeOperator;
 template <int D> class ConvolutionOperator;
 
+const ComplexDouble  defaultMetric [4][4] ={{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}};
+
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
-template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
+template <int D> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, const CompFunction<D> &inp, ComplexDouble metric[4][4] = defaultMetric, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, FunctionTreeVector<D, T> &precTrees, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply(double prec, CompFunction<D> &out, ConvolutionOperator<D> &oper, CompFunction<D> &inp, FunctionTreeVector<D, T> *precTrees, ComplexDouble metric[4][4] = nullptr, int maxIter = -1, bool absPrec = false);
 template <int D, typename T> void apply_far_field(double prec, FunctionTree<D, T> &out, ConvolutionOperator<D> &oper, FunctionTree<D, T> &inp, int maxIter = -1, bool absPrec = false);
diff --git a/src/utils/CompFunction.h b/src/utils/CompFunction.h
index 17e01ebc2..96ac057ca 100644
--- a/src/utils/CompFunction.h
+++ b/src/utils/CompFunction.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "mpi_utils.h"
-#include "trees/FunctionTree.h"
+#include "trees/FunctionTreeVector.h"
 
 using namespace Eigen;
 
diff --git a/tests/operators/heat_evolution_operator.cpp b/tests/operators/heat_evolution_operator.cpp
index fb6fd7649..09ddcdcbb 100644
--- a/tests/operators/heat_evolution_operator.cpp
+++ b/tests/operators/heat_evolution_operator.cpp
@@ -41,7 +41,6 @@
 #include "trees/BandWidth.h"
 #include "operators/HeatOperator.h"
 #include "functions/special_functions.h"
-#include "treebuilders/complex_apply.h"
 #include "treebuilders/add.h"
 
 //using namespace mrcpp;
@@ -56,7 +55,7 @@ TEST_CASE("Apply heat evolution operator", "[apply_heat_evolution], [heat_evolut
     const auto order = 5;
     const auto prec = 1.0e-8;
 
-    
+
     // Time moment:
     double delta_t = 0.0005;
 
@@ -67,7 +66,7 @@ TEST_CASE("Apply heat evolution operator", "[apply_heat_evolution], [heat_evolut
 
     // Time evolution operatror Exp(delta_t)
     mrcpp::HeatOperator<1> H(MRA, delta_t, prec);
-    
+
     // Analytical solution parameters for psi(x, t)
     double sigma = 0.001;
     double x0 = 0.5;
@@ -81,7 +80,7 @@ TEST_CASE("Apply heat evolution operator", "[apply_heat_evolution], [heat_evolut
     mrcpp::project<1>(prec, f_tree, f);
     mrcpp::FunctionTree<1> g_tree(MRA);
     mrcpp::project<1>(prec, g_tree, g);
-    
+
     // Apply operator H = Exp(delta_t) f(x)
     mrcpp::FunctionTree<1> output(MRA);
     mrcpp::apply(prec, output, H, f_tree);
@@ -97,4 +96,4 @@ TEST_CASE("Apply heat evolution operator", "[apply_heat_evolution], [heat_evolut
 }
 
 
-} // namespace schrodinger_evolution_operator
\ No newline at end of file
+} // namespace schrodinger_evolution_operator

From da5e41ff57b2ec9cb692a747f62cd6a0cd5a2bc4 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 28 Jan 2025 11:45:55 +0100
Subject: [PATCH 35/38] std17 and PR review requests

---
 src/operators/ABGVOperator.cpp                |   3 +-
 src/operators/CartesianConvolution.cpp        |   8 +-
 src/operators/HeatOperator.cpp                |   6 +-
 src/operators/IdentityConvolution.cpp         |   2 +-
 src/operators/MWOperator.cpp                  |  21 ++--
 src/operators/MWOperator.h                    |   8 +-
 src/operators/OperatorStatistics.cpp          |  32 ++---
 src/operators/OperatorStatistics.h            |   6 +-
 src/treebuilders/AdditionCalculator.h         |  36 ++----
 src/treebuilders/ConvolutionCalculator.h      |   2 +-
 .../CrossCorrelationCalculator.cpp            |   5 +-
 src/treebuilders/DerivativeCalculator.cpp     |  40 -------
 src/treebuilders/DerivativeCalculator.h       |   2 +-
 src/treebuilders/MultiplicationCalculator.h   |  40 ++-----
 src/treebuilders/SquareCalculator.h           |  46 +++----
 src/treebuilders/apply.cpp                    |   4 +-
 src/treebuilders/complex_apply.h              |  18 ++-
 src/treebuilders/multiply.cpp                 | 100 +++-------------
 src/treebuilders/multiply.h                   |   7 +-
 src/trees/FunctionNode.cpp                    |  65 ++++++++++
 src/trees/FunctionNode.h                      |   3 +
 src/trees/FunctionTree.cpp                    |  36 +-----
 src/trees/MWNode.cpp                          |   4 -
 src/trees/MultiResolutionAnalysis.cpp         |   1 -
 src/trees/NodeIndex.h                         |   4 +-
 src/trees/OperatorNode.cpp                    |  14 +--
 src/utils/math_utils.cpp                      |  51 ++++----
 src/utils/math_utils.h                        |   3 +-
 src/utils/parallel.cpp                        | 113 +++---------------
 src/utils/parallel.h                          |  13 +-
 30 files changed, 236 insertions(+), 457 deletions(-)

diff --git a/src/operators/ABGVOperator.cpp b/src/operators/ABGVOperator.cpp
index ca7d34580..05525405e 100644
--- a/src/operators/ABGVOperator.cpp
+++ b/src/operators/ABGVOperator.cpp
@@ -49,8 +49,7 @@ ABGVOperator<D>::ABGVOperator(const MultiResolutionAnalysis<D> &mra, double a, d
     initialize(a, b);
 }
 
-template <int D>
-void ABGVOperator<D>::initialize(double a, double b) {
+template <int D> void ABGVOperator<D>::initialize(double a, double b) {
     int bw = 0; // Operator bandwidth
     if (std::abs(a) > MachineZero) bw = 1;
     if (std::abs(b) > MachineZero) bw = 1;
diff --git a/src/operators/CartesianConvolution.cpp b/src/operators/CartesianConvolution.cpp
index 432d3c02b..64ac5491d 100644
--- a/src/operators/CartesianConvolution.cpp
+++ b/src/operators/CartesianConvolution.cpp
@@ -28,8 +28,8 @@
 #include "core/InterpolatingBasis.h"
 #include "core/LegendreBasis.h"
 
-#include "functions/Gaussian.h"
 #include "functions/GaussExp.h"
+#include "functions/Gaussian.h"
 
 #include "treebuilders/CrossCorrelationCalculator.h"
 #include "treebuilders/OperatorAdaptor.h"
@@ -68,9 +68,9 @@ CartesianConvolution::CartesianConvolution(const MultiResolutionAnalysis<3> &mra
 }
 
 void CartesianConvolution::setCartesianComponents(int x, int y, int z) {
-    int x_shift = x*this->sep_rank;
-    int y_shift = y*this->sep_rank;
-    int z_shift = z*this->sep_rank;
+    int x_shift = x * this->sep_rank;
+    int y_shift = y * this->sep_rank;
+    int z_shift = z * this->sep_rank;
 
     for (int i = 0; i < this->sep_rank; i++) this->assign(i, 0, this->raw_exp[x_shift + i].get());
     for (int i = 0; i < this->sep_rank; i++) this->assign(i, 1, this->raw_exp[y_shift + i].get());
diff --git a/src/operators/HeatOperator.cpp b/src/operators/HeatOperator.cpp
index 4cd980c54..cad3d9139 100644
--- a/src/operators/HeatOperator.cpp
+++ b/src/operators/HeatOperator.cpp
@@ -36,7 +36,7 @@ namespace mrcpp {
  *  @param[in] prec: Build precision
  *  @details This will project a kernel of a single gaussian with
  *  exponent \f$ 1/(4t) \f$.
- * 
+ *
  */
 template <int D>
 HeatOperator<D>::HeatOperator(const MultiResolutionAnalysis<D> &mra, double t, double prec)
@@ -64,11 +64,11 @@ HeatOperator<D>::HeatOperator(const MultiResolutionAnalysis<D> &mra, double t, d
  *  @details This will project a kernel of a single gaussian with
  *           exponent \f$ 1/(4t) \f$.
  *           This version of the constructor
- *           is used for calculations within periodic boundary conditions (PBC). 
+ *           is used for calculations within periodic boundary conditions (PBC).
  *           The \a root parameter is the coarsest negative scale at wich the operator
  *           is applied. The \a reach parameter is the bandwidth of the operator at
  *           the root scale. For details see \ref MWOperator
- * 
+ *
  */
 template <int D>
 HeatOperator<D>::HeatOperator(const MultiResolutionAnalysis<D> &mra, double t, double prec, int root, int reach)
diff --git a/src/operators/IdentityConvolution.cpp b/src/operators/IdentityConvolution.cpp
index 5b8bde3af..038d076cc 100644
--- a/src/operators/IdentityConvolution.cpp
+++ b/src/operators/IdentityConvolution.cpp
@@ -60,7 +60,7 @@ IdentityConvolution<D>::IdentityConvolution(const MultiResolutionAnalysis<D> &mr
  *  @param[in] reach: width at root scale (applies to periodic boundary conditions)
  *  @details This will project a kernel of a single gaussian with
  *           exponent sqrt(10/build_prec). This version of the constructor
- *           is used for calculations within periodic boundary conditions (PBC). 
+ *           is used for calculations within periodic boundary conditions (PBC).
  *           The \a root parameter is the coarsest negative scale at wich the operator
  *           is applied. The \a reach parameter is the bandwidth of the operator at
  *           the root scale. For details see \ref MWOperator
diff --git a/src/operators/MWOperator.cpp b/src/operators/MWOperator.cpp
index 428e5fc1f..225108f48 100644
--- a/src/operators/MWOperator.cpp
+++ b/src/operators/MWOperator.cpp
@@ -32,8 +32,7 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template<int D>
-void MWOperator<D>::initOperExp(int M) {
+template <int D> void MWOperator<D>::initOperExp(int M) {
     if (this->raw_exp.size() < M) MSG_ABORT("Incompatible raw expansion");
     this->oper_exp.clear();
     for (int m = 0; m < M; m++) {
@@ -47,24 +46,21 @@ void MWOperator<D>::initOperExp(int M) {
         for (int d = 0; d < D; d++) assign(i, d, this->raw_exp[i].get());
 }
 
-template <int D>
-OperatorTree &MWOperator<D>::getComponent(int i, int d) {
+template <int D> OperatorTree &MWOperator<D>::getComponent(int i, int d) {
     if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds");
     if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds");
     if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component");
     return *this->oper_exp[i][d];
 }
 
-template <int D>
-const OperatorTree &MWOperator<D>::getComponent(int i, int d) const {
+template <int D> const OperatorTree &MWOperator<D>::getComponent(int i, int d) const {
     if (i < 0 or i >= this->oper_exp.size()) MSG_ERROR("Index out of bounds");
     if (d < 0 or d >= D) MSG_ERROR("Dimension out of bounds");
     if (this->oper_exp[i][d] == nullptr) MSG_ERROR("Invalid component");
     return *this->oper_exp[i][d];
 }
 
-template <int D>
-int MWOperator<D>::getMaxBandWidth(int depth) const {
+template <int D> int MWOperator<D>::getMaxBandWidth(int depth) const {
     int maxWidth = -1;
     if (depth < 0) {
         maxWidth = *std::max_element(this->band_max.begin(), this->band_max.end());
@@ -74,14 +70,12 @@ int MWOperator<D>::getMaxBandWidth(int depth) const {
     return maxWidth;
 }
 
-template <int D>
-void MWOperator<D>::clearBandWidths() {
+template <int D> void MWOperator<D>::clearBandWidths() {
     for (auto &i : this->oper_exp)
         for (int d = 0; d < D; d++) i[d]->clearBandWidth();
 }
 
-template <int D>
-void MWOperator<D>::calcBandWidths(double prec) {
+template <int D> void MWOperator<D>::calcBandWidths(double prec) {
     int maxDepth = 0;
     // First compute BandWidths and find depth of the deepest component
     for (auto &i : this->oper_exp) {
@@ -113,8 +107,7 @@ void MWOperator<D>::calcBandWidths(double prec) {
     println(20, std::endl);
 }
 
-template <int D>
-MultiResolutionAnalysis<2> MWOperator<D>::getOperatorMRA() const {
+template <int D> MultiResolutionAnalysis<2> MWOperator<D>::getOperatorMRA() const {
     const BoundingBox<D> &box = this->MRA.getWorldBox();
     const ScalingBasis &basis = this->MRA.getScalingBasis();
 
diff --git a/src/operators/MWOperator.h b/src/operators/MWOperator.h
index 4e3962fdc..2dcad2b32 100644
--- a/src/operators/MWOperator.h
+++ b/src/operators/MWOperator.h
@@ -39,8 +39,7 @@ namespace mrcpp {
  * @details Fixme
  *
  */
-template <int D>
-class MWOperator {
+template <int D> class MWOperator {
 public:
     MWOperator(const MultiResolutionAnalysis<D> &mra, int root, int reach)
             : oper_root(root)
@@ -63,8 +62,8 @@ class MWOperator {
     OperatorTree &getComponent(int i, int d);
     const OperatorTree &getComponent(int i, int d) const;
 
-    std::array<OperatorTree*, D> &operator[](int i) { return this->oper_exp[i]; }
-    const std::array<OperatorTree*, D> &operator[](int i) const { return this->oper_exp[i]; }
+    std::array<OperatorTree *, D> &operator[](int i) { return this->oper_exp[i]; }
+    const std::array<OperatorTree *, D> &operator[](int i) const { return this->oper_exp[i]; }
 
 protected:
     int oper_root;
@@ -78,7 +77,6 @@ class MWOperator {
 
     void initOperExp(int M);
     void assign(int i, int d, OperatorTree *oper) { this->oper_exp[i][d] = oper; }
-
 };
 
 } // namespace mrcpp
diff --git a/src/operators/OperatorStatistics.cpp b/src/operators/OperatorStatistics.cpp
index 4ed0263cc..f58ae2b0d 100644
--- a/src/operators/OperatorStatistics.cpp
+++ b/src/operators/OperatorStatistics.cpp
@@ -30,8 +30,7 @@ using namespace Eigen;
 
 namespace mrcpp {
 
-template <int D, typename T>
-OperatorStatistics<D, T>::OperatorStatistics()
+OperatorStatistics::OperatorStatistics()
         : nThreads(mrcpp_get_max_threads())
         , totFCount(0)
         , totGCount(0)
@@ -58,7 +57,7 @@ OperatorStatistics<D, T>::OperatorStatistics()
     }
 }
 
-template <int D, typename T> OperatorStatistics<D, T>::~OperatorStatistics() {
+OperatorStatistics::~OperatorStatistics() {
     for (int i = 0; i < this->nThreads; i++) { delete this->compCount[i]; }
     delete[] this->compCount;
     delete[] this->fCount;
@@ -68,7 +67,7 @@ template <int D, typename T> OperatorStatistics<D, T>::~OperatorStatistics() {
 }
 
 /** Sum all node counters from all threads. */
-template <int D, typename T> void OperatorStatistics<D, T>::flushNodeCounters() {
+void OperatorStatistics::flushNodeCounters() {
     for (int i = 0; i < this->nThreads; i++) {
         this->totFCount += this->fCount[i];
         this->totGCount += this->gCount[i];
@@ -82,20 +81,20 @@ template <int D, typename T> void OperatorStatistics<D, T>::flushNodeCounters()
 }
 
 /** Increment g-node usage counter. Needed for load balancing. */
-template <int D, typename T> void OperatorStatistics<D, T>::incrementGNodeCounters(const MWNode<D, T> &gNode) {
+template <int D, typename T> void OperatorStatistics::incrementGNodeCounters(const MWNode<D, T> &gNode) {
     int thread = mrcpp_get_thread_num();
     this->gCount[thread]++;
 }
 
 /** Increment operator application counter. */
-template <int D, typename T> void OperatorStatistics<D, T>::incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt) {
+template <int D, typename T> void OperatorStatistics::incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt) {
     int thread = mrcpp_get_thread_num();
     this->fCount[thread]++;
     (*this->compCount[thread])(ft, gt) += 1;
     if (fNode.isGenNode()) { this->genCount[thread]++; }
 }
 
-template <int D, typename T> std::ostream &OperatorStatistics<D, T>::print(std::ostream &o) const {
+std::ostream &OperatorStatistics::print(std::ostream &o) const {
     o << std::setw(8);
     o << "*OperatorFunc statistics: " << std::endl << std::endl;
     o << "  Total calculated gNodes      : " << this->totGCount << std::endl;
@@ -105,12 +104,17 @@ template <int D, typename T> std::ostream &OperatorStatistics<D, T>::print(std::
     return o;
 }
 
-template class OperatorStatistics<1, double>;
-template class OperatorStatistics<2, double>;
-template class OperatorStatistics<3, double>;
-
-template class OperatorStatistics<1, ComplexDouble>;
-template class OperatorStatistics<2, ComplexDouble>;
-template class OperatorStatistics<3, ComplexDouble>;
+template void OperatorStatistics::incrementFNodeCounters<1, double>(const MWNode<1, double> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementFNodeCounters<2, double>(const MWNode<2, double> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementFNodeCounters<3, double>(const MWNode<3, double> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementFNodeCounters<1, ComplexDouble>(const MWNode<1, ComplexDouble> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementFNodeCounters<2, ComplexDouble>(const MWNode<2, ComplexDouble> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementFNodeCounters<3, ComplexDouble>(const MWNode<3, ComplexDouble> &fNode, int ft, int gt);
+template void OperatorStatistics::incrementGNodeCounters<1, double>(const MWNode<1, double> &gNode);
+template void OperatorStatistics::incrementGNodeCounters<2, double>(const MWNode<2, double> &gNode);
+template void OperatorStatistics::incrementGNodeCounters<3, double>(const MWNode<3, double> &gNode);
+template void OperatorStatistics::incrementGNodeCounters<1, ComplexDouble>(const MWNode<1, ComplexDouble> &gNode);
+template void OperatorStatistics::incrementGNodeCounters<2, ComplexDouble>(const MWNode<2, ComplexDouble> &gNode);
+template void OperatorStatistics::incrementGNodeCounters<3, ComplexDouble>(const MWNode<3, ComplexDouble> &gNode);
 
 } // namespace mrcpp
diff --git a/src/operators/OperatorStatistics.h b/src/operators/OperatorStatistics.h
index 883bb653c..9a51728c0 100644
--- a/src/operators/OperatorStatistics.h
+++ b/src/operators/OperatorStatistics.h
@@ -32,14 +32,14 @@
 
 namespace mrcpp {
 
-template <int D, typename T> class OperatorStatistics final {
+class OperatorStatistics final {
 public:
     OperatorStatistics();
     ~OperatorStatistics();
 
     void flushNodeCounters();
-    void incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt);
-    void incrementGNodeCounters(const MWNode<D, T> &gNode);
+    template <int D, typename T> void incrementFNodeCounters(const MWNode<D, T> &fNode, int ft, int gt);
+    template <int D, typename T> void incrementGNodeCounters(const MWNode<D, T> &gNode);
 
     friend std::ostream &operator<<(std::ostream &o, const OperatorStatistics &os) { return os.print(o); }
 
diff --git a/src/treebuilders/AdditionCalculator.h b/src/treebuilders/AdditionCalculator.h
index 45b6b8406..9223f1ae6 100644
--- a/src/treebuilders/AdditionCalculator.h
+++ b/src/treebuilders/AdditionCalculator.h
@@ -40,35 +40,23 @@ template <int D, typename T> class AdditionCalculator final : public TreeCalcula
     FunctionTreeVector<D, T> sum_vec;
     bool conj;
 
-    void calcNode(MWNode<D, double> &node_o) {
+    void calcNode(MWNode<D, T> &node_o) override {
         node_o.zeroCoefs();
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         for (int i = 0; i < this->sum_vec.size(); i++) {
-            double c_i = get_coef(this->sum_vec, i);
-            FunctionTree<D, double> &func_i = get_func(this->sum_vec, i);
+            T c_i = get_coef(this->sum_vec, i);
+            FunctionTree<D, T> &func_i = get_func(this->sum_vec, i);
             // This generates missing nodes
-            const MWNode<D, double> &node_i = func_i.getNode(idx);
-            const double *coefs_i = node_i.getCoefs();
+            const MWNode<D, T> &node_i = func_i.getNode(idx);
+            const T *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
-            for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
-        }
-        node_o.setHasCoefs();
-        node_o.calcNorms();
-    }
-    void calcNode(MWNode<D, ComplexDouble> &node_o) {
-        node_o.zeroCoefs();
-        const NodeIndex<D> &idx = node_o.getNodeIndex();
-        ComplexDouble *coefs_o = node_o.getCoefs();
-        for (int i = 0; i < this->sum_vec.size(); i++) {
-            ComplexDouble c_i = get_coef(this->sum_vec, i);
-            FunctionTree<D, ComplexDouble> &func_i = get_func(this->sum_vec, i);
-            // This generates missing nodes
-            const MWNode<D, ComplexDouble> &node_i = func_i.getNode(idx);
-            const ComplexDouble *coefs_i = node_i.getCoefs();
-            int n_coefs = node_i.getNCoefs();
-            if (func_i.conjugate() xor conj) {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); }
+            if constexpr (std::is_same<T, ComplexDouble>::value) {
+                if (func_i.conjugate() xor conj) {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * std::conj(coefs_i[j]); }
+                } else {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
+                }
             } else {
                 for (int j = 0; j < n_coefs; j++) { coefs_o[j] += c_i * coefs_i[j]; }
             }
diff --git a/src/treebuilders/ConvolutionCalculator.h b/src/treebuilders/ConvolutionCalculator.h
index f114ba976..8ac4b5d34 100644
--- a/src/treebuilders/ConvolutionCalculator.h
+++ b/src/treebuilders/ConvolutionCalculator.h
@@ -57,7 +57,7 @@ template <int D, typename T> class ConvolutionCalculator final : public TreeCalc
     std::vector<Timer *> calc_t;
     std::vector<Timer *> norm_t;
 
-    OperatorStatistics<D, T> operStat;
+    OperatorStatistics operStat;
     std::vector<Eigen::MatrixXi *> bandSizes;
     std::function<double(const NodeIndex<D> &idx)> precFunc = [](const NodeIndex<D> &idx) { return 1.0; };
 
diff --git a/src/treebuilders/CrossCorrelationCalculator.cpp b/src/treebuilders/CrossCorrelationCalculator.cpp
index b4c2fc3ad..a5eef945d 100644
--- a/src/treebuilders/CrossCorrelationCalculator.cpp
+++ b/src/treebuilders/CrossCorrelationCalculator.cpp
@@ -77,8 +77,7 @@ template <int T> void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros
         const MWNode<1> &node_a = this->kernel->getNode(idx_a);
         const MWNode<1> &node_b = this->kernel->getNode(idx_b);
 
-        Eigen::Matrix<double, Eigen::Dynamic, 1> vec_a;
-        VectorXd vec_b;
+        VectorXd vec_a, vec_b;
         node_a.getCoefs(vec_a);
         node_b.getCoefs(vec_b);
 
@@ -91,7 +90,7 @@ template <int T> void CrossCorrelationCalculator::applyCcc(MWNode<2> &node, Cros
     for (int i = 0; i < t_dim * kp1_d; i++) {
         auto scaling_factor = node.getMWTree().getMRA().getWorldBox().getScalingFactor(0);
         // This is only implemented for unifrom scaling factors
-        // hence the zero TODO: make it work for non-unifrom scaling
+        // hence the zero TODO: make it work for non-uniform scaling
         coefs[i] = std::sqrt(scaling_factor) * two_n * vec_o(i);
     }
 }
diff --git a/src/treebuilders/DerivativeCalculator.cpp b/src/treebuilders/DerivativeCalculator.cpp
index 8426e3d97..b298d1b6e 100644
--- a/src/treebuilders/DerivativeCalculator.cpp
+++ b/src/treebuilders/DerivativeCalculator.cpp
@@ -259,45 +259,6 @@ operator component to a f-node in a n-dimensional tensor space. */
 template <int D, typename T> void DerivativeCalculator<D, T>::tensorApplyOperComp(OperatorState<D, T> &os) {
     T **aux = os.getAuxData();
     double **oData = os.getOperData();
-    /*
-#ifdef HAVE_BLAS
-    double mult = 0.0;
-    for (int i = 0; i < D; i++) {
-        if (oData[i] != 0) {
-            if (i == D - 1) { // Last dir: Add up into g
-                mult = 1.0;
-            }
-            const T *f = aux[i];
-            T *g = const_cast<T *>(aux[i + 1]);
-            cblas_dgemm(CblasColMajor,
-                        CblasTrans,
-                        CblasNoTrans,
-                        os.kp1_dm1,
-                        os.kp1,
-                        os.kp1,
-                        1.0,
-                        f,
-                        os.kp1,
-                        oData[i],
-                        os.kp1,
-                        mult,
-                        g,
-                        os.kp1_dm1);
-        } else {
-            // Identity operator in direction i
-            Eigen::Map<MatrixXd> f(aux[i], os.kp1, os.kp1_dm1);
-            Eigen::Map<MatrixXd> g(aux[i + 1], os.kp1_dm1, os.kp1);
-            if (oData[i] == 0) {
-                if (i == D - 1) { // Last dir: Add up into g
-                    g += f.transpose();
-                } else {
-                    g = f.transpose();
-                }
-            }
-        }
-    }
-#else
-    */
     for (int i = 0; i < D; i++) {
         Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> f(aux[i], os.kp1, os.kp1_dm1);
         Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> g(aux[i + 1], os.kp1_dm1, os.kp1);
@@ -317,7 +278,6 @@ template <int D, typename T> void DerivativeCalculator<D, T>::tensorApplyOperCom
             }
         }
     }
-    //#endif
 }
 
 template <int D, typename T> MWNodeVector<D, T> *DerivativeCalculator<D, T>::getInitialWorkVector(MWTree<D, T> &tree) const {
diff --git a/src/treebuilders/DerivativeCalculator.h b/src/treebuilders/DerivativeCalculator.h
index d9f435528..347554a46 100644
--- a/src/treebuilders/DerivativeCalculator.h
+++ b/src/treebuilders/DerivativeCalculator.h
@@ -46,7 +46,7 @@ template <int D, typename T> class DerivativeCalculator final : public TreeCalcu
     std::vector<Timer> band_t;
     std::vector<Timer> calc_t;
     std::vector<Timer> norm_t;
-    OperatorStatistics<D, T> operStat;
+    OperatorStatistics operStat;
 
     MWNodeVector<D, T> makeOperBand(const MWNode<D, T> &gNode, std::vector<NodeIndex<D>> &idx_band);
 
diff --git a/src/treebuilders/MultiplicationCalculator.h b/src/treebuilders/MultiplicationCalculator.h
index 8b40b5e27..49fa67948 100644
--- a/src/treebuilders/MultiplicationCalculator.h
+++ b/src/treebuilders/MultiplicationCalculator.h
@@ -40,41 +40,25 @@ template <int D, typename T> class MultiplicationCalculator final : public TreeC
     FunctionTreeVector<D, T> prod_vec;
     bool conj;
 
-    void calcNode(MWNode<D, double> &node_o) {
+    void calcNode(MWNode<D, T> &node_o) {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
         for (int i = 0; i < this->prod_vec.size(); i++) {
-            double c_i = get_coef(this->prod_vec, i);
-            FunctionTree<D, double> &func_i = get_func(this->prod_vec, i);
+            T c_i = get_coef(this->prod_vec, i);
+            FunctionTree<D, T> &func_i = get_func(this->prod_vec, i);
             // This generates missing nodes
-            MWNode<D, double> node_i = func_i.getNode(idx); // Copy node
+            MWNode<D, T> node_i = func_i.getNode(idx); // Copy node
             node_i.mwTransform(Reconstruction);
             node_i.cvTransform(Forward);
-            const double *coefs_i = node_i.getCoefs();
+            const T *coefs_i = node_i.getCoefs();
             int n_coefs = node_i.getNCoefs();
-            for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
-        }
-        node_o.cvTransform(Backward);
-        node_o.mwTransform(Compression);
-        node_o.setHasCoefs();
-        node_o.calcNorms();
-    }
-    void calcNode(MWNode<D, ComplexDouble> &node_o) {
-        const NodeIndex<D> &idx = node_o.getNodeIndex();
-        ComplexDouble *coefs_o = node_o.getCoefs();
-        for (int j = 0; j < node_o.getNCoefs(); j++) { coefs_o[j] = 1.0; }
-        for (int i = 0; i < this->prod_vec.size(); i++) {
-            ComplexDouble c_i = get_coef(this->prod_vec, i);
-            FunctionTree<D, ComplexDouble> &func_i = get_func(this->prod_vec, i);
-            // ComplexDoublehis generates missing nodes
-            MWNode<D, ComplexDouble> node_i = func_i.getNode(idx); // Copy node
-            node_i.mwTransform(Reconstruction);
-            node_i.cvTransform(Forward);
-            const ComplexDouble *coefs_i = node_i.getCoefs();
-            int n_coefs = node_i.getNCoefs();
-            if (func_i.conjugate() xor (conj and i == 0)) {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * std::conj(coefs_i[j]); }
+            if constexpr (std::is_same<T, ComplexDouble>::value) {
+                if (func_i.conjugate() xor (conj and i == 0)) { // NB: take complex conjugate of "bra"
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * std::conj(coefs_i[j]); }
+                } else {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
+                }
             } else {
                 for (int j = 0; j < n_coefs; j++) { coefs_o[j] *= c_i * coefs_i[j]; }
             }
diff --git a/src/treebuilders/SquareCalculator.h b/src/treebuilders/SquareCalculator.h
index 8d7be36b7..015b90f82 100644
--- a/src/treebuilders/SquareCalculator.h
+++ b/src/treebuilders/SquareCalculator.h
@@ -39,43 +39,31 @@ template <int D, typename T> class SquareCalculator final : public TreeCalculato
     FunctionTree<D, T> *func;
     bool conj;
 
-    void calcNode(MWNode<D, double> &node_o) {
+    void calcNode(MWNode<D, T> &node_o) {
         const NodeIndex<D> &idx = node_o.getNodeIndex();
         int n_coefs = node_o.getNCoefs();
-        double *coefs_o = node_o.getCoefs();
+        T *coefs_o = node_o.getCoefs();
         // This generates missing nodes
-        MWNode<D, double> node_i = func->getNode(idx); // Copy node
+        MWNode<D, T> node_i = func->getNode(idx); // Copy node
         node_i.mwTransform(Reconstruction);
         node_i.cvTransform(Forward);
-        const double *coefs_i = node_i.getCoefs();
-        for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
-        node_o.cvTransform(Backward);
-        node_o.mwTransform(Compression);
-        node_o.setHasCoefs();
-        node_o.calcNorms();
-    }
-    void calcNode(MWNode<D, ComplexDouble> &node_o) {
-        const NodeIndex<D> &idx = node_o.getNodeIndex();
-        int n_coefs = node_o.getNCoefs();
-        ComplexDouble *coefs_o = node_o.getCoefs();
-        // This generates missing nodes
-        MWNode<D, ComplexDouble> node_i = func->getNode(idx); // Copy node
-        node_i.mwTransform(Reconstruction);
-        node_i.cvTransform(Forward);
-        const ComplexDouble *coefs_i = node_i.getCoefs();
-        for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
-        if (func->conjugate()) {
-            if (conj) {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * coefs_i[j]; }
+        const T *coefs_i = node_i.getCoefs();
+        if constexpr (std::is_same<T, ComplexDouble>::value) {
+            if (func->conjugate()) {
+                if (conj) {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * coefs_i[j]; }
+                } else {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * std::conj(coefs_i[j]); }
+                }
             } else {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = std::conj(coefs_i[j]) * std::conj(coefs_i[j]); }
+                if (conj) {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * std::conj(coefs_i[j]); }
+                } else {
+                    for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
+                }
             }
         } else {
-            if (conj) {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * std::conj(coefs_i[j]); }
-            } else {
-                for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
-            }
+            for (int j = 0; j < n_coefs; j++) { coefs_o[j] = coefs_i[j] * coefs_i[j]; }
         }
         node_o.cvTransform(Backward);
         node_o.mwTransform(Compression);
diff --git a/src/treebuilders/apply.cpp b/src/treebuilders/apply.cpp
index a2610a537..cfe17b86f 100644
--- a/src/treebuilders/apply.cpp
+++ b/src/treebuilders/apply.cpp
@@ -409,7 +409,7 @@ template <int D, typename T> void apply(FunctionTree<D, T> &out, DerivativeOpera
 }
 
 template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, CompFunction<D> &inp, int dir, ComplexDouble metric[4][4]) {
-    // TODO: sums and not only each components independently
+    // TODO: sums and not only each components independently, when concrete examples with non diagonal metric are tested
 
     for (int icomp = 0; icomp < inp.Ncomp(); icomp++) {
         for (int ocomp = 0; ocomp < 4; ocomp++) {
@@ -420,7 +420,7 @@ template <int D> void apply(CompFunction<D> &out, DerivativeOperator<D> &oper, C
                         if (std::imag(metric[icomp][ocomp]) < MachinePrec)
                             out.CompD[ocomp]->rescale(std::real(metric[icomp][ocomp]));
                         else
-                            out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; // TODO: multiply c1 in rescale?
+                            out.func_ptr->data.c1[ocomp] *= metric[icomp][ocomp]; // To consider: multiply c1 in rescale?
                     }
                     out.func_ptr->isreal = 1;
                 } else {
diff --git a/src/treebuilders/complex_apply.h b/src/treebuilders/complex_apply.h
index 88aa96ee5..8ed9a0f17 100644
--- a/src/treebuilders/complex_apply.h
+++ b/src/treebuilders/complex_apply.h
@@ -30,15 +30,14 @@
 namespace mrcpp {
 
 /// @brief Stores pointers to real and imaginary parts of tree objects.
-/// @tparam MWClass 
-template <typename MWClass>
-struct ComplexObject
-{
-    MWClass* real;
-    MWClass* imaginary;
-
-    ComplexObject(MWClass& realPart, MWClass& imaginaryPart)
-        : real(&realPart), imaginary(&imaginaryPart) {}
+/// @tparam MWClass
+template <typename MWClass> struct ComplexObject {
+    MWClass *real;
+    MWClass *imaginary;
+
+    ComplexObject(MWClass &realPart, MWClass &imaginaryPart)
+            : real(&realPart)
+            , imaginary(&imaginaryPart) {}
 };
 
 // clang-format off
@@ -54,5 +53,4 @@ void apply
 );
 // clang-format on
 
-
 } // namespace mrcpp
diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index 84436d1e4..3348599f9 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -76,19 +76,6 @@ void multiply(double prec, FunctionTree<D, T> &out, T c, FunctionTree<D, T> &inp
     tmp_vec.push_back({1.0, &inp_b});
     multiply(prec, out, tmp_vec, maxIter, absPrec, useMaxNorms, conjugate);
 }
-/*
-template <int D> void multiply(double prec,
-                           FunctionTree<D, ComplexDouble> &out,
-                           ComplexDouble c,
-                           FunctionTree<D, ComplexDouble> &inp_a,
-                           FunctionTree<D, double> &inp_b,
-                           int maxIter = -1,
-                           bool absPrec = false,
-                           bool useMaxNorms = false) {
-// we rather make a copy with ComplexDouble type only
-FunctionTree<D, ComplexDouble>* inp_b_CPLX = inp_b.CopyTreeToComplex();
-multiply(prec, out,c,inp_a,inp_b_CPLX,maxIter,absPrec,useMaxNorms);
-} */
 
 /** @brief Multiplication of several MW function representations, adaptive grid
  *
@@ -296,9 +283,8 @@ template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, Func
  * grids overlap.
  *
  */
-template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket) {
+template <int D, typename T, typename U = T, typename V = decltype(std::declval<T>() * std::declval<U>())> V dot(FunctionTree<D, T> &bra, FunctionTree<D, U> &ket) {
     if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible");
-
     MWNodeVector<D, T> nodeTable;
     TreeIterator<D, T> it(bra);
     it.setReturnGenNodes(false);
@@ -307,8 +293,8 @@ template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &
         nodeTable.push_back(&node);
     }
     int nNodes = nodeTable.size();
-    T result = 0.0;
-    T locResult = 0.0;
+    V result = 0.0;
+    V locResult = 0.0;
     // OMP is disabled in order to get EXACT results (to the very last digit), the
     // order of summation makes the result different beyond the 14th digit or so.
     // OMP does improve the performace, but its not worth it for the time being.
@@ -318,71 +304,19 @@ template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &
     //#pragma omp for schedule(guided)
     for (int n = 0; n < nNodes; n++) {
         const auto &braNode = static_cast<const FunctionNode<D, T> &>(*nodeTable[n]);
-        const MWNode<D, T> *mwNode = ket.findNode(braNode.getNodeIndex());
+        const MWNode<D, U> *mwNode = ket.findNode(braNode.getNodeIndex());
         if (mwNode == nullptr) continue;
 
-        const auto &ketNode = static_cast<const FunctionNode<D, T> &>(*mwNode);
+        const auto &ketNode = static_cast<const FunctionNode<D, U> &>(*mwNode);
         if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode);
         locResult += dot_wavelet(braNode, ketNode);
     }
     //#pragma omp critical
     result += locResult;
-    //    }
-    return result;
-}
-
-/** @returns Dot product <bra|ket> of two MW function representations
- *
- * @param[in] bra: Bra side input function
- * @param[in] ket: Ket side input function
- *
- * @details The dot product is computed with the trees in compressed form, i.e.
- * scaling coefs only on root nodes, wavelet coefs on all nodes. Since wavelet
- * functions are orthonormal through ALL scales and the root scaling functions
- * are orthonormal to all finer level wavelet functions, this becomes a rather
- * efficient procedure as you only need to compute the dot product where the
- * grids overlap.
- *
- */
-template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra, FunctionTree<D, double> &ket) {
-    if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible");
-    MWNodeVector<D, ComplexDouble> nodeTable;
-    TreeIterator<D, ComplexDouble> it(bra);
-    it.setReturnGenNodes(false);
-    while (it.next()) {
-        MWNode<D, ComplexDouble> &node = it.getNode();
-        nodeTable.push_back(&node);
-    }
-    int nNodes = nodeTable.size();
-    ComplexDouble result = 0.0;
-    ComplexDouble locResult = 0.0;
-    // OMP is disabled in order to get EXACT results (to the very last digit), the
-    // order of summation makes the result different beyond the 14th digit or so.
-    // OMP does improve the performace, but its not worth it for the time being.
-    //#pragma omp parallel firstprivate(n_nodes, locResult) num_threads(mrcpp_get_num_threads())
-    //		shared(nodeTable,rhs,result)
-    //    {
-    //#pragma omp for schedule(guided)
-    for (int n = 0; n < nNodes; n++) {
-        const auto &braNode = static_cast<const FunctionNode<D, ComplexDouble> &>(*nodeTable[n]);
-        const MWNode<D, double> *mwNode = ket.findNode(braNode.getNodeIndex());
-        if (mwNode == nullptr) continue;
 
-        const auto &ketNode = static_cast<const FunctionNode<D, double> &>(*mwNode);
-        if (braNode.isRootNode()) locResult += dot_scaling(braNode, ketNode);
-        locResult += dot_wavelet(braNode, ketNode);
-    }
-    //#pragma omp critical
-    result += locResult;
-    //    }
     return result;
 }
-template <int D> ComplexDouble dot(FunctionTree<D, double> &bra, FunctionTree<D, ComplexDouble> &ket) {
-    ket.setConjugate(!ket.conjugate());
-    ComplexDouble prod = dot(ket, bra);
-    ket.setConjugate(!ket.conjugate());
-    return prod;
-}
+
 /** @brief abs-dot product of two MW function representations
  *
  * @param[in] bra: Bra side input function
@@ -447,11 +381,6 @@ template void square<3, double>(double prec, FunctionTree<3, double> &out, Funct
 template void dot<1, double>(double prec, FunctionTree<1, double> &out, FunctionTreeVector<1, double> &inp_a, FunctionTreeVector<1, double> &inp_b, int maxIter, bool absPrec);
 template void dot<2, double>(double prec, FunctionTree<2, double> &out, FunctionTreeVector<2, double> &inp_a, FunctionTreeVector<2, double> &inp_b, int maxIter, bool absPrec);
 template void dot<3, double>(double prec, FunctionTree<3, double> &out, FunctionTreeVector<3, double> &inp_a, FunctionTreeVector<3, double> &inp_b, int maxIter, bool absPrec);
-
-template double dot<1, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket);
-template double dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket);
-template double dot<3, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket);
-
 template double node_norm_dot<1, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket, bool exact);
 template double node_norm_dot<2, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket, bool exact);
 template double node_norm_dot<3, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket, bool exact);
@@ -517,11 +446,18 @@ template void dot<3, ComplexDouble>(double prec,
                                     int maxIter,
                                     bool absPrec);
 
-template ComplexDouble dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket);
-template ComplexDouble dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket);
-template ComplexDouble dot<3, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket);
-template ComplexDouble dot<3>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, double> &ket);
-template ComplexDouble dot<3>(FunctionTree<3, double> &bra, FunctionTree<3, ComplexDouble> &ket);
+template double dot<1, double, double>(FunctionTree<1, double> &bra, FunctionTree<1, double> &ket);
+template double dot<2, double, double>(FunctionTree<2, double> &bra, FunctionTree<2, double> &ket);
+template double dot<3, double, double>(FunctionTree<3, double> &bra, FunctionTree<3, double> &ket);
+template ComplexDouble dot<1, ComplexDouble, double>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, double> &ket);
+template ComplexDouble dot<2, ComplexDouble, double>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, double> &ket);
+template ComplexDouble dot<3, ComplexDouble, double>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, double> &ket);
+template ComplexDouble dot<1, double, ComplexDouble>(FunctionTree<1, double> &bra, FunctionTree<1, ComplexDouble> &ket);
+template ComplexDouble dot<2, double, ComplexDouble>(FunctionTree<2, double> &bra, FunctionTree<2, ComplexDouble> &ket);
+template ComplexDouble dot<3, double, ComplexDouble>(FunctionTree<3, double> &bra, FunctionTree<3, ComplexDouble> &ket);
+template ComplexDouble dot<1, ComplexDouble, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket);
+template ComplexDouble dot<2, ComplexDouble, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket);
+template ComplexDouble dot<3, ComplexDouble, ComplexDouble>(FunctionTree<3, ComplexDouble> &bra, FunctionTree<3, ComplexDouble> &ket);
 
 template double node_norm_dot<1, ComplexDouble>(FunctionTree<1, ComplexDouble> &bra, FunctionTree<1, ComplexDouble> &ket, bool exact);
 template double node_norm_dot<2, ComplexDouble>(FunctionTree<2, ComplexDouble> &bra, FunctionTree<2, ComplexDouble> &ket, bool exact);
diff --git a/src/treebuilders/multiply.h b/src/treebuilders/multiply.h
index 65cd44787..316066483 100644
--- a/src/treebuilders/multiply.h
+++ b/src/treebuilders/multiply.h
@@ -31,12 +31,9 @@ namespace mrcpp {
 template <int D, typename T> class RepresentableFunction;
 template <int D, typename T> class FunctionTree;
 
-template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp_a, FunctionTreeVector<D, T> &inp_b, int maxIter = -1, bool absPrec = false);
-
-template <int D, typename T> T dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket);
+template <int D, typename T = double, typename U = T, typename V = decltype(std::declval<T>() * std::declval<U>())> V dot(FunctionTree<D, T> &bra, FunctionTree<D, U> &ket);
 
-template <int D> ComplexDouble dot(FunctionTree<D, ComplexDouble> &bra, FunctionTree<D, double> &ket);
-template <int D> ComplexDouble dot(FunctionTree<D, double> &bra, FunctionTree<D, ComplexDouble> &ket);
+template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, FunctionTreeVector<D, T> &inp_a, FunctionTreeVector<D, T> &inp_b, int maxIter = -1, bool absPrec = false);
 
 template <int D, typename T> double node_norm_dot(FunctionTree<D, T> &bra, FunctionTree<D, T> &ket, bool exact = false);
 
diff --git a/src/trees/FunctionNode.cpp b/src/trees/FunctionNode.cpp
index db0005aa6..ff23fb394 100644
--- a/src/trees/FunctionNode.cpp
+++ b/src/trees/FunctionNode.cpp
@@ -493,6 +493,32 @@ template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble>
     return result;
 }
 
+/** Inner product of the functions represented by the scaling basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the scaling basis on
+ * the node on the full support of the nodes. The scaling basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const double *a = bra.getCoefs();
+    const ComplexDouble *b = ket.getCoefs();
+
+    int size = bra.getKp1_d();
+    ComplexDouble result = 0.0;
+    // note that bra is conjugated by default
+    if (ket.getMWTree().conjugate()) {
+        for (int i = 0; i < size; i++) result += a[i] * std::conj(b[i]);
+    } else {
+        for (int i = 0; i < size; i++) result += a[i] * b[i];
+    }
+    return result;
+}
+
 /** Inner product of the functions represented by the wavelet basis of the nodes.
  *
  * Integrates the product of the functions represented by the wavelet basis on
@@ -585,6 +611,34 @@ template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble>
     return result;
 }
 
+/** Inner product of the functions represented by the wavelet basis of the nodes.
+ *
+ * Integrates the product of the functions represented by the wavelet basis on
+ * the node on the full support of the nodes. The wavelet basis is fully
+ * orthonormal, and the inner product is simply the dot product of the
+ * coefficient vectors. Assumes the nodes have identical support.
+ * NB: will take conjugate of bra in case of complex values.
+ */
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, ComplexDouble> &ket) {
+    if (bra.isGenNode() or ket.isGenNode()) return 0.0;
+
+    assert(bra.hasCoefs());
+    assert(ket.hasCoefs());
+
+    const double *a = bra.getCoefs();
+    const ComplexDouble *b = ket.getCoefs();
+
+    int start = bra.getKp1_d();
+    int size = (bra.getTDim() - 1) * start;
+    ComplexDouble result = 0.0;
+    if (ket.getMWTree().conjugate()) {
+        for (int i = 0; i < size; i++) result += a[start + i] * std::conj(b[start + i]);
+    } else {
+        for (int i = 0; i < size; i++) result += a[start + i] * b[start + i];
+    }
+    return result;
+}
+
 template double dot_scaling(const FunctionNode<1, double> &bra, const FunctionNode<1, double> &ket);
 template double dot_scaling(const FunctionNode<2, double> &bra, const FunctionNode<2, double> &ket);
 template double dot_scaling(const FunctionNode<3, double> &bra, const FunctionNode<3, double> &ket);
@@ -607,6 +661,17 @@ template ComplexDouble dot_wavelet(const FunctionNode<1, ComplexDouble> &bra, co
 template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, ComplexDouble> &ket);
 template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, ComplexDouble> &ket);
 
+template ComplexDouble dot_scaling(const FunctionNode<1, double> &bra, const FunctionNode<1, ComplexDouble> &ket);
+template ComplexDouble dot_scaling(const FunctionNode<2, double> &bra, const FunctionNode<2, ComplexDouble> &ket);
+template ComplexDouble dot_scaling(const FunctionNode<3, double> &bra, const FunctionNode<3, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<1, double> &bra, const FunctionNode<1, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<2, double> &bra, const FunctionNode<2, ComplexDouble> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<3, double> &bra, const FunctionNode<3, ComplexDouble> &ket);
+
+template ComplexDouble dot_scaling(const FunctionNode<1, ComplexDouble> &bra, const FunctionNode<1, double> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<1, ComplexDouble> &bra, const FunctionNode<1, double> &ket);
+template ComplexDouble dot_scaling(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, double> &ket);
+template ComplexDouble dot_wavelet(const FunctionNode<2, ComplexDouble> &bra, const FunctionNode<2, double> &ket);
 template ComplexDouble dot_scaling(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket);
 template ComplexDouble dot_wavelet(const FunctionNode<3, ComplexDouble> &bra, const FunctionNode<3, double> &ket);
 
diff --git a/src/trees/FunctionNode.h b/src/trees/FunctionNode.h
index d1f7c3639..d1bfaaa31 100644
--- a/src/trees/FunctionNode.h
+++ b/src/trees/FunctionNode.h
@@ -88,4 +88,7 @@ template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble>
 template <int D> ComplexDouble dot_scaling(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
 template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, ComplexDouble> &bra, const FunctionNode<D, double> &ket);
 
+template <int D> ComplexDouble dot_scaling(const FunctionNode<D, double> &bra, const FunctionNode<D, ComplexDouble> &ket);
+template <int D> ComplexDouble dot_wavelet(const FunctionNode<D, double> &bra, const FunctionNode<D, ComplexDouble> &ket);
+
 } // namespace mrcpp
diff --git a/src/trees/FunctionTree.cpp b/src/trees/FunctionTree.cpp
index 6cf245f2e..a41581692 100644
--- a/src/trees/FunctionTree.cpp
+++ b/src/trees/FunctionTree.cpp
@@ -107,8 +107,8 @@ template <int D, typename T> FunctionTree<D, T>::~FunctionTree() {
     if (this->getNNodes() > 0) this->deleteRootNodes();
 }
 
-/** @brief Read a previously stusing MADNESS conventions for n, l and index order.ored tree assuming text/ASCII format,
- *   in a representation
+/** @brief Read a previously stored tree assuming text/ASCII format,
+ *   in a representation using MADNESS conventions for n, l and index order.
  * @param[in] file: File name
  * @note This tree must have the exact same MRA the one that was saved(?)
  */
@@ -128,10 +128,8 @@ template <int D, typename T> void FunctionTree<D, T>::loadTreeTXT(const std::str
     double TXT_thres = 1.0e-14; // threshold for differences in scaling factors
     for (int d = 0; d < D; d++) {
         if (std::abs(coord[d][0] + L) > TXT_thres) std::cout << coord[d][0] << " " << L << std::endl;
-        ;
         if (std::abs(coord[d][0] + L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
         if (std::abs(coord[d][1] - L) > TXT_thres) std::cout << coord[d][1] << " " << L << std::endl;
-        ;
         if (std::abs(coord[d][1] - L) > TXT_thres) NOT_IMPLEMENTED_ABORT;
     }
 
@@ -1184,36 +1182,6 @@ template <int D, typename T> FunctionTree<D, double> *FunctionTree<D, T>::Imag()
     return out;
 }
 
-/*
-template<>
-void FunctionTree<3, double>::CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree) {
-//void CopyTreeToComplex(FunctionTree<3, ComplexDouble>* &outTree, FunctionTree<3, double>* inTree) {
-FunctionTree<3, double>* inTree = this;
-delete outTree;
-outTree = new FunctionTree<3, ComplexDouble> (inTree->getMRA());
-int nChunks=inTree->getNChunks();
-outTree->getNodeAllocator().init(nChunks, true); //also allocate coefficients
-int Ncoefperchunk = outTree->getNodeAllocator().getCoefChunkSize()/sizeof(ComplexDouble);
-// real and complex trees have the same Ncoefperchunk.
-for (int iChunk = 0; iChunk < nChunks; iChunk++) {
-    MWNode<3, double> * inNode = inTree->getNodeAllocator().getNodeChunk(iChunk);
-    MWNode<3, ComplexDouble> * outNode = outTree->getNodeAllocator().getNodeChunk(iChunk);
-    //outTree->getNodeAllocator().getNodeChunk(iChunk) = inTree->getNodeAllocator().getNodeChunk(iChunk);
-    int nNodes = std::min(inTree->getNNodes(), inTree->getNodeAllocator().getMaxNodesPerChunk());
-    for (int i = 0; i < nNodes; i++) {
-        outNode[i] = *reinterpret_cast<MWNode<3, std::complex<double>>*>(&inNode[i]); // could be improved
-    }
-    ComplexDouble* Ccoefs;
-    int ncoefs = nNodes * inTree->getNodeAllocator().getNCoefs();
-    Ccoefs = outTree->getNodeAllocator().getCoefChunk(iChunk);
-    auto InCoefs = inTree->getNodeAllocator().getCoefChunk(iChunk);
-    for (int i = 0; i < ncoefs; i++) {
-        Ccoefs[i] = InCoefs[i];
-    }
-}
-outTree->getNodeAllocator().reassemble();
-}*/
-
 /*
  * From real to complex tree. Copy everything, and convert double to ComplexDouble for the coefficents.
  * Should use a deep_copy if generalized in the future.
diff --git a/src/trees/MWNode.cpp b/src/trees/MWNode.cpp
index c8ce03dac..2d521b468 100644
--- a/src/trees/MWNode.cpp
+++ b/src/trees/MWNode.cpp
@@ -654,11 +654,7 @@ template <int D, typename T> double MWNode<D, T>::calcComponentNorm(int i) const
     int start = i * size;
 
     double sq_norm = 0.0;
-    //#ifdef HAVE_BLAS
-    //    sq_norm = cblas_ddot(size, &c[start], 1, &c[start], 1);
-    //#else
     for (int i = start; i < start + size; i++) { sq_norm += std::norm(c[i]); }
-    //#endif
     return std::sqrt(sq_norm);
 }
 
diff --git a/src/trees/MultiResolutionAnalysis.cpp b/src/trees/MultiResolutionAnalysis.cpp
index 2724bacf9..43b39c32d 100644
--- a/src/trees/MultiResolutionAnalysis.cpp
+++ b/src/trees/MultiResolutionAnalysis.cpp
@@ -147,7 +147,6 @@ template <int D> bool MultiResolutionAnalysis<D>::operator==(const MultiResoluti
  * respective classes.
  */
 template <int D> bool MultiResolutionAnalysis<D>::operator!=(const MultiResolutionAnalysis<D> &mra) const {
-    if (this->basis != mra.basis) std::cout << "diff basis " << this->basis << std::endl << "and  " << mra.basis << std::endl;
     if (this->basis != mra.basis) return true;
     if (this->world != mra.world)
         std::cout << "diff world " << this->world << std::endl
diff --git a/src/trees/NodeIndex.h b/src/trees/NodeIndex.h
index 866f3bdb2..f73ded001 100644
--- a/src/trees/NodeIndex.h
+++ b/src/trees/NodeIndex.h
@@ -31,8 +31,8 @@
 
 #pragma once
 
-#include <iostream>
 #include <iomanip>
+#include <iostream>
 
 namespace mrcpp {
 
@@ -92,7 +92,7 @@ template <int D> class NodeIndex final {
     }
 
 private:
-    short int N{0};          ///< Length scale index 2^N
+    short int N{0};         ///< Length scale index 2^N
     std::array<int, D> L{}; ///< Translation index [x,y,z,...]
 };
 
diff --git a/src/trees/OperatorNode.cpp b/src/trees/OperatorNode.cpp
index a0e09aac5..37f576eac 100644
--- a/src/trees/OperatorNode.cpp
+++ b/src/trees/OperatorNode.cpp
@@ -42,16 +42,16 @@ void OperatorNode::dealloc() {
     this->tree->getNodeAllocator().dealloc(sIdx);
 }
 
-/** 
+/**
  * @brief Calculate one specific component norm of the OperatorNode (TODO: needs to be specified more).
- * 
+ *
  * @param[in] i: TODO: deens to be specified
  *
  * @details OperatorNorms are defined as matrix 2-norms that are expensive to calculate.
  * Thus we calculate some cheaper upper bounds for this norm for thresholding.
  * First a simple vector norm, then a product of the 1- and infinity-norm.
  * (TODO: needs to be more presiced).
- * 
+ *
  */
 double OperatorNode::calcComponentNorm(int i) const {
     int depth = getDepth();
@@ -64,7 +64,7 @@ double OperatorNode::calcComponentNorm(int i) const {
     int kp1 = this->getKp1();
     int kp1_d = this->getKp1_d();
     const VectorXd &comp_vec = coef_vec.segment(i * kp1_d, kp1_d);
-    const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1);   //one can use MatrixXd OperatorNode::getComponent(int i)
+    const MatrixXd comp_mat = MatrixXd::Map(comp_vec.data(), kp1, kp1); // one can use MatrixXd OperatorNode::getComponent(int i)
 
     double norm = 0.0;
     double vecNorm = comp_vec.norm();
@@ -79,7 +79,6 @@ double OperatorNode::calcComponentNorm(int i) const {
     return norm;
 }
 
-
 /** @brief Matrix elements of the non-standard form.
  *
  * @param[in] i: Index enumerating the matrix type in the non-standard form.
@@ -92,10 +91,9 @@ double OperatorNode::calcComponentNorm(int i) const {
  * One of these matrices is returned by the method according to the choice of the index parameter
  * \f$ i = 0, 1, 2, 3 \f$, respectively.
  * For example, \f$ \alpha_l^n = \text{getComponent}(3) \f$.
- * 
+ *
  */
-MatrixXd OperatorNode::getComponent(int i)
-{
+MatrixXd OperatorNode::getComponent(int i) {
     int depth = getDepth();
     double prec = getOperTree().getNormPrecision();
     double thrs = std::max(MachinePrec, prec / (8.0 * (1 << depth)));
diff --git a/src/utils/math_utils.cpp b/src/utils/math_utils.cpp
index 11e8aa3c2..69a13f300 100644
--- a/src/utils/math_utils.cpp
+++ b/src/utils/math_utils.cpp
@@ -171,34 +171,26 @@ void math_utils::tensor_self_product(const VectorXd &A, MatrixXd &tprod) {
     for (int i = 0; i < Ar; i++) { tprod.block(i, 0, 1, Ar) = A(i) * A; }
 }
 
-/** Matrix multiplication of the filter with the input coefficient (type double)*/
-void math_utils::apply_filter(double *out, double *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
-#ifdef HAVE_BLAS
-    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
-#else
-    Map<MatrixXd> f(in, kp1, kp1_dm1);
-    Map<MatrixXd> g(out, kp1_dm1, kp1);
-    if (fac < MachineZero) {
-        g.noalias() = f.transpose() * filter;
-    } else {
-        g.noalias() += f.transpose() * filter;
-    }
-#endif
-}
-
-/** Matrix multiplication of the filter with the input coefficient (type complex)*/
-void math_utils::apply_filter(ComplexDouble *out, ComplexDouble *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
-    //#ifdef HAVE_BLAS
-    //    cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, kp1_dm1, kp1, kp1, 1.0, in, kp1, filter.data(), kp1, fac, out, kp1_dm1);
-    //#else
-    Map<MatrixXcd> f(in, kp1, kp1_dm1);
-    Map<MatrixXcd> g(out, kp1_dm1, kp1);
-    if (fac < MachineZero) {
-        g.noalias() = f.transpose() * filter;
-    } else {
-        g.noalias() += f.transpose() * filter;
-    }
-    //#endif
+/** Matrix multiplication of the filter with the input coefficients */
+template <typename T> void math_utils::apply_filter(T *out, T *in, const MatrixXd &filter, int kp1, int kp1_dm1, double fac) {
+    if constexpr (std::is_same<T, double>::value) {
+        Map<MatrixXd> f(in, kp1, kp1_dm1);
+        Map<MatrixXd> g(out, kp1_dm1, kp1);
+        if (fac < MachineZero) {
+            g.noalias() = f.transpose() * filter;
+        } else {
+            g.noalias() += f.transpose() * filter;
+        }
+    } else if constexpr (std::is_same<T, ComplexDouble>::value) {
+        Map<MatrixXcd> f(in, kp1, kp1_dm1);
+        Map<MatrixXcd> g(out, kp1_dm1, kp1);
+        if (fac < MachineZero) {
+            g.noalias() = f.transpose() * filter;
+        } else {
+            g.noalias() += f.transpose() * filter;
+        }
+    } else
+        NOT_IMPLEMENTED_ABORT;
 }
 
 /** Make a nD-representation from 1D-representations of separable functions.
@@ -342,6 +334,9 @@ template <class T> std::vector<std::vector<T>> math_utils::cartesian_product(std
     return output;
 }
 
+template void math_utils::apply_filter<double>(double *out, double *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
+template void math_utils::apply_filter<ComplexDouble>(ComplexDouble *out, ComplexDouble *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
+
 template double math_utils::calc_distance<1>(const Coord<1> &a, const Coord<1> &b);
 template double math_utils::calc_distance<2>(const Coord<2> &a, const Coord<2> &b);
 template double math_utils::calc_distance<3>(const Coord<3> &a, const Coord<3> &b);
diff --git a/src/utils/math_utils.h b/src/utils/math_utils.h
index 9dcdb6956..3eacfa10b 100644
--- a/src/utils/math_utils.h
+++ b/src/utils/math_utils.h
@@ -66,8 +66,7 @@ double matrix_norm_inf(const Eigen::MatrixXd &M);
 double matrix_norm_1(const Eigen::MatrixXd &M);
 double matrix_norm_2(const Eigen::MatrixXd &M);
 
-void apply_filter(double *out, double *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
-void apply_filter(ComplexDouble *out, ComplexDouble *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
+template <typename T> void apply_filter(T *out, T *in, const Eigen::MatrixXd &filter, int kp1, int kp1_dm1, double fac);
 
 void tensor_expand_coefs(int dim, int dir, int kp1, int kp1_d, const Eigen::MatrixXd &primitive, Eigen::VectorXd &expanded);
 
diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index ab1000e66..0afd31e90 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -442,7 +442,7 @@ void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+template <typename T> void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) {
 /* 1) Each odd rank send to the left rank
    2) All odd ranks are "deleted" (can exit routine)
    3) new "effective" ranks are defined within the non-deleted ranks
@@ -462,48 +462,7 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
             int src = comm_rank + fac;
             if (src < comm_size) {
                 int tag = 3333 + src;
-                mrcpp::FunctionTree<3> tree_i(tree.getMRA());
-                mrcpp::recv_tree(tree_i, src, tag, comm, -1, false);
-                tree.appendTreeNoCoeff(tree_i); // make union grid
-            }
-        }
-        if ((comm_rank / fac) % 2 == 1) {
-            // send
-            int dest = comm_rank - fac;
-            if (dest >= 0) {
-                int tag = 3333 + comm_rank;
-                mrcpp::send_tree(tree, dest, tag, comm, -1, false);
-                break; // once data is sent we are done
-            }
-        }
-        fac *= 2;
-    }
-    MPI_Barrier(comm);
-#endif
-}
-
-/** @brief make union tree and send into rank zero */
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
-/* 1) Each odd rank send to the left rank
-   2) All odd ranks are "deleted" (can exit routine)
-   3) new "effective" ranks are defined within the non-deleted ranks
-      effective rank = rank/fac , where fac are powers of 2
-   4) repeat
- */
-#ifdef MRCPP_HAS_MPI
-    int comm_size, comm_rank;
-    MPI_Comm_rank(comm, &comm_rank);
-    MPI_Comm_size(comm, &comm_size);
-    if (comm_size == 1) return;
-
-    int fac = 1; // powers of 2
-    while (fac < comm_size) {
-        if ((comm_rank / fac) % 2 == 0) {
-            // receive
-            int src = comm_rank + fac;
-            if (src < comm_size) {
-                int tag = 3333 + src;
-                mrcpp::FunctionTree<3, ComplexDouble> tree_i(tree.getMRA());
+                mrcpp::FunctionTree<3, T> tree_i(tree.getMRA());
                 mrcpp::recv_tree(tree_i, src, tag, comm, -1, false);
                 tree.appendTreeNoCoeff(tree_i); // make union grid
             }
@@ -524,9 +483,8 @@ void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm c
 }
 
 /** @brief make union tree without coeff and send to all
- *  Real trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+template <typename T> void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector<FunctionTree<3, T>> &Phi, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
@@ -536,8 +494,7 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
     int N = Phi.size();
     for (int j = 0; j < N; j++) {
         if (not my_func(j)) continue;
-        if (Phi[j].isreal()) tree.appendTreeNoCoeff(*Phi[j].CompD[0]);
-        if (Phi[j].iscomplex()) tree.appendTreeNoCoeff(*Phi[j].CompC[0]);
+        tree.appendTreeNoCoeff(Phi[j]);
     }
     mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
     mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
@@ -545,9 +502,8 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, vector<CompFun
 }
 
 /** @brief make union tree without coeff and send to all
- *  Complex trees
  */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
+template <typename T> void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, vector<CompFunction<3>> &Phi, MPI_Comm comm) {
 #ifdef MRCPP_HAS_MPI
     /* 1) make union grid of own orbitals
        2) make union grid with others orbitals (sent to rank zero)
@@ -565,26 +521,6 @@ void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<
 #endif
 }
 
-/** @brief make union tree without coeff and send to all
- *  Include both real and imaginary parts
- */
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm) {
-#ifdef MRCPP_HAS_MPI
-    /* 1) make union grid of own orbitals
-       2) make union grid with others orbitals (sent to rank zero)
-       3) rank zero broadcast func to everybody
-     */
-
-    int N = Phi.size();
-    for (int j = 0; j < N; j++) {
-        if (not my_func(j)) continue;
-        tree.appendTreeNoCoeff(Phi[j]);
-    }
-    mrcpp::mpi::reduce_Tree_noCoeff(tree, comm_wrk);
-    mrcpp::mpi::broadcast_Tree_noCoeff(tree, comm_wrk);
-#endif
-}
-
 /** @brief Distribute rank zero function to all ranks */
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
@@ -618,7 +554,7 @@ void broadcast_function(CompFunction<3> &func, MPI_Comm comm) {
 }
 
 /** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm) {
+template <typename T> void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm) {
 /* use same strategy as a reduce, but in reverse order */
 #ifdef MRCPP_HAS_MPI
     int comm_size, comm_rank;
@@ -649,36 +585,15 @@ void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm)
 #endif
 }
 
-/** @brief Distribute rank zero function to all ranks */
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm) {
-/* use same strategy as a reduce, but in reverse order */
-#ifdef MRCPP_HAS_MPI
-    int comm_size, comm_rank;
-    MPI_Comm_rank(comm, &comm_rank);
-    MPI_Comm_size(comm, &comm_size);
-    if (comm_size == 1) return;
+template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<FunctionTree<3, double>> &Phi, MPI_Comm comm);
+template void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
+template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
 
-    int fac = 1; // powers of 2
-    while (fac < comm_size) fac *= 2;
-    fac /= 2;
+template void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
+template void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
+template void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
 
-    while (fac > 0) {
-        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 1) {
-            // receive
-            int src = comm_rank - fac;
-            int tag = 4334 + comm_rank;
-            mrcpp::recv_tree(tree, src, tag, comm, -1, false);
-        }
-        if (comm_rank % fac == 0 and (comm_rank / fac) % 2 == 0) {
-            // send
-            int dst = comm_rank + fac;
-            int tag = 4334 + dst;
-            if (dst < comm_size) mrcpp::send_tree(tree, dst, tag, comm, -1, false);
-        }
-        fac /= 2;
-    }
-    MPI_Barrier(comm);
-#endif
-}
 } // namespace mpi
 } // namespace mrcpp
diff --git a/src/utils/parallel.h b/src/utils/parallel.h
index 40b74bc7a..395cc1174 100644
--- a/src/utils/parallel.h
+++ b/src/utils/parallel.h
@@ -54,14 +54,11 @@ void share_function(CompFunction<3> &func, int src, int tag, MPI_Comm comm);
 void reduce_function(double prec, CompFunction<3> &func, MPI_Comm comm);
 void broadcast_function(CompFunction<3> &func, MPI_Comm comm);
 
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, MPI_Comm comm);
-void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<FunctionTree<3, ComplexDouble>> &Phi, MPI_Comm comm);
-void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, MPI_Comm comm);
-
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, double> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
-void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, ComplexDouble> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
+template <typename T> void reduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm);
+template <typename T> void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, std::vector<FunctionTree<3, T>> &Phi, MPI_Comm comm);
+template <typename T> void broadcast_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, MPI_Comm comm);
+
+template <typename T> void allreduce_Tree_noCoeff(mrcpp::FunctionTree<3, T> &tree, std::vector<CompFunction<3>> &Phi, MPI_Comm comm);
 
 void allreduce_vector(IntVector &vec, MPI_Comm comm);
 void allreduce_vector(DoubleVector &vec, MPI_Comm comm);

From 71d6e2da9123e3c771739c1abfbc8cdf093be276 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 28 Jan 2025 11:55:45 +0100
Subject: [PATCH 36/38] std17 and PR review requests

---
 src/treebuilders/multiply.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/treebuilders/multiply.cpp b/src/treebuilders/multiply.cpp
index 3348599f9..4e046126e 100644
--- a/src/treebuilders/multiply.cpp
+++ b/src/treebuilders/multiply.cpp
@@ -283,7 +283,7 @@ template <int D, typename T> void dot(double prec, FunctionTree<D, T> &out, Func
  * grids overlap.
  *
  */
-template <int D, typename T, typename U = T, typename V = decltype(std::declval<T>() * std::declval<U>())> V dot(FunctionTree<D, T> &bra, FunctionTree<D, U> &ket) {
+template <int D, typename T, typename U, typename V> V dot(FunctionTree<D, T> &bra, FunctionTree<D, U> &ket) {
     if (bra.getMRA() != ket.getMRA()) MSG_ABORT("Trees not compatible");
     MWNodeVector<D, T> nodeTable;
     TreeIterator<D, T> it(bra);

From f46ac4ea73e737fe20f36d712c8625fb705a4efa Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Tue, 28 Jan 2025 12:11:13 +0100
Subject: [PATCH 37/38] std17 and PR review requests

---
 src/utils/parallel.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/utils/parallel.cpp b/src/utils/parallel.cpp
index 0afd31e90..332d0fb5d 100644
--- a/src/utils/parallel.cpp
+++ b/src/utils/parallel.cpp
@@ -11,7 +11,7 @@
 
 #ifdef MRCPP_HAS_OMP
 #define mrcpp_get_max_threads() omp_get_max_threads()
-#define mrcpp_get_num_procs() omp_get_num_procs() / 2
+#define mrcpp_get_num_procs() omp_get_num_procs()
 #define mrcpp_set_dynamic(n) omp_set_dynamic(n)
 #else
 #define mrcpp_get_max_threads() 1
@@ -181,7 +181,7 @@ void initialize() {
     int omp_threads_available = thread::hardware_concurrency();
 
     int nthreads = 1;
-    int my_OMP_NUM_THREADS = omp_get_max_threads();
+    int my_OMP_NUM_THREADS = mrcpp_get_max_threads();
     MPI_Bcast(&my_OMP_NUM_THREADS, 1, MPI_INT, 0, MPI_COMM_WORLD);
     if (use_omp_num_threads) { // we assume that the user has set the environment variable
         // OMP_NUM_THREADS, such that the total number of threads that can be used on each node is
@@ -208,14 +208,14 @@ void initialize() {
         if (is_bankclient) nthreads = (omp_threads_available / 2 - n_bank_thisnode) / n_wrk_thisnode; // 1) and 4)
         // cout<<nthreads<<" after direct calculation"<<endl;
         //  do not exceed total number of cores accessible (assumed to be half the number of logical threads)
-        nthreads = min(nthreads, omp_get_num_procs() / 2); // 2)
+        nthreads = min(nthreads, mrcpp_get_num_procs() / 2); // 2)
         // cout<<nthreads<<" after mrcpp_get_num_procs"<<endl;
 
         // NB: we do not use OMP_NUM_THREADS. Use all cores accessible.
 
         if (is_bank) nthreads = 1; // 3)
 
-        //        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<omp_get_num_procs()<<" maxthreads"<<omp_get_max_threads()<<" "<<"
+        //        cout<<world_rank<<" found "<<omp_threads_available<<" available threads. omp: procs"<<mrcpp_get_num_procs()<<" maxthreads"<<mrcpp_get_max_threads()<<" "<<"
         //        threads"<<omp_get_num_threads()<<" "<<mrcpp::omp::n_threads<<" On this node: "<<n_bank_thisnode<<" banks "<<n_wrk_thisnode<<" workers"<<" "<<nthreads<<" is bank "<<is_bank<<"
         //        my_OMP_NUM_THREADS "<<my_OMP_NUM_THREADS<<endl;
 
@@ -233,7 +233,7 @@ void initialize() {
         std::cout << "WARNING: only " << nthreads * n_wrk_thisnode + n_bank_thisnode << " threads used per node while " << omp_threads_available << " logical cpus are accessible " << std::endl;
     }
 
-    if (nthreads > omp_get_num_procs()) { std::cout << "WARNING: MPI rank " << world_rank << " will use " << nthreads << " but only " << omp_get_num_procs() << " procs are accessible" << std::endl; }
+    if (nthreads > mrcpp_get_num_procs() / 2) { std::cout << "WARNING: MPI rank " << world_rank << " will use " << nthreads << " but only " << mrcpp_get_num_procs() / 2 << " procs are accessible" << std::endl; }
 
     omp::n_threads = nthreads;
     mrcpp::set_max_threads(nthreads);

From 8f41b4b1ada2529d58b0bac3a36319665e9b43d7 Mon Sep 17 00:00:00 2001
From: gitpeterwind <peter.wind@met.no>
Date: Fri, 7 Feb 2025 13:38:40 +0100
Subject: [PATCH 38/38] docs typo

---
 docs/mrcpp_api/mwfunctions.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mrcpp_api/mwfunctions.rst b/docs/mrcpp_api/mwfunctions.rst
index cef911f6f..527497f5c 100644
--- a/docs/mrcpp_api/mwfunctions.rst
+++ b/docs/mrcpp_api/mwfunctions.rst
@@ -165,7 +165,7 @@ Constructing an MRA
 
 An MRA is defined in two steps, first the computational domain is given by a
 ``BoundingBox`` (D is the dimension), e.g. for a total domain of
-:math:`[-32,32]^3` in three dimensions (eight root boxes of size :math:`[16]^3`
+:math:`[-32,32]^3` in three dimensions (eight root boxes of size :math:`[32]^3`
 each):
 
 .. code-block:: cpp