diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
index bcc0e52a40ca3..f73bd34e53386 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
@@ -23,10 +23,11 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Max> {
    static const std::string Name() {return "Max";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0] << ";\n";
+      out << res << " = std::max({ " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
-         out << "\t" << "\t" << res << " = std::max(" << res << ", " << inputs[i] << ");\n";
+         out << ", " << inputs[i];
       }
+      out << "});\n";
       return out.str();
    }
 };
@@ -36,10 +37,11 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Min> {
    static const std::string Name() {return "Min";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0] << ";\n";
+       out << res << " = std::min({ " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
-         out << "\t" << "\t" << res << " = std::min(" << res << ", " << inputs[i] << ");\n";
+         out << ", " << inputs[i];
       }
+      out << "});\n";
       return out.str();
    }
 };
@@ -52,7 +54,7 @@ struct NaryOperatorTraits<float, EBasicNaryOperator::Mean> {
    static const std::string Name() {return "Mean";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = (" << inputs[0];
+      out << res << " = (" << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
          out << " + " << inputs[i];
       }
@@ -66,7 +68,7 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Sum> {
    static const std::string Name() {return "Sum";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0];
+      out << res << " = " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
          out << " + " << inputs[i];
       }
@@ -83,10 +85,11 @@ private:
 
    std::vector<std::string> fNInputs;
    std::string fNY;
-   std::vector<std::vector<size_t>> fShapeInputs;
+   std::vector<std::vector<Dim>> fShapeInputs;
 
    std::vector<std::string> fNBroadcastedInputs;
    std::vector<size_t> fShapeY;
+   std::vector<Dim> fDimShapeY;
 
    bool fBroadcast = false;
 
@@ -119,64 +122,164 @@ public:
    }
 
    void Initialize(RModel& model) override {
+      std::vector<std::vector<size_t>> inputShapes;
       for (auto &it : fNInputs) {
          if (!model.CheckIfTensorAlreadyExist(it)) {
             throw std::runtime_error("TMVA SOFIE BasicNary Op Input Tensor " + it + " is not found in model");
          }
-         fShapeInputs.push_back(model.GetTensorShape(it));
+         fShapeInputs.push_back(model.GetDimTensorShape(it));
+         if (fNInputs.size()> 2) {
+            if (model.IsDimInputTensor(it))
+               throw std::runtime_error("TMVA SOFIE BasicNary : supports only 2 inputs for dynamic tensors");
+            else
+               inputShapes.push_back(model.GetTensorShape(it));
+         }
       }
       // Find the common shape of the input tensors
-      fShapeY = UTILITY::MultidirectionalBroadcastShape(fShapeInputs);
-      model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY);
-      // Broadcasting
-      size_t N = fNInputs.size();
-      fNBroadcastedInputs.reserve(N);
-      for (size_t i = 0; i < N; i++) {
-         if (!UTILITY::AreSameShape(model.GetTensorShape(fNInputs[i]), fShapeY)) {
-            fBroadcast = true;
-            std::string name = "Broadcasted"  + fNInputs[i];
-            model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY);
-            fNBroadcastedInputs.emplace_back("tensor_" + name);
-         } else {
-            fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]);
+      if (fShapeInputs.size() > 2 ) {
+         // support dynamic tensors now for input list of size=2
+         auto shapeY = UTILITY::MultidirectionalBroadcastShape(inputShapes);
+         fDimShapeY = ConvertShapeToDim(shapeY);
+      } else if (fShapeInputs.size() == 2 ) {
+         auto ret  = UTILITY::MultidirectionalBroadcastShape(fShapeInputs[0], fShapeInputs[1]);
+         // use same code as in BinaryOperator (need to extend for input sizes > 2)
+         fBroadcast = ret.first;
+         fDimShapeY = ret.second;
+         // case of all parametric shapes and MultiDirectionalBroadcastShape  return the max of the 2
+         // need to do before we declare the output tensor shape and the broadcasted ones
+         if (ret.first & 4) {
+            // check if one of the parameter is an input dimension
+            // define function to find this
+            auto IsInputDimParam = [&](const std::string &p) {
+               auto inputNames = model.GetInputTensorNames();
+               for (auto &input : inputNames) {
+                  for (auto &i_s : model.GetDimTensorShape(input)) {
+                     if (i_s.isParam && i_s.param == p)
+                        return true;
+                  }
+               }
+               return false;
+            };
+            auto & shapeA = fShapeInputs[0];
+            auto & shapeB = fShapeInputs[1];
+            for (size_t i = 0; i < fDimShapeY.size(); i++) {
+               auto &s = fDimShapeY[i];
+               if (s.isParam && s.param.find("std::max") != std::string::npos) {
+                  if (IsInputDimParam(shapeA[i].param)) {
+                     // case dim is 1 we indicate that the input parameter is equal to 1
+                     if (shapeA[i].dim != 1)
+                        s = shapeA[i];
+                     else
+                        s = shapeB[i];
+                  } else if (IsInputDimParam(shapeB[i].param)) {
+                     if (shapeB[i].dim != 1)
+                        s = shapeB[i];
+                     else
+                        s = shapeA[i];
+                  }
+               }
+            }
          }
+      } else if  (fShapeInputs.size() == 1 ) {
+         fDimShapeY = fShapeInputs[0];
       }
+      if (!fShapeY.empty())
+         model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY);
+      else
+         model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fDimShapeY);
+
+
       fType = ConvertTypeToString(model.GetTensorType(fNInputs[0]));
+
+      if (model.Verbose()) {
+         std::cout << NaryOperatorTraits<T, Op>::Name() << " : ";
+         if (fNInputs.size() == 2)
+            std::cout << ConvertShapeToString(fShapeInputs[0]) << " , "
+                      << ConvertShapeToString(fShapeInputs[1]);
+         std::cout << " --> " << ConvertShapeToString(fDimShapeY) << std::endl;
+      }
    }
 
    std::string Generate(std::string OpName) override {
       OpName = "op_" + OpName;
-      if (fShapeY.empty()) {
+      if (fDimShapeY.empty()) {
          throw std::runtime_error("TMVA SOFIE BasicNary called to Generate without being initialized first");
       }
       std::stringstream out;
-      size_t length = ConvertShapeToLength(fShapeY);
+      auto length = ConvertDimShapeToLength(fDimShapeY);
       out << SP << "\n//------ BasicNary operator\n";
-      if (fBroadcast) {
-         for (size_t i = 0; i < fNInputs.size(); i++) {
-            if (fNBroadcastedInputs[i] != fNInputs[i]) {
-               out << SP << SP << "// Broadcasting " << fNInputs[i] << " to " << ConvertShapeToString(fShapeY) << "\n";
-               out << SP << SP << "{\n";
-               out << SP << SP << SP << fType << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" + fNInputs[i] << ", " << ConvertShapeToString(fShapeInputs[i]);
-               out << ", " << ConvertShapeToString(fShapeY) << ");\n";
-               out << SP << SP << SP << "std::copy(data, data + " << length << ", " << fNBroadcastedInputs[i] << ");\n";
-               out << SP << SP << SP << "delete[] data;\n";
-               out << SP << SP << "}\n";
-            }
-         }
-      }
 
-      if (fNInputs.size() == 1) {
+      int nInputs = fNInputs.size();
+
+      if (nInputs == 1) {
          out << SP << "std::copy(tensor_" << fNInputs[0] << ", tensor_" << fNInputs[0] << " + ";
          out << length << ", tensor_" << fNY << ");\n";
       } else {
-         std::vector<std::string> inputs(fNBroadcastedInputs.size());
-         for (size_t i = 0; i < fNBroadcastedInputs.size(); i++) {
-            inputs[i] = fNBroadcastedInputs[i] + "[id]";
+
+         // implement operator without broadcasting, but using loos on all indices
+         std::vector<std::vector<Dim>> inputStrides(nInputs);
+         for (int i = 0; i < nInputs; i++)
+            inputStrides[i] = UTILITY::ComputeStrideFromShape(fShapeInputs[i]);
+
+         auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY);
+
+         // make loop on output indices
+         std::string compute_idx_Y;
+         int nloop = 0;
+         if (fDimShapeY.empty() ||
+               std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+            compute_idx_Y = "0";
+         } else {
+            for (size_t i = 0; i < fDimShapeY.size(); ++i) {
+               if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") {
+                  nloop++;
+                  for (int j = 0; j < nloop; j++) out << SP;
+                  out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i]
+                      << "; ++idx_" << i << "){\n";
+                  compute_idx_Y += "idx_" + std::to_string(i);
+                  if (stridesY[i].GetVal() != "1")
+                     compute_idx_Y += " * " + stridesY[i].GetVal();
+                  compute_idx_Y += " + ";
+               }
+            }
+            // remove last 3 characters " + "
+            for (int j = 0; j < 3; j++)
+               compute_idx_Y.pop_back();
+         }
+         // find indices for input tensors
+         std::vector<std::string> inputs(nInputs);
+         for (int ipt = 0; ipt < nInputs; ipt++ ) {
+            std::string compute_idx_X;
+            auto & shape = fShapeInputs[ipt];
+            auto & stride = inputStrides[ipt];
+            if (shape.empty() ||
+                std::all_of(shape.begin(), shape.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+               compute_idx_X = "0";
+            } else {
+               for (size_t i = 0; i < shape.size(); ++i) {
+                  if (shape[i].dim == 1 || shape[i].GetVal() == "1")
+                     continue;
+                  compute_idx_X += "idx_" + std::to_string(i + (fDimShapeY.size() - shape.size()));
+                  if (stride[i].GetVal() != "1")
+                     compute_idx_X += " * " + stride[i].GetVal();
+                  compute_idx_X += " + ";
+               }
+               // remove last 3 character " + "
+               for (int j = 0; j < 3; j++)
+                  compute_idx_X.pop_back();
+            }
+            inputs[ipt] = "tensor_" + fNInputs[ipt] + "[" + compute_idx_X + "]";
+         }
+
+         // perform the operation
+         for (int j = 0; j < nloop + 1; j++) out << SP;
+         std::string output = "tensor_" + fNY + "[" + compute_idx_Y + "]";
+         out << NaryOperatorTraits<T,Op>::Op(output, inputs);
+
+         for (int i = nloop; i > 0; i--) {
+            for (int j = 0; j < i; j++) out << SP;
+            out << "}\n";
          }
-         out << SP << "for (size_t id = 0; id < " << length << "; id++) {\n";
-         out << NaryOperatorTraits<T,Op>::Op("tensor_" + fNY + "[id]", inputs);
-         out << SP << "}\n";
       }
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
index f2d31796bbbcd..c37e7fc4b68de 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
@@ -141,8 +141,8 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      OpName = "op_" + OpName;
+   std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
       if (fShapeX.empty()){
          throw std::runtime_error("TMVA SOFIE Batch Normalization called to Generate without being initialized first");
       }
@@ -158,7 +158,7 @@ public:
          spatial_dim = ConvertDimShapeToLength( spatialShape);
       }
 
-      out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU" : "") << "\n";
+      out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU " : " ") << opName << "\n";
       out << SP << "{\n";
       out << SP << "   size_t i = 0;\n";
       out << SP << "   for (size_t n = 0; n < " << batchSize << "; ++n) {\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
index f48e27ee4f264..8267bb8a7e4f4 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
@@ -46,7 +46,7 @@ public:
         throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model");
       }
       fShape = model.GetDimTensorShape(fNX);
-      // shoud we add a check if the same type
+      // should we add a check if the same type
       auto inputType = model.GetTensorType(fNX);
       if (model.IsInitializedTensor(fNX)) {
          fIsOutputConstant = true;
@@ -57,29 +57,30 @@ public:
          }
          else
             fIsOutputConstant = false;
+      } else if (model.IsShapeTensor(fNX) && ConvertStringToType(fAttrType) == ETensorType::INT64) {
+         auto shapeData = model.GetShapeTensorValues(fNX);
+         model.AddShapeTensor(fNY, shapeData, fShape.size() == 0);
+         fIsOutputConstant = true;
       }
       if (!fIsOutputConstant)
          model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape);
       if (model.Verbose()) {
-         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY;
+         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY
+                  << " shape " << ConvertDimShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " (constant) ";
          std::cout << std::endl;
       }
    }
 
 
-   std::string Generate(std::string OpName) override {
-      if (fIsOutputConstant) return "";
+   std::string Generate(std::string opName) override {
+
+      // output shape can be empty if is a scalar
 
-      OpName = "op_" + OpName;
-      if (fShape.empty()) {
-         throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first");
-      }
       std::stringstream out;
       auto length = ConvertDimShapeToLength(fShape);
 
-      // out << SP << ETensorType << " " << OpName << "_attr = "  << fattr << ";\n";
-      out << "\n//------ CAST\n";
+      out << "\n//------ CAST " << opName << " ---> " << fNY << "  " << ConvertDimShapeToString(fShape) << "\n";
        // no generated code for constant outputs
       if (fIsOutputConstant) return out.str();
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
index 0d365ae517de5..734434357a149 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
@@ -56,7 +56,6 @@ template<typename T, EComparisionOperator Op>
 class ROperator_Comparision final : public ROperator{
 private:
 
-   bool fIsModelOutput = false;
    std::string fNX1;
    std::string fNX2;
    std::string fNY;
@@ -65,11 +64,10 @@ private:
    std::vector<Dim> fDimShapeX1;
    std::vector<Dim> fDimShapeX2;
    std::vector<size_t> fShapeY;
-   std::string fNBroadcastedX1;
-   std::string fNBroadcastedX2;
+   std::vector<Dim> fDimShapeY;
    ETensorType fTensorType1 = ETensorType::UNDEFINED;
    ETensorType fTensorType2 = ETensorType::UNDEFINED;
-   bool fBroadcast = false;
+   int fBroadcastFlag = 0;
 
 
 public:
@@ -115,184 +113,260 @@ public:
       }
       fTensorType1 = model.GetTensorType(fNX1);
       fTensorType2 = model.GetTensorType(fNX2);
-      bool broadcast = !UTILITY::AreSameShape(fShapeX1, fShapeX2);
-      if (broadcast) {
-         // Y is the common shape of A and B
-         fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2);
-         bool broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY);
-         bool broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY);
-         // Broadcast A to Y
-         if (broadcastX1) {
-            if (model.IsInitializedTensor(fNX1)) {
-               auto data = model.GetInitializedTensorData(fNX1);
-               std::shared_ptr<void> broadcastedData(
-                  UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeX1, fShapeY),
-                  std::default_delete<T[]>());
-               // Update the data and the shape of A
-               model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData);
-               fShapeX1 = fShapeY;
-            } else {
-               // Add an intermediate tensor for broadcasting A
-               fNBroadcastedX1 = "Broadcasted" + fNX1;
-               model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY);
+      // case of non dynamic tensors
+      if (!fShapeX1.empty() && !fShapeX2.empty()) {
+         bool broadcastX1 = false;
+         bool broadcastX2 = false;
+         if (UTILITY::AreSameShape(fShapeX1, fShapeX2)) {
+            // no broadcast needed
+            fShapeY = fShapeX1;
+         } else  {
+            // Y is the common shape of A and B
+            fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2);
+            broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY);
+            broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY);
+         }
+
+
+         // analyze case of constant tensors or shape tensors (which have known shapes but data as Dim values
+         // normal case with non-dynamic tensor is also here
+         T *data1 = nullptr;
+         T *data2 = nullptr;
+         std::unique_ptr<T> broadcastedData1;
+         std::unique_ptr<T> broadcastedData2;
+         // data for shape tensors
+         std::vector<Dim> shapeData1;
+         std::vector<Dim> shapeData2;
+         size_t length = ConvertShapeToLength(fShapeY);
+         bool *outData = new bool[length];
+         if (model.IsInitializedTensor(fNX1)) {
+            data1 = static_cast<T *>(model.GetInitializedTensorData(fNX1).get());
+            if (broadcastX1) {
+               broadcastedData1 = std::unique_ptr<T>(
+                  UTILITY::UnidirectionalBroadcast<T>(data1, fShapeX1, fShapeY));
+               data1 = broadcastedData1.get();
             }
+
+         } else if (model.IsShapeTensor(fNX1)) {
+            shapeData1 = model.GetShapeTensorValues(fNX1);
          }
-         // Broadcast B to Y
-         if (broadcastX2) {
-            if (model.IsInitializedTensor(fNX2)) {
-               auto data = model.GetInitializedTensorData(fNX2);
-               std::shared_ptr<void> broadcastedData(
-                  UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeX2, fShapeY),
-                  std::default_delete<T[]>());
-               // Update the data and the shape of B
-               model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData);
-               fShapeX2 = fShapeY;
-            } else {
-               // Add an intermediate tensor for broadcasting B
-               fNBroadcastedX2 = "Broadcasted" + fNX2;
-               model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY);
+         if (model.IsInitializedTensor(fNX2)) {
+            data2 = static_cast<T *>(model.GetInitializedTensorData(fNX2).get());
+            if (broadcastX2) {
+               broadcastedData2 = std::unique_ptr<T>(
+                  UTILITY::UnidirectionalBroadcast<T>(data2, fShapeX2, fShapeY));
+               data2 = broadcastedData2.get();
             }
+         } else if (model.IsShapeTensor(fNX2)) {
+            shapeData2 = model.GetShapeTensorValues(fNX2);
          }
-      } else {
-         fShapeY = fShapeX1;
-      }
-      // case of constant tensors
-      T * data1 = nullptr;
-      T * data2 = nullptr;
-      std::vector<Dim> shapeData1;
-      std::vector<Dim> shapeData2;
-      size_t length = ConvertShapeToLength(fShapeY);
-      bool *  outData = new bool[length];
-      if (model.IsInitializedTensor(fNX1)) {
-         data1 = static_cast<T *>(model.GetInitializedTensorData(fNX1).get());
-      } else if (model.IsShapeTensor(fNX1)) {
-         shapeData1 = model.GetShapeTensorValues(fNX1);
-      }
-      if (model.IsInitializedTensor(fNX2)) {
-         data2 = static_cast<T *>(model.GetInitializedTensorData(fNX2).get());
-      } else if (model.IsShapeTensor(fNX2)) {
-         shapeData2 = model.GetShapeTensorValues(fNX2);
-      }
-      if (data1 && data2) {
-         fIsOutputConstant = true;
-         for (size_t i = 0; i < length; i++)
-            outData[i] = ComparisionTrait<T,Op>::Result(data1[i], data2[i]);
-         model.AddConstantTensor(fNY, fShapeY, outData);
-         if (model.Verbose())
-            std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << " : "
-               << ConvertValuesToString(length,outData) << std::endl;
-      } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) {
-         fIsOutputConstant = true;
-         if (data1 && !data2) {
-            // data 1 is constant and data2 is shape
-            for (size_t i = 0; i < length; i++) {
-               if (shapeData2[i].isParam) {
-                  if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) {
-                     fIsOutputConstant = false;
-                     break;
-                  } else {
-                     // assume a comparison is done with .dim = 0
-                     shapeData2[i].dim = 0;
+         if (data1 && data2) {
+            fIsOutputConstant = true;
+            for (size_t i = 0; i < length; i++)
+               outData[i] = ComparisionTrait<T, Op>::Result(data1[i], data2[i]);
+            model.AddConstantTensor(fNY, fShapeY, outData);
+            if (model.Verbose())
+               std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData)
+                         << std::endl;
+         } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) {
+            fIsOutputConstant = true;
+            if (data1 && !data2) {
+               // data 1 is constant and data2 is shape
+               for (size_t i = 0; i < length; i++) {
+                  if (shapeData2[i].isParam) {
+                     if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) {
+                        fIsOutputConstant = false;
+                        break;
+                     } else {
+                        // assume a comparison is done with .dim = 0
+                        shapeData2[i].dim = 0;
+                     }
                   }
+                  outData[i] = ComparisionTrait<T, Op>::Result(data1[i], static_cast<T>(shapeData2[i].dim));
                }
-               outData[i] = ComparisionTrait<T,Op>::Result(data1[i], static_cast<T>(shapeData2[i].dim));
-            }
-         } else if (!data1 && data2) {
-            // data 1 is shape and dat2 is constant
-            for (size_t i = 0; i < length; i++) {
-               if (shapeData1[i].isParam) {
-                  if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) {
+            } else if (!data1 && data2) {
+               // data 1 is shape and dat2 is constant
+               for (size_t i = 0; i < length; i++) {
+                  if (shapeData1[i].isParam) {
+                     if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) {
+                        fIsOutputConstant = false;
+                        break;
+                     } else {
+                        // assume a comparison is done with .dim = 0
+                        shapeData1[i].dim = 0;
+                     }
+                  }
+                  outData[i] = ComparisionTrait<T, Op>::Result(static_cast<T>(shapeData1[i].dim), data2[i]);
+               }
+            } else if (!shapeData1.empty() && !shapeData2.empty()) {
+               // both data1 and data2 are shape tensors
+               for (size_t i = 0; i < length; i++) {
+                  if (!shapeData1[i].isParam && !shapeData2[i].isParam) {
+                     outData[i] = ComparisionTrait<T, Op>::Result(shapeData1[i].dim, shapeData2[i].dim);
+                  } else if (shapeData1[i].isParam && shapeData2[i].isParam) {
+                     if (shapeData1[i].param == shapeData2[i].param)
+                        outData[i] = ComparisionTrait<int, Op>::Result(1, 1); // comparison of two equal value
+                     else {
+                        fIsOutputConstant = false;
+                        break;
+                     }
+                  } else {
                      fIsOutputConstant = false;
                      break;
-                  } else {
-                     // assume a comparison is done with .dim = 0
-                     shapeData1[i].dim = 0;
                   }
                }
-               outData[i] = ComparisionTrait<T,Op>::Result(static_cast<T>(shapeData1[i].dim), data2[i]);
             }
-         } else if (!shapeData1.empty() && !shapeData2.empty() ) {
-            // both data1 and data2 are shape tensors
-            for (size_t i = 0; i < length; i++) {
-               if (!shapeData1[i].isParam && !shapeData2[i].isParam) {
-                  outData[i] = ComparisionTrait<T,Op>::Result(shapeData1[i].dim, shapeData2[i].dim);
-               }
-               else if (shapeData1[i].isParam && shapeData2[i].isParam) {
-                  if (shapeData1[i].param == shapeData2[i].param)
-                     outData[i] = ComparisionTrait<int,Op>::Result(1,1); // comparison of two equal value
-                  else {
-                     fIsOutputConstant = false;
-                     break;
+            if (fIsOutputConstant) {
+               model.AddConstantTensor(fNY, fShapeY, outData);
+               if (model.Verbose())
+                  std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                            << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData)
+                            << " (constant) " << std::endl;
+            }
+         }
+         delete[] outData;
+         // case of non constant output (no constant or shape tensors)
+         if (!fIsOutputConstant && !fShapeY.empty()) {
+            model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY);
+            fDimShapeY = ConvertShapeToDim(fShapeY);
+            if (model.Verbose())
+               std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << std::endl;
+         }
+      } else {
+         // case of dynamic tensors
+          // case A or B have dynamic shapes. We need to broadcast if shape are not same
+         auto ret = UTILITY::MultidirectionalBroadcastShape(fDimShapeX1, fDimShapeX2);
+         fBroadcastFlag = ret.first;
+         fDimShapeY = ret.second;
+         // case of all parametric shapes and MultiDirectionalBroadcastShape  return the max of the 2
+         // need to do before we declare the output tensor shape and the broadcasted ones
+         if (ret.first & 4) {
+            // check if one of the parameter is an input dimension
+            // define function to find this
+            auto IsInputDimParam = [&](const std::string &p) {
+               auto inputNames = model.GetInputTensorNames();
+               for (auto &input : inputNames) {
+                  for (auto &i_s : model.GetDimTensorShape(input)) {
+                     if (i_s.isParam && i_s.param == p)
+                        return true;
                   }
                }
-               else {
-                  fIsOutputConstant = false;
-                  break;
+               return false;
+            };
+            for (size_t i = 0; i < fDimShapeY.size(); i++) {
+               auto &s = fDimShapeY[i];
+               if (s.isParam && s.param.find("std::max") != std::string::npos) {
+                  if (IsInputDimParam(fDimShapeX1[i].param)) {
+                     // case dim is 1 we indicate that the input parameter is equal to 1
+                     if (fDimShapeX1[i].dim != 1)
+                        s = fDimShapeX1[i];
+                     else
+                        s = fDimShapeX2[i];
+                  } else if (IsInputDimParam(fDimShapeX2[i].param)) {
+                     if (fDimShapeX2[i].dim != 1)
+                        s = fDimShapeX2[i];
+                     else
+                        s = fDimShapeX1[i];
+                  }
                }
             }
          }
-         if (fIsOutputConstant) {
-            model.AddConstantTensor(fNY, fShapeY, outData);
-            if (model.Verbose())
-               std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << " : "
-                  << ConvertValuesToString(length,outData) << " (constant) " << std::endl;
 
+         model.AddIntermediateTensor(fNY, ETensorType::BOOL, fDimShapeY);
+         if (model.Verbose()) {
+            std::cout << ComparisionTrait<T, Op>::Name()  << " : " << fNX1 << "  " << ConvertShapeToString(fDimShapeX1) << " , "
+                                                          << fNX2 << "  " << ConvertShapeToString(fDimShapeX2) << " --> "
+                                                          << fNY  << "  " << ConvertShapeToString(fDimShapeY) << std::endl;
+            model.PrintIntermediateTensors();
          }
       }
-      delete [] outData;
-      if (!fIsOutputConstant) {
-         model.AddIntermediateTensor(fNY, ETensorType::BOOL , fShapeY);
-         if (model.Verbose())
-               std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << std::endl;
-      }
-
-      // check if this is not output operators to add a specific line for definining the tensor_xxx variable
-      const auto & outputTensorNames = model.GetOutputTensorNames();
-      fIsModelOutput = false;
-      if (std::find(outputTensorNames.begin(), outputTensorNames.end(), fNY) != outputTensorNames.end())
-         fIsModelOutput = true;
    }
 
    std::string Generate(std::string opName) override {
       if (fIsOutputConstant) return "";
       opName = "op_" + opName;
 
-     if (fShapeY.empty()) {
+     if (fDimShapeY.empty()) {
          throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first");
       }
       std::stringstream out;
       out << SP << "\n//------ " << ComparisionTrait<T,Op>::Name() << "  " << opName
                                  << " --> " << ConvertShapeToString(fShapeY) << "\n";
-      size_t length = ConvertShapeToLength(fShapeY);
-      // Broadcast A if it's uninitialized
-      if (!fNBroadcastedX1.empty()) {
-         std::string type1 = ConvertTypeToString(fTensorType1);
-         out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n";
-         out << SP << "{\n";
-         out << SP << SP << type1 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type1 << ">(tensor_" << fNX1 << ", " << ConvertShapeToString(fShapeX1) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX1 << ");\n";
-         out << SP << SP << "delete[] data;\n";
-         out << SP << "}\n";
+
+      // need to add check if tensors are compatible as in binary operator
+
+      // use same code as Binary operator
+      auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeX1);
+      auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeX2);
+      auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY);
+
+      std::string compute_idx_X1, compute_idx_X2, compute_idx_Y;
+      if (fDimShapeX1.empty() ||
+          std::all_of(fDimShapeX1.begin(), fDimShapeX1.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_X1 = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeX1.size(); ++i) {
+            if (fDimShapeX1[i].dim == 1 || fDimShapeX1[i].GetVal() == "1")
+               continue;
+            compute_idx_X1 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX1.size()));
+            if (stridesA[i].GetVal() != "1")
+               compute_idx_X1 += " * " + stridesA[i].GetVal();
+            compute_idx_X1 += " + ";
+         }
+         // remove last 3 character " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_X1.pop_back();
       }
-      // Broadcast B if it's uninitialized
-      if (!fNBroadcastedX2.empty()) {
-         std::string type2 = ConvertTypeToString(fTensorType2);
-         out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n";
-         out << SP << "{\n";
-         out << SP << SP << type2 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type2 << ">(tensor_" << fNX2 << ", " << ConvertShapeToString(fShapeX2) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX2 << ");\n";
-         out << SP << SP << "delete[] data;\n";
-         out << SP << "}\n";
+      if (fDimShapeX2.empty() ||
+          std::all_of(fDimShapeX2.begin(), fDimShapeX2.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_X2 = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeX2.size(); ++i) {
+            if (fDimShapeX2[i].dim == 1 || fDimShapeX2[i].GetVal() == "1")
+               continue;
+            compute_idx_X2 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX2.size()));
+            if (stridesB[i].GetVal() != "1")
+               compute_idx_X2 += " * " + stridesB[i].GetVal();
+            compute_idx_X2 += " + ";
+         }
+          // remove last 3 character " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_X2.pop_back();
       }
-      const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1;
-      const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2;
-
-      out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
-      out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait<T,Op>::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") <<  " ;\n";
-      out << SP << "}\n";
-      // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector
-      if (!fIsModelOutput)
-         out << SP << "const std::vector<std::uint8_t> & tensor_" << fNY << " = fTensor_" << fNY << ";\n";
+      int nloop = 0;
+      if (fDimShapeY.empty() ||
+          std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_Y = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeY.size(); ++i) {
+            if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") {
+               nloop++;
+               for (int j = 0; j < nloop; j++) out << SP;
+               out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i]
+                   << "; ++idx_" << i << "){\n";
+               compute_idx_Y += "idx_" + std::to_string(i);
+               if (stridesY[i].GetVal() != "1")
+                  compute_idx_Y += " * " + stridesY[i].GetVal();
+               compute_idx_Y += " + ";
+            }
+         }
+         // remove last 3 characters " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_Y.pop_back();
+      }
+      for (int j = 0; j < nloop + 1; j++) out << SP;
+      out << "tensor_" << fNY << "[" << compute_idx_Y << "] = "
+          << ComparisionTrait<T,Op>::Op( "tensor_" + fNX1 + "[" + compute_idx_X1 + "]" ,
+                                         "tensor_" + fNX2 + "[" + compute_idx_X2 + "]") <<  " ;\n";
+
+
+      for (int i = nloop; i > 0; i--) {
+         for (int j = 0; j < i; j++) out << SP;
+         out << "}\n";
+      }
+
 
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
index ad855341dfc17..d8155195c9f49 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
@@ -123,7 +123,7 @@
                            concat_dim = inputs[i][iaxis];
                         else if (inputs[i][iaxis].isParam || concat_dim.isParam) {
                            concat_dim =
-                              Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(),
+                              Dim{ concat_dim.GetVal() + std::string(" + ") + inputs[i][iaxis].GetVal(),
                                  static_cast<size_t>(-1)};
                         } else {
                            concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim };
@@ -156,7 +156,7 @@
                }
 
                // output shape for concatenated axis
-               ret[fAxis] = Dim{concat_dim};
+               ret[fAxis] = concat_dim;
 
             }
             // case of stacking (not supported yet)
@@ -205,7 +205,7 @@
                      size_t inputLength = ConvertShapeToLength(inputShape);
                      std::copy(inputData, inputData + inputLength, outputData.begin() + offset );
                      offset += inputLength;
-                     // data do not need to be written as a weight
+                     // data do not need to be written in teh generated code
                      model.SetNotWritableInitializedTensor(input);
                   }
                   model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
@@ -221,15 +221,18 @@
                      std::vector<Dim> inputData;
                      auto inputShape = model.GetTensorShape(input); // shape is not dynamic
                      size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
-                     if (model.IsShapeTensor(input))
+                     if (model.IsShapeTensor(input)) {
                         inputData = model.GetShapeTensorValues(input);
-                     else if (model.IsConstantTensor(input)) {
+                     } else if (model.IsInitializedTensor(input)) {
                         inputData.resize(inputLength);
                         auto intData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
                         for (size_t i = 0; i < inputData.size(); i++)
                            inputData[i] = Dim{ static_cast<size_t>(intData[i])};
                      }
-                     std::cout << "concatenating input data " << inputLength << "  " << inputData[0] << std::endl;
+                     else {
+                        // this should not happen
+                        throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type");
+                     }
                      std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset );
                      offset += inputLength;
                   }
@@ -251,13 +254,15 @@
          }
 
          std::string Generate(std::string opName) override {
-            if (fIsOutputConstant) return "";
             opName = "op_" + opName;
+            std::stringstream out;
+            out<<"\n//--------- Concat " << opName << " --> " << fOutput << "  " << ConvertShapeToString(fOutputShape) << "\n";
+
+            if (fIsOutputConstant) return out.str();
+
             if(fOutputShape.empty()){
                   throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first");
             }
-            std::stringstream out;
-            out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n";
             // special case when memory is contiguous
             bool hasShapeOnes = true;
             for(int i = 0; i<fAxis; ++i){
@@ -299,14 +304,14 @@
 
                for (size_t j = 0; j < fInputs.size(); j++) {
                   if (j>0)
-                  out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis].GetVal() << ";\n";
+                  out << SP << SP << SP << "idxOut += " << inStrides[j-1][fAxis-1].GetVal() << ";\n";
                   out << SP << SP << SP << "int idxIn" << j <<" = ";
                   for (int k = 0; k < fAxis; k++) {
                      if (k > 0) out << " + ";
                      out << inStrides[j][k].GetVal() << "*i" << k;
                   }
                   out << ";\n";
-                  out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis].GetVal() << "; ++iC) {\n";
+                  out << SP << SP << SP << "for (size_t iC = 0; iC < " << inStrides[j][fAxis-1].GetVal() << "; ++iC) {\n";
                   out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n";
                   out << SP << SP << SP << "}\n";
                // concatenate the axis values
diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
index 1cf5d13f5cd6f..93f3c43feceb9 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
@@ -128,6 +128,7 @@ public:
          }
       } else {
          model.AddIntermediateTensor(fNY, ConvertStringToType(TensorType<T>::Name()), fDimOutputShape);
+         fOutputTensorNames.emplace_back(fNY);
       }
    }
 
@@ -136,9 +137,9 @@ public:
       std::stringstream out;
       if (fIsOutputConstant) {
          if (fNX.empty())
-            out <<  "// ---- Constant (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n";
+            out <<  "// ---- Constant (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n";
          else
-            out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n";
+            out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n";
          return out.str();
       }
       // Only ConstantOfShape might require generation code
@@ -153,9 +154,7 @@ public:
       }
       auto length = ConvertDimShapeToLength(fDimOutputShape);
       // vector is already allocated- fill with values
-      out << SP << "if (" << length << " > fTensor_" << fNY << ".size())\n";
-      out << SP << SP << "fTensor_" << fNY << ".resize(" << length  << ");\n";
-      out << SP << "std::fill(fTensor_" << fNY << ".begin(), fTensor_" << fNY << ".end(), " << fValues[0] << ");\n";
+      out << SP << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << ", " << fValues[0] << ");\n";
       return out.str();
    }
 };
diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
index 95f226ca91d4b..823e7fa04717e 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
@@ -20,6 +20,8 @@ template<typename T>
 class ROperator_Conv final : public ROperator
 {
 private:
+   bool fBroadcastBias = false;
+
    std::string fAttrAutopad;
    std::vector<size_t> fAttrDilations;
    size_t fAttrGroup;
@@ -30,7 +32,6 @@ private:
    std::string fNX;
    std::string fNW;
    std::string fNB;
-   std::string fNB2; // bias tensor name after broadcasting
    std::string fNY;
 
    std::string convK;
@@ -262,6 +263,9 @@ public:
                std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
          }
          fShapeB = model.GetTensorShape(fNB);
+         if (fShapeB.size() != 1)
+            throw
+               std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)");
          std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
          auto shapeDimB = model.GetDimTensorShape(fNB);
          bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape);
@@ -278,7 +282,9 @@ public:
             if (fType != "float")
                throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
             // here is the actual broadcasting
+            fBroadcastBias = true;
             if (!fUseSession) {
+               // do here broadcasting
                std::vector<size_t> shape(fDim + 1, 1);
                shape[0] = fShapeB[0];
                auto intTargetShape = ConvertShapeToInt(targetShape);
@@ -287,26 +293,28 @@ public:
                   std::default_delete<float[]>());
                model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr);
                fShapeB = model.GetTensorShape(fNB);
-               fNB2 = fNB;   // use same name
-            }
-            else {
-               // In case of session add broadcasting code in Session constructor and in GenerateInitCode
-               // we need to add a new intermediate tensor for broadcasted bias tensor
-               fNB2 = fNB + "bcast";
-               model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape);
             }
          }
       }
-      // output channel size can be parametric
+      // output channel size can be parametric and is an expression
       std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());
-      auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
+      //check if shape is not parametric
+      std::vector<size_t> outputInts = ConvertShapeToInt(outputDims);
+      Dim channelDim;
+      if (outputInts.empty()) {
+         auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
+         channelDim = Dim{ outputChannelSize, static_cast<size_t>(-1)};
+      } else {
+         size_t outputChannelSize = ConvertShapeToLength(outputInts);
+         channelDim = Dim{ outputChannelSize };
+      }
       size_t kernelSize = fAttrKernelShape[0];
       for (size_t i = 1; i < fDim; i++) {
          kernelSize *= fAttrKernelShape[i];
       }
 
       std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
-      std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}};
+      std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim };
       model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );
       model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );
       convK = fNX +"_f";
@@ -325,15 +333,25 @@ public:
    std::string GenerateInitCode() override {
       std::stringstream out;
       // Generate initialization code for broadcasting of bias tensor
-      if (!fNB2.empty()) {
+      if (fBroadcastBias) {
          // include a separate scope to avoid defining unique operator temp variables
          std::vector<size_t> shape(fDim + 1, 1);
+         // bias (is a 1D tensor)
          shape[0] = fShapeB[0];
          std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
-         out << SP << "{\n";
+         out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n";
+         // in case of dynamic tensors check needs to be done at run time
+         bool isOutDynamic = ConvertShapeToInt(targetShape).empty();
+         auto length = ConvertDimShapeToLength(targetShape);
+         if (isOutDynamic)
+            out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n";
+         else
+            out << SP << "{\n";
          out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
              << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
+         out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n";
+         out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n";
+         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNB << ");\n";
          out << SP << SP << "delete[] data;\n";
          out << SP << "}\n";
       }
@@ -553,13 +571,13 @@ public:
          out << SP << SP << "}\n"; // end of group loop
       }
 
-      if (fNB2 != "") {
+      if (fNB != "") {
          out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";
          out << SP << "float " << OpName << "_gamma = 1.0;\n";
          out << SP << "int " << OpName << "_incx = 1;\n";
          out << SP << "int " << OpName << "_incy = 1;\n";
 
-         out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
+         out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &"
              << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
 
       }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index 81411b8ebf71a..1d51c59380dae 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -153,13 +153,14 @@ public:
    }
 
    std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
+      std::stringstream out;
+      out << "//--------- Gather " << opName << " --> " << fNY << "  " << ConvertShapeToString(fShapeY) << "\n";
       if (fIsOutputConstant) {
          // no code to generate here for constant output. Tensor output is defined in Session constructor
-         return "//---------------------------------------\n";
+         out << "//--------------------(constant)----------\n";
+         return out.str();
       }
-      opName = "op_" + opName;
-      std::stringstream out;
-      out << "//--------- Gather " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n";
       // The shape of the output is q + r - 1
       size_t r = fShapeX.size();
       // Indices of shape q
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index d954720396151..1a0fa7b16868b 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -24,6 +24,7 @@ namespace SOFIE{
 
    private:
       bool fIsDynamic = false;
+      bool fBroadcastBias = false;
 
       float fAttrAlpha = 1.0;
       float fAttrBeta = 1.0;
@@ -33,7 +34,6 @@ namespace SOFIE{
       std::string fNA;
       std::string fNB;
       std::string fNC = "";
-      std::string fNC2; // bias tensor name after broadcasting
       std::string fNY;
       std::string fType;
       EActivationType fActivation;
@@ -207,13 +207,7 @@ namespace SOFIE{
          }
 
          fShapeY = DynamicShapeInference({fShapeA, fShapeB});
-         std::vector<size_t> shapeY;
-         if (!fIsDynamic) {
-            shapeY = ConvertShapeToInt(fShapeY);
-            if (shapeY.empty()) {
-               throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertShapeToString(fShapeY));
-            }
-         }
+         std::vector<size_t> shapeY = ConvertShapeToInt(fShapeY);
 
          // bias is normally not dynamic (not support it for time being)
          if (fNC != ""){
@@ -222,14 +216,18 @@ namespace SOFIE{
                throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported");
             }
             fShapeC = model.GetTensorShape(fNC);
-            fNC2 = fNC;
             size_t lengthC = ConvertShapeToLength(fShapeC);
             size_t lengthY = ConvertShapeToLength(shapeY);
             // for dynamic outputs broadcasting is always done
-            bool broadcast_needed = lengthC != lengthY;
+            bool broadcast_needed = false;
+            if (fIsDynamic && shapeY.empty())
+               broadcast_needed = true;
+            else
+               broadcast_needed = lengthC != lengthY;
 
 
             if (broadcast_needed) {
+               fBroadcastBias = true;
                if (!model.UseSession()) {
                   // without session dynamic tensors not supported in Gemm
                   if (fIsDynamic) {
@@ -246,14 +244,18 @@ namespace SOFIE{
                      fShapeC = shapeY;
                   }
                } else {
-                  // In case of session add broadcasting code in Session constructor and in GenerateInitCode
-                  // we need to add a new intermediate tensor for broadcasted bias tensor
-                  fNC2 = fNC + "bcast";
-                  if (!fIsDynamic) {
-                     model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY);
-                  }
-                  else
-                     model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY);
+                  // /d to add a new intermediate tensor for broadcasted bias tensor
+                  // fNC2 = fNC + "bcast";
+                  // if (!fIsDynamic) {
+                  //    model.AddIntermed/ In case of session add broadcasting code in Session constructor and in GenerateInitCode
+                  // // we neeiateTensor(fNC2, model.GetTensorType(fNC), shapeY);
+                  // }
+                  // else
+                  //    model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY);
+                  // // do not add to lists of input/output tensors since broadcasted tensors are special
+                  // // and we manage their memory separatly
+                  // //fInputTensorNames.emplace_back(fNC2);
+                  // //fOutputTensorNames.emplace_back(fNC2);
                }
             }
          }
@@ -291,18 +293,26 @@ namespace SOFIE{
       std::string GenerateInitCode() override {
          std::stringstream out;
          // generate initialization code for broadcasting of bias tensor
-         if (fShapeC.size() != fShapeY.size() && fNC != fNC2) {
+         if (fShapeC.size() != fShapeY.size() && fBroadcastBias) {
             // we broadcast here always C in Y output, so target shape is the one of Y
             // no need to call UTILITY::UnidirectionalBroadcastShape.
             // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code.
-            auto targetShape = fShapeY;
-            // include a separate scope to avoid defining unique operator temp variables
-            out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n";
-            out << SP << "{\n";
-            out << "      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
-               << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n";
             auto length = ConvertDimShapeToLength(fShapeY); // output size
-            out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n";
+            // include a separate scope to avoid defining unique operator temp variables
+            out << "//--- broadcast bias tensor " << fNC << "for Gemm op if needed \n";
+            // in case of dynamic tensors check needs to be done at run time
+            bool isOutDynamic = ConvertShapeToInt(fShapeY).empty();
+            if (isOutDynamic)
+               out << SP << "if (" << length << " > " << ConvertShapeToLength(fShapeC) << ") {\n";
+            else
+               out << SP << "{\n";
+            // here we broadcast
+            out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
+                << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n";
+
+            out << SP << SP << "fTensor_" << fNC << ".resize(" << length << ");\n";
+            out << SP << SP << "tensor_" << fNC << " = fTensor_" << fNC << ".data();\n";
+            out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC << ");\n";
             out << SP << SP << "delete [] data;\n";
             out << SP << "}\n";
          }
@@ -338,7 +348,7 @@ namespace SOFIE{
 
          // case bias is present
          if (!fNC.empty()){
-            if (fNC2 == fNC) {
+            if (!fBroadcastBias) {
                // add a check in case broadcasting was not needed or done outside of session
                // C should have smaller dimension of Y
                if (!fIsDynamic) {
@@ -347,7 +357,7 @@ namespace SOFIE{
                             + ConvertShapeToString(fShapeC) + " output length " + lengthGemm);
                } else {
                   // add a dynamic check (C should not be a dynamic tensor)
-                  out << SP << "assert(" << lengthGemm << " != " <<  ConvertShapeToLength(fShapeC) << ");\n";
+                  out << SP << "assert(" << lengthGemm << " == " <<  ConvertShapeToLength(fShapeC) << ");\n";
                }
             }
          } else {
@@ -381,7 +391,7 @@ namespace SOFIE{
             out << std::setprecision(std::numeric_limits<float>::max_digits10) << fAttrBeta << ",";
             // in the case of bias
              if (!fNC.empty())
-               out << "tensor_" << fNC2;
+               out << "tensor_" << fNC;
              else
                out << "nullptr";
              out << ");\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
index 9cac15a14fc52..16d2cb689d518 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
@@ -37,15 +37,10 @@ public:
       }
       static_assert( (std::is_same_v<T, float> || std::is_same_v<T, int64_t>),
                   "TMVA::SOFIE - Unsupported type by Range operator");
-   }
-
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-      return input;
-   }
-
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      auto ret = input; //suggest copy to compiler
-      return ret;
+      {
+         fInputTensorNames = { fNStart, fNLimit, fNDelta };
+         fOutputTensorNames = { fNOutput };
+      }
    }
 
    void Initialize(RModel& model) override {
@@ -63,32 +58,94 @@ public:
             std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNDelta + "is not found in model");
       }
       ETensorType type = ConvertStringToType(fType);
-      if (model.IsInitializedTensor(fNStart) && model.IsInitializedTensor(fNDelta) && model.IsInitializedTensor(fNLimit)) {
-         T * start = static_cast<T*>(model.GetInitializedTensorData(fNStart).get());
-         T * limit = static_cast<T*>(model.GetInitializedTensorData(fNLimit).get());
-         T * delta = static_cast<T*>(model.GetInitializedTensorData(fNDelta).get());
-         if (!start || !delta || !limit)
-            std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data");
-         T a = *start;
-         T b = *limit;
-         T d = *delta;
-         int number_of_elements = std::max( static_cast<double>(std::ceil( (b - a) / d )) , 0. );
+
+
+
+      auto analyzeInput = [&](const std::string & tName, T & value, Dim & dim) {
+         int ftype = 0; // type of input (0 intermediate, 1 constant , 2 shape)
+         if (model.IsInitializedTensor(tName)) {
+            T * data = static_cast<T*>(model.GetInitializedTensorData(tName).get());
+            if (!data)
+               std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input  data");
+            value = *data;
+            ftype = 1;
+         } else if (model.IsShapeTensor(tName)) {
+            auto data = model.GetShapeTensorValues(tName);
+            dim = data[0];
+            if (!dim.isParam) {
+               value = static_cast<T>(dim.dim);
+               ftype = 1;
+            } else
+               ftype = 2;
+         }
+         return ftype;
+      };
+
+      T start_value;
+      T limit_value;
+      T delta_value;
+      Dim start_dim;
+      Dim limit_dim;
+      Dim delta_dim;
+      int res1 = analyzeInput(fNStart, start_value, start_dim);
+      int res2 = analyzeInput(fNLimit, limit_value, limit_dim);
+      int res3 = analyzeInput(fNDelta, delta_value, delta_dim);
+      if (res1 == 0 || res2 == 0 || res3 == 0) {
+         // cannot know at compile time- need to do fully at run time
+         //
+         fShape = {Dim{"range_size_" + fNStart + "_" + fNLimit}};
+         model.AddDynamicTensor(fNOutput, type, fShape);
+      } else if (res1 == 1 && res2 == 1 && res3 == 1) {
+         size_t number_of_elements = std::max(static_cast<int>(std::ceil((limit_value - start_value) / delta_value )) , 0 );
+         fIsOutputConstant = true;
+
+         // compute output
          std::vector<T> output(number_of_elements);
-         for (int i=0; i<number_of_elements; ++i) {
-            output[i] =  a + (i * d);
+         for (size_t i=0; i<number_of_elements; ++i) {
+            output[i] =  start_value + (i * delta_value);
          }
-         std::vector<size_t> shape = {static_cast<size_t>(number_of_elements)};
+         std::vector<size_t> shape = {number_of_elements};
          model.AddConstantTensor(fNOutput,shape, output.data());
-         fIsOutputConstant = true;
-         // set the input tensor not writable
+         fShape = ConvertShapeToDim(shape);
+
+          // set the input tensor not writable
          model.SetNotWritableInitializedTensor(fNStart);
          model.SetNotWritableInitializedTensor(fNDelta);
          model.SetNotWritableInitializedTensor(fNLimit);
+
+      } else { // case of a shape tensor
+         std::string start = (res1 == 1) ? std::to_string(start_value) : start_dim.GetVal();
+         std::string limit = (res2 == 1) ? std::to_string(limit_value) : limit_dim.GetVal();
+         std::string delta = (res3 == 1) ? std::to_string(delta_value) : delta_dim.GetVal();
+         std::stringstream s;
+         if (type == ETensorType::FLOAT ) {
+            if (delta_value == 1)
+               s <<  "std::max(std::ceil("<< limit << " - " << start << "),0.0f)";
+            else
+               s <<  "std::max(std::ceil(("<< limit << " - " << start << ")/" << delta << "),0.0f)";
+         } else if (type == ETensorType::INT64 ) {
+            if (delta == "1") {
+               if (start == "0")
+                  s <<  limit;
+               else
+                  s << "std::max((" << limit << " - " << start << "),0L)";
+            } else {
+               if (start == "0")
+                  s <<  "((" << limit << ")/" << delta << ")";
+               else
+                  s << "std::max((" << limit << " - " << start << ")/"<< delta << "),0L)";
+            }
+         } else {
+            throw
+               std::runtime_error("TMVA SOFIE Range Op Input Tensor " + ConvertTypeToString(type) + "is not supported");
+         }
+
+
+         fShape = { Dim {s.str(), static_cast<size_t>(-1)} };
+         model.AddDynamicTensor(fNOutput,type, fShape);
       }
-      else {
-         fShape = {Dim{"range_size"}};
-         model.AddDynamicTensor(fNOutput, type, fShape);
-      }
+
+
       if (model.Verbose()) {
          std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData<T>(fNOutput));
@@ -96,26 +153,32 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
+   std::string Generate(std::string opName) override {
 
       std::stringstream out;
-      out << "\n//------ Range\n";
+      out << "\n//------ Range " << opName << "---> " << ConvertDimShapeToString(fShape) << "\n";
       if (fIsOutputConstant) return out.str();
 
-      OpName = "op_" + OpName;
+      opName = "op_" + opName;
       if (fShape.empty()) {
          throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first");
       }
 
       std::string sizeName = fShape[0].param;
-      out << SP << "size_t " << sizeName << " = static_cast<size_t>(std::max(std::ceil((static_cast<float>(*tensor_" << fNLimit << ") - static_cast<float>(*tensor_" << fNStart << ")) / static_cast<float>(*tensor_" << fNDelta << ")), 0.0f));\n";
-      out << SP << "if (" << sizeName << " > " << "fTensor_" << fNOutput << ".size() ){\n";
-      out << SP << SP << "fTensor_" << fNOutput << ".resize(" << sizeName << ");\n";
-      // need to re-initialized pointer to tensor data
-      out << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n";
-      out << SP << "}\n";
-      out << SP << "for (size_t i = 0; i < " << sizeName << "; i++) {\n";
-      out << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
+      if (sizeName.find("range_size") != std::string::npos)
+         sizeName = "static_cast<size_t>(std::max(std::ceil((static_cast<float>(*tensor_" + fNLimit +
+                ") - static_cast<float>(*tensor_" + fNStart + ")) / static_cast<float>(*tensor_" + fNDelta + ")), 0.0f))";
+      out << SP << "{\n";
+      out << SP << SP << "size_t range" << " = " << sizeName << ";\n";
+      if (sizeName != fShape[0].param) {
+         out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n";
+         // we should probably resize the tensor here
+         out << SP << SP << SP << "throw std::runtime_error(\"wrong size allocated for output of range\");\n";
+         out << SP << SP << "}\n";
+      }
+      out << SP << SP << "for (size_t i = 0; i < range; i++) {\n";
+      out << SP << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
+      out << SP << SP << "}\n";
       out << SP << "}\n";
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
index 1204770d3d321..1da588e965a01 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
@@ -166,7 +166,7 @@ public:
       std::string reducedLength;
       if (fInputDimShape) {
          reducedLength = "reducedLength_" + opName;
-         out << SP << "size_t " << reducedLength << " = " <<  inputLength << " / " << outputLength << ";\n";
+         out << SP << "size_t " << reducedLength << " = (" <<  inputLength << ") / (" << outputLength << ");\n";
       } else {
          int rLength = std::stoi(inputLength) / std::stoi(outputLength);
          reducedLength = std::to_string(rLength);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index 2634b68dbc875..a3ed28c4860bc 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -108,6 +108,9 @@ public:
 
                if (IsInteger(tmp_length) && IsInteger(input_length))
                   output_shape[i] = Dim{static_cast<size_t>(std::stoi(input_length) / std::stoi(tmp_length))};
+               else if (IsInteger(tmp_length) && std::stoi(tmp_length) == 1) {
+                  output_shape[i] = Dim{input_length, static_cast<size_t>(-1)};
+               }
                else {
                   //we can try simplifying expression if tmp_length is integer and part of input_length
                   // contains tmp_length
@@ -243,7 +246,7 @@ public:
       // check if optional tensor exists defining shape or axes
       if (!fNInput2.empty()) {
          if (model.CheckIfTensorAlreadyExist(fNInput2)) {
-            if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) {
+            if (model.IsInitializedTensor(fNInput2)) {
                // assume input shape is an initialized tensor
                auto dptr = model.GetInitializedTensorData(fNInput2);
                auto values = static_cast<int64_t *>(dptr.get());
@@ -260,6 +263,9 @@ public:
                fShapeOutput = ShapeInference({fShapeInput})[0];
                // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed
                model.SetNotWritableInitializedTensor(fNInput2);
+            } else if (model.IsShapeTensor(fNInput2)) {
+               auto shapeData = model.GetShapeTensorValues(fNInput2);
+               fShapeOutput = shapeData;
             } else {
                // we cannot get shape at initialization time but at run-time
                fDynamicShape = true;
diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
index b23e3b0a86d21..3add774b0d8d4 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
@@ -235,6 +235,8 @@ public:
                if (iend < 0) {
                   std::string send = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-iend) +")";
                   fEnd[fAxes[i]] = Dim{send,size_t(-1)};
+               } else if (iend == std::numeric_limits<IType>::max()){
+                  fEnd[fAxes[i]] = fShapeInput[fAxes[i]];
                } else {
                  fEnd[fAxes[i]] = Dim{size_t(iend)};
                }
@@ -332,23 +334,23 @@ public:
       else {
          model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput);
          if (model.Verbose()) {
-            std::cout << "Slice ---> " << fNOutput << " " <<  ConvertShapeToString(fShapeOutput) << std::endl;
+            std::cout << "Slice " << fNData << "  " << ConvertShapeToString(fShapeInput)
+                      << "---> " << fNOutput << " " <<  ConvertShapeToString(fShapeOutput) << std::endl;
          }
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      if (fIsOutputConstant) return "";  //no op for constant tensors
+   std::string Generate(std::string opName) override {
 
-      OpName = "op_" + OpName;
       if (fShapeInput.empty() || fShapeOutput.empty()){
          throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first");
       }
 
       std::stringstream out;
-      //std::string opName = "Slice";
 
-      out << SP << "///------- Slice operator\n" << std::endl;
+      out << "///------- Slice operator " << opName << "---> " << fNOutput << " "
+          << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl;
+      if (fIsOutputConstant) return out.str();  //no op for constant tensors
       // loop on the dimensions depending no the orders
       size_t ndim = fShapeInput.size();
       auto strides = UTILITY::ComputeStrideFromShape(fShapeInput);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
index 1086f72eae71c..9b291b40e0854 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
@@ -20,8 +20,8 @@ private:
    std::string fNRepeats;
    std::string fNInput;
    std::string fNY;
-   std::vector<size_t>fShapeInput;
-   std::vector<size_t> fShapeY;
+   std::vector<Dim>fShapeInput;
+   std::vector<Dim> fShapeY;
 
 public:
    ROperator_Tile(){}
@@ -35,13 +35,18 @@ public:
       return input;
    }
 
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      std::vector<size_t> ret = input[0];
-
-      for(size_t i=0; i < input[1].size(); i++) {
-            ret[i]=ret[i]*input[1][i];
+   std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> repeat)  {
+      std::vector<Dim> ret = input;
+      for(size_t i=0; i < repeat.size(); i++) {
+         if (repeat[i] != 1) {
+            if (ret[i].isParam) {
+               ret[i] = Dim{ std::string(ret[i].GetVal() + "*" + std::to_string(repeat[i])), static_cast<size_t>(-1) };
+            } else {
+               ret[i]=Dim { ret[i].dim *repeat[i] };
+            }
+         }
       }
-      return {ret};
+      return ret;
    }
 
    void Initialize(RModel& model) override {
@@ -52,7 +57,7 @@ public:
       if (model.CheckIfTensorAlreadyExist(fNRepeats) == false){
         throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model");
       }
-      fShapeInput=model.GetTensorShape(fNInput);
+      fShapeInput=model.GetDimTensorShape(fNInput);
 
       // if repeats vector is not initialized we cannot deduce shape of output
       // not support for time being this case
@@ -79,12 +84,12 @@ public:
       std::copy(repeats_data, repeats_data + num_elements, repeats_vector.begin());
 
 
-      fShapeY = ShapeInference({fShapeInput,repeats_vector})[0];
+      fShapeY = DoShapeInference(fShapeInput,repeats_vector);
 
       model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY);
 
       if (model.Verbose())
-         std::cout <<  "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY)
+         std::cout <<  "Tile: " << fNInput << " " << ConvertDimShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY)
             << " given repeats " << ConvertShapeToString(repeats_vector) << std::endl;
    }
 
@@ -103,9 +108,9 @@ public:
       std::string output = "tensor_" + fNY;
       out << "///-------- Tile operator\n";
       out << "{\n"; // add scope to re-use same names
-      out << "const int input_shape[" << fShapeInput.size() << "] = " << ConvertShapeToString(fShapeInput) << ";\n";
+      out << "const size_t input_shape[" << fShapeInput.size() << "] = " << ConvertDimShapeToString(fShapeInput) << ";\n";
 
-      out << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n";
+      out << "int inputLength = " << ConvertDimShapeToLength(fShapeInput) << ";\n";
       out << "int s = 1;\n";
       // loop from inverse dim order
       out << "for (int i = " << fShapeInput.size()-1 << "; i >=0; i--) {\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
index 0869437bb6b0c..edee91de8eb57 100644
--- a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
@@ -19,13 +19,13 @@ private:
    int fAttrLargest;
    int fAttrSorted;
 
-   size_t fK;
+   Dim fK;
    std::string fNK;
    std::string fNX;
    std::string fNVal;
    std::string fNInd;
-   std::vector<size_t> fShapeX;
-   std::vector<size_t> fShapeY;
+   std::vector<Dim> fShapeX;
+   std::vector<Dim> fShapeY;
    std::string fType;
 
 public:
@@ -43,23 +43,10 @@ public:
         }
 
    std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-         ETensorType ret = input[0];
-         return {ret, ret};
-      }
-
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      if (input.size() != 2) {
-         throw std::runtime_error("TMVA SOFIE TopK Op Shape Inference needs exactly 2 input tensors");
-      }
-
-      auto shape = input[0]; // Shape format: [ m x n x o x p ... ]
-
-      // set the dimension at the specified axis to k  (fAttrAxis is checked before that is in the correct range
-      shape[fAttrAxis] = fK; // Modified shape: [ m x n x k x p ... ]
-      return {shape, shape};
+      ETensorType ret = input[0];
+      return {ret, ret};
    }
 
-
    void Initialize(RModel& model) override {
       if (model.CheckIfTensorAlreadyExist(fNX) == false) {
          // input must be a graph input, or already initialized intermediate tensor
@@ -70,10 +57,10 @@ public:
          throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor i.e. K is not found in model");
       }
 
-      fShapeX = model.GetTensorShape(fNX);
+      fShapeX = model.GetDimTensorShape(fNX);
       auto fShapeK = model.GetTensorShape(fNK);
       auto kptr = static_cast<int64_t *>(model.GetInitializedTensorData(fNK).get());
-      fK = *kptr;
+      size_t kval = *kptr;
       model.SetNotWritableInitializedTensor(fNK);
       fAttrAxis = fAttrAxis < 0 ? fShapeX.size() + fAttrAxis : fAttrAxis;
       if(static_cast<size_t>(fAttrAxis) >=  fShapeX.size()){
@@ -81,14 +68,25 @@ public:
             std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+fShapeX.size()+" .");
       }
       // fK cannot be larger that axis dimension
-      fK = std::min(fK, fShapeX[fAttrAxis]);
+      if (fShapeX[fAttrAxis].isParam)
+         fK = Dim{std::string("std::min(size_t(" + std::to_string(kval) + "), " + fShapeX[fAttrAxis].GetVal() + ")" ), static_cast<size_t>(-1) };
+      else
+         fK = Dim { std::min(kval, fShapeX[fAttrAxis].dim) };
+
+      // output shape is equal to input shape apart for value in fAttrAxis
+      fShapeY = fShapeX;
+      fShapeY[fAttrAxis] = Dim{fK};
 
-      fShapeY = ShapeInference({fShapeX, fShapeK})[0];
       model.AddIntermediateTensor(fNVal, model.GetTensorType(fNX), fShapeY);
 
       // output indices should be an int64 tensor
       model.AddIntermediateTensor(fNInd, ETensorType::INT64, fShapeY);
       fType = ConvertTypeToString(model.GetTensorType(fNX));
+
+      if (model.Verbose()) {
+         std::cout << "TopK " << fNX << "  " << ConvertShapeToString(fShapeX)
+                      << "---> " << fNVal << " " <<  ConvertShapeToString(fShapeY) << std::endl;
+      }
    }
 
    std::string Generate(std::string OpName) override {
@@ -101,19 +99,20 @@ public:
       size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis;
       out << "\n" << SP << "//------ TopK\n";
 
-      size_t length=ConvertShapeToLength(fShapeX);
+      auto length=ConvertDimShapeToLength(fShapeX);
       auto strideX = UTILITY::ComputeStrideFromShape(fShapeX);
       auto strideY = UTILITY::ComputeStrideFromShape(fShapeY);
       // we perform loop on dimension before sorted axis and after sorted axis
-      size_t n_before = (axis>0) ? length/strideX[axis-1] : 1;
-      size_t n_after = strideX[axis];
-      size_t n_elements = fShapeX[axis]; // number of elements to be sorted
+      std::vector<Dim> shape_before(fShapeX.begin(), fShapeX.begin() + axis);   // input shape before axis
+      std::string n_before = (axis>0) ? ConvertDimShapeToLength(shape_before) : "1";
+      std::string n_after = strideX[axis].GetVal();
+      std::string n_elements = fShapeX[axis].GetVal(); // number of elements to be sorted
 
       // }
       out << SP << "{\n"; // to define a separate scope for the operator code
       out << SP << "std::vector<std::pair<float,int64_t>> elements(" << n_elements << ");\n";
       // loop on elements before
-      if (n_before > 1) {
+      if (n_before != "1") {
          out << SP << "for (size_t i = 0; i < " << n_before << "; i++) {\n";
          out << SP << SP << "size_t xoffset = i*" << strideX[axis-1] << ";\n";
          out << SP << SP << "size_t yoffset = i*" << strideY[axis-1] << ";\n";
@@ -122,7 +121,7 @@ public:
          out << SP << "size_t xoffset = 0;\n";
          out << SP << "size_t yoffset = 0;\n";
       }
-      if (n_after > 1)
+      if (n_after !=  "1")
          out << SP << "for (size_t j = 0; j < " << n_after << "; j++) {\n";
       else
          out << SP << "const size_t j = 0;\n";
@@ -149,8 +148,8 @@ public:
       out << SP << SP << SP << "tensor_" << fNVal   << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].first;\n";
       out << SP << SP << SP << "tensor_" << fNInd << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].second;\n";
       out << SP << SP << "}\n";
-      if (n_after > 1) out << SP << SP << "}\n";
-      if (n_before> 1) out << SP << "}\n";
+      if (n_after != "1") out << SP << SP << "}\n";
+      if (n_before != "1") out << SP << "}\n";
       out << SP << "}\n"; // end operator scope
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 2dae4f7d03ce7..7abb7df68d997 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -252,8 +252,14 @@ public:
    bool IsConstantTensor() const { return fConstant;}
    // query if tensor needs to be written in a weight file. Constant tensors are not written in a file
    bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;}
+   // check if a Tensor is Writable (need to be written in teh file or in the generated code (e.g. as a costant tensor)
+   // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in
+   // the generated code
+   bool IsNotWritable() const { return fIsNotWritable; }
    // set not writable initialized tensors - i.e. tensor that must not be written in a file
    void SetNotWritable() { fIsNotWritable = true;}
+   // set as constant (needed for non-flot initialized tensors)
+   void SetConstant() { fConstant = true;}
 
    template <class T = void>
    T const *data() const
@@ -805,6 +811,22 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect
    }
 }
 
+
+// code for the memory greeding allocations
+struct TensorLifeInfo {
+   int begin;   // start time (op index) lifetime
+   int end;     //  end time lifetime
+   size_t size; // size of tensors in bytes
+};
+
+struct MemoryResult {
+  std::size_t total_bytes = 0;  // total memory needed
+  std::vector<size_t> offsets; // resulted offsets for each tensor
+};
+
+/// Greedy best-fit planner with coalescing free list.
+MemoryResult OrganizeMemory(const std::vector<TensorLifeInfo> & tensorsInfo );
+
 } // namespace SOFIE
 } // namespace Experimental
 } // namespace TMVA
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 3e2c2d6ed332f..8bc4d4e048a30 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -167,16 +167,15 @@ void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution) {
     }
 
     // storing the last usage of tensors which are input to
-    // operators (but are not inputs to the model, i.e. they are intermediate
-    // tensors). This information is needed to keep a check on when a
-    // particular intermediate tensor can be flushed to free up memory for reuse.
+    // operators (but are not inputs to the model or they are not initialized)
+    // We call this function during parsing so we don't have yet initialized the operators
    for(size_t index = 0; index<op_input_tensors.size() &&
-         fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
-         std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
-                   UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end() &&
-         fDynamicTensorInfos.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fDynamicTensorInfos.end();
-         ++index){
-            fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+            fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
+            std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
+                      UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end();
+            ++index)
+   {
+      fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
    }
 }
 
@@ -222,6 +221,7 @@ bool RModel::IsInitializedTensor(const std::string& tensorName) const {
     return fInitializedTensors.find(name) != fInitializedTensors.end();
 }
 bool RModel::IsConstantTensor(const std::string& tensorName) const {
+   // a constant tensor is an initialized tensor but has the constant flag set
     std::string name = UTILITY::Clean_name(tensorName);
     auto itr = fInitializedTensors.find(name);
     if (itr == fInitializedTensors.end()) return false;
@@ -522,6 +522,7 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
    fIntermediateTensorInfos.clear();
    fDynamicTensorInfos.clear();
 
+
    // loop on inputs and see if shape can be  full specified
    // if the batch size is provided it can be used to specify the full shape
    // Add the full specified tensors in fReadyInputTensors collection
@@ -581,7 +582,7 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
    if (fUseWeightFile) {
       bool modelHasWeights = false;
       for (auto &i : fInitializedTensors) {
-         if (i.second.type() == ETensorType::FLOAT) {
+         if (i.second.IsWeightTensor()) {
             modelHasWeights = true;
             break;
          }
@@ -602,16 +603,24 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
       fOperators[op_idx]->Initialize(*this);
       for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){
          std::string name = std::string{it};
+         // check if tensor is not an initialized or output tensor and it is not already in the list
          if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() &&
              std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() &&
-             fInitializedTensors.find(name) == fInitializedTensors.end() &&
-             fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){
+             fInitializedTensors.find(name) == fInitializedTensors.end())
+         {
             fIntermediateTensorFrequencyLookup[it] = op_idx;
          }
       }
       i++;
    }
 
+   // loop on initialized tensors and make the integers as constant to be
+   // not written in a weight file
+   for (auto &it : fInitializedTensors) {
+      if (it.second.IsWeightTensor() && it.second.type() !=  ETensorType::FLOAT)
+         it.second.SetConstant();
+   }
+
    fIsInitialized = true;
 }
 
@@ -684,9 +693,11 @@ std::string GenerateConstantTensorCode(const std::pair<std::string, InitializedT
 void RModel::GenerateInitializedTensorInfo()
 {
    if (!fInitializedTensors.empty())
-      fGC += "// initialized tensors\n";
+      fGC += "// initialized (weights and constant) tensors\n";
 
+   // here are constant tensor or initialized ones which are not weights (e.g. int64_t tensors )
    for (auto &i : fInitializedTensors) {
+      if (i.second.IsNotWritable())  continue;
       if (!fUseWeightFile || i.second.IsConstantTensor()) {
          if (i.second.type() == ETensorType::FLOAT) {
             fGC += GenerateConstantTensorCode<float>(i);
@@ -764,16 +775,21 @@ void RModel::GenerateIntermediateTensorInfo() {
       fGC += "//--- declare the dynamic tensors\n";
       for (auto &i : fDynamicTensorInfos) {
          if (i.second.type == ETensorType::FLOAT) {
-            fGC += "std::vector<float> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<float> fTensor_" + i.first + ";\n";
             fGC += "float * tensor_" + i.first + " = nullptr;\n";
          } else if (i.second.type == ETensorType::DOUBLE) {
-            fGC += "std::vector<double> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<double> fTensor_" + i.first + ";\n";
             fGC += "double * tensor_" + i.first + " = nullptr;\n";
          } else if (i.second.type == ETensorType::INT64) {
-            fGC += "std::vector<int64_t> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<int64_t> fTensor_" + i.first + ";\n";
             fGC += "int64_t * tensor_" + i.first + " = nullptr;\n";
+         } else if (i.second.type == ETensorType::BOOL) {
+            //fGC += "std::vector<uint8_t> fTensor_" + i.first + ";\n";
+            fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n";
          }
       }
+      fGC += "//--- dynamic tensors pool\n";
+      fGC += "std::vector<char> fDynamicMemoryPool;\n";
    }
 }
 
@@ -791,14 +807,81 @@ void RModel::GenerateOperatorDeclarations() {
 
 void RModel::GenerateDynamicTensorInfo()
 {
+   // generate code for allocating dynamic tensors using the greedy memory allocations
+   if (fDynamicTensorInfos.empty())
+      return;
+
    std::stringstream out;
+   out << "//  dynamic tensor memory management\n";
+   out << SP << "std::vector<TMVA::Experimental::SOFIE::TensorLifeInfo> dynamicTensorInfos;\n";
+   out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n";
+
+   // loop on all the operators to find begin/end life of the tensors
+   int op_index = 0;
+   std::vector<std::pair<std::string, ETensorType>> tensors;
+   tensors.reserve(fDynamicTensorInfos.size());
+   for (auto & op : fOperators) {
+      // loop on output tensors -
+      for (auto &it : op->GetOpOutputTensors()) {
+         if (fVerbose) {
+            auto op_ptr = op.get();
+            std::cout << "Looping on operator " << op_index << "   " << typeid(*op_ptr).name() << std::endl;
+         }
+         // check if is a dynamic tensor
+         std::string name = std::string(it);
+         if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() ) {
+            auto tensor_size =  ConvertDimShapeToLength(GetDimTensorShape(name));
+            auto type = GetTensorType(name);
+            size_t type_size = GetTypeSize(type);
+            int begin = op_index;
+            int end = fOperators.size();
+            // look for end
+            auto it_lookup = fIntermediateTensorFrequencyLookup.find(name);
+            if (it_lookup != fIntermediateTensorFrequencyLookup.end())
+               end = it_lookup->second + 1;  // end is last time used + 1
+            // // some tensors (like xcol in convolutions) are just used within the operators
+            // if (end == 0 && begin > 0) end = begin+1;
+
+            if (begin> end) {
+               std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << "  "  << " end " << end << std::endl;
+               throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin");
+            }
+
+            // write in code
+            out << SP << "dynamicTensorInfos.push_back( {" << begin << ", " << end << ", " << type_size << "* (" << tensor_size << ") });"
+                << " // tensor_" << name << std::endl;
+            tensors.push_back({name,type});
+         }
+      }
+      op_index++; // increment operator index
+   }
+   out << "\n" << SP << "auto memory_result = OrganizeMemory(dynamicTensorInfos);\n\n";
+   out << "//  allocating now the memory\n";
+   out << SP << "fDynamicMemoryPool = std::vector<char>(memory_result.total_bytes);\n";
+   out << SP << "int idx = 0;\n";
+   for (auto & it : tensors) {
+      out << SP << "tensor_" << it.first << " = reinterpret_cast<" << ConvertTypeToString(it.second) << " *>(fDynamicMemoryPool.data() + memory_result.offsets[idx++]);\n";
+   }
+   // check that all dynamic tensors are covered
+   bool missingTensor = false;
    for (auto &i : fDynamicTensorInfos) {
-      auto length = ConvertDynamicShapeToLength(i.second.shape);
-      out << SP << "if (" << length << " > 0) {\n";
-      out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n";
-      out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n";
-      out << SP << "}\n";
+      if (std::find(tensors.begin(), tensors.end(), std::pair<std::string,ETensorType>{i.first, i.second.type}) == tensors.end()) {
+         std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl;
+         missingTensor = true;
+      }
    }
+   if (missingTensor)
+      throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list");
+
+
+
+   // for (auto &i : fDynamicTensorInfos) {
+   //    auto length = ConvertDynamicShapeToLength(i.second.shape);
+   //    out << SP << "if (" << length << " > 0) {\n";
+   //    out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n";
+   //    out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n";
+   //    out << SP << "}\n";
+   // }
    fGC += out.str();
 }
 
@@ -1143,7 +1226,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) {
                std::string length = std::to_string(ConvertShapeToLength(i.second.shape()));
                fGC += "   ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n";
             } else {
-               std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file");
+               throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file");
             }
         }
         fGC += "   f.close();\n";
@@ -1288,7 +1371,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) {
                }
             }
             else {
-               std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file");
+               throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file");
             }
             if (f.fail())
                std::runtime_error("tmva-sofie failed to write tensor data to file for  " + tensor_name);
diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx
index c107b489be19e..1ff510842643a 100644
--- a/tmva/sofie/src/SOFIE_common.cxx
+++ b/tmva/sofie/src/SOFIE_common.cxx
@@ -4,6 +4,8 @@
 #include <sstream>
 #include <stdexcept>
 #include <charconv>
+#include <unordered_map>
+#include <set>
 
 namespace TMVA {
 namespace Experimental {
@@ -89,7 +91,7 @@ std::string ConvertTypeToString(ETensorType type){
          return "double";
       }
       case ETensorType::BOOL : {
-         return "bool";
+         return "uint8_t";
       }
       default:{
          return "other_" + std::to_string( (int) type);
@@ -547,6 +549,130 @@ std::vector<Dim> UTILITY::ComputeStrideFromShape(const std::vector<Dim> & shape)
    return strides;
 }
 
+struct FreeBlock {
+  std::size_t offset;
+  std::size_t size;
+  bool operator<(const FreeBlock& other) const {
+    // order by offset for deterministic coalescing
+    return offset < other.offset;
+  }
+};
+
+struct MemoryEvent {
+  int t;      // time (i.e. operator index)
+  int type;   // 0 = END first, 1 = START
+  int idx;    // tensor index
+  bool operator<(const MemoryEvent& o) const {
+    if (t != o.t) return t < o.t;
+    return type < o.type; // END before START at the same time
+  }
+};
+
+/// Greedy best-fit planner with coalescing free list.
+MemoryResult OrganizeMemory(const std::vector<TensorLifeInfo> & tensorsInfo )
+{
+   // Basic validation
+   for (const auto &t : tensorsInfo) {
+      if (!(t.end > t.begin)) {
+         throw std::runtime_error("Each tensor must have end > begin.");
+      }
+   }
+
+   // Build events: free before allocate at equal times.
+   std::vector<MemoryEvent> events;
+   events.reserve(tensorsInfo.size() * 2);
+   for (int i = 0; i < (int)tensorsInfo.size(); ++i) {
+      events.push_back({tensorsInfo[i].end, 0, i});   // END
+      events.push_back({tensorsInfo[i].begin, 1, i}); // START
+   }
+   std::sort(events.begin(), events.end());
+
+   std::vector<size_t> tensorsOffset(tensorsInfo.size());
+
+   // Free list ordered by offset (for O(log n) coalescing)
+   // and faster insert/erase with respect to a vector
+   std::set<FreeBlock> free_list;
+
+   // Bookkeeping: size/offset map for frees.
+   std::unordered_map<int, std::size_t> live_size;
+   std::unordered_map<int, std::size_t> live_offset;
+
+   std::size_t total_bytes = 0;
+
+   auto allocate_best_fit = [&](std::size_t need) -> std::size_t {
+      // Find the *smallest* block whose size >= need (best-fit).
+      // Since free_list is ordered by offset, we scan to find best by size.
+      // (For very large sets you could maintain a multimap by size as well.)
+      auto best = free_list.end();
+      for (auto it = free_list.begin(); it != free_list.end(); ++it) {
+         if (it->size >= need) {
+            if (best == free_list.end() || it->size < best->size)
+               best = it;
+         }
+      }
+      if (best != free_list.end()) {
+         std::size_t off = best->offset;
+         if (best->size == need) {
+            free_list.erase(best);
+         } else {
+            FreeBlock updated{best->offset + need, best->size - need};
+            free_list.erase(best);
+            free_list.insert(updated);
+         }
+         return off;
+      }
+      // No free block large enough; grow the heap.
+      std::size_t off = total_bytes;
+      total_bytes += need;
+      return off;
+   };
+
+   auto try_coalesce = [&](std::set<FreeBlock>::iterator it) {
+      // Coalesce with previous
+      if (it != free_list.begin()) {
+         auto prev = std::prev(it);
+         if (prev->offset + prev->size == it->offset) {
+            FreeBlock merged{prev->offset, prev->size + it->size};
+            free_list.erase(prev);
+            it = free_list.erase(it);
+            it = free_list.insert(merged).first;
+         }
+      }
+      // Coalesce with next
+      auto next = std::next(it);
+      if (next != free_list.end() && it->offset + it->size == next->offset) {
+         FreeBlock merged{it->offset, it->size + next->size};
+         free_list.erase(next);
+         it = free_list.erase(it);
+         free_list.insert(merged);
+      }
+   };
+
+   // Sweep through time.
+   for (const auto &e : events) {
+      if (e.type == 0) { // END: free
+         auto it_sz = live_size.find(e.idx);
+         auto it_off = live_offset.find(e.idx);
+         if (it_sz != live_size.end() && it_off != live_offset.end()) {
+            FreeBlock fb{it_off->second, it_sz->second};
+            // Insert and coalesce with neighbors
+            auto it = free_list.insert(fb).first;
+            try_coalesce(it);
+            live_size.erase(it_sz);
+            live_offset.erase(it_off);
+         }
+      } else { // START: allocate
+         auto &t = tensorsInfo[e.idx];
+         std::size_t off = allocate_best_fit(t.size);
+         tensorsOffset[e.idx] = off;
+         live_size[e.idx] = t.size;
+         live_offset[e.idx] = off;
+      }
+   }
+
+   return MemoryResult{total_bytes, std::move(tensorsOffset)};
+}
+
 } // namespace SOFIE
 } // namespace Experimental
 } // namespace TMVA