From 15563e2facae18ebf84dbb1b2c92bf4d2e2037ca Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Mon, 10 Nov 2025 17:30:32 +0100
Subject: [PATCH 1/4] [tmva][sofie]  Apply fixes for supporting Dynamic tensors

Add missing support for Dynamic tensors for some operators.
With this commit a full support for dynamic tensor is available for ParticleNet model.
Fix also a bug in Concat operator when the concat axis is not the first one
---
 tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx   | 189 ++++++---
 tmva/sofie/inc/TMVA/ROperator_Cast.hxx        |  21 +-
 tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 362 +++++++++++-------
 tmva/sofie/inc/TMVA/ROperator_Concat.hxx      |  27 +-
 tmva/sofie/inc/TMVA/ROperator_Constant.hxx    |   4 +-
 tmva/sofie/inc/TMVA/ROperator_Conv.hxx        |  15 +-
 tmva/sofie/inc/TMVA/ROperator_Gather.hxx      |   9 +-
 tmva/sofie/inc/TMVA/ROperator_Range.hxx       | 134 +++++--
 tmva/sofie/inc/TMVA/ROperator_Reduce.hxx      |   2 +-
 tmva/sofie/inc/TMVA/ROperator_Reshape.hxx     |   8 +-
 tmva/sofie/inc/TMVA/ROperator_Slice.hxx       |  14 +-
 tmva/sofie/inc/TMVA/ROperator_Tile.hxx        |  31 +-
 tmva/sofie/inc/TMVA/ROperator_TopK.hxx        |  59 ++-
 tmva/sofie/inc/TMVA/SOFIE_common.hxx          |   6 +
 tmva/sofie/src/RModel.cxx                     |  22 +-
 15 files changed, 598 insertions(+), 305 deletions(-)
diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
index bcc0e52a40ca3..f73bd34e53386 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx
@@ -23,10 +23,11 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Max> {
    static const std::string Name() {return "Max";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0] << ";\n";
+      out << res << " = std::max({ " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
-         out << "\t" << "\t" << res << " = std::max(" << res << ", " << inputs[i] << ");\n";
+         out << ", " << inputs[i];
       }
+      out << "});\n";
       return out.str();
    }
 };
@@ -36,10 +37,11 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Min> {
    static const std::string Name() {return "Min";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0] << ";\n";
+       out << res << " = std::min({ " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
-         out << "\t" << "\t" << res << " = std::min(" << res << ", " << inputs[i] << ");\n";
+         out << ", " << inputs[i];
       }
+      out << "});\n";
       return out.str();
    }
 };
@@ -52,7 +54,7 @@ struct NaryOperatorTraits<float, EBasicNaryOperator::Mean> {
    static const std::string Name() {return "Mean";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = (" << inputs[0];
+      out << res << " = (" << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
          out << " + " << inputs[i];
       }
@@ -66,7 +68,7 @@ struct NaryOperatorTraits<T, EBasicNaryOperator::Sum> {
    static const std::string Name() {return "Sum";}
    static std::string Op(const std::string& res, std::vector<std::string>& inputs) {
       std::stringstream out;
-      out << "\t" << "\t" << res << " = " << inputs[0];
+      out << res << " = " << inputs[0];
       for (size_t i = 1; i < inputs.size(); i++) {
          out << " + " << inputs[i];
       }
@@ -83,10 +85,11 @@ private:
 
    std::vector<std::string> fNInputs;
    std::string fNY;
-   std::vector<std::vector<size_t>> fShapeInputs;
+   std::vector<std::vector<Dim>> fShapeInputs;
 
    std::vector<std::string> fNBroadcastedInputs;
    std::vector<size_t> fShapeY;
+   std::vector<Dim> fDimShapeY;
 
    bool fBroadcast = false;
 
@@ -119,64 +122,164 @@ public:
    }
 
    void Initialize(RModel& model) override {
+      std::vector<std::vector<size_t>> inputShapes;
       for (auto &it : fNInputs) {
          if (!model.CheckIfTensorAlreadyExist(it)) {
             throw std::runtime_error("TMVA SOFIE BasicNary Op Input Tensor " + it + " is not found in model");
          }
-         fShapeInputs.push_back(model.GetTensorShape(it));
+         fShapeInputs.push_back(model.GetDimTensorShape(it));
+         if (fNInputs.size()> 2) {
+            if (model.IsDimInputTensor(it))
+               throw std::runtime_error("TMVA SOFIE BasicNary : supports only 2 inputs for dynamic tensors");
+            else
+               inputShapes.push_back(model.GetTensorShape(it));
+         }
       }
       // Find the common shape of the input tensors
-      fShapeY = UTILITY::MultidirectionalBroadcastShape(fShapeInputs);
-      model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY);
-      // Broadcasting
-      size_t N = fNInputs.size();
-      fNBroadcastedInputs.reserve(N);
-      for (size_t i = 0; i < N; i++) {
-         if (!UTILITY::AreSameShape(model.GetTensorShape(fNInputs[i]), fShapeY)) {
-            fBroadcast = true;
-            std::string name = "Broadcasted"  + fNInputs[i];
-            model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY);
-            fNBroadcastedInputs.emplace_back("tensor_" + name);
-         } else {
-            fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]);
+      if (fShapeInputs.size() > 2 ) {
+         // support dynamic tensors now for input list of size=2
+         auto shapeY = UTILITY::MultidirectionalBroadcastShape(inputShapes);
+         fDimShapeY = ConvertShapeToDim(shapeY);
+      } else if (fShapeInputs.size() == 2 ) {
+         auto ret  = UTILITY::MultidirectionalBroadcastShape(fShapeInputs[0], fShapeInputs[1]);
+         // use same code as in BinaryOperator (need to extend for input sizes > 2)
+         fBroadcast = ret.first;
+         fDimShapeY = ret.second;
+         // case of all parametric shapes and MultiDirectionalBroadcastShape  return the max of the 2
+         // need to do before we declare the output tensor shape and the broadcasted ones
+         if (ret.first & 4) {
+            // check if one of the parameter is an input dimension
+            // define function to find this
+            auto IsInputDimParam = [&](const std::string &p) {
+               auto inputNames = model.GetInputTensorNames();
+               for (auto &input : inputNames) {
+                  for (auto &i_s : model.GetDimTensorShape(input)) {
+                     if (i_s.isParam && i_s.param == p)
+                        return true;
+                  }
+               }
+               return false;
+            };
+            auto & shapeA = fShapeInputs[0];
+            auto & shapeB = fShapeInputs[1];
+            for (size_t i = 0; i < fDimShapeY.size(); i++) {
+               auto &s = fDimShapeY[i];
+               if (s.isParam && s.param.find("std::max") != std::string::npos) {
+                  if (IsInputDimParam(shapeA[i].param)) {
+                     // case dim is 1 we indicate that the input parameter is equal to 1
+                     if (shapeA[i].dim != 1)
+                        s = shapeA[i];
+                     else
+                        s = shapeB[i];
+                  } else if (IsInputDimParam(shapeB[i].param)) {
+                     if (shapeB[i].dim != 1)
+                        s = shapeB[i];
+                     else
+                        s = shapeA[i];
+                  }
+               }
+            }
          }
+      } else if  (fShapeInputs.size() == 1 ) {
+         fDimShapeY = fShapeInputs[0];
       }
+      if (!fShapeY.empty())
+         model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY);
+      else
+         model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fDimShapeY);
+
+
       fType = ConvertTypeToString(model.GetTensorType(fNInputs[0]));
+
+      if (model.Verbose()) {
+         std::cout << NaryOperatorTraits<T, Op>::Name() << " : ";
+         if (fNInputs.size() == 2)
+            std::cout << ConvertShapeToString(fShapeInputs[0]) << " , "
+                      << ConvertShapeToString(fShapeInputs[1]);
+         std::cout << " --> " << ConvertShapeToString(fDimShapeY) << std::endl;
+      }
    }
 
    std::string Generate(std::string OpName) override {
       OpName = "op_" + OpName;
-      if (fShapeY.empty()) {
+      if (fDimShapeY.empty()) {
          throw std::runtime_error("TMVA SOFIE BasicNary called to Generate without being initialized first");
       }
       std::stringstream out;
-      size_t length = ConvertShapeToLength(fShapeY);
+      auto length = ConvertDimShapeToLength(fDimShapeY);
       out << SP << "\n//------ BasicNary operator\n";
-      if (fBroadcast) {
-         for (size_t i = 0; i < fNInputs.size(); i++) {
-            if (fNBroadcastedInputs[i] != fNInputs[i]) {
-               out << SP << SP << "// Broadcasting " << fNInputs[i] << " to " << ConvertShapeToString(fShapeY) << "\n";
-               out << SP << SP << "{\n";
-               out << SP << SP << SP << fType << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" + fNInputs[i] << ", " << ConvertShapeToString(fShapeInputs[i]);
-               out << ", " << ConvertShapeToString(fShapeY) << ");\n";
-               out << SP << SP << SP << "std::copy(data, data + " << length << ", " << fNBroadcastedInputs[i] << ");\n";
-               out << SP << SP << SP << "delete[] data;\n";
-               out << SP << SP << "}\n";
-            }
-         }
-      }
 
-      if (fNInputs.size() == 1) {
+      int nInputs = fNInputs.size();
+
+      if (nInputs == 1) {
          out << SP << "std::copy(tensor_" << fNInputs[0] << ", tensor_" << fNInputs[0] << " + ";
          out << length << ", tensor_" << fNY << ");\n";
       } else {
-         std::vector<std::string> inputs(fNBroadcastedInputs.size());
-         for (size_t i = 0; i < fNBroadcastedInputs.size(); i++) {
-            inputs[i] = fNBroadcastedInputs[i] + "[id]";
+
+         // implement operator without broadcasting, but using loos on all indices
+         std::vector<std::vector<Dim>> inputStrides(nInputs);
+         for (int i = 0; i < nInputs; i++)
+            inputStrides[i] = UTILITY::ComputeStrideFromShape(fShapeInputs[i]);
+
+         auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY);
+
+         // make loop on output indices
+         std::string compute_idx_Y;
+         int nloop = 0;
+         if (fDimShapeY.empty() ||
+               std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+            compute_idx_Y = "0";
+         } else {
+            for (size_t i = 0; i < fDimShapeY.size(); ++i) {
+               if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") {
+                  nloop++;
+                  for (int j = 0; j < nloop; j++) out << SP;
+                  out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i]
+                      << "; ++idx_" << i << "){\n";
+                  compute_idx_Y += "idx_" + std::to_string(i);
+                  if (stridesY[i].GetVal() != "1")
+                     compute_idx_Y += " * " + stridesY[i].GetVal();
+                  compute_idx_Y += " + ";
+               }
+            }
+            // remove last 3 characters " + "
+            for (int j = 0; j < 3; j++)
+               compute_idx_Y.pop_back();
+         }
+         // find indices for input tensors
+         std::vector<std::string> inputs(nInputs);
+         for (int ipt = 0; ipt < nInputs; ipt++ ) {
+            std::string compute_idx_X;
+            auto & shape = fShapeInputs[ipt];
+            auto & stride = inputStrides[ipt];
+            if (shape.empty() ||
+                std::all_of(shape.begin(), shape.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+               compute_idx_X = "0";
+            } else {
+               for (size_t i = 0; i < shape.size(); ++i) {
+                  if (shape[i].dim == 1 || shape[i].GetVal() == "1")
+                     continue;
+                  compute_idx_X += "idx_" + std::to_string(i + (fDimShapeY.size() - shape.size()));
+                  if (stride[i].GetVal() != "1")
+                     compute_idx_X += " * " + stride[i].GetVal();
+                  compute_idx_X += " + ";
+               }
+               // remove last 3 character " + "
+               for (int j = 0; j < 3; j++)
+                  compute_idx_X.pop_back();
+            }
+            inputs[ipt] = "tensor_" + fNInputs[ipt] + "[" + compute_idx_X + "]";
+         }
+
+         // perform the operation
+         for (int j = 0; j < nloop + 1; j++) out << SP;
+         std::string output = "tensor_" + fNY + "[" + compute_idx_Y + "]";
+         out << NaryOperatorTraits<T,Op>::Op(output, inputs);
+
+         for (int i = nloop; i > 0; i--) {
+            for (int j = 0; j < i; j++) out << SP;
+            out << "}\n";
          }
-         out << SP << "for (size_t id = 0; id < " << length << "; id++) {\n";
-         out << NaryOperatorTraits<T,Op>::Op("tensor_" + fNY + "[id]", inputs);
-         out << SP << "}\n";
       }
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
index f48e27ee4f264..8267bb8a7e4f4 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx
@@ -46,7 +46,7 @@ public:
         throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model");
       }
       fShape = model.GetDimTensorShape(fNX);
-      // shoud we add a check if the same type
+      // should we add a check if the same type
       auto inputType = model.GetTensorType(fNX);
       if (model.IsInitializedTensor(fNX)) {
          fIsOutputConstant = true;
@@ -57,29 +57,30 @@ public:
          }
          else
             fIsOutputConstant = false;
+      } else if (model.IsShapeTensor(fNX) && ConvertStringToType(fAttrType) == ETensorType::INT64) {
+         auto shapeData = model.GetShapeTensorValues(fNX);
+         model.AddShapeTensor(fNY, shapeData, fShape.size() == 0);
+         fIsOutputConstant = true;
       }
       if (!fIsOutputConstant)
          model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape);
       if (model.Verbose()) {
-         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY;
+         std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY
+                  << " shape " << ConvertDimShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " (constant) ";
          std::cout << std::endl;
       }
    }
 
 
-   std::string Generate(std::string OpName) override {
-      if (fIsOutputConstant) return "";
+   std::string Generate(std::string opName) override {
+
+      // output shape can be empty if is a scalar
 
-      OpName = "op_" + OpName;
-      if (fShape.empty()) {
-         throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first");
-      }
       std::stringstream out;
       auto length = ConvertDimShapeToLength(fShape);
 
-      // out << SP << ETensorType << " " << OpName << "_attr = "  << fattr << ";\n";
-      out << "\n//------ CAST\n";
+      out << "\n//------ CAST " << opName << " ---> " << fNY << "  " << ConvertDimShapeToString(fShape) << "\n";
        // no generated code for constant outputs
       if (fIsOutputConstant) return out.str();
 
diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
index 0d365ae517de5..40c8923676aaf 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
@@ -65,11 +65,10 @@ private:
    std::vector<Dim> fDimShapeX1;
    std::vector<Dim> fDimShapeX2;
    std::vector<size_t> fShapeY;
-   std::string fNBroadcastedX1;
-   std::string fNBroadcastedX2;
+   std::vector<Dim> fDimShapeY;
    ETensorType fTensorType1 = ETensorType::UNDEFINED;
    ETensorType fTensorType2 = ETensorType::UNDEFINED;
-   bool fBroadcast = false;
+   int fBroadcastFlag = 0;
 
 
 public:
@@ -115,136 +114,175 @@ public:
       }
       fTensorType1 = model.GetTensorType(fNX1);
       fTensorType2 = model.GetTensorType(fNX2);
-      bool broadcast = !UTILITY::AreSameShape(fShapeX1, fShapeX2);
-      if (broadcast) {
-         // Y is the common shape of A and B
-         fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2);
-         bool broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY);
-         bool broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY);
-         // Broadcast A to Y
-         if (broadcastX1) {
-            if (model.IsInitializedTensor(fNX1)) {
-               auto data = model.GetInitializedTensorData(fNX1);
-               std::shared_ptr<void> broadcastedData(
-                  UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeX1, fShapeY),
-                  std::default_delete<T[]>());
-               // Update the data and the shape of A
-               model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData);
-               fShapeX1 = fShapeY;
-            } else {
-               // Add an intermediate tensor for broadcasting A
-               fNBroadcastedX1 = "Broadcasted" + fNX1;
-               model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY);
+      // case of non dynamic tensors
+      if (!fShapeX1.empty() && !fShapeX2.empty()) {
+         bool broadcastX1 = false;
+         bool broadcastX2 = false;
+         if (UTILITY::AreSameShape(fShapeX1, fShapeX2)) {
+            // no broadcast needed
+            fShapeY = fShapeX1;
+         } else  {
+            // Y is the common shape of A and B
+            fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2);
+            broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY);
+            broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY);
+         }
+
+
+         // analyze case of constant tensors or shape tensors (which have known shapes but data as Dim values
+         // normal case with non-dynamic tensor is also here
+         T *data1 = nullptr;
+         T *data2 = nullptr;
+         std::unique_ptr<T> broadcastedData1;
+         std::unique_ptr<T> broadcastedData2;
+         // data for shape tensors
+         std::vector<Dim> shapeData1;
+         std::vector<Dim> shapeData2;
+         size_t length = ConvertShapeToLength(fShapeY);
+         bool *outData = new bool[length];
+         if (model.IsInitializedTensor(fNX1)) {
+            data1 = static_cast<T *>(model.GetInitializedTensorData(fNX1).get());
+            if (broadcastX1) {
+               broadcastedData1 = std::unique_ptr<T>(
+                  UTILITY::UnidirectionalBroadcast<T>(data1, fShapeX1, fShapeY));
+               data1 = broadcastedData1.get();
             }
+
+         } else if (model.IsShapeTensor(fNX1)) {
+            shapeData1 = model.GetShapeTensorValues(fNX1);
          }
-         // Broadcast B to Y
-         if (broadcastX2) {
-            if (model.IsInitializedTensor(fNX2)) {
-               auto data = model.GetInitializedTensorData(fNX2);
-               std::shared_ptr<void> broadcastedData(
-                  UTILITY::UnidirectionalBroadcast<T>(static_cast<T *>(data.get()), fShapeX2, fShapeY),
-                  std::default_delete<T[]>());
-               // Update the data and the shape of B
-               model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData);
-               fShapeX2 = fShapeY;
-            } else {
-               // Add an intermediate tensor for broadcasting B
-               fNBroadcastedX2 = "Broadcasted" + fNX2;
-               model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY);
+         if (model.IsInitializedTensor(fNX2)) {
+            data2 = static_cast<T *>(model.GetInitializedTensorData(fNX2).get());
+            if (broadcastX2) {
+               broadcastedData2 = std::unique_ptr<T>(
+                  UTILITY::UnidirectionalBroadcast<T>(data2, fShapeX2, fShapeY));
+               data2 = broadcastedData2.get();
             }
+         } else if (model.IsShapeTensor(fNX2)) {
+            shapeData2 = model.GetShapeTensorValues(fNX2);
          }
-      } else {
-         fShapeY = fShapeX1;
-      }
-      // case of constant tensors
-      T * data1 = nullptr;
-      T * data2 = nullptr;
-      std::vector<Dim> shapeData1;
-      std::vector<Dim> shapeData2;
-      size_t length = ConvertShapeToLength(fShapeY);
-      bool *  outData = new bool[length];
-      if (model.IsInitializedTensor(fNX1)) {
-         data1 = static_cast<T *>(model.GetInitializedTensorData(fNX1).get());
-      } else if (model.IsShapeTensor(fNX1)) {
-         shapeData1 = model.GetShapeTensorValues(fNX1);
-      }
-      if (model.IsInitializedTensor(fNX2)) {
-         data2 = static_cast<T *>(model.GetInitializedTensorData(fNX2).get());
-      } else if (model.IsShapeTensor(fNX2)) {
-         shapeData2 = model.GetShapeTensorValues(fNX2);
-      }
-      if (data1 && data2) {
-         fIsOutputConstant = true;
-         for (size_t i = 0; i < length; i++)
-            outData[i] = ComparisionTrait<T,Op>::Result(data1[i], data2[i]);
-         model.AddConstantTensor(fNY, fShapeY, outData);
-         if (model.Verbose())
-            std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << " : "
-               << ConvertValuesToString(length,outData) << std::endl;
-      } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) {
-         fIsOutputConstant = true;
-         if (data1 && !data2) {
-            // data 1 is constant and data2 is shape
-            for (size_t i = 0; i < length; i++) {
-               if (shapeData2[i].isParam) {
-                  if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) {
-                     fIsOutputConstant = false;
-                     break;
-                  } else {
-                     // assume a comparison is done with .dim = 0
-                     shapeData2[i].dim = 0;
+         if (data1 && data2) {
+            fIsOutputConstant = true;
+            for (size_t i = 0; i < length; i++)
+               outData[i] = ComparisionTrait<T, Op>::Result(data1[i], data2[i]);
+            model.AddConstantTensor(fNY, fShapeY, outData);
+            if (model.Verbose())
+               std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData)
+                         << std::endl;
+         } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) {
+            fIsOutputConstant = true;
+            if (data1 && !data2) {
+               // data 1 is constant and data2 is shape
+               for (size_t i = 0; i < length; i++) {
+                  if (shapeData2[i].isParam) {
+                     if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) {
+                        fIsOutputConstant = false;
+                        break;
+                     } else {
+                        // assume a comparison is done with .dim = 0
+                        shapeData2[i].dim = 0;
+                     }
                   }
+                  outData[i] = ComparisionTrait<T, Op>::Result(data1[i], static_cast<T>(shapeData2[i].dim));
                }
-               outData[i] = ComparisionTrait<T,Op>::Result(data1[i], static_cast<T>(shapeData2[i].dim));
-            }
-         } else if (!data1 && data2) {
-            // data 1 is shape and dat2 is constant
-            for (size_t i = 0; i < length; i++) {
-               if (shapeData1[i].isParam) {
-                  if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) {
+            } else if (!data1 && data2) {
+               // data 1 is shape and dat2 is constant
+               for (size_t i = 0; i < length; i++) {
+                  if (shapeData1[i].isParam) {
+                     if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) {
+                        fIsOutputConstant = false;
+                        break;
+                     } else {
+                        // assume a comparison is done with .dim = 0
+                        shapeData1[i].dim = 0;
+                     }
+                  }
+                  outData[i] = ComparisionTrait<T, Op>::Result(static_cast<T>(shapeData1[i].dim), data2[i]);
+               }
+            } else if (!shapeData1.empty() && !shapeData2.empty()) {
+               // both data1 and data2 are shape tensors
+               for (size_t i = 0; i < length; i++) {
+                  if (!shapeData1[i].isParam && !shapeData2[i].isParam) {
+                     outData[i] = ComparisionTrait<T, Op>::Result(shapeData1[i].dim, shapeData2[i].dim);
+                  } else if (shapeData1[i].isParam && shapeData2[i].isParam) {
+                     if (shapeData1[i].param == shapeData2[i].param)
+                        outData[i] = ComparisionTrait<int, Op>::Result(1, 1); // comparison of two equal value
+                     else {
+                        fIsOutputConstant = false;
+                        break;
+                     }
+                  } else {
                      fIsOutputConstant = false;
                      break;
-                  } else {
-                     // assume a comparison is done with .dim = 0
-                     shapeData1[i].dim = 0;
                   }
                }
-               outData[i] = ComparisionTrait<T,Op>::Result(static_cast<T>(shapeData1[i].dim), data2[i]);
             }
-         } else if (!shapeData1.empty() && !shapeData2.empty() ) {
-            // both data1 and data2 are shape tensors
-            for (size_t i = 0; i < length; i++) {
-               if (!shapeData1[i].isParam && !shapeData2[i].isParam) {
-                  outData[i] = ComparisionTrait<T,Op>::Result(shapeData1[i].dim, shapeData2[i].dim);
-               }
-               else if (shapeData1[i].isParam && shapeData2[i].isParam) {
-                  if (shapeData1[i].param == shapeData2[i].param)
-                     outData[i] = ComparisionTrait<int,Op>::Result(1,1); // comparison of two equal value
-                  else {
-                     fIsOutputConstant = false;
-                     break;
+            if (fIsOutputConstant) {
+               model.AddConstantTensor(fNY, fShapeY, outData);
+               if (model.Verbose())
+                  std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                            << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData)
+                            << " (constant) " << std::endl;
+            }
+         }
+         delete[] outData;
+         // case of non constant output (no constant or shape tensors)
+         if (!fIsOutputConstant && !fShapeY.empty()) {
+            model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY);
+            fDimShapeY = ConvertShapeToDim(fShapeY);
+            if (model.Verbose())
+               std::cout << ComparisionTrait<T, Op>::Name() << " op ---> " << fNY << "  "
+                         << ConvertShapeToString(fShapeY) << std::endl;
+         }
+      } else {
+         // case of dynamic tensors
+          // case A or B have dynamic shapes. We need to broadcast if shape are not same
+         auto ret = UTILITY::MultidirectionalBroadcastShape(fDimShapeX1, fDimShapeX2);
+         fBroadcastFlag = ret.first;
+         fDimShapeY = ret.second;
+         // case of all parametric shapes and MultiDirectionalBroadcastShape  return the max of the 2
+         // need to do before we declare the output tensor shape and the broadcasted ones
+         if (ret.first & 4) {
+            // check if one of the parameter is an input dimension
+            // define function to find this
+            auto IsInputDimParam = [&](const std::string &p) {
+               auto inputNames = model.GetInputTensorNames();
+               for (auto &input : inputNames) {
+                  for (auto &i_s : model.GetDimTensorShape(input)) {
+                     if (i_s.isParam && i_s.param == p)
+                        return true;
                   }
                }
-               else {
-                  fIsOutputConstant = false;
-                  break;
+               return false;
+            };
+            for (size_t i = 0; i < fDimShapeY.size(); i++) {
+               auto &s = fDimShapeY[i];
+               if (s.isParam && s.param.find("std::max") != std::string::npos) {
+                  if (IsInputDimParam(fDimShapeX1[i].param)) {
+                     // case dim is 1 we indicate that the input parameter is equal to 1
+                     if (fDimShapeX1[i].dim != 1)
+                        s = fDimShapeX1[i];
+                     else
+                        s = fDimShapeX2[i];
+                  } else if (IsInputDimParam(fDimShapeX2[i].param)) {
+                     if (fDimShapeX2[i].dim != 1)
+                        s = fDimShapeX2[i];
+                     else
+                        s = fDimShapeX1[i];
+                  }
                }
             }
          }
-         if (fIsOutputConstant) {
-            model.AddConstantTensor(fNY, fShapeY, outData);
-            if (model.Verbose())
-               std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << " : "
-                  << ConvertValuesToString(length,outData) << " (constant) " << std::endl;
 
+         model.AddIntermediateTensor(fNY, ETensorType::BOOL, fDimShapeY);
+         if (model.Verbose()) {
+            std::cout << ComparisionTrait<T, Op>::Name()  << " : " << fNX1 << "  " << ConvertShapeToString(fDimShapeX1) << " , "
+                                                          << fNX2 << "  " << ConvertShapeToString(fDimShapeX2) << " --> "
+                                                          << fNY  << "  " << ConvertShapeToString(fDimShapeY) << std::endl;
+            model.PrintIntermediateTensors();
          }
       }
-      delete [] outData;
-      if (!fIsOutputConstant) {
-         model.AddIntermediateTensor(fNY, ETensorType::BOOL , fShapeY);
-         if (model.Verbose())
-               std::cout <<  ComparisionTrait<T,Op>::Name() << " op ---> " << fNY << "  " << ConvertShapeToString(fShapeY) << std::endl;
-      }
 
       // check if this is not output operators to add a specific line for definining the tensor_xxx variable
       const auto & outputTensorNames = model.GetOutputTensorNames();
@@ -257,39 +295,85 @@ public:
       if (fIsOutputConstant) return "";
       opName = "op_" + opName;
 
-     if (fShapeY.empty()) {
+     if (fDimShapeY.empty()) {
          throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first");
       }
       std::stringstream out;
       out << SP << "\n//------ " << ComparisionTrait<T,Op>::Name() << "  " << opName
                                  << " --> " << ConvertShapeToString(fShapeY) << "\n";
-      size_t length = ConvertShapeToLength(fShapeY);
-      // Broadcast A if it's uninitialized
-      if (!fNBroadcastedX1.empty()) {
-         std::string type1 = ConvertTypeToString(fTensorType1);
-         out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n";
-         out << SP << "{\n";
-         out << SP << SP << type1 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type1 << ">(tensor_" << fNX1 << ", " << ConvertShapeToString(fShapeX1) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX1 << ");\n";
-         out << SP << SP << "delete[] data;\n";
-         out << SP << "}\n";
+
+      // need to add check if tensors are compatible as in binary operator
+
+      // use same code as Binary operator
+      auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeX1);
+      auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeX2);
+      auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY);
+
+      std::string compute_idx_X1, compute_idx_X2, compute_idx_Y;
+      if (fDimShapeX1.empty() ||
+          std::all_of(fDimShapeX1.begin(), fDimShapeX1.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_X1 = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeX1.size(); ++i) {
+            if (fDimShapeX1[i].dim == 1 || fDimShapeX1[i].GetVal() == "1")
+               continue;
+            compute_idx_X1 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX1.size()));
+            if (stridesA[i].GetVal() != "1")
+               compute_idx_X1 += " * " + stridesA[i].GetVal();
+            compute_idx_X1 += " + ";
+         }
+         // remove last 3 character " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_X1.pop_back();
+      }
+      if (fDimShapeX2.empty() ||
+          std::all_of(fDimShapeX2.begin(), fDimShapeX2.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_X2 = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeX2.size(); ++i) {
+            if (fDimShapeX2[i].dim == 1 || fDimShapeX2[i].GetVal() == "1")
+               continue;
+            compute_idx_X2 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX2.size()));
+            if (stridesB[i].GetVal() != "1")
+               compute_idx_X2 += " * " + stridesB[i].GetVal();
+            compute_idx_X2 += " + ";
+         }
+          // remove last 3 character " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_X2.pop_back();
       }
-      // Broadcast B if it's uninitialized
-      if (!fNBroadcastedX2.empty()) {
-         std::string type2 = ConvertTypeToString(fTensorType2);
-         out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n";
-         out << SP << "{\n";
-         out << SP << SP << type2 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type2 << ">(tensor_" << fNX2 << ", " << ConvertShapeToString(fShapeX2) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX2 << ");\n";
-         out << SP << SP << "delete[] data;\n";
-         out << SP << "}\n";
+      int nloop = 0;
+      if (fDimShapeY.empty() ||
+          std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) {
+         compute_idx_Y = "0";
+      } else {
+         for (size_t i = 0; i < fDimShapeY.size(); ++i) {
+            if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") {
+               nloop++;
+               for (int j = 0; j < nloop; j++) out << SP;
+               out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i]
+                   << "; ++idx_" << i << "){\n";
+               compute_idx_Y += "idx_" + std::to_string(i);
+               if (stridesY[i].GetVal() != "1")
+                  compute_idx_Y += " * " + stridesY[i].GetVal();
+               compute_idx_Y += " + ";
+            }
+         }
+         // remove last 3 characters " + "
+         for (int j = 0; j < 3; j++)
+            compute_idx_Y.pop_back();
+      }
+      for (int j = 0; j < nloop + 1; j++) out << SP;
+      out << "tensor_" << fNY << "[" << compute_idx_Y << "] = "
+          << ComparisionTrait<T,Op>::Op( "tensor_" + fNX1 + "[" + compute_idx_X1 + "]" ,
+                                         "tensor_" + fNX2 + "[" + compute_idx_X2 + "]") <<  " ;\n";
+
+
+      for (int i = nloop; i > 0; i--) {
+         for (int j = 0; j < i; j++) out << SP;
+         out << "}\n";
       }
-      const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1;
-      const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2;
 
-      out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n";
-      out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait<T,Op>::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") <<  " ;\n";
-      out << SP << "}\n";
       // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector
       if (!fIsModelOutput)
          out << SP << "const std::vector<std::uint8_t> & tensor_" << fNY << " = fTensor_" << fNY << ";\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
index ad855341dfc17..d8155195c9f49 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx
@@ -123,7 +123,7 @@
                            concat_dim = inputs[i][iaxis];
                         else if (inputs[i][iaxis].isParam || concat_dim.isParam) {
                            concat_dim =
-                              Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(),
+                              Dim{ concat_dim.GetVal() + std::string(" + ") + inputs[i][iaxis].GetVal(),
                                  static_cast<size_t>(-1)};
                         } else {
                            concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim };
@@ -156,7 +156,7 @@
                }
 
                // output shape for concatenated axis
-               ret[fAxis] = Dim{concat_dim};
+               ret[fAxis] = concat_dim;
 
             }
             // case of stacking (not supported yet)
@@ -205,7 +205,7 @@
                      size_t inputLength = ConvertShapeToLength(inputShape);
                      std::copy(inputData, inputData + inputLength, outputData.begin() + offset );
                      offset += inputLength;
-                     // data do not need to be written as a weight
+                     // data do not need to be written in teh generated code
                      model.SetNotWritableInitializedTensor(input);
                   }
                   model.AddConstantTensor<int64_t>(fOutput, outputShape, outputData.data());
@@ -221,15 +221,18 @@
                      std::vector<Dim> inputData;
                      auto inputShape = model.GetTensorShape(input); // shape is not dynamic
                      size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar
-                     if (model.IsShapeTensor(input))
+                     if (model.IsShapeTensor(input)) {
                         inputData = model.GetShapeTensorValues(input);
-                     else if (model.IsConstantTensor(input)) {
+                     } else if (model.IsInitializedTensor(input)) {
                         inputData.resize(inputLength);
                         auto intData = static_cast<int64_t*>(model.GetInitializedTensorData(input).get());
                         for (size_t i = 0; i < inputData.size(); i++)
                            inputData[i] = Dim{ static_cast<size_t>(intData[i])};
                      }
-                     std::cout << "concatenating input data " << inputLength << "  " << inputData[0] << std::endl;
+                     else {
+                        // this should not happen
+                        throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type");
+                     }
                      std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset );
                      offset += inputLength;
                   }
@@ -251,13 +254,15 @@
          }
 
          std::string Generate(std::string opName) override {
-            if (fIsOutputConstant) return "";
             opName = "op_" + opName;
+            std::stringstream out;
+            out<<"\n//--------- Concat " << opName << " --> " << fOutput << "  " << ConvertShapeToString(fOutputShape) << "\n";
+
+            if (fIsOutputConstant) return out.str();
+
             if(fOutputShape.empty()){
                   throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first");
             }
-            std::stringstream out;
-            out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n";
             // special case when memory is contiguous
             bool hasShapeOnes = true;
             for(int i = 0; i<fAxis; ++i){
@@ -299,14 +304,14 @@
 
                for (size_t j = 0; j < fInputs.size(); j++) {
                   if (j>0)
-                  out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis].GetVal() << ";\n";
+                  out << SP << SP << SP << "idxOut += " << inStrides[j-1][fAxis-1].GetVal() << ";\n";
                   out << SP << SP << SP << "int idxIn" << j <<" = ";
                   for (int k = 0; k < fAxis; k++) {
                      if (k > 0) out << " + ";
                      out << inStrides[j][k].GetVal() << "*i" << k;
                   }
                   out << ";\n";
-                  out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis].GetVal() << "; ++iC) {\n";
+                  out << SP << SP << SP << "for (size_t iC = 0; iC < " << inStrides[j][fAxis-1].GetVal() << "; ++iC) {\n";
                   out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n";
                   out << SP << SP << SP << "}\n";
                // concatenate the axis values
diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
index 1cf5d13f5cd6f..3b339e3440488 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
@@ -136,9 +136,9 @@ public:
       std::stringstream out;
       if (fIsOutputConstant) {
          if (fNX.empty())
-            out <<  "// ---- Constant (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n";
+            out <<  "// ---- Constant (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n";
          else
-            out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n";
+            out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n";
          return out.str();
       }
       // Only ConstantOfShape might require generation code
diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
index 95f226ca91d4b..2681eeb2dd84c 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
@@ -297,16 +297,25 @@ public:
             }
          }
       }
-      // output channel size can be parametric
+      // output channel size can be parametric and is an expression
       std::vector<Dim> outputDims = std::vector<Dim>(fShapeY.begin()+2, fShapeY.end());
-      auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
+      //check if shape is not parametric
+      std::vector<size_t> outputInts = ConvertShapeToInt(outputDims);
+      Dim channelDim;
+      if (outputInts.empty()) {
+         auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W
+         channelDim = Dim{ outputChannelSize, static_cast<size_t>(-1)};
+      } else {
+         size_t outputChannelSize = ConvertShapeToLength(outputInts);
+         channelDim = Dim{ outputChannelSize };
+      }
       size_t kernelSize = fAttrKernelShape[0];
       for (size_t i = 1; i < fDim; i++) {
          kernelSize *= fAttrKernelShape[i];
       }
 
       std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
-      std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}};
+      std::vector<Dim> shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim };
       model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );
       model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );
       convK = fNX +"_f";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
index 81411b8ebf71a..1d51c59380dae 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx
@@ -153,13 +153,14 @@ public:
    }
 
    std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
+      std::stringstream out;
+      out << "//--------- Gather " << opName << " --> " << fNY << "  " << ConvertShapeToString(fShapeY) << "\n";
       if (fIsOutputConstant) {
          // no code to generate here for constant output. Tensor output is defined in Session constructor
-         return "//---------------------------------------\n";
+         out << "//--------------------(constant)----------\n";
+         return out.str();
       }
-      opName = "op_" + opName;
-      std::stringstream out;
-      out << "//--------- Gather " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n";
       // The shape of the output is q + r - 1
       size_t r = fShapeX.size();
       // Indices of shape q
diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
index 9cac15a14fc52..7c138c3b3def5 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
@@ -39,15 +39,6 @@ public:
                   "TMVA::SOFIE - Unsupported type by Range operator");
    }
 
-   std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-      return input;
-   }
-
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      auto ret = input; //suggest copy to compiler
-      return ret;
-   }
-
    void Initialize(RModel& model) override {
        //input must be a graph input, or already initialized intermediate tensor
       if (!model.CheckIfTensorAlreadyExist(fNStart)) {
@@ -63,32 +54,94 @@ public:
             std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNDelta + "is not found in model");
       }
       ETensorType type = ConvertStringToType(fType);
-      if (model.IsInitializedTensor(fNStart) && model.IsInitializedTensor(fNDelta) && model.IsInitializedTensor(fNLimit)) {
-         T * start = static_cast<T*>(model.GetInitializedTensorData(fNStart).get());
-         T * limit = static_cast<T*>(model.GetInitializedTensorData(fNLimit).get());
-         T * delta = static_cast<T*>(model.GetInitializedTensorData(fNDelta).get());
-         if (!start || !delta || !limit)
-            std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data");
-         T a = *start;
-         T b = *limit;
-         T d = *delta;
-         int number_of_elements = std::max( static_cast<double>(std::ceil( (b - a) / d )) , 0. );
+
+
+
+      auto analyzeInput = [&](const std::string & tName, T & value, Dim & dim) {
+         int ftype = 0; // type of input (0 intermediate, 1 constant , 2 shape)
+         if (model.IsInitializedTensor(tName)) {
+            T * data = static_cast<T*>(model.GetInitializedTensorData(tName).get());
+            if (!data)
+               std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input  data");
+            value = *data;
+            ftype = 1;
+         } else if (model.IsShapeTensor(tName)) {
+            auto data = model.GetShapeTensorValues(tName);
+            dim = data[0];
+            if (!dim.isParam) {
+               value = static_cast<T>(dim.dim);
+               ftype = 1;
+            } else
+               ftype = 2;
+         }
+         return ftype;
+      };
+
+      T start_value;
+      T limit_value;
+      T delta_value;
+      Dim start_dim;
+      Dim limit_dim;
+      Dim delta_dim;
+      int res1 = analyzeInput(fNStart, start_value, start_dim);
+      int res2 = analyzeInput(fNLimit, limit_value, limit_dim);
+      int res3 = analyzeInput(fNDelta, delta_value, delta_dim);
+      if (res1 == 0 || res2 == 0 || res3 == 0) {
+         // cannot know at compile time- need to do fully at run time
+         //
+         fShape = {Dim{"range_size_" + fNStart + "_" + fNLimit}};
+         model.AddDynamicTensor(fNOutput, type, fShape);
+      } else if (res1 == 1 && res2 == 1 && res3 == 1) {
+         size_t number_of_elements = std::max(static_cast<int>(std::ceil((limit_value - start_value) / delta_value )) , 0 );
+         fIsOutputConstant = true;
+
+         // compute output
          std::vector<T> output(number_of_elements);
-         for (int i=0; i<number_of_elements; ++i) {
-            output[i] =  a + (i * d);
+         for (size_t i=0; i<number_of_elements; ++i) {
+            output[i] =  start_value + (i * delta_value);
          }
-         std::vector<size_t> shape = {static_cast<size_t>(number_of_elements)};
+         std::vector<size_t> shape = {number_of_elements};
          model.AddConstantTensor(fNOutput,shape, output.data());
-         fIsOutputConstant = true;
-         // set the input tensor not writable
+         fShape = ConvertShapeToDim(shape);
+
+          // set the input tensor not writable
          model.SetNotWritableInitializedTensor(fNStart);
          model.SetNotWritableInitializedTensor(fNDelta);
          model.SetNotWritableInitializedTensor(fNLimit);
+
+      } else { // case of a shape tensor
+         std::string start = (res1 == 1) ? std::to_string(start_value) : start_dim.GetVal();
+         std::string limit = (res2 == 1) ? std::to_string(limit_value) : limit_dim.GetVal();
+         std::string delta = (res3 == 1) ? std::to_string(delta_value) : delta_dim.GetVal();
+         std::stringstream s;
+         if (type == ETensorType::FLOAT ) {
+            if (delta_value == 1)
+               s <<  "std::max(std::ceil("<< limit << " - " << start << "),0.0f)";
+            else
+               s <<  "std::max(std::ceil(("<< limit << " - " << start << ")/" << delta << "),0.0f)";
+         } else if (type == ETensorType::INT64 ) {
+            if (delta == "1") {
+               if (start == "0")
+                  s <<  limit;
+               else
+                  s << "std::max((" << limit << " - " << start << "),0L)";
+            } else {
+               if (start == "0")
+                  s <<  "((" << limit << ")/" << delta << ")";
+               else
+                  s << "std::max((" << limit << " - " << start << ")/"<< delta << "),0L)";
+            }
+         } else {
+            throw
+               std::runtime_error("TMVA SOFIE Range Op Input Tensor " + ConvertTypeToString(type) + "is not supported");
+         }
+
+
+         fShape = { Dim {s.str(), static_cast<size_t>(-1)} };
+         model.AddDynamicTensor(fNOutput,type, fShape);
       }
-      else {
-         fShape = {Dim{"range_size"}};
-         model.AddDynamicTensor(fNOutput, type, fShape);
-      }
+
+
       if (model.Verbose()) {
          std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape);
          if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData<T>(fNOutput));
@@ -96,26 +149,31 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
+   std::string Generate(std::string opName) override {
 
       std::stringstream out;
-      out << "\n//------ Range\n";
+      out << "\n//------ Range " << opName << "---> " << ConvertDimShapeToString(fShape) << "\n";
       if (fIsOutputConstant) return out.str();
 
-      OpName = "op_" + OpName;
+      opName = "op_" + opName;
       if (fShape.empty()) {
          throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first");
       }
 
       std::string sizeName = fShape[0].param;
-      out << SP << "size_t " << sizeName << " = static_cast<size_t>(std::max(std::ceil((static_cast<float>(*tensor_" << fNLimit << ") - static_cast<float>(*tensor_" << fNStart << ")) / static_cast<float>(*tensor_" << fNDelta << ")), 0.0f));\n";
-      out << SP << "if (" << sizeName << " > " << "fTensor_" << fNOutput << ".size() ){\n";
-      out << SP << SP << "fTensor_" << fNOutput << ".resize(" << sizeName << ");\n";
+      if (sizeName.find("range_size") != std::string::npos)
+         sizeName = "static_cast<size_t>(std::max(std::ceil((static_cast<float>(*tensor_" + fNLimit +
+                ") - static_cast<float>(*tensor_" + fNStart + ")) / static_cast<float>(*tensor_" + fNDelta + ")), 0.0f))";
+      out << SP << "{\n";
+      out << SP << SP << "size_t range" << " = " << sizeName << ";\n";
+      out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n";
+      out << SP << SP << SP << "fTensor_" << fNOutput << ".resize(range);\n";
       // need to re-initialized pointer to tensor data
-      out << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n";
-      out << SP << "}\n";
-      out << SP << "for (size_t i = 0; i < " << sizeName << "; i++) {\n";
-      out << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
+      out << SP << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n";
+      out << SP << SP << "}\n";
+      out << SP << SP << "for (size_t i = 0; i < range; i++) {\n";
+      out << SP << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
+      out << SP << SP << "}\n";
       out << SP << "}\n";
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
index 1204770d3d321..1da588e965a01 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx
@@ -166,7 +166,7 @@ public:
       std::string reducedLength;
       if (fInputDimShape) {
          reducedLength = "reducedLength_" + opName;
-         out << SP << "size_t " << reducedLength << " = " <<  inputLength << " / " << outputLength << ";\n";
+         out << SP << "size_t " << reducedLength << " = (" <<  inputLength << ") / (" << outputLength << ");\n";
       } else {
          int rLength = std::stoi(inputLength) / std::stoi(outputLength);
          reducedLength = std::to_string(rLength);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
index 2634b68dbc875..a3ed28c4860bc 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx
@@ -108,6 +108,9 @@ public:
 
                if (IsInteger(tmp_length) && IsInteger(input_length))
                   output_shape[i] = Dim{static_cast<size_t>(std::stoi(input_length) / std::stoi(tmp_length))};
+               else if (IsInteger(tmp_length) && std::stoi(tmp_length) == 1) {
+                  output_shape[i] = Dim{input_length, static_cast<size_t>(-1)};
+               }
                else {
                   //we can try simplifying expression if tmp_length is integer and part of input_length
                   // contains tmp_length
@@ -243,7 +246,7 @@ public:
       // check if optional tensor exists defining shape or axes
       if (!fNInput2.empty()) {
          if (model.CheckIfTensorAlreadyExist(fNInput2)) {
-            if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) {
+            if (model.IsInitializedTensor(fNInput2)) {
                // assume input shape is an initialized tensor
                auto dptr = model.GetInitializedTensorData(fNInput2);
                auto values = static_cast<int64_t *>(dptr.get());
@@ -260,6 +263,9 @@ public:
                fShapeOutput = ShapeInference({fShapeInput})[0];
                // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed
                model.SetNotWritableInitializedTensor(fNInput2);
+            } else if (model.IsShapeTensor(fNInput2)) {
+               auto shapeData = model.GetShapeTensorValues(fNInput2);
+               fShapeOutput = shapeData;
             } else {
                // we cannot get shape at initialization time but at run-time
                fDynamicShape = true;
diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
index b23e3b0a86d21..3add774b0d8d4 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx
@@ -235,6 +235,8 @@ public:
                if (iend < 0) {
                   std::string send = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-iend) +")";
                   fEnd[fAxes[i]] = Dim{send,size_t(-1)};
+               } else if (iend == std::numeric_limits<IType>::max()){
+                  fEnd[fAxes[i]] = fShapeInput[fAxes[i]];
                } else {
                  fEnd[fAxes[i]] = Dim{size_t(iend)};
                }
@@ -332,23 +334,23 @@ public:
       else {
          model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput);
          if (model.Verbose()) {
-            std::cout << "Slice ---> " << fNOutput << " " <<  ConvertShapeToString(fShapeOutput) << std::endl;
+            std::cout << "Slice " << fNData << "  " << ConvertShapeToString(fShapeInput)
+                      << "---> " << fNOutput << " " <<  ConvertShapeToString(fShapeOutput) << std::endl;
          }
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      if (fIsOutputConstant) return "";  //no op for constant tensors
+   std::string Generate(std::string opName) override {
 
-      OpName = "op_" + OpName;
       if (fShapeInput.empty() || fShapeOutput.empty()){
          throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first");
       }
 
       std::stringstream out;
-      //std::string opName = "Slice";
 
-      out << SP << "///------- Slice operator\n" << std::endl;
+      out << "///------- Slice operator " << opName << "---> " << fNOutput << " "
+          << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl;
+      if (fIsOutputConstant) return out.str();  //no op for constant tensors
       // loop on the dimensions depending no the orders
       size_t ndim = fShapeInput.size();
       auto strides = UTILITY::ComputeStrideFromShape(fShapeInput);
diff --git a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
index 1086f72eae71c..9b291b40e0854 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx
@@ -20,8 +20,8 @@ private:
    std::string fNRepeats;
    std::string fNInput;
    std::string fNY;
-   std::vector<size_t>fShapeInput;
-   std::vector<size_t> fShapeY;
+   std::vector<Dim>fShapeInput;
+   std::vector<Dim> fShapeY;
 
 public:
    ROperator_Tile(){}
@@ -35,13 +35,18 @@ public:
       return input;
    }
 
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      std::vector<size_t> ret = input[0];
-
-      for(size_t i=0; i < input[1].size(); i++) {
-            ret[i]=ret[i]*input[1][i];
+   std::vector<Dim> DoShapeInference(const std::vector<Dim> & input, const std::vector<size_t> repeat)  {
+      std::vector<Dim> ret = input;
+      for(size_t i=0; i < repeat.size(); i++) {
+         if (repeat[i] != 1) {
+            if (ret[i].isParam) {
+               ret[i] = Dim{ std::string(ret[i].GetVal() + "*" + std::to_string(repeat[i])), static_cast<size_t>(-1) };
+            } else {
+               ret[i]=Dim { ret[i].dim *repeat[i] };
+            }
+         }
       }
-      return {ret};
+      return ret;
    }
 
    void Initialize(RModel& model) override {
@@ -52,7 +57,7 @@ public:
       if (model.CheckIfTensorAlreadyExist(fNRepeats) == false){
         throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model");
       }
-      fShapeInput=model.GetTensorShape(fNInput);
+      fShapeInput=model.GetDimTensorShape(fNInput);
 
       // if repeats vector is not initialized we cannot deduce shape of output
       // not support for time being this case
@@ -79,12 +84,12 @@ public:
       std::copy(repeats_data, repeats_data + num_elements, repeats_vector.begin());
 
 
-      fShapeY = ShapeInference({fShapeInput,repeats_vector})[0];
+      fShapeY = DoShapeInference(fShapeInput,repeats_vector);
 
       model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY);
 
       if (model.Verbose())
-         std::cout <<  "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY)
+         std::cout <<  "Tile: " << fNInput << " " << ConvertDimShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY)
             << " given repeats " << ConvertShapeToString(repeats_vector) << std::endl;
    }
 
@@ -103,9 +108,9 @@ public:
       std::string output = "tensor_" + fNY;
       out << "///-------- Tile operator\n";
       out << "{\n"; // add scope to re-use same names
-      out << "const int input_shape[" << fShapeInput.size() << "] = " << ConvertShapeToString(fShapeInput) << ";\n";
+      out << "const size_t input_shape[" << fShapeInput.size() << "] = " << ConvertDimShapeToString(fShapeInput) << ";\n";
 
-      out << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n";
+      out << "int inputLength = " << ConvertDimShapeToLength(fShapeInput) << ";\n";
       out << "int s = 1;\n";
       // loop from inverse dim order
       out << "for (int i = " << fShapeInput.size()-1 << "; i >=0; i--) {\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
index 0869437bb6b0c..edee91de8eb57 100644
--- a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx
@@ -19,13 +19,13 @@ private:
    int fAttrLargest;
    int fAttrSorted;
 
-   size_t fK;
+   Dim fK;
    std::string fNK;
    std::string fNX;
    std::string fNVal;
    std::string fNInd;
-   std::vector<size_t> fShapeX;
-   std::vector<size_t> fShapeY;
+   std::vector<Dim> fShapeX;
+   std::vector<Dim> fShapeY;
    std::string fType;
 
 public:
@@ -43,23 +43,10 @@ public:
         }
 
    std::vector<ETensorType> TypeInference(std::vector<ETensorType> input) override {
-         ETensorType ret = input[0];
-         return {ret, ret};
-      }
-
-   std::vector<std::vector<size_t>> ShapeInference(std::vector<std::vector<size_t>> input) override {
-      if (input.size() != 2) {
-         throw std::runtime_error("TMVA SOFIE TopK Op Shape Inference needs exactly 2 input tensors");
-      }
-
-      auto shape = input[0]; // Shape format: [ m x n x o x p ... ]
-
-      // set the dimension at the specified axis to k  (fAttrAxis is checked before that is in the correct range
-      shape[fAttrAxis] = fK; // Modified shape: [ m x n x k x p ... ]
-      return {shape, shape};
+      ETensorType ret = input[0];
+      return {ret, ret};
    }
 
-
    void Initialize(RModel& model) override {
       if (model.CheckIfTensorAlreadyExist(fNX) == false) {
          // input must be a graph input, or already initialized intermediate tensor
@@ -70,10 +57,10 @@ public:
          throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor i.e. K is not found in model");
       }
 
-      fShapeX = model.GetTensorShape(fNX);
+      fShapeX = model.GetDimTensorShape(fNX);
       auto fShapeK = model.GetTensorShape(fNK);
       auto kptr = static_cast<int64_t *>(model.GetInitializedTensorData(fNK).get());
-      fK = *kptr;
+      size_t kval = *kptr;
       model.SetNotWritableInitializedTensor(fNK);
       fAttrAxis = fAttrAxis < 0 ? fShapeX.size() + fAttrAxis : fAttrAxis;
       if(static_cast<size_t>(fAttrAxis) >=  fShapeX.size()){
@@ -81,14 +68,25 @@ public:
             std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+fShapeX.size()+" .");
       }
       // fK cannot be larger that axis dimension
-      fK = std::min(fK, fShapeX[fAttrAxis]);
+      if (fShapeX[fAttrAxis].isParam)
+         fK = Dim{std::string("std::min(size_t(" + std::to_string(kval) + "), " + fShapeX[fAttrAxis].GetVal() + ")" ), static_cast<size_t>(-1) };
+      else
+         fK = Dim { std::min(kval, fShapeX[fAttrAxis].dim) };
+
+      // output shape is equal to input shape apart for value in fAttrAxis
+      fShapeY = fShapeX;
+      fShapeY[fAttrAxis] = Dim{fK};
 
-      fShapeY = ShapeInference({fShapeX, fShapeK})[0];
       model.AddIntermediateTensor(fNVal, model.GetTensorType(fNX), fShapeY);
 
       // output indices should be an int64 tensor
       model.AddIntermediateTensor(fNInd, ETensorType::INT64, fShapeY);
       fType = ConvertTypeToString(model.GetTensorType(fNX));
+
+      if (model.Verbose()) {
+         std::cout << "TopK " << fNX << "  " << ConvertShapeToString(fShapeX)
+                      << "---> " << fNVal << " " <<  ConvertShapeToString(fShapeY) << std::endl;
+      }
    }
 
    std::string Generate(std::string OpName) override {
@@ -101,19 +99,20 @@ public:
       size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis;
       out << "\n" << SP << "//------ TopK\n";
 
-      size_t length=ConvertShapeToLength(fShapeX);
+      auto length=ConvertDimShapeToLength(fShapeX);
       auto strideX = UTILITY::ComputeStrideFromShape(fShapeX);
       auto strideY = UTILITY::ComputeStrideFromShape(fShapeY);
       // we perform loop on dimension before sorted axis and after sorted axis
-      size_t n_before = (axis>0) ? length/strideX[axis-1] : 1;
-      size_t n_after = strideX[axis];
-      size_t n_elements = fShapeX[axis]; // number of elements to be sorted
+      std::vector<Dim> shape_before(fShapeX.begin(), fShapeX.begin() + axis);   // input shape before axis
+      std::string n_before = (axis>0) ? ConvertDimShapeToLength(shape_before) : "1";
+      std::string n_after = strideX[axis].GetVal();
+      std::string n_elements = fShapeX[axis].GetVal(); // number of elements to be sorted
 
       // }
       out << SP << "{\n"; // to define a separate scope for the operator code
       out << SP << "std::vector<std::pair<float,int64_t>> elements(" << n_elements << ");\n";
       // loop on elements before
-      if (n_before > 1) {
+      if (n_before != "1") {
          out << SP << "for (size_t i = 0; i < " << n_before << "; i++) {\n";
          out << SP << SP << "size_t xoffset = i*" << strideX[axis-1] << ";\n";
          out << SP << SP << "size_t yoffset = i*" << strideY[axis-1] << ";\n";
@@ -122,7 +121,7 @@ public:
          out << SP << "size_t xoffset = 0;\n";
          out << SP << "size_t yoffset = 0;\n";
       }
-      if (n_after > 1)
+      if (n_after !=  "1")
          out << SP << "for (size_t j = 0; j < " << n_after << "; j++) {\n";
       else
          out << SP << "const size_t j = 0;\n";
@@ -149,8 +148,8 @@ public:
       out << SP << SP << SP << "tensor_" << fNVal   << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].first;\n";
       out << SP << SP << SP << "tensor_" << fNInd << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].second;\n";
       out << SP << SP << "}\n";
-      if (n_after > 1) out << SP << SP << "}\n";
-      if (n_before> 1) out << SP << "}\n";
+      if (n_after != "1") out << SP << SP << "}\n";
+      if (n_before != "1") out << SP << "}\n";
       out << SP << "}\n"; // end operator scope
       return out.str();
    }
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index 2dae4f7d03ce7..dfa46a44c03b0 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -252,8 +252,14 @@ public:
    bool IsConstantTensor() const { return fConstant;}
    // query if tensor needs to be written in a weight file. Constant tensors are not written in a file
    bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;}
+   // check if a Tensor is Writable (need to be written in teh file or in the generated code (e.g. as a costant tensor)
+   // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in
+   // the generated code
+   bool IsNotWritable() const { return fIsNotWritable; }
    // set not writable initialized tensors - i.e. tensor that must not be written in a file
    void SetNotWritable() { fIsNotWritable = true;}
+   // set as constant (needed for non-flot initialized tensors)
+   void SetConstant() { fConstant = true;}
 
    template <class T = void>
    T const *data() const
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 3e2c2d6ed332f..85254e0000269 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -222,6 +222,7 @@ bool RModel::IsInitializedTensor(const std::string& tensorName) const {
     return fInitializedTensors.find(name) != fInitializedTensors.end();
 }
 bool RModel::IsConstantTensor(const std::string& tensorName) const {
+   // a constant tensor is an initialized tensor but has the constant flag set
     std::string name = UTILITY::Clean_name(tensorName);
     auto itr = fInitializedTensors.find(name);
     if (itr == fInitializedTensors.end()) return false;
@@ -522,6 +523,7 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
    fIntermediateTensorInfos.clear();
    fDynamicTensorInfos.clear();
 
+
    // loop on inputs and see if shape can be  full specified
    // if the batch size is provided it can be used to specify the full shape
    // Add the full specified tensors in fReadyInputTensors collection
@@ -581,7 +583,7 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
    if (fUseWeightFile) {
       bool modelHasWeights = false;
       for (auto &i : fInitializedTensors) {
-         if (i.second.type() == ETensorType::FLOAT) {
+         if (i.second.IsWeightTensor()) {
             modelHasWeights = true;
             break;
          }
@@ -612,6 +614,13 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
       i++;
    }
 
+   // loop on initialized tensors and make the integers as constant to be
+   // not written in a weight file
+   for (auto &i : fInitializedTensors) {
+      if (i.second.IsWeightTensor() && i.second.type() !=  ETensorType::FLOAT)
+         i.second.SetConstant();
+   }
+
    fIsInitialized = true;
 }
 
@@ -684,9 +693,11 @@ std::string GenerateConstantTensorCode(const std::pair<std::string, InitializedT
 void RModel::GenerateInitializedTensorInfo()
 {
    if (!fInitializedTensors.empty())
-      fGC += "// initialized tensors\n";
+      fGC += "// initialized (weights and constant) tensors\n";
 
+   // here are constant tensor or initialized ones which are not weights (e.g. int64_t tensors )
    for (auto &i : fInitializedTensors) {
+      if (i.second.IsNotWritable())  continue;
       if (!fUseWeightFile || i.second.IsConstantTensor()) {
          if (i.second.type() == ETensorType::FLOAT) {
             fGC += GenerateConstantTensorCode<float>(i);
@@ -772,6 +783,9 @@ void RModel::GenerateIntermediateTensorInfo() {
          } else if (i.second.type == ETensorType::INT64) {
             fGC += "std::vector<int64_t> fTensor_" + i.first + ";\n";
             fGC += "int64_t * tensor_" + i.first + " = nullptr;\n";
+         } else if (i.second.type == ETensorType::BOOL) {
+            fGC += "std::vector<uint8_t> fTensor_" + i.first + ";\n";
+            fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n";
          }
       }
    }
@@ -1143,7 +1157,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) {
                std::string length = std::to_string(ConvertShapeToLength(i.second.shape()));
                fGC += "   ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n";
             } else {
-               std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file");
+               throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file");
             }
         }
         fGC += "   f.close();\n";
@@ -1288,7 +1302,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) {
                }
             }
             else {
-               std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file");
+               throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file");
             }
             if (f.fail())
                std::runtime_error("tmva-sofie failed to write tensor data to file for  " + tensor_name);

From 8de1cfc61f68acbf1a697f33069591d277c39ce8 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Mon, 10 Nov 2025 23:16:35 +0100
Subject: [PATCH 2/4] [tmva][sofie] Remove special case handling bool outputs

Since we use now for boolean tensors a std::vector<uint8_t> it is not needed to
have a special treatment when the output ttype of the operator is a boolean
(e.g. in Comparison)
---
 tmva/sofie/inc/TMVA/ROperator_Comparision.hxx | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
index 40c8923676aaf..734434357a149 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx
@@ -56,7 +56,6 @@ template<typename T, EComparisionOperator Op>
 class ROperator_Comparision final : public ROperator{
 private:
 
-   bool fIsModelOutput = false;
    std::string fNX1;
    std::string fNX2;
    std::string fNY;
@@ -283,12 +282,6 @@ public:
             model.PrintIntermediateTensors();
          }
       }
-
-      // check if this is not output operators to add a specific line for definining the tensor_xxx variable
-      const auto & outputTensorNames = model.GetOutputTensorNames();
-      fIsModelOutput = false;
-      if (std::find(outputTensorNames.begin(), outputTensorNames.end(), fNY) != outputTensorNames.end())
-         fIsModelOutput = true;
    }
 
    std::string Generate(std::string opName) override {
@@ -374,9 +367,6 @@ public:
          out << "}\n";
       }
 
-      // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector
-      if (!fIsModelOutput)
-         out << SP << "const std::vector<std::uint8_t> & tensor_" << fNY << " = fTensor_" << fNY << ";\n";
 
       return out.str();
    }

From 8d2342460609fbe56c6b4aa039e227218076c0a5 Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Wed, 12 Nov 2025 09:30:56 +0100
Subject: [PATCH 3/4] [tmva][sofie] Add support for greedy memory allocation
 for dynammic tensors

Add a new function in SOFIE_common OrganizeMemory which computes the total memory and the offset for each tensor given tensor begin /end life and size.

Fix also some small issue with dynamic tensor.
One is for the bias of Gemm and Conv. The broadcasting of bias is done for dynamic tensor in the Session constructor only if needed. For the broadcasted tensor  there is no need to create a new tensor, but the existing one is resized to the  broadcasted needed size using vector::resize
---
 .../inc/TMVA/ROperator_BatchNormalization.hxx |   6 +-
 tmva/sofie/inc/TMVA/ROperator_Constant.hxx    |   5 +-
 tmva/sofie/inc/TMVA/ROperator_Conv.hxx        |  35 +++--
 tmva/sofie/inc/TMVA/ROperator_Gemm.hxx        |  52 ++++---
 tmva/sofie/inc/TMVA/ROperator_Range.hxx       |  17 ++-
 tmva/sofie/inc/TMVA/SOFIE_common.hxx          |  16 +++
 tmva/sofie/src/RModel.cxx                     | 115 ++++++++++++----
 tmva/sofie/src/SOFIE_common.cxx               | 128 +++++++++++++++++-
 8 files changed, 305 insertions(+), 69 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
index f2d31796bbbcd..c37e7fc4b68de 100644
--- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx
@@ -141,8 +141,8 @@ public:
       }
    }
 
-   std::string Generate(std::string OpName) override {
-      OpName = "op_" + OpName;
+   std::string Generate(std::string opName) override {
+      opName = "op_" + opName;
       if (fShapeX.empty()){
          throw std::runtime_error("TMVA SOFIE Batch Normalization called to Generate without being initialized first");
       }
@@ -158,7 +158,7 @@ public:
          spatial_dim = ConvertDimShapeToLength( spatialShape);
       }
 
-      out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU" : "") << "\n";
+      out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU " : " ") << opName << "\n";
       out << SP << "{\n";
       out << SP << "   size_t i = 0;\n";
       out << SP << "   for (size_t n = 0; n < " << batchSize << "; ++n) {\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
index 3b339e3440488..93f3c43feceb9 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx
@@ -128,6 +128,7 @@ public:
          }
       } else {
          model.AddIntermediateTensor(fNY, ConvertStringToType(TensorType<T>::Name()), fDimOutputShape);
+         fOutputTensorNames.emplace_back(fNY);
       }
    }
 
@@ -153,9 +154,7 @@ public:
       }
       auto length = ConvertDimShapeToLength(fDimOutputShape);
       // vector is already allocated- fill with values
-      out << SP << "if (" << length << " > fTensor_" << fNY << ".size())\n";
-      out << SP << SP << "fTensor_" << fNY << ".resize(" << length  << ");\n";
-      out << SP << "std::fill(fTensor_" << fNY << ".begin(), fTensor_" << fNY << ".end(), " << fValues[0] << ");\n";
+      out << SP << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << ", " << fValues[0] << ");\n";
       return out.str();
    }
 };
diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
index 2681eeb2dd84c..823e7fa04717e 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx
@@ -20,6 +20,8 @@ template<typename T>
 class ROperator_Conv final : public ROperator
 {
 private:
+   bool fBroadcastBias = false;
+
    std::string fAttrAutopad;
    std::vector<size_t> fAttrDilations;
    size_t fAttrGroup;
@@ -30,7 +32,6 @@ private:
    std::string fNX;
    std::string fNW;
    std::string fNB;
-   std::string fNB2; // bias tensor name after broadcasting
    std::string fNY;
 
    std::string convK;
@@ -262,6 +263,9 @@ public:
                std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model");
          }
          fShapeB = model.GetTensorShape(fNB);
+         if (fShapeB.size() != 1)
+            throw
+               std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)");
          std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
          auto shapeDimB = model.GetDimTensorShape(fNB);
          bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape);
@@ -278,7 +282,9 @@ public:
             if (fType != "float")
                throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported");
             // here is the actual broadcasting
+            fBroadcastBias = true;
             if (!fUseSession) {
+               // do here broadcasting
                std::vector<size_t> shape(fDim + 1, 1);
                shape[0] = fShapeB[0];
                auto intTargetShape = ConvertShapeToInt(targetShape);
@@ -287,13 +293,6 @@ public:
                   std::default_delete<float[]>());
                model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr);
                fShapeB = model.GetTensorShape(fNB);
-               fNB2 = fNB;   // use same name
-            }
-            else {
-               // In case of session add broadcasting code in Session constructor and in GenerateInitCode
-               // we need to add a new intermediate tensor for broadcasted bias tensor
-               fNB2 = fNB + "bcast";
-               model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape);
             }
          }
       }
@@ -334,15 +333,25 @@ public:
    std::string GenerateInitCode() override {
       std::stringstream out;
       // Generate initialization code for broadcasting of bias tensor
-      if (!fNB2.empty()) {
+      if (fBroadcastBias) {
          // include a separate scope to avoid defining unique operator temp variables
          std::vector<size_t> shape(fDim + 1, 1);
+         // bias (is a 1D tensor)
          shape[0] = fShapeB[0];
          std::vector<Dim> targetShape(fShapeY.begin() + 1, fShapeY.end());
-         out << SP << "{\n";
+         out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n";
+         // in case of dynamic tensors check needs to be done at run time
+         bool isOutDynamic = ConvertShapeToInt(targetShape).empty();
+         auto length = ConvertDimShapeToLength(targetShape);
+         if (isOutDynamic)
+            out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n";
+         else
+            out << SP << "{\n";
          out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
              << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n";
-         out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n";
+         out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n";
+         out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n";
+         out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNB << ");\n";
          out << SP << SP << "delete[] data;\n";
          out << SP << "}\n";
       }
@@ -562,13 +571,13 @@ public:
          out << SP << SP << "}\n"; // end of group loop
       }
 
-      if (fNB2 != "") {
+      if (fNB != "") {
          out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n";
          out << SP << "float " << OpName << "_gamma = 1.0;\n";
          out << SP << "int " << OpName << "_incx = 1;\n";
          out << SP << "int " << OpName << "_incy = 1;\n";
 
-         out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &"
+         out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &"
              << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n";
 
       }
diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index d954720396151..1c8b51d991af2 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -24,6 +24,7 @@ namespace SOFIE{
 
    private:
       bool fIsDynamic = false;
+      bool fBroadcastBias = false;
 
       float fAttrAlpha = 1.0;
       float fAttrBeta = 1.0;
@@ -33,7 +34,6 @@ namespace SOFIE{
       std::string fNA;
       std::string fNB;
       std::string fNC = "";
-      std::string fNC2; // bias tensor name after broadcasting
       std::string fNY;
       std::string fType;
       EActivationType fActivation;
@@ -222,7 +222,6 @@ namespace SOFIE{
                throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported");
             }
             fShapeC = model.GetTensorShape(fNC);
-            fNC2 = fNC;
             size_t lengthC = ConvertShapeToLength(fShapeC);
             size_t lengthY = ConvertShapeToLength(shapeY);
             // for dynamic outputs broadcasting is always done
@@ -230,6 +229,7 @@ namespace SOFIE{
 
 
             if (broadcast_needed) {
+               fBroadcastBias = true;
                if (!model.UseSession()) {
                   // without session dynamic tensors not supported in Gemm
                   if (fIsDynamic) {
@@ -246,14 +246,18 @@ namespace SOFIE{
                      fShapeC = shapeY;
                   }
                } else {
-                  // In case of session add broadcasting code in Session constructor and in GenerateInitCode
-                  // we need to add a new intermediate tensor for broadcasted bias tensor
-                  fNC2 = fNC + "bcast";
-                  if (!fIsDynamic) {
-                     model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY);
-                  }
-                  else
-                     model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY);
+                  // /d to add a new intermediate tensor for broadcasted bias tensor
+                  // fNC2 = fNC + "bcast";
+                  // if (!fIsDynamic) {
+                  //    model.AddIntermed/ In case of session add broadcasting code in Session constructor and in GenerateInitCode
+                  // // we neeiateTensor(fNC2, model.GetTensorType(fNC), shapeY);
+                  // }
+                  // else
+                  //    model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY);
+                  // // do not add to lists of input/output tensors since broadcasted tensors are special
+                  // // and we manage their memory separatly
+                  // //fInputTensorNames.emplace_back(fNC2);
+                  // //fOutputTensorNames.emplace_back(fNC2);
                }
             }
          }
@@ -291,18 +295,26 @@ namespace SOFIE{
       std::string GenerateInitCode() override {
          std::stringstream out;
          // generate initialization code for broadcasting of bias tensor
-         if (fShapeC.size() != fShapeY.size() && fNC != fNC2) {
+         if (fShapeC.size() != fShapeY.size() && fBroadcastBias) {
             // we broadcast here always C in Y output, so target shape is the one of Y
             // no need to call UTILITY::UnidirectionalBroadcastShape.
             // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code.
-            auto targetShape = fShapeY;
-            // include a separate scope to avoid defining unique operator temp variables
-            out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n";
-            out << SP << "{\n";
-            out << "      float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
-               << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n";
             auto length = ConvertDimShapeToLength(fShapeY); // output size
-            out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n";
+            // include a separate scope to avoid defining unique operator temp variables
+            out << "//--- broadcast bias tensor " << fNC << "for Gemm op if needed \n";
+            // in case of dynamic tensors check needs to be done at run time
+            bool isOutDynamic = ConvertShapeToInt(fShapeY).empty();
+            if (isOutDynamic)
+               out << SP << "if (" << length << " > " << ConvertShapeToLength(fShapeC) << ") {\n";
+            else
+               out << SP << "{\n";
+            // here we broadcast
+            out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<float>(tensor_"
+                << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n";
+
+            out << SP << SP << "fTensor_" << fNC << ".resize(" << length << ");\n";
+            out << SP << SP << "tensor_" << fNC << " = fTensor_" << fNC << ".data();\n";
+            out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC << ");\n";
             out << SP << SP << "delete [] data;\n";
             out << SP << "}\n";
          }
@@ -338,7 +350,7 @@ namespace SOFIE{
 
          // case bias is present
          if (!fNC.empty()){
-            if (fNC2 == fNC) {
+            if (!fBroadcastBias) {
                // add a check in case broadcasting was not needed or done outside of session
                // C should have smaller dimension of Y
                if (!fIsDynamic) {
@@ -381,7 +393,7 @@ namespace SOFIE{
             out << std::setprecision(std::numeric_limits<float>::max_digits10) << fAttrBeta << ",";
             // in the case of bias
              if (!fNC.empty())
-               out << "tensor_" << fNC2;
+               out << "tensor_" << fNC;
              else
                out << "nullptr";
              out << ");\n";
diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
index 7c138c3b3def5..16d2cb689d518 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx
@@ -37,6 +37,10 @@ public:
       }
       static_assert( (std::is_same_v<T, float> || std::is_same_v<T, int64_t>),
                   "TMVA::SOFIE - Unsupported type by Range operator");
+      {
+         fInputTensorNames = { fNStart, fNLimit, fNDelta };
+         fOutputTensorNames = { fNOutput };
+      }
    }
 
    void Initialize(RModel& model) override {
@@ -166,13 +170,14 @@ public:
                 ") - static_cast<float>(*tensor_" + fNStart + ")) / static_cast<float>(*tensor_" + fNDelta + ")), 0.0f))";
       out << SP << "{\n";
       out << SP << SP << "size_t range" << " = " << sizeName << ";\n";
-      out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n";
-      out << SP << SP << SP << "fTensor_" << fNOutput << ".resize(range);\n";
-      // need to re-initialized pointer to tensor data
-      out << SP << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n";
-      out << SP << SP << "}\n";
+      if (sizeName != fShape[0].param) {
+         out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n";
+         // we should probably resize the tensor here
+         out << SP << SP << SP << "throw std::runtime_error(\"wrong size allocated for output of range\");\n";
+         out << SP << SP << "}\n";
+      }
       out << SP << SP << "for (size_t i = 0; i < range; i++) {\n";
-      out << SP << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
+      out << SP << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n";
       out << SP << SP << "}\n";
       out << SP << "}\n";
       return out.str();
diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
index dfa46a44c03b0..7abb7df68d997 100644
--- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx
+++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx
@@ -811,6 +811,22 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect
    }
 }
 
+
+// code for the memory greeding allocations
+struct TensorLifeInfo {
+   int begin;   // start time (op index) lifetime
+   int end;     //  end time lifetime
+   size_t size; // size of tensors in bytes
+};
+
+struct MemoryResult {
+  std::size_t total_bytes = 0;  // total memory needed
+  std::vector<size_t> offsets; // resulted offsets for each tensor
+};
+
+/// Greedy best-fit planner with coalescing free list.
+MemoryResult OrganizeMemory(const std::vector<TensorLifeInfo> & tensorsInfo );
+
 } // namespace SOFIE
 } // namespace Experimental
 } // namespace TMVA
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 85254e0000269..8bc4d4e048a30 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -167,16 +167,15 @@ void RModel::AddOperator(std::unique_ptr<ROperator> op, int order_execution) {
     }
 
     // storing the last usage of tensors which are input to
-    // operators (but are not inputs to the model, i.e. they are intermediate
-    // tensors). This information is needed to keep a check on when a
-    // particular intermediate tensor can be flushed to free up memory for reuse.
+    // operators (but are not inputs to the model or they are not initialized)
+    // We call this function during parsing so we don't have yet initialized the operators
    for(size_t index = 0; index<op_input_tensors.size() &&
-         fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
-         std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
-                   UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end() &&
-         fDynamicTensorInfos.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fDynamicTensorInfos.end();
-         ++index){
-            fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
+            fInitializedTensors.find(UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInitializedTensors.end() &&
+            std::find(fInputTensorNames.begin(), fInputTensorNames.end(),
+                      UTILITY::Clean_name(std::string(op_input_tensors[index]))) == fInputTensorNames.end();
+            ++index)
+   {
+      fIntermediateTensorFrequencyLookup[op_input_tensors[index]] = order_execution;
    }
 }
 
@@ -604,10 +603,11 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
       fOperators[op_idx]->Initialize(*this);
       for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){
          std::string name = std::string{it};
+         // check if tensor is not an initialized or output tensor and it is not already in the list
          if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() &&
              std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() &&
-             fInitializedTensors.find(name) == fInitializedTensors.end() &&
-             fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){
+             fInitializedTensors.find(name) == fInitializedTensors.end())
+         {
             fIntermediateTensorFrequencyLookup[it] = op_idx;
          }
       }
@@ -616,9 +616,9 @@ void RModel::Initialize(const std::map<std::string, size_t> & inputParams, bool
 
    // loop on initialized tensors and make the integers as constant to be
    // not written in a weight file
-   for (auto &i : fInitializedTensors) {
-      if (i.second.IsWeightTensor() && i.second.type() !=  ETensorType::FLOAT)
-         i.second.SetConstant();
+   for (auto &it : fInitializedTensors) {
+      if (it.second.IsWeightTensor() && it.second.type() !=  ETensorType::FLOAT)
+         it.second.SetConstant();
    }
 
    fIsInitialized = true;
@@ -775,19 +775,21 @@ void RModel::GenerateIntermediateTensorInfo() {
       fGC += "//--- declare the dynamic tensors\n";
       for (auto &i : fDynamicTensorInfos) {
          if (i.second.type == ETensorType::FLOAT) {
-            fGC += "std::vector<float> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<float> fTensor_" + i.first + ";\n";
             fGC += "float * tensor_" + i.first + " = nullptr;\n";
          } else if (i.second.type == ETensorType::DOUBLE) {
-            fGC += "std::vector<double> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<double> fTensor_" + i.first + ";\n";
             fGC += "double * tensor_" + i.first + " = nullptr;\n";
          } else if (i.second.type == ETensorType::INT64) {
-            fGC += "std::vector<int64_t> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<int64_t> fTensor_" + i.first + ";\n";
             fGC += "int64_t * tensor_" + i.first + " = nullptr;\n";
          } else if (i.second.type == ETensorType::BOOL) {
-            fGC += "std::vector<uint8_t> fTensor_" + i.first + ";\n";
+            //fGC += "std::vector<uint8_t> fTensor_" + i.first + ";\n";
             fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n";
          }
       }
+      fGC += "//--- dynamic tensors pool\n";
+      fGC += "std::vector<char> fDynamicMemoryPool;\n";
    }
 }
 
@@ -805,14 +807,81 @@ void RModel::GenerateOperatorDeclarations() {
 
 void RModel::GenerateDynamicTensorInfo()
 {
+   // generate code for allocating dynamic tensors using the greedy memory allocations
+   if (fDynamicTensorInfos.empty())
+      return;
+
    std::stringstream out;
+   out << "//  dynamic tensor memory management\n";
+   out << SP << "std::vector<TMVA::Experimental::SOFIE::TensorLifeInfo> dynamicTensorInfos;\n";
+   out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n";
+
+   // loop on all the operators to find begin/end life of the tensors
+   int op_index = 0;
+   std::vector<std::pair<std::string, ETensorType>> tensors;
+   tensors.reserve(fDynamicTensorInfos.size());
+   for (auto & op : fOperators) {
+      // loop on output tensors -
+      for (auto &it : op->GetOpOutputTensors()) {
+         if (fVerbose) {
+            auto op_ptr = op.get();
+            std::cout << "Looping on operator " << op_index << "   " << typeid(*op_ptr).name() << std::endl;
+         }
+         // check if is a dynamic tensor
+         std::string name = std::string(it);
+         if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() ) {
+            auto tensor_size =  ConvertDimShapeToLength(GetDimTensorShape(name));
+            auto type = GetTensorType(name);
+            size_t type_size = GetTypeSize(type);
+            int begin = op_index;
+            int end = fOperators.size();
+            // look for end
+            auto it_lookup = fIntermediateTensorFrequencyLookup.find(name);
+            if (it_lookup != fIntermediateTensorFrequencyLookup.end())
+               end = it_lookup->second + 1;  // end is last time used + 1
+            // // some tensors (like xcol in convolutions) are just used within the operators
+            // if (end == 0 && begin > 0) end = begin+1;
+
+            if (begin> end) {
+               std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << "  "  << " end " << end << std::endl;
+               throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin");
+            }
+
+            // write in code
+            out << SP << "dynamicTensorInfos.push_back( {" << begin << ", " << end << ", " << type_size << "* (" << tensor_size << ") });"
+                << " // tensor_" << name << std::endl;
+            tensors.push_back({name,type});
+         }
+      }
+      op_index++; // increment operator index
+   }
+   out << "\n" << SP << "auto memory_result = OrganizeMemory(dynamicTensorInfos);\n\n";
+   out << "//  allocating now the memory\n";
+   out << SP << "fDynamicMemoryPool = std::vector<char>(memory_result.total_bytes);\n";
+   out << SP << "int idx = 0;\n";
+   for (auto & it : tensors) {
+      out << SP << "tensor_" << it.first << " = reinterpret_cast<" << ConvertTypeToString(it.second) << " *>(fDynamicMemoryPool.data() + memory_result.offsets[idx++]);\n";
+   }
+   // check that all dynamic tensors are covered
+   bool missingTensor = false;
    for (auto &i : fDynamicTensorInfos) {
-      auto length = ConvertDynamicShapeToLength(i.second.shape);
-      out << SP << "if (" << length << " > 0) {\n";
-      out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n";
-      out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n";
-      out << SP << "}\n";
+      if (std::find(tensors.begin(), tensors.end(), std::pair<std::string,ETensorType>{i.first, i.second.type}) == tensors.end()) {
+         std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl;
+         missingTensor = true;
+      }
    }
+   if (missingTensor)
+      throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list");
+
+
+
+   // for (auto &i : fDynamicTensorInfos) {
+   //    auto length = ConvertDynamicShapeToLength(i.second.shape);
+   //    out << SP << "if (" << length << " > 0) {\n";
+   //    out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n";
+   //    out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n";
+   //    out << SP << "}\n";
+   // }
    fGC += out.str();
 }
 
diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx
index c107b489be19e..1ff510842643a 100644
--- a/tmva/sofie/src/SOFIE_common.cxx
+++ b/tmva/sofie/src/SOFIE_common.cxx
@@ -4,6 +4,8 @@
 #include <sstream>
 #include <stdexcept>
 #include <charconv>
+#include <unordered_map>
+#include <set>
 
 namespace TMVA {
 namespace Experimental {
@@ -89,7 +91,7 @@ std::string ConvertTypeToString(ETensorType type){
          return "double";
       }
       case ETensorType::BOOL : {
-         return "bool";
+         return "uint8_t";
       }
       default:{
          return "other_" + std::to_string( (int) type);
@@ -547,6 +549,130 @@ std::vector<Dim> UTILITY::ComputeStrideFromShape(const std::vector<Dim> & shape)
    return strides;
 }
 
+struct FreeBlock {
+  std::size_t offset;
+  std::size_t size;
+  bool operator<(const FreeBlock& other) const {
+    // order by offset for deterministic coalescing
+    return offset < other.offset;
+  }
+};
+
+struct MemoryEvent {
+  int t;      // time (i.e. operator index)
+  int type;   // 0 = END first, 1 = START
+  int idx;    // tensor index
+  bool operator<(const MemoryEvent& o) const {
+    if (t != o.t) return t < o.t;
+    return type < o.type; // END before START at the same time
+  }
+};
+
+/// Greedy best-fit planner with coalescing free list.
+MemoryResult OrganizeMemory(const std::vector<TensorLifeInfo> & tensorsInfo )
+{
+   // Basic validation
+   for (const auto &t : tensorsInfo) {
+      if (!(t.end > t.begin)) {
+         throw std::runtime_error("Each tensor must have end > begin.");
+      }
+   }
+
+   // Build events: free before allocate at equal times.
+   std::vector<MemoryEvent> events;
+   events.reserve(tensorsInfo.size() * 2);
+   for (int i = 0; i < (int)tensorsInfo.size(); ++i) {
+      events.push_back({tensorsInfo[i].end, 0, i});   // END
+      events.push_back({tensorsInfo[i].begin, 1, i}); // START
+   }
+   std::sort(events.begin(), events.end());
+
+   std::vector<size_t> tensorsOffset(tensorsInfo.size());
+
+   // Free list ordered by offset (for O(log n) coalescing)
+   // and faster insert/erase with respect to a vector
+   std::set<FreeBlock> free_list;
+
+   // Bookkeeping: size/offset map for frees.
+   std::unordered_map<int, std::size_t> live_size;
+   std::unordered_map<int, std::size_t> live_offset;
+
+   std::size_t total_bytes = 0;
+
+   auto allocate_best_fit = [&](std::size_t need) -> std::size_t {
+      // Find the *smallest* block whose size >= need (best-fit).
+      // Since free_list is ordered by offset, we scan to find best by size.
+      // (For very large sets you could maintain a multimap by size as well.)
+      auto best = free_list.end();
+      for (auto it = free_list.begin(); it != free_list.end(); ++it) {
+         if (it->size >= need) {
+            if (best == free_list.end() || it->size < best->size)
+               best = it;
+         }
+      }
+      if (best != free_list.end()) {
+         std::size_t off = best->offset;
+         if (best->size == need) {
+            free_list.erase(best);
+         } else {
+            FreeBlock updated{best->offset + need, best->size - need};
+            free_list.erase(best);
+            free_list.insert(updated);
+         }
+         return off;
+      }
+      // No free block large enough; grow the heap.
+      std::size_t off = total_bytes;
+      total_bytes += need;
+      return off;
+   };
+
+   auto try_coalesce = [&](std::set<FreeBlock>::iterator it) {
+      // Coalesce with previous
+      if (it != free_list.begin()) {
+         auto prev = std::prev(it);
+         if (prev->offset + prev->size == it->offset) {
+            FreeBlock merged{prev->offset, prev->size + it->size};
+            free_list.erase(prev);
+            it = free_list.erase(it);
+            it = free_list.insert(merged).first;
+         }
+      }
+      // Coalesce with next
+      auto next = std::next(it);
+      if (next != free_list.end() && it->offset + it->size == next->offset) {
+         FreeBlock merged{it->offset, it->size + next->size};
+         free_list.erase(next);
+         it = free_list.erase(it);
+         free_list.insert(merged);
+      }
+   };
+
+   // Sweep through time.
+   for (const auto &e : events) {
+      if (e.type == 0) { // END: free
+         auto it_sz = live_size.find(e.idx);
+         auto it_off = live_offset.find(e.idx);
+         if (it_sz != live_size.end() && it_off != live_offset.end()) {
+            FreeBlock fb{it_off->second, it_sz->second};
+            // Insert and coalesce with neighbors
+            auto it = free_list.insert(fb).first;
+            try_coalesce(it);
+            live_size.erase(it_sz);
+            live_offset.erase(it_off);
+         }
+      } else { // START: allocate
+         auto &t = tensorsInfo[e.idx];
+         std::size_t off = allocate_best_fit(t.size);
+         tensorsOffset[e.idx] = off;
+         live_size[e.idx] = t.size;
+         live_offset[e.idx] = off;
+      }
+   }
+
+   return MemoryResult{total_bytes, std::move(tensorsOffset)};
+}
+
 } // namespace SOFIE
 } // namespace Experimental
 } // namespace TMVA

From 21f3675cda2fb656aaaed524a7e9e971997a194c Mon Sep 17 00:00:00 2001
From: moneta <lorenzo.moneta@cern.ch>
Date: Fri, 14 Nov 2025 10:41:42 +0100
Subject: [PATCH 4/4] [tmva][sofie] Fix an issue in genereting code for dynamic
 tensor when broadcasting

The assert that was generated when broadcasting dynamic tensors was not correct
---
 tmva/sofie/inc/TMVA/ROperator_Gemm.hxx | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
index 1c8b51d991af2..1a0fa7b16868b 100644
--- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx
@@ -207,13 +207,7 @@ namespace SOFIE{
          }
 
          fShapeY = DynamicShapeInference({fShapeA, fShapeB});
-         std::vector<size_t> shapeY;
-         if (!fIsDynamic) {
-            shapeY = ConvertShapeToInt(fShapeY);
-            if (shapeY.empty()) {
-               throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertShapeToString(fShapeY));
-            }
-         }
+         std::vector<size_t> shapeY = ConvertShapeToInt(fShapeY);
 
          // bias is normally not dynamic (not support it for time being)
          if (fNC != ""){
@@ -225,7 +219,11 @@ namespace SOFIE{
             size_t lengthC = ConvertShapeToLength(fShapeC);
             size_t lengthY = ConvertShapeToLength(shapeY);
             // for dynamic outputs broadcasting is always done
-            bool broadcast_needed = lengthC != lengthY;
+            bool broadcast_needed = false;
+            if (fIsDynamic && shapeY.empty())
+               broadcast_needed = true;
+            else
+               broadcast_needed = lengthC != lengthY;
 
 
             if (broadcast_needed) {
@@ -359,7 +357,7 @@ namespace SOFIE{
                             + ConvertShapeToString(fShapeC) + " output length " + lengthGemm);
                } else {
                   // add a dynamic check (C should not be a dynamic tensor)
-                  out << SP << "assert(" << lengthGemm << " != " <<  ConvertShapeToLength(fShapeC) << ");\n";
+                  out << SP << "assert(" << lengthGemm << " == " <<  ConvertShapeToLength(fShapeC) << ");\n";
                }
             }
          } else {