diff --git a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx index bcc0e52a40ca3..f73bd34e53386 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BasicNary.hxx @@ -23,10 +23,11 @@ struct NaryOperatorTraits { static const std::string Name() {return "Max";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0] << ";\n"; + out << res << " = std::max({ " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { - out << "\t" << "\t" << res << " = std::max(" << res << ", " << inputs[i] << ");\n"; + out << ", " << inputs[i]; } + out << "});\n"; return out.str(); } }; @@ -36,10 +37,11 @@ struct NaryOperatorTraits { static const std::string Name() {return "Min";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0] << ";\n"; + out << res << " = std::min({ " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { - out << "\t" << "\t" << res << " = std::min(" << res << ", " << inputs[i] << ");\n"; + out << ", " << inputs[i]; } + out << "});\n"; return out.str(); } }; @@ -52,7 +54,7 @@ struct NaryOperatorTraits { static const std::string Name() {return "Mean";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = (" << inputs[0]; + out << res << " = (" << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { out << " + " << inputs[i]; } @@ -66,7 +68,7 @@ struct NaryOperatorTraits { static const std::string Name() {return "Sum";} static std::string Op(const std::string& res, std::vector& inputs) { std::stringstream out; - out << "\t" << "\t" << res << " = " << inputs[0]; + out << res << " = " << inputs[0]; for (size_t i = 1; i < inputs.size(); i++) { out << " + " << inputs[i]; } @@ -83,10 +85,11 @@ private: std::vector fNInputs; std::string fNY; - std::vector> fShapeInputs; + std::vector> fShapeInputs; std::vector fNBroadcastedInputs; std::vector fShapeY; + std::vector fDimShapeY; bool fBroadcast = false; @@ -119,64 +122,164 @@ public: } void Initialize(RModel& model) override { + std::vector> inputShapes; for (auto &it : fNInputs) { if (!model.CheckIfTensorAlreadyExist(it)) { throw std::runtime_error("TMVA SOFIE BasicNary Op Input Tensor " + it + " is not found in model"); } - fShapeInputs.push_back(model.GetTensorShape(it)); + fShapeInputs.push_back(model.GetDimTensorShape(it)); + if (fNInputs.size()> 2) { + if (model.IsDimInputTensor(it)) + throw std::runtime_error("TMVA SOFIE BasicNary : supports only 2 inputs for dynamic tensors"); + else + inputShapes.push_back(model.GetTensorShape(it)); + } } // Find the common shape of the input tensors - fShapeY = UTILITY::MultidirectionalBroadcastShape(fShapeInputs); - model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY); - // Broadcasting - size_t N = fNInputs.size(); - fNBroadcastedInputs.reserve(N); - for (size_t i = 0; i < N; i++) { - if (!UTILITY::AreSameShape(model.GetTensorShape(fNInputs[i]), fShapeY)) { - fBroadcast = true; - std::string name = "Broadcasted" + fNInputs[i]; - model.AddIntermediateTensor(name, model.GetTensorType(fNInputs[0]), fShapeY); - fNBroadcastedInputs.emplace_back("tensor_" + name); - } else { - fNBroadcastedInputs.emplace_back("tensor_" + fNInputs[i]); + if (fShapeInputs.size() > 2 ) { + // support dynamic tensors now for input list of size=2 + auto shapeY = UTILITY::MultidirectionalBroadcastShape(inputShapes); + fDimShapeY = ConvertShapeToDim(shapeY); + } else if (fShapeInputs.size() == 2 ) { + auto ret = UTILITY::MultidirectionalBroadcastShape(fShapeInputs[0], fShapeInputs[1]); + // use same code as in BinaryOperator (need to extend for input sizes > 2) + fBroadcast = ret.first; + fDimShapeY = ret.second; + // case of all parametric shapes and MultiDirectionalBroadcastShape return the max of the 2 + // need to do before we declare the output tensor shape and the broadcasted ones + if (ret.first & 4) { + // check if one of the parameter is an input dimension + // define function to find this + auto IsInputDimParam = [&](const std::string &p) { + auto inputNames = model.GetInputTensorNames(); + for (auto &input : inputNames) { + for (auto &i_s : model.GetDimTensorShape(input)) { + if (i_s.isParam && i_s.param == p) + return true; + } + } + return false; + }; + auto & shapeA = fShapeInputs[0]; + auto & shapeB = fShapeInputs[1]; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + auto &s = fDimShapeY[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + if (IsInputDimParam(shapeA[i].param)) { + // case dim is 1 we indicate that the input parameter is equal to 1 + if (shapeA[i].dim != 1) + s = shapeA[i]; + else + s = shapeB[i]; + } else if (IsInputDimParam(shapeB[i].param)) { + if (shapeB[i].dim != 1) + s = shapeB[i]; + else + s = shapeA[i]; + } + } + } } + } else if (fShapeInputs.size() == 1 ) { + fDimShapeY = fShapeInputs[0]; } + if (!fShapeY.empty()) + model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fShapeY); + else + model.AddIntermediateTensor(fNY, model.GetTensorType(fNInputs[0]), fDimShapeY); + + fType = ConvertTypeToString(model.GetTensorType(fNInputs[0])); + + if (model.Verbose()) { + std::cout << NaryOperatorTraits::Name() << " : "; + if (fNInputs.size() == 2) + std::cout << ConvertShapeToString(fShapeInputs[0]) << " , " + << ConvertShapeToString(fShapeInputs[1]); + std::cout << " --> " << ConvertShapeToString(fDimShapeY) << std::endl; + } } std::string Generate(std::string OpName) override { OpName = "op_" + OpName; - if (fShapeY.empty()) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE BasicNary called to Generate without being initialized first"); } std::stringstream out; - size_t length = ConvertShapeToLength(fShapeY); + auto length = ConvertDimShapeToLength(fDimShapeY); out << SP << "\n//------ BasicNary operator\n"; - if (fBroadcast) { - for (size_t i = 0; i < fNInputs.size(); i++) { - if (fNBroadcastedInputs[i] != fNInputs[i]) { - out << SP << SP << "// Broadcasting " << fNInputs[i] << " to " << ConvertShapeToString(fShapeY) << "\n"; - out << SP << SP << "{\n"; - out << SP << SP << SP << fType << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << fType << ">(tensor_" + fNInputs[i] << ", " << ConvertShapeToString(fShapeInputs[i]); - out << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << SP << "std::copy(data, data + " << length << ", " << fNBroadcastedInputs[i] << ");\n"; - out << SP << SP << SP << "delete[] data;\n"; - out << SP << SP << "}\n"; - } - } - } - if (fNInputs.size() == 1) { + int nInputs = fNInputs.size(); + + if (nInputs == 1) { out << SP << "std::copy(tensor_" << fNInputs[0] << ", tensor_" << fNInputs[0] << " + "; out << length << ", tensor_" << fNY << ");\n"; } else { - std::vector inputs(fNBroadcastedInputs.size()); - for (size_t i = 0; i < fNBroadcastedInputs.size(); i++) { - inputs[i] = fNBroadcastedInputs[i] + "[id]"; + + // implement operator without broadcasting, but using loos on all indices + std::vector> inputStrides(nInputs); + for (int i = 0; i < nInputs; i++) + inputStrides[i] = UTILITY::ComputeStrideFromShape(fShapeInputs[i]); + + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + // make loop on output indices + std::string compute_idx_Y; + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i] + << "; ++idx_" << i << "){\n"; + compute_idx_Y += "idx_" + std::to_string(i); + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + // find indices for input tensors + std::vector inputs(nInputs); + for (int ipt = 0; ipt < nInputs; ipt++ ) { + std::string compute_idx_X; + auto & shape = fShapeInputs[ipt]; + auto & stride = inputStrides[ipt]; + if (shape.empty() || + std::all_of(shape.begin(), shape.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X = "0"; + } else { + for (size_t i = 0; i < shape.size(); ++i) { + if (shape[i].dim == 1 || shape[i].GetVal() == "1") + continue; + compute_idx_X += "idx_" + std::to_string(i + (fDimShapeY.size() - shape.size())); + if (stride[i].GetVal() != "1") + compute_idx_X += " * " + stride[i].GetVal(); + compute_idx_X += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X.pop_back(); + } + inputs[ipt] = "tensor_" + fNInputs[ipt] + "[" + compute_idx_X + "]"; + } + + // perform the operation + for (int j = 0; j < nloop + 1; j++) out << SP; + std::string output = "tensor_" + fNY + "[" + compute_idx_Y + "]"; + out << NaryOperatorTraits::Op(output, inputs); + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; } - out << SP << "for (size_t id = 0; id < " << length << "; id++) {\n"; - out << NaryOperatorTraits::Op("tensor_" + fNY + "[id]", inputs); - out << SP << "}\n"; } return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx index f2d31796bbbcd..c37e7fc4b68de 100644 --- a/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_BatchNormalization.hxx @@ -141,8 +141,8 @@ public: } } - std::string Generate(std::string OpName) override { - OpName = "op_" + OpName; + std::string Generate(std::string opName) override { + opName = "op_" + opName; if (fShapeX.empty()){ throw std::runtime_error("TMVA SOFIE Batch Normalization called to Generate without being initialized first"); } @@ -158,7 +158,7 @@ public: spatial_dim = ConvertDimShapeToLength( spatialShape); } - out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU" : "") << "\n"; + out << "\n\n//---- BatchNorm" << (fActivation == EActivationType::RELU ? " + ReLU " : " ") << opName << "\n"; out << SP << "{\n"; out << SP << " size_t i = 0;\n"; out << SP << " for (size_t n = 0; n < " << batchSize << "; ++n) {\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx index f48e27ee4f264..8267bb8a7e4f4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Cast.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Cast.hxx @@ -46,7 +46,7 @@ public: throw std::runtime_error("TMVA SOFIE Cast Op Input Tensor is not found in model"); } fShape = model.GetDimTensorShape(fNX); - // shoud we add a check if the same type + // should we add a check if the same type auto inputType = model.GetTensorType(fNX); if (model.IsInitializedTensor(fNX)) { fIsOutputConstant = true; @@ -57,29 +57,30 @@ public: } else fIsOutputConstant = false; + } else if (model.IsShapeTensor(fNX) && ConvertStringToType(fAttrType) == ETensorType::INT64) { + auto shapeData = model.GetShapeTensorValues(fNX); + model.AddShapeTensor(fNY, shapeData, fShape.size() == 0); + fIsOutputConstant = true; } if (!fIsOutputConstant) model.AddIntermediateTensor(fNY, ConvertStringToType(fAttrType), fShape); if (model.Verbose()) { - std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY; + std::cout << "Cast : " << ConvertTypeToString(inputType) << " " << fNX << " -> " << fAttrType << " for " << fNY + << " shape " << ConvertDimShapeToString(fShape); if (fIsOutputConstant) std::cout << " (constant) "; std::cout << std::endl; } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; + std::string Generate(std::string opName) override { + + // output shape can be empty if is a scalar - OpName = "op_" + OpName; - if (fShape.empty()) { - throw std::runtime_error("TMVA SOFIE Cast called to Generate without being initialized first"); - } std::stringstream out; auto length = ConvertDimShapeToLength(fShape); - // out << SP << ETensorType << " " << OpName << "_attr = " << fattr << ";\n"; - out << "\n//------ CAST\n"; + out << "\n//------ CAST " << opName << " ---> " << fNY << " " << ConvertDimShapeToString(fShape) << "\n"; // no generated code for constant outputs if (fIsOutputConstant) return out.str(); diff --git a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx index 0d365ae517de5..734434357a149 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Comparision.hxx @@ -56,7 +56,6 @@ template class ROperator_Comparision final : public ROperator{ private: - bool fIsModelOutput = false; std::string fNX1; std::string fNX2; std::string fNY; @@ -65,11 +64,10 @@ private: std::vector fDimShapeX1; std::vector fDimShapeX2; std::vector fShapeY; - std::string fNBroadcastedX1; - std::string fNBroadcastedX2; + std::vector fDimShapeY; ETensorType fTensorType1 = ETensorType::UNDEFINED; ETensorType fTensorType2 = ETensorType::UNDEFINED; - bool fBroadcast = false; + int fBroadcastFlag = 0; public: @@ -115,184 +113,260 @@ public: } fTensorType1 = model.GetTensorType(fNX1); fTensorType2 = model.GetTensorType(fNX2); - bool broadcast = !UTILITY::AreSameShape(fShapeX1, fShapeX2); - if (broadcast) { - // Y is the common shape of A and B - fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2); - bool broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY); - bool broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY); - // Broadcast A to Y - if (broadcastX1) { - if (model.IsInitializedTensor(fNX1)) { - auto data = model.GetInitializedTensorData(fNX1); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX1, fShapeY), - std::default_delete()); - // Update the data and the shape of A - model.UpdateInitializedTensor(fNX1, model.GetTensorType(fNX1), fShapeY, broadcastedData); - fShapeX1 = fShapeY; - } else { - // Add an intermediate tensor for broadcasting A - fNBroadcastedX1 = "Broadcasted" + fNX1; - model.AddIntermediateTensor(fNBroadcastedX1, model.GetTensorType(fNX1), fShapeY); + // case of non dynamic tensors + if (!fShapeX1.empty() && !fShapeX2.empty()) { + bool broadcastX1 = false; + bool broadcastX2 = false; + if (UTILITY::AreSameShape(fShapeX1, fShapeX2)) { + // no broadcast needed + fShapeY = fShapeX1; + } else { + // Y is the common shape of A and B + fShapeY = UTILITY::UnidirectionalBroadcastShape(fShapeX1, fShapeX2); + broadcastX1 = !UTILITY::AreSameShape(fShapeX1, fShapeY); + broadcastX2 = !UTILITY::AreSameShape(fShapeX2, fShapeY); + } + + + // analyze case of constant tensors or shape tensors (which have known shapes but data as Dim values + // normal case with non-dynamic tensor is also here + T *data1 = nullptr; + T *data2 = nullptr; + std::unique_ptr broadcastedData1; + std::unique_ptr broadcastedData2; + // data for shape tensors + std::vector shapeData1; + std::vector shapeData2; + size_t length = ConvertShapeToLength(fShapeY); + bool *outData = new bool[length]; + if (model.IsInitializedTensor(fNX1)) { + data1 = static_cast(model.GetInitializedTensorData(fNX1).get()); + if (broadcastX1) { + broadcastedData1 = std::unique_ptr( + UTILITY::UnidirectionalBroadcast(data1, fShapeX1, fShapeY)); + data1 = broadcastedData1.get(); } + + } else if (model.IsShapeTensor(fNX1)) { + shapeData1 = model.GetShapeTensorValues(fNX1); } - // Broadcast B to Y - if (broadcastX2) { - if (model.IsInitializedTensor(fNX2)) { - auto data = model.GetInitializedTensorData(fNX2); - std::shared_ptr broadcastedData( - UTILITY::UnidirectionalBroadcast(static_cast(data.get()), fShapeX2, fShapeY), - std::default_delete()); - // Update the data and the shape of B - model.UpdateInitializedTensor(fNX2, model.GetTensorType(fNX2), fShapeY, broadcastedData); - fShapeX2 = fShapeY; - } else { - // Add an intermediate tensor for broadcasting B - fNBroadcastedX2 = "Broadcasted" + fNX2; - model.AddIntermediateTensor(fNBroadcastedX2, model.GetTensorType(fNX2), fShapeY); + if (model.IsInitializedTensor(fNX2)) { + data2 = static_cast(model.GetInitializedTensorData(fNX2).get()); + if (broadcastX2) { + broadcastedData2 = std::unique_ptr( + UTILITY::UnidirectionalBroadcast(data2, fShapeX2, fShapeY)); + data2 = broadcastedData2.get(); } + } else if (model.IsShapeTensor(fNX2)) { + shapeData2 = model.GetShapeTensorValues(fNX2); } - } else { - fShapeY = fShapeX1; - } - // case of constant tensors - T * data1 = nullptr; - T * data2 = nullptr; - std::vector shapeData1; - std::vector shapeData2; - size_t length = ConvertShapeToLength(fShapeY); - bool * outData = new bool[length]; - if (model.IsInitializedTensor(fNX1)) { - data1 = static_cast(model.GetInitializedTensorData(fNX1).get()); - } else if (model.IsShapeTensor(fNX1)) { - shapeData1 = model.GetShapeTensorValues(fNX1); - } - if (model.IsInitializedTensor(fNX2)) { - data2 = static_cast(model.GetInitializedTensorData(fNX2).get()); - } else if (model.IsShapeTensor(fNX2)) { - shapeData2 = model.GetShapeTensorValues(fNX2); - } - if (data1 && data2) { - fIsOutputConstant = true; - for (size_t i = 0; i < length; i++) - outData[i] = ComparisionTrait::Result(data1[i], data2[i]); - model.AddConstantTensor(fNY, fShapeY, outData); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(length,outData) << std::endl; - } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) { - fIsOutputConstant = true; - if (data1 && !data2) { - // data 1 is constant and data2 is shape - for (size_t i = 0; i < length; i++) { - if (shapeData2[i].isParam) { - if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) { - fIsOutputConstant = false; - break; - } else { - // assume a comparison is done with .dim = 0 - shapeData2[i].dim = 0; + if (data1 && data2) { + fIsOutputConstant = true; + for (size_t i = 0; i < length; i++) + outData[i] = ComparisionTrait::Result(data1[i], data2[i]); + model.AddConstantTensor(fNY, fShapeY, outData); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData) + << std::endl; + } else if ((data1 || !shapeData1.empty()) && (data2 || !shapeData2.empty())) { + fIsOutputConstant = true; + if (data1 && !data2) { + // data 1 is constant and data2 is shape + for (size_t i = 0; i < length; i++) { + if (shapeData2[i].isParam) { + if (shapeData2[i].dim == size_t(-1) || data1[i] > 0) { + fIsOutputConstant = false; + break; + } else { + // assume a comparison is done with .dim = 0 + shapeData2[i].dim = 0; + } } + outData[i] = ComparisionTrait::Result(data1[i], static_cast(shapeData2[i].dim)); } - outData[i] = ComparisionTrait::Result(data1[i], static_cast(shapeData2[i].dim)); - } - } else if (!data1 && data2) { - // data 1 is shape and dat2 is constant - for (size_t i = 0; i < length; i++) { - if (shapeData1[i].isParam) { - if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) { + } else if (!data1 && data2) { + // data 1 is shape and dat2 is constant + for (size_t i = 0; i < length; i++) { + if (shapeData1[i].isParam) { + if (shapeData1[i].dim == size_t(-1) || data2[i] > 0) { + fIsOutputConstant = false; + break; + } else { + // assume a comparison is done with .dim = 0 + shapeData1[i].dim = 0; + } + } + outData[i] = ComparisionTrait::Result(static_cast(shapeData1[i].dim), data2[i]); + } + } else if (!shapeData1.empty() && !shapeData2.empty()) { + // both data1 and data2 are shape tensors + for (size_t i = 0; i < length; i++) { + if (!shapeData1[i].isParam && !shapeData2[i].isParam) { + outData[i] = ComparisionTrait::Result(shapeData1[i].dim, shapeData2[i].dim); + } else if (shapeData1[i].isParam && shapeData2[i].isParam) { + if (shapeData1[i].param == shapeData2[i].param) + outData[i] = ComparisionTrait::Result(1, 1); // comparison of two equal value + else { + fIsOutputConstant = false; + break; + } + } else { fIsOutputConstant = false; break; - } else { - // assume a comparison is done with .dim = 0 - shapeData1[i].dim = 0; } } - outData[i] = ComparisionTrait::Result(static_cast(shapeData1[i].dim), data2[i]); } - } else if (!shapeData1.empty() && !shapeData2.empty() ) { - // both data1 and data2 are shape tensors - for (size_t i = 0; i < length; i++) { - if (!shapeData1[i].isParam && !shapeData2[i].isParam) { - outData[i] = ComparisionTrait::Result(shapeData1[i].dim, shapeData2[i].dim); - } - else if (shapeData1[i].isParam && shapeData2[i].isParam) { - if (shapeData1[i].param == shapeData2[i].param) - outData[i] = ComparisionTrait::Result(1,1); // comparison of two equal value - else { - fIsOutputConstant = false; - break; + if (fIsOutputConstant) { + model.AddConstantTensor(fNY, fShapeY, outData); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << " : " << ConvertValuesToString(length, outData) + << " (constant) " << std::endl; + } + } + delete[] outData; + // case of non constant output (no constant or shape tensors) + if (!fIsOutputConstant && !fShapeY.empty()) { + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fShapeY); + fDimShapeY = ConvertShapeToDim(fShapeY); + if (model.Verbose()) + std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " + << ConvertShapeToString(fShapeY) << std::endl; + } + } else { + // case of dynamic tensors + // case A or B have dynamic shapes. We need to broadcast if shape are not same + auto ret = UTILITY::MultidirectionalBroadcastShape(fDimShapeX1, fDimShapeX2); + fBroadcastFlag = ret.first; + fDimShapeY = ret.second; + // case of all parametric shapes and MultiDirectionalBroadcastShape return the max of the 2 + // need to do before we declare the output tensor shape and the broadcasted ones + if (ret.first & 4) { + // check if one of the parameter is an input dimension + // define function to find this + auto IsInputDimParam = [&](const std::string &p) { + auto inputNames = model.GetInputTensorNames(); + for (auto &input : inputNames) { + for (auto &i_s : model.GetDimTensorShape(input)) { + if (i_s.isParam && i_s.param == p) + return true; } } - else { - fIsOutputConstant = false; - break; + return false; + }; + for (size_t i = 0; i < fDimShapeY.size(); i++) { + auto &s = fDimShapeY[i]; + if (s.isParam && s.param.find("std::max") != std::string::npos) { + if (IsInputDimParam(fDimShapeX1[i].param)) { + // case dim is 1 we indicate that the input parameter is equal to 1 + if (fDimShapeX1[i].dim != 1) + s = fDimShapeX1[i]; + else + s = fDimShapeX2[i]; + } else if (IsInputDimParam(fDimShapeX2[i].param)) { + if (fDimShapeX2[i].dim != 1) + s = fDimShapeX2[i]; + else + s = fDimShapeX1[i]; + } } } } - if (fIsOutputConstant) { - model.AddConstantTensor(fNY, fShapeY, outData); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << " : " - << ConvertValuesToString(length,outData) << " (constant) " << std::endl; + model.AddIntermediateTensor(fNY, ETensorType::BOOL, fDimShapeY); + if (model.Verbose()) { + std::cout << ComparisionTrait::Name() << " : " << fNX1 << " " << ConvertShapeToString(fDimShapeX1) << " , " + << fNX2 << " " << ConvertShapeToString(fDimShapeX2) << " --> " + << fNY << " " << ConvertShapeToString(fDimShapeY) << std::endl; + model.PrintIntermediateTensors(); } } - delete [] outData; - if (!fIsOutputConstant) { - model.AddIntermediateTensor(fNY, ETensorType::BOOL , fShapeY); - if (model.Verbose()) - std::cout << ComparisionTrait::Name() << " op ---> " << fNY << " " << ConvertShapeToString(fShapeY) << std::endl; - } - - // check if this is not output operators to add a specific line for definining the tensor_xxx variable - const auto & outputTensorNames = model.GetOutputTensorNames(); - fIsModelOutput = false; - if (std::find(outputTensorNames.begin(), outputTensorNames.end(), fNY) != outputTensorNames.end()) - fIsModelOutput = true; } std::string Generate(std::string opName) override { if (fIsOutputConstant) return ""; opName = "op_" + opName; - if (fShapeY.empty()) { + if (fDimShapeY.empty()) { throw std::runtime_error("TMVA SOFIE Comparision Op called to Generate without being initialized first"); } std::stringstream out; out << SP << "\n//------ " << ComparisionTrait::Name() << " " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; - size_t length = ConvertShapeToLength(fShapeY); - // Broadcast A if it's uninitialized - if (!fNBroadcastedX1.empty()) { - std::string type1 = ConvertTypeToString(fTensorType1); - out << SP << "// Broadcasting uninitialized tensor " << fNX1 << "\n"; - out << SP << "{\n"; - out << SP << SP << type1 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type1 << ">(tensor_" << fNX1 << ", " << ConvertShapeToString(fShapeX1) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX1 << ");\n"; - out << SP << SP << "delete[] data;\n"; - out << SP << "}\n"; + + // need to add check if tensors are compatible as in binary operator + + // use same code as Binary operator + auto stridesA = UTILITY::ComputeStrideFromShape(fDimShapeX1); + auto stridesB = UTILITY::ComputeStrideFromShape(fDimShapeX2); + auto stridesY = UTILITY::ComputeStrideFromShape(fDimShapeY); + + std::string compute_idx_X1, compute_idx_X2, compute_idx_Y; + if (fDimShapeX1.empty() || + std::all_of(fDimShapeX1.begin(), fDimShapeX1.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X1 = "0"; + } else { + for (size_t i = 0; i < fDimShapeX1.size(); ++i) { + if (fDimShapeX1[i].dim == 1 || fDimShapeX1[i].GetVal() == "1") + continue; + compute_idx_X1 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX1.size())); + if (stridesA[i].GetVal() != "1") + compute_idx_X1 += " * " + stridesA[i].GetVal(); + compute_idx_X1 += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X1.pop_back(); } - // Broadcast B if it's uninitialized - if (!fNBroadcastedX2.empty()) { - std::string type2 = ConvertTypeToString(fTensorType2); - out << SP << "// Broadcasting uninitialized tensor " << fNX2 << "\n"; - out << SP << "{\n"; - out << SP << SP << type2 << "* data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast<" << type2 << ">(tensor_" << fNX2 << ", " << ConvertShapeToString(fShapeX2) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNBroadcastedX2 << ");\n"; - out << SP << SP << "delete[] data;\n"; - out << SP << "}\n"; + if (fDimShapeX2.empty() || + std::all_of(fDimShapeX2.begin(), fDimShapeX2.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_X2 = "0"; + } else { + for (size_t i = 0; i < fDimShapeX2.size(); ++i) { + if (fDimShapeX2[i].dim == 1 || fDimShapeX2[i].GetVal() == "1") + continue; + compute_idx_X2 += "idx_" + std::to_string(i + (fDimShapeY.size() - fDimShapeX2.size())); + if (stridesB[i].GetVal() != "1") + compute_idx_X2 += " * " + stridesB[i].GetVal(); + compute_idx_X2 += " + "; + } + // remove last 3 character " + " + for (int j = 0; j < 3; j++) + compute_idx_X2.pop_back(); } - const std::string& nameX1 = fNBroadcastedX1.empty()? fNX1 : fNBroadcastedX1; - const std::string& nameX2 = fNBroadcastedX2.empty()? fNX2 : fNBroadcastedX2; - - out << SP << "for (size_t id = 0; id < " << length << " ; id++){\n"; - out << SP << SP << "fTensor_" << fNY << "[id] = " << ComparisionTrait::Op( "tensor_" + nameX1 + "[id]" , "tensor_" + nameX2 + "[id]") << " ;\n"; - out << SP << "}\n"; - // since output is a boolean need to add the tensor_xxx variable since it is not defined as a pointer to a boolean std::vector - if (!fIsModelOutput) - out << SP << "const std::vector & tensor_" << fNY << " = fTensor_" << fNY << ";\n"; + int nloop = 0; + if (fDimShapeY.empty() || + std::all_of(fDimShapeY.begin(), fDimShapeY.end(), [](Dim d) { return d.dim == 1 || d.GetVal() == "1"; })) { + compute_idx_Y = "0"; + } else { + for (size_t i = 0; i < fDimShapeY.size(); ++i) { + if (fDimShapeY[i].dim != 1 && fDimShapeY[i].GetVal() != "1") { + nloop++; + for (int j = 0; j < nloop; j++) out << SP; + out << "for (size_t idx_" << i << " = 0; idx_" << i << " < " << fDimShapeY[i] + << "; ++idx_" << i << "){\n"; + compute_idx_Y += "idx_" + std::to_string(i); + if (stridesY[i].GetVal() != "1") + compute_idx_Y += " * " + stridesY[i].GetVal(); + compute_idx_Y += " + "; + } + } + // remove last 3 characters " + " + for (int j = 0; j < 3; j++) + compute_idx_Y.pop_back(); + } + for (int j = 0; j < nloop + 1; j++) out << SP; + out << "tensor_" << fNY << "[" << compute_idx_Y << "] = " + << ComparisionTrait::Op( "tensor_" + fNX1 + "[" + compute_idx_X1 + "]" , + "tensor_" + fNX2 + "[" + compute_idx_X2 + "]") << " ;\n"; + + + for (int i = nloop; i > 0; i--) { + for (int j = 0; j < i; j++) out << SP; + out << "}\n"; + } + return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx index ad855341dfc17..d8155195c9f49 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Concat.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Concat.hxx @@ -123,7 +123,7 @@ concat_dim = inputs[i][iaxis]; else if (inputs[i][iaxis].isParam || concat_dim.isParam) { concat_dim = - Dim{ concat_dim.GetVal() + std::string("+ ") + inputs[i][iaxis].GetVal(), + Dim{ concat_dim.GetVal() + std::string(" + ") + inputs[i][iaxis].GetVal(), static_cast(-1)}; } else { concat_dim = Dim { concat_dim.dim + inputs[i][iaxis].dim }; @@ -156,7 +156,7 @@ } // output shape for concatenated axis - ret[fAxis] = Dim{concat_dim}; + ret[fAxis] = concat_dim; } // case of stacking (not supported yet) @@ -205,7 +205,7 @@ size_t inputLength = ConvertShapeToLength(inputShape); std::copy(inputData, inputData + inputLength, outputData.begin() + offset ); offset += inputLength; - // data do not need to be written as a weight + // data do not need to be written in teh generated code model.SetNotWritableInitializedTensor(input); } model.AddConstantTensor(fOutput, outputShape, outputData.data()); @@ -221,15 +221,18 @@ std::vector inputData; auto inputShape = model.GetTensorShape(input); // shape is not dynamic size_t inputLength = ConvertShapeToLength(inputShape); // shape can be a scalar - if (model.IsShapeTensor(input)) + if (model.IsShapeTensor(input)) { inputData = model.GetShapeTensorValues(input); - else if (model.IsConstantTensor(input)) { + } else if (model.IsInitializedTensor(input)) { inputData.resize(inputLength); auto intData = static_cast(model.GetInitializedTensorData(input).get()); for (size_t i = 0; i < inputData.size(); i++) inputData[i] = Dim{ static_cast(intData[i])}; } - std::cout << "concatenating input data " << inputLength << " " << inputData[0] << std::endl; + else { + // this should not happen + throw std::runtime_error("TMVA SOFIE Concat Operator- invalid input type for shape output type"); + } std::copy(inputData.begin(), inputData.end(), outputData.begin() + offset ); offset += inputLength; } @@ -251,13 +254,15 @@ } std::string Generate(std::string opName) override { - if (fIsOutputConstant) return ""; opName = "op_" + opName; + std::stringstream out; + out<<"\n//--------- Concat " << opName << " --> " << fOutput << " " << ConvertShapeToString(fOutputShape) << "\n"; + + if (fIsOutputConstant) return out.str(); + if(fOutputShape.empty()){ throw std::runtime_error("TMVA SOFIE Concat called to Generate without being initialized first"); } - std::stringstream out; - out<<"\n//--------- Concat " << opName << " --> " << ConvertShapeToString(fOutputShape) << "\n"; // special case when memory is contiguous bool hasShapeOnes = true; for(int i = 0; i0) - out << SP << SP << SP << "idxOut += " << fInputShapes[j-1][fAxis].GetVal() << ";\n"; + out << SP << SP << SP << "idxOut += " << inStrides[j-1][fAxis-1].GetVal() << ";\n"; out << SP << SP << SP << "int idxIn" << j <<" = "; for (int k = 0; k < fAxis; k++) { if (k > 0) out << " + "; out << inStrides[j][k].GetVal() << "*i" << k; } out << ";\n"; - out << SP << SP << SP << "for (size_t iC = 0; iC < " << fInputShapes[j][fAxis].GetVal() << "; ++iC) {\n"; + out << SP << SP << SP << "for (size_t iC = 0; iC < " << inStrides[j][fAxis-1].GetVal() << "; ++iC) {\n"; out << SP << SP << SP << SP << "tensor_" << fOutput << "[idxOut+iC] = tensor_" << fInputs[j] << "[idxIn" << j << "+iC];\n"; out << SP << SP << SP << "}\n"; // concatenate the axis values diff --git a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx index 1cf5d13f5cd6f..93f3c43feceb9 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Constant.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Constant.hxx @@ -128,6 +128,7 @@ public: } } else { model.AddIntermediateTensor(fNY, ConvertStringToType(TensorType::Name()), fDimOutputShape); + fOutputTensorNames.emplace_back(fNY); } } @@ -136,9 +137,9 @@ public: std::stringstream out; if (fIsOutputConstant) { if (fNX.empty()) - out << "// ---- Constant (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n"; + out << "// ---- Constant (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n"; else - out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << ConvertShapeToString(fDimOutputShape) << "\n"; + out << "// ---- ConstantOfShape (no-op) " << opName << " --> " << fNY << " " << ConvertShapeToString(fDimOutputShape) << "\n"; return out.str(); } // Only ConstantOfShape might require generation code @@ -153,9 +154,7 @@ public: } auto length = ConvertDimShapeToLength(fDimOutputShape); // vector is already allocated- fill with values - out << SP << "if (" << length << " > fTensor_" << fNY << ".size())\n"; - out << SP << SP << "fTensor_" << fNY << ".resize(" << length << ");\n"; - out << SP << "std::fill(fTensor_" << fNY << ".begin(), fTensor_" << fNY << ".end(), " << fValues[0] << ");\n"; + out << SP << "std::fill(tensor_" << fNY << ", tensor_" << fNY << " + " << length << ", " << fValues[0] << ");\n"; return out.str(); } }; diff --git a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx index 95f226ca91d4b..823e7fa04717e 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Conv.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Conv.hxx @@ -20,6 +20,8 @@ template class ROperator_Conv final : public ROperator { private: + bool fBroadcastBias = false; + std::string fAttrAutopad; std::vector fAttrDilations; size_t fAttrGroup; @@ -30,7 +32,6 @@ private: std::string fNX; std::string fNW; std::string fNB; - std::string fNB2; // bias tensor name after broadcasting std::string fNY; std::string convK; @@ -262,6 +263,9 @@ public: std::runtime_error("TMVA SOFIE Conv op Input Tensor " + fNB + " is not found in model"); } fShapeB = model.GetTensorShape(fNB); + if (fShapeB.size() != 1) + throw + std::runtime_error("TMVA SOFIE Conv op : invalid shape for Bias tensor (is not 1D)"); std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); auto shapeDimB = model.GetDimTensorShape(fNB); bool broadcast_needed = !UTILITY::AreSameShape(shapeDimB, targetShape); @@ -278,7 +282,9 @@ public: if (fType != "float") throw std::runtime_error("TMVA SOFIE Conv op: Broadcasting for non-float type tensors is not supported"); // here is the actual broadcasting + fBroadcastBias = true; if (!fUseSession) { + // do here broadcasting std::vector shape(fDim + 1, 1); shape[0] = fShapeB[0]; auto intTargetShape = ConvertShapeToInt(targetShape); @@ -287,26 +293,28 @@ public: std::default_delete()); model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), intTargetShape, new_data_ptr); fShapeB = model.GetTensorShape(fNB); - fNB2 = fNB; // use same name - } - else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNB2 = fNB + "bcast"; - model.AddIntermediateTensor(fNB2, model.GetTensorType(fNB), targetShape); } } } - // output channel size can be parametric + // output channel size can be parametric and is an expression std::vector outputDims = std::vector(fShapeY.begin()+2, fShapeY.end()); - auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W + //check if shape is not parametric + std::vector outputInts = ConvertShapeToInt(outputDims); + Dim channelDim; + if (outputInts.empty()) { + auto outputChannelSize = ConvertDimShapeToLength(outputDims); // size/channel = D * H * W + channelDim = Dim{ outputChannelSize, static_cast(-1)}; + } else { + size_t outputChannelSize = ConvertShapeToLength(outputInts); + channelDim = Dim{ outputChannelSize }; + } size_t kernelSize = fAttrKernelShape[0]; for (size_t i = 1; i < fDim; i++) { kernelSize *= fAttrKernelShape[i]; } std::vector shape1 = {fShapeW[0], fShapeW[1], kernelSize}; - std::vector shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, Dim{outputChannelSize}}; + std::vector shape2 = {Dim{fShapeW[1]}, Dim{kernelSize}, channelDim }; model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 ); model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 ); convK = fNX +"_f"; @@ -325,15 +333,25 @@ public: std::string GenerateInitCode() override { std::stringstream out; // Generate initialization code for broadcasting of bias tensor - if (!fNB2.empty()) { + if (fBroadcastBias) { // include a separate scope to avoid defining unique operator temp variables std::vector shape(fDim + 1, 1); + // bias (is a 1D tensor) shape[0] = fShapeB[0]; std::vector targetShape(fShapeY.begin() + 1, fShapeY.end()); - out << SP << "{\n"; + out << "//--- broadcast bias tensor " << fNB << "for Conv op if needed \n"; + // in case of dynamic tensors check needs to be done at run time + bool isOutDynamic = ConvertShapeToInt(targetShape).empty(); + auto length = ConvertDimShapeToLength(targetShape); + if (isOutDynamic) + out << SP << "if (" << length << " > " << ConvertShapeToLength(shape) << ") {\n"; + else + out << SP << "{\n"; out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" << fNB << ", " << ConvertShapeToString(shape) << ", " << ConvertShapeToString(fShapeY) << ");\n"; - out << SP << SP << "std::copy(data, data + " << ConvertDimShapeToLength(targetShape) << ", tensor_" << fNB2 << ");\n"; + out << SP << SP << "fTensor_" << fNB << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << fNB << " = fTensor_" << fNB << ".data();\n"; + out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNB << ");\n"; out << SP << SP << "delete[] data;\n"; out << SP << "}\n"; } @@ -553,13 +571,13 @@ public: out << SP << SP << "}\n"; // end of group loop } - if (fNB2 != "") { + if (fNB != "") { out << SP << "int " << OpName << "_size = " << outputBatchStride << ";\n"; out << SP << "float " << OpName << "_gamma = 1.0;\n"; out << SP << "int " << OpName << "_incx = 1;\n"; out << SP << "int " << OpName << "_incy = 1;\n"; - out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB2 << ", &" + out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNB << ", &" << OpName << "_incx, tensor_" << fNY << " + out_offset, &" << OpName << "_incy);\n"; } diff --git a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx index 81411b8ebf71a..1d51c59380dae 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gather.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gather.hxx @@ -153,13 +153,14 @@ public: } std::string Generate(std::string opName) override { + opName = "op_" + opName; + std::stringstream out; + out << "//--------- Gather " << opName << " --> " << fNY << " " << ConvertShapeToString(fShapeY) << "\n"; if (fIsOutputConstant) { // no code to generate here for constant output. Tensor output is defined in Session constructor - return "//---------------------------------------\n"; + out << "//--------------------(constant)----------\n"; + return out.str(); } - opName = "op_" + opName; - std::stringstream out; - out << "//--------- Gather " << opName << " --> " << ConvertShapeToString(fShapeY) << "\n"; // The shape of the output is q + r - 1 size_t r = fShapeX.size(); // Indices of shape q diff --git a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx index d954720396151..1a0fa7b16868b 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Gemm.hxx @@ -24,6 +24,7 @@ namespace SOFIE{ private: bool fIsDynamic = false; + bool fBroadcastBias = false; float fAttrAlpha = 1.0; float fAttrBeta = 1.0; @@ -33,7 +34,6 @@ namespace SOFIE{ std::string fNA; std::string fNB; std::string fNC = ""; - std::string fNC2; // bias tensor name after broadcasting std::string fNY; std::string fType; EActivationType fActivation; @@ -207,13 +207,7 @@ namespace SOFIE{ } fShapeY = DynamicShapeInference({fShapeA, fShapeB}); - std::vector shapeY; - if (!fIsDynamic) { - shapeY = ConvertShapeToInt(fShapeY); - if (shapeY.empty()) { - throw std::runtime_error("TMVA SOFIE Gemm Op " + fNY + " has invalid shape" + ConvertShapeToString(fShapeY)); - } - } + std::vector shapeY = ConvertShapeToInt(fShapeY); // bias is normally not dynamic (not support it for time being) if (fNC != ""){ @@ -222,14 +216,18 @@ namespace SOFIE{ throw std::runtime_error("TMVA SOFIE Gemm Op Input Tensor" + fNC + " is dynamic and is not supported"); } fShapeC = model.GetTensorShape(fNC); - fNC2 = fNC; size_t lengthC = ConvertShapeToLength(fShapeC); size_t lengthY = ConvertShapeToLength(shapeY); // for dynamic outputs broadcasting is always done - bool broadcast_needed = lengthC != lengthY; + bool broadcast_needed = false; + if (fIsDynamic && shapeY.empty()) + broadcast_needed = true; + else + broadcast_needed = lengthC != lengthY; if (broadcast_needed) { + fBroadcastBias = true; if (!model.UseSession()) { // without session dynamic tensors not supported in Gemm if (fIsDynamic) { @@ -246,14 +244,18 @@ namespace SOFIE{ fShapeC = shapeY; } } else { - // In case of session add broadcasting code in Session constructor and in GenerateInitCode - // we need to add a new intermediate tensor for broadcasted bias tensor - fNC2 = fNC + "bcast"; - if (!fIsDynamic) { - model.AddIntermediateTensor(fNC2, model.GetTensorType(fNC), shapeY); - } - else - model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); + // /d to add a new intermediate tensor for broadcasted bias tensor + // fNC2 = fNC + "bcast"; + // if (!fIsDynamic) { + // model.AddIntermed/ In case of session add broadcasting code in Session constructor and in GenerateInitCode + // // we neeiateTensor(fNC2, model.GetTensorType(fNC), shapeY); + // } + // else + // model.AddDynamicTensor(fNC2,model.GetTensorType(fNC), fShapeY); + // // do not add to lists of input/output tensors since broadcasted tensors are special + // // and we manage their memory separatly + // //fInputTensorNames.emplace_back(fNC2); + // //fOutputTensorNames.emplace_back(fNC2); } } } @@ -291,18 +293,26 @@ namespace SOFIE{ std::string GenerateInitCode() override { std::stringstream out; // generate initialization code for broadcasting of bias tensor - if (fShapeC.size() != fShapeY.size() && fNC != fNC2) { + if (fShapeC.size() != fShapeY.size() && fBroadcastBias) { // we broadcast here always C in Y output, so target shape is the one of Y // no need to call UTILITY::UnidirectionalBroadcastShape. // here in case of parametric shape we need to assume that the parameters will be defined in the initialization code. - auto targetShape = fShapeY; - // include a separate scope to avoid defining unique operator temp variables - out << "//--- broadcast bias tensor " << fNC << "for Gemm op\n"; - out << SP << "{\n"; - out << " float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" - << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n"; auto length = ConvertDimShapeToLength(fShapeY); // output size - out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC2 << ");\n"; + // include a separate scope to avoid defining unique operator temp variables + out << "//--- broadcast bias tensor " << fNC << "for Gemm op if needed \n"; + // in case of dynamic tensors check needs to be done at run time + bool isOutDynamic = ConvertShapeToInt(fShapeY).empty(); + if (isOutDynamic) + out << SP << "if (" << length << " > " << ConvertShapeToLength(fShapeC) << ") {\n"; + else + out << SP << "{\n"; + // here we broadcast + out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::UnidirectionalBroadcast(tensor_" + << fNC << "," << ConvertShapeToString(fShapeC) << ", " << ConvertShapeToString(fShapeY) << ");\n"; + + out << SP << SP << "fTensor_" << fNC << ".resize(" << length << ");\n"; + out << SP << SP << "tensor_" << fNC << " = fTensor_" << fNC << ".data();\n"; + out << SP << SP << "std::copy(data, data + " << length << ", tensor_" << fNC << ");\n"; out << SP << SP << "delete [] data;\n"; out << SP << "}\n"; } @@ -338,7 +348,7 @@ namespace SOFIE{ // case bias is present if (!fNC.empty()){ - if (fNC2 == fNC) { + if (!fBroadcastBias) { // add a check in case broadcasting was not needed or done outside of session // C should have smaller dimension of Y if (!fIsDynamic) { @@ -347,7 +357,7 @@ namespace SOFIE{ + ConvertShapeToString(fShapeC) + " output length " + lengthGemm); } else { // add a dynamic check (C should not be a dynamic tensor) - out << SP << "assert(" << lengthGemm << " != " << ConvertShapeToLength(fShapeC) << ");\n"; + out << SP << "assert(" << lengthGemm << " == " << ConvertShapeToLength(fShapeC) << ");\n"; } } } else { @@ -381,7 +391,7 @@ namespace SOFIE{ out << std::setprecision(std::numeric_limits::max_digits10) << fAttrBeta << ","; // in the case of bias if (!fNC.empty()) - out << "tensor_" << fNC2; + out << "tensor_" << fNC; else out << "nullptr"; out << ");\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_Range.hxx b/tmva/sofie/inc/TMVA/ROperator_Range.hxx index 9cac15a14fc52..16d2cb689d518 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Range.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Range.hxx @@ -37,15 +37,10 @@ public: } static_assert( (std::is_same_v || std::is_same_v), "TMVA::SOFIE - Unsupported type by Range operator"); - } - - std::vector TypeInference(std::vector input) override { - return input; - } - - std::vector> ShapeInference(std::vector> input) override { - auto ret = input; //suggest copy to compiler - return ret; + { + fInputTensorNames = { fNStart, fNLimit, fNDelta }; + fOutputTensorNames = { fNOutput }; + } } void Initialize(RModel& model) override { @@ -63,32 +58,94 @@ public: std::runtime_error("TMVA SOFIE Range Op Input Tensor " + fNDelta + "is not found in model"); } ETensorType type = ConvertStringToType(fType); - if (model.IsInitializedTensor(fNStart) && model.IsInitializedTensor(fNDelta) && model.IsInitializedTensor(fNLimit)) { - T * start = static_cast(model.GetInitializedTensorData(fNStart).get()); - T * limit = static_cast(model.GetInitializedTensorData(fNLimit).get()); - T * delta = static_cast(model.GetInitializedTensorData(fNDelta).get()); - if (!start || !delta || !limit) - std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data"); - T a = *start; - T b = *limit; - T d = *delta; - int number_of_elements = std::max( static_cast(std::ceil( (b - a) / d )) , 0. ); + + + + auto analyzeInput = [&](const std::string & tName, T & value, Dim & dim) { + int ftype = 0; // type of input (0 intermediate, 1 constant , 2 shape) + if (model.IsInitializedTensor(tName)) { + T * data = static_cast(model.GetInitializedTensorData(tName).get()); + if (!data) + std::runtime_error("TMVA SOFIE Range Op Input Tensor has invalid input data"); + value = *data; + ftype = 1; + } else if (model.IsShapeTensor(tName)) { + auto data = model.GetShapeTensorValues(tName); + dim = data[0]; + if (!dim.isParam) { + value = static_cast(dim.dim); + ftype = 1; + } else + ftype = 2; + } + return ftype; + }; + + T start_value; + T limit_value; + T delta_value; + Dim start_dim; + Dim limit_dim; + Dim delta_dim; + int res1 = analyzeInput(fNStart, start_value, start_dim); + int res2 = analyzeInput(fNLimit, limit_value, limit_dim); + int res3 = analyzeInput(fNDelta, delta_value, delta_dim); + if (res1 == 0 || res2 == 0 || res3 == 0) { + // cannot know at compile time- need to do fully at run time + // + fShape = {Dim{"range_size_" + fNStart + "_" + fNLimit}}; + model.AddDynamicTensor(fNOutput, type, fShape); + } else if (res1 == 1 && res2 == 1 && res3 == 1) { + size_t number_of_elements = std::max(static_cast(std::ceil((limit_value - start_value) / delta_value )) , 0 ); + fIsOutputConstant = true; + + // compute output std::vector output(number_of_elements); - for (int i=0; i shape = {static_cast(number_of_elements)}; + std::vector shape = {number_of_elements}; model.AddConstantTensor(fNOutput,shape, output.data()); - fIsOutputConstant = true; - // set the input tensor not writable + fShape = ConvertShapeToDim(shape); + + // set the input tensor not writable model.SetNotWritableInitializedTensor(fNStart); model.SetNotWritableInitializedTensor(fNDelta); model.SetNotWritableInitializedTensor(fNLimit); + + } else { // case of a shape tensor + std::string start = (res1 == 1) ? std::to_string(start_value) : start_dim.GetVal(); + std::string limit = (res2 == 1) ? std::to_string(limit_value) : limit_dim.GetVal(); + std::string delta = (res3 == 1) ? std::to_string(delta_value) : delta_dim.GetVal(); + std::stringstream s; + if (type == ETensorType::FLOAT ) { + if (delta_value == 1) + s << "std::max(std::ceil("<< limit << " - " << start << "),0.0f)"; + else + s << "std::max(std::ceil(("<< limit << " - " << start << ")/" << delta << "),0.0f)"; + } else if (type == ETensorType::INT64 ) { + if (delta == "1") { + if (start == "0") + s << limit; + else + s << "std::max((" << limit << " - " << start << "),0L)"; + } else { + if (start == "0") + s << "((" << limit << ")/" << delta << ")"; + else + s << "std::max((" << limit << " - " << start << ")/"<< delta << "),0L)"; + } + } else { + throw + std::runtime_error("TMVA SOFIE Range Op Input Tensor " + ConvertTypeToString(type) + "is not supported"); + } + + + fShape = { Dim {s.str(), static_cast(-1)} }; + model.AddDynamicTensor(fNOutput,type, fShape); } - else { - fShape = {Dim{"range_size"}}; - model.AddDynamicTensor(fNOutput, type, fShape); - } + + if (model.Verbose()) { std::cout << "Range -> output is " << fNOutput << " : " << ConvertShapeToString(fShape); if (fIsOutputConstant) std::cout << " : " << ConvertValuesToString(model.GetTensorData(fNOutput)); @@ -96,26 +153,32 @@ public: } } - std::string Generate(std::string OpName) override { + std::string Generate(std::string opName) override { std::stringstream out; - out << "\n//------ Range\n"; + out << "\n//------ Range " << opName << "---> " << ConvertDimShapeToString(fShape) << "\n"; if (fIsOutputConstant) return out.str(); - OpName = "op_" + OpName; + opName = "op_" + opName; if (fShape.empty()) { throw std::runtime_error("TMVA SOFIE Range operator called to Generate without being initialized first"); } std::string sizeName = fShape[0].param; - out << SP << "size_t " << sizeName << " = static_cast(std::max(std::ceil((static_cast(*tensor_" << fNLimit << ") - static_cast(*tensor_" << fNStart << ")) / static_cast(*tensor_" << fNDelta << ")), 0.0f));\n"; - out << SP << "if (" << sizeName << " > " << "fTensor_" << fNOutput << ".size() ){\n"; - out << SP << SP << "fTensor_" << fNOutput << ".resize(" << sizeName << ");\n"; - // need to re-initialized pointer to tensor data - out << SP << SP << "tensor_" << fNOutput << " = fTensor_" << fNOutput << ".data();\n"; - out << SP << "}\n"; - out << SP << "for (size_t i = 0; i < " << sizeName << "; i++) {\n"; - out << SP << SP << "fTensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; + if (sizeName.find("range_size") != std::string::npos) + sizeName = "static_cast(std::max(std::ceil((static_cast(*tensor_" + fNLimit + + ") - static_cast(*tensor_" + fNStart + ")) / static_cast(*tensor_" + fNDelta + ")), 0.0f))"; + out << SP << "{\n"; + out << SP << SP << "size_t range" << " = " << sizeName << ";\n"; + if (sizeName != fShape[0].param) { + out << SP << SP << "if ( range > " << "fTensor_" << fNOutput << ".size() ){\n"; + // we should probably resize the tensor here + out << SP << SP << SP << "throw std::runtime_error(\"wrong size allocated for output of range\");\n"; + out << SP << SP << "}\n"; + } + out << SP << SP << "for (size_t i = 0; i < range; i++) {\n"; + out << SP << SP << SP << "tensor_" << fNOutput << "[i] = *tensor_" << fNStart << " + i * (*tensor_" << fNDelta << ");\n"; + out << SP << SP << "}\n"; out << SP << "}\n"; return out.str(); } diff --git a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx index 1204770d3d321..1da588e965a01 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Reduce.hxx @@ -166,7 +166,7 @@ public: std::string reducedLength; if (fInputDimShape) { reducedLength = "reducedLength_" + opName; - out << SP << "size_t " << reducedLength << " = " << inputLength << " / " << outputLength << ";\n"; + out << SP << "size_t " << reducedLength << " = (" << inputLength << ") / (" << outputLength << ");\n"; } else { int rLength = std::stoi(inputLength) / std::stoi(outputLength); reducedLength = std::to_string(rLength); diff --git a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx index 2634b68dbc875..a3ed28c4860bc 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Reshape.hxx @@ -108,6 +108,9 @@ public: if (IsInteger(tmp_length) && IsInteger(input_length)) output_shape[i] = Dim{static_cast(std::stoi(input_length) / std::stoi(tmp_length))}; + else if (IsInteger(tmp_length) && std::stoi(tmp_length) == 1) { + output_shape[i] = Dim{input_length, static_cast(-1)}; + } else { //we can try simplifying expression if tmp_length is integer and part of input_length // contains tmp_length @@ -243,7 +246,7 @@ public: // check if optional tensor exists defining shape or axes if (!fNInput2.empty()) { if (model.CheckIfTensorAlreadyExist(fNInput2)) { - if (model.IsConstantTensor(fNInput2) || model.IsInitializedTensor(fNInput2)) { + if (model.IsInitializedTensor(fNInput2)) { // assume input shape is an initialized tensor auto dptr = model.GetInitializedTensorData(fNInput2); auto values = static_cast(dptr.get()); @@ -260,6 +263,9 @@ public: fShapeOutput = ShapeInference({fShapeInput})[0]; // set flag to not write tensor in weight file. Its data will be hard-coded in way model is constructed model.SetNotWritableInitializedTensor(fNInput2); + } else if (model.IsShapeTensor(fNInput2)) { + auto shapeData = model.GetShapeTensorValues(fNInput2); + fShapeOutput = shapeData; } else { // we cannot get shape at initialization time but at run-time fDynamicShape = true; diff --git a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx index b23e3b0a86d21..3add774b0d8d4 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Slice.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Slice.hxx @@ -235,6 +235,8 @@ public: if (iend < 0) { std::string send = std::string("(") + fShapeInput[fAxes[i]].param + "-" + std::to_string(-iend) +")"; fEnd[fAxes[i]] = Dim{send,size_t(-1)}; + } else if (iend == std::numeric_limits::max()){ + fEnd[fAxes[i]] = fShapeInput[fAxes[i]]; } else { fEnd[fAxes[i]] = Dim{size_t(iend)}; } @@ -332,23 +334,23 @@ public: else { model.AddIntermediateTensor(fNOutput, model.GetTensorType(fNData), fShapeOutput); if (model.Verbose()) { - std::cout << "Slice ---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; + std::cout << "Slice " << fNData << " " << ConvertShapeToString(fShapeInput) + << "---> " << fNOutput << " " << ConvertShapeToString(fShapeOutput) << std::endl; } } } - std::string Generate(std::string OpName) override { - if (fIsOutputConstant) return ""; //no op for constant tensors + std::string Generate(std::string opName) override { - OpName = "op_" + OpName; if (fShapeInput.empty() || fShapeOutput.empty()){ throw std::runtime_error("TMVA SOFIE Slice Op called to Generate without being initialized first"); } std::stringstream out; - //std::string opName = "Slice"; - out << SP << "///------- Slice operator\n" << std::endl; + out << "///------- Slice operator " << opName << "---> " << fNOutput << " " + << ConvertDimShapeToString(fShapeOutput) << "\n" << std::endl; + if (fIsOutputConstant) return out.str(); //no op for constant tensors // loop on the dimensions depending no the orders size_t ndim = fShapeInput.size(); auto strides = UTILITY::ComputeStrideFromShape(fShapeInput); diff --git a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx index 1086f72eae71c..9b291b40e0854 100644 --- a/tmva/sofie/inc/TMVA/ROperator_Tile.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_Tile.hxx @@ -20,8 +20,8 @@ private: std::string fNRepeats; std::string fNInput; std::string fNY; - std::vectorfShapeInput; - std::vector fShapeY; + std::vectorfShapeInput; + std::vector fShapeY; public: ROperator_Tile(){} @@ -35,13 +35,18 @@ public: return input; } - std::vector> ShapeInference(std::vector> input) override { - std::vector ret = input[0]; - - for(size_t i=0; i < input[1].size(); i++) { - ret[i]=ret[i]*input[1][i]; + std::vector DoShapeInference(const std::vector & input, const std::vector repeat) { + std::vector ret = input; + for(size_t i=0; i < repeat.size(); i++) { + if (repeat[i] != 1) { + if (ret[i].isParam) { + ret[i] = Dim{ std::string(ret[i].GetVal() + "*" + std::to_string(repeat[i])), static_cast(-1) }; + } else { + ret[i]=Dim { ret[i].dim *repeat[i] }; + } + } } - return {ret}; + return ret; } void Initialize(RModel& model) override { @@ -52,7 +57,7 @@ public: if (model.CheckIfTensorAlreadyExist(fNRepeats) == false){ throw std::runtime_error("TMVA SOFIE Tile Op Input Tensor is not found in model"); } - fShapeInput=model.GetTensorShape(fNInput); + fShapeInput=model.GetDimTensorShape(fNInput); // if repeats vector is not initialized we cannot deduce shape of output // not support for time being this case @@ -79,12 +84,12 @@ public: std::copy(repeats_data, repeats_data + num_elements, repeats_vector.begin()); - fShapeY = ShapeInference({fShapeInput,repeats_vector})[0]; + fShapeY = DoShapeInference(fShapeInput,repeats_vector); model.AddIntermediateTensor(fNY, model.GetTensorType(fNInput), fShapeY); if (model.Verbose()) - std::cout << "Tile: " << fNInput << " " << ConvertShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertShapeToString(fShapeY) + std::cout << "Tile: " << fNInput << " " << ConvertDimShapeToString(fShapeInput) << " -> " << fNY << " with shape " << ConvertDimShapeToString(fShapeY) << " given repeats " << ConvertShapeToString(repeats_vector) << std::endl; } @@ -103,9 +108,9 @@ public: std::string output = "tensor_" + fNY; out << "///-------- Tile operator\n"; out << "{\n"; // add scope to re-use same names - out << "const int input_shape[" << fShapeInput.size() << "] = " << ConvertShapeToString(fShapeInput) << ";\n"; + out << "const size_t input_shape[" << fShapeInput.size() << "] = " << ConvertDimShapeToString(fShapeInput) << ";\n"; - out << "int inputLength = " << ConvertShapeToLength(fShapeInput) << ";\n"; + out << "int inputLength = " << ConvertDimShapeToLength(fShapeInput) << ";\n"; out << "int s = 1;\n"; // loop from inverse dim order out << "for (int i = " << fShapeInput.size()-1 << "; i >=0; i--) {\n"; diff --git a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx index 0869437bb6b0c..edee91de8eb57 100644 --- a/tmva/sofie/inc/TMVA/ROperator_TopK.hxx +++ b/tmva/sofie/inc/TMVA/ROperator_TopK.hxx @@ -19,13 +19,13 @@ private: int fAttrLargest; int fAttrSorted; - size_t fK; + Dim fK; std::string fNK; std::string fNX; std::string fNVal; std::string fNInd; - std::vector fShapeX; - std::vector fShapeY; + std::vector fShapeX; + std::vector fShapeY; std::string fType; public: @@ -43,23 +43,10 @@ public: } std::vector TypeInference(std::vector input) override { - ETensorType ret = input[0]; - return {ret, ret}; - } - - std::vector> ShapeInference(std::vector> input) override { - if (input.size() != 2) { - throw std::runtime_error("TMVA SOFIE TopK Op Shape Inference needs exactly 2 input tensors"); - } - - auto shape = input[0]; // Shape format: [ m x n x o x p ... ] - - // set the dimension at the specified axis to k (fAttrAxis is checked before that is in the correct range - shape[fAttrAxis] = fK; // Modified shape: [ m x n x k x p ... ] - return {shape, shape}; + ETensorType ret = input[0]; + return {ret, ret}; } - void Initialize(RModel& model) override { if (model.CheckIfTensorAlreadyExist(fNX) == false) { // input must be a graph input, or already initialized intermediate tensor @@ -70,10 +57,10 @@ public: throw std::runtime_error("TMVA SOFIE TopK Op Input Tensor i.e. K is not found in model"); } - fShapeX = model.GetTensorShape(fNX); + fShapeX = model.GetDimTensorShape(fNX); auto fShapeK = model.GetTensorShape(fNK); auto kptr = static_cast(model.GetInitializedTensorData(fNK).get()); - fK = *kptr; + size_t kval = *kptr; model.SetNotWritableInitializedTensor(fNK); fAttrAxis = fAttrAxis < 0 ? fShapeX.size() + fAttrAxis : fAttrAxis; if(static_cast(fAttrAxis) >= fShapeX.size()){ @@ -81,14 +68,25 @@ public: std::runtime_error("TMVA::SOFIE ONNX TopK op axis = "+ std::to_string(fAttrAxis) +" value exeeds size of tensor " +fNX+" of size "+fShapeX.size()+" ."); } // fK cannot be larger that axis dimension - fK = std::min(fK, fShapeX[fAttrAxis]); + if (fShapeX[fAttrAxis].isParam) + fK = Dim{std::string("std::min(size_t(" + std::to_string(kval) + "), " + fShapeX[fAttrAxis].GetVal() + ")" ), static_cast(-1) }; + else + fK = Dim { std::min(kval, fShapeX[fAttrAxis].dim) }; + + // output shape is equal to input shape apart for value in fAttrAxis + fShapeY = fShapeX; + fShapeY[fAttrAxis] = Dim{fK}; - fShapeY = ShapeInference({fShapeX, fShapeK})[0]; model.AddIntermediateTensor(fNVal, model.GetTensorType(fNX), fShapeY); // output indices should be an int64 tensor model.AddIntermediateTensor(fNInd, ETensorType::INT64, fShapeY); fType = ConvertTypeToString(model.GetTensorType(fNX)); + + if (model.Verbose()) { + std::cout << "TopK " << fNX << " " << ConvertShapeToString(fShapeX) + << "---> " << fNVal << " " << ConvertShapeToString(fShapeY) << std::endl; + } } std::string Generate(std::string OpName) override { @@ -101,19 +99,20 @@ public: size_t axis = fAttrAxis < 0 ? size + fAttrAxis : fAttrAxis; out << "\n" << SP << "//------ TopK\n"; - size_t length=ConvertShapeToLength(fShapeX); + auto length=ConvertDimShapeToLength(fShapeX); auto strideX = UTILITY::ComputeStrideFromShape(fShapeX); auto strideY = UTILITY::ComputeStrideFromShape(fShapeY); // we perform loop on dimension before sorted axis and after sorted axis - size_t n_before = (axis>0) ? length/strideX[axis-1] : 1; - size_t n_after = strideX[axis]; - size_t n_elements = fShapeX[axis]; // number of elements to be sorted + std::vector shape_before(fShapeX.begin(), fShapeX.begin() + axis); // input shape before axis + std::string n_before = (axis>0) ? ConvertDimShapeToLength(shape_before) : "1"; + std::string n_after = strideX[axis].GetVal(); + std::string n_elements = fShapeX[axis].GetVal(); // number of elements to be sorted // } out << SP << "{\n"; // to define a separate scope for the operator code out << SP << "std::vector> elements(" << n_elements << ");\n"; // loop on elements before - if (n_before > 1) { + if (n_before != "1") { out << SP << "for (size_t i = 0; i < " << n_before << "; i++) {\n"; out << SP << SP << "size_t xoffset = i*" << strideX[axis-1] << ";\n"; out << SP << SP << "size_t yoffset = i*" << strideY[axis-1] << ";\n"; @@ -122,7 +121,7 @@ public: out << SP << "size_t xoffset = 0;\n"; out << SP << "size_t yoffset = 0;\n"; } - if (n_after > 1) + if (n_after != "1") out << SP << "for (size_t j = 0; j < " << n_after << "; j++) {\n"; else out << SP << "const size_t j = 0;\n"; @@ -149,8 +148,8 @@ public: out << SP << SP << SP << "tensor_" << fNVal << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].first;\n"; out << SP << SP << SP << "tensor_" << fNInd << "[yoffset + " << strideY[axis] << "*l + j] = elements[l].second;\n"; out << SP << SP << "}\n"; - if (n_after > 1) out << SP << SP << "}\n"; - if (n_before> 1) out << SP << "}\n"; + if (n_after != "1") out << SP << SP << "}\n"; + if (n_before != "1") out << SP << "}\n"; out << SP << "}\n"; // end operator scope return out.str(); } diff --git a/tmva/sofie/inc/TMVA/SOFIE_common.hxx b/tmva/sofie/inc/TMVA/SOFIE_common.hxx index 2dae4f7d03ce7..7abb7df68d997 100644 --- a/tmva/sofie/inc/TMVA/SOFIE_common.hxx +++ b/tmva/sofie/inc/TMVA/SOFIE_common.hxx @@ -252,8 +252,14 @@ public: bool IsConstantTensor() const { return fConstant;} // query if tensor needs to be written in a weight file. Constant tensors are not written in a file bool IsWeightTensor() const { return !fConstant && !fIsNotWritable;} + // check if a Tensor is Writable (need to be written in teh file or in the generated code (e.g. as a costant tensor) + // if an initialized tensors is used in a constant operator at compile time does not need to be written and can be omitted in + // the generated code + bool IsNotWritable() const { return fIsNotWritable; } // set not writable initialized tensors - i.e. tensor that must not be written in a file void SetNotWritable() { fIsNotWritable = true;} + // set as constant (needed for non-flot initialized tensors) + void SetConstant() { fConstant = true;} template T const *data() const @@ -805,6 +811,22 @@ void ReadTensorFromStream(std::istream &is, T &target, std::string const &expect } } + +// code for the memory greeding allocations +struct TensorLifeInfo { + int begin; // start time (op index) lifetime + int end; // end time lifetime + size_t size; // size of tensors in bytes +}; + +struct MemoryResult { + std::size_t total_bytes = 0; // total memory needed + std::vector offsets; // resulted offsets for each tensor +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ); + } // namespace SOFIE } // namespace Experimental } // namespace TMVA diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx index 3e2c2d6ed332f..8bc4d4e048a30 100644 --- a/tmva/sofie/src/RModel.cxx +++ b/tmva/sofie/src/RModel.cxx @@ -167,16 +167,15 @@ void RModel::AddOperator(std::unique_ptr op, int order_execution) { } // storing the last usage of tensors which are input to - // operators (but are not inputs to the model, i.e. they are intermediate - // tensors). This information is needed to keep a check on when a - // particular intermediate tensor can be flushed to free up memory for reuse. + // operators (but are not inputs to the model or they are not initialized) + // We call this function during parsing so we don't have yet initialized the operators for(size_t index = 0; index & inputParams, bool fIntermediateTensorInfos.clear(); fDynamicTensorInfos.clear(); + // loop on inputs and see if shape can be full specified // if the batch size is provided it can be used to specify the full shape // Add the full specified tensors in fReadyInputTensors collection @@ -581,7 +582,7 @@ void RModel::Initialize(const std::map & inputParams, bool if (fUseWeightFile) { bool modelHasWeights = false; for (auto &i : fInitializedTensors) { - if (i.second.type() == ETensorType::FLOAT) { + if (i.second.IsWeightTensor()) { modelHasWeights = true; break; } @@ -602,16 +603,24 @@ void RModel::Initialize(const std::map & inputParams, bool fOperators[op_idx]->Initialize(*this); for(auto &it:fOperators[op_idx]->GetOpOutputTensors()){ std::string name = std::string{it}; + // check if tensor is not an initialized or output tensor and it is not already in the list if (fIntermediateTensorFrequencyLookup.find(it) == fIntermediateTensorFrequencyLookup.end() && std::find(fOutputTensorNames.begin(), fOutputTensorNames.end(), name) == fOutputTensorNames.end() && - fInitializedTensors.find(name) == fInitializedTensors.end() && - fDynamicTensorInfos.find(name) == fDynamicTensorInfos.end()){ + fInitializedTensors.find(name) == fInitializedTensors.end()) + { fIntermediateTensorFrequencyLookup[it] = op_idx; } } i++; } + // loop on initialized tensors and make the integers as constant to be + // not written in a weight file + for (auto &it : fInitializedTensors) { + if (it.second.IsWeightTensor() && it.second.type() != ETensorType::FLOAT) + it.second.SetConstant(); + } + fIsInitialized = true; } @@ -684,9 +693,11 @@ std::string GenerateConstantTensorCode(const std::pair(i); @@ -764,16 +775,21 @@ void RModel::GenerateIntermediateTensorInfo() { fGC += "//--- declare the dynamic tensors\n"; for (auto &i : fDynamicTensorInfos) { if (i.second.type == ETensorType::FLOAT) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "float * tensor_" + i.first + " = nullptr;\n"; } else if (i.second.type == ETensorType::DOUBLE) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "double * tensor_" + i.first + " = nullptr;\n"; } else if (i.second.type == ETensorType::INT64) { - fGC += "std::vector fTensor_" + i.first + ";\n"; + //fGC += "std::vector fTensor_" + i.first + ";\n"; fGC += "int64_t * tensor_" + i.first + " = nullptr;\n"; + } else if (i.second.type == ETensorType::BOOL) { + //fGC += "std::vector fTensor_" + i.first + ";\n"; + fGC += "uint8_t * tensor_" + i.first + " = nullptr;\n"; } } + fGC += "//--- dynamic tensors pool\n"; + fGC += "std::vector fDynamicMemoryPool;\n"; } } @@ -791,14 +807,81 @@ void RModel::GenerateOperatorDeclarations() { void RModel::GenerateDynamicTensorInfo() { + // generate code for allocating dynamic tensors using the greedy memory allocations + if (fDynamicTensorInfos.empty()) + return; + std::stringstream out; + out << "// dynamic tensor memory management\n"; + out << SP << "std::vector dynamicTensorInfos;\n"; + out << SP << "dynamicTensorInfos.reserve(" << fDynamicTensorInfos.size() << ");\n"; + + // loop on all the operators to find begin/end life of the tensors + int op_index = 0; + std::vector> tensors; + tensors.reserve(fDynamicTensorInfos.size()); + for (auto & op : fOperators) { + // loop on output tensors - + for (auto &it : op->GetOpOutputTensors()) { + if (fVerbose) { + auto op_ptr = op.get(); + std::cout << "Looping on operator " << op_index << " " << typeid(*op_ptr).name() << std::endl; + } + // check if is a dynamic tensor + std::string name = std::string(it); + if ( fDynamicTensorInfos.find(name) != fDynamicTensorInfos.end() ) { + auto tensor_size = ConvertDimShapeToLength(GetDimTensorShape(name)); + auto type = GetTensorType(name); + size_t type_size = GetTypeSize(type); + int begin = op_index; + int end = fOperators.size(); + // look for end + auto it_lookup = fIntermediateTensorFrequencyLookup.find(name); + if (it_lookup != fIntermediateTensorFrequencyLookup.end()) + end = it_lookup->second + 1; // end is last time used + 1 + // // some tensors (like xcol in convolutions) are just used within the operators + // if (end == 0 && begin > 0) end = begin+1; + + if (begin> end) { + std::cout << "op " << op_index << "tensor_" << name << " begin " << begin << " " << " end " << end << std::endl; + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo: tensor_" + name + " has end before begin"); + } + + // write in code + out << SP << "dynamicTensorInfos.push_back( {" << begin << ", " << end << ", " << type_size << "* (" << tensor_size << ") });" + << " // tensor_" << name << std::endl; + tensors.push_back({name,type}); + } + } + op_index++; // increment operator index + } + out << "\n" << SP << "auto memory_result = OrganizeMemory(dynamicTensorInfos);\n\n"; + out << "// allocating now the memory\n"; + out << SP << "fDynamicMemoryPool = std::vector(memory_result.total_bytes);\n"; + out << SP << "int idx = 0;\n"; + for (auto & it : tensors) { + out << SP << "tensor_" << it.first << " = reinterpret_cast<" << ConvertTypeToString(it.second) << " *>(fDynamicMemoryPool.data() + memory_result.offsets[idx++]);\n"; + } + // check that all dynamic tensors are covered + bool missingTensor = false; for (auto &i : fDynamicTensorInfos) { - auto length = ConvertDynamicShapeToLength(i.second.shape); - out << SP << "if (" << length << " > 0) {\n"; - out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; - out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; - out << SP << "}\n"; + if (std::find(tensors.begin(), tensors.end(), std::pair{i.first, i.second.type}) == tensors.end()) { + std::cout << "Dynamic tensors " << i.first << " is not in list of operator input/output " << std::endl; + missingTensor = true; + } } + if (missingTensor) + throw std::runtime_error("TMVA-SOFIE: RModel::GenerateDynamicTensorInfo - some tensors are not in input/output list"); + + + + // for (auto &i : fDynamicTensorInfos) { + // auto length = ConvertDynamicShapeToLength(i.second.shape); + // out << SP << "if (" << length << " > 0) {\n"; + // out << SP << SP << "fTensor_" << i.first << ".resize(" << length << ");\n"; + // out << SP << SP << "tensor_" << i.first << " = fTensor_" << i.first << ".data();\n"; + // out << SP << "}\n"; + // } fGC += out.str(); } @@ -1143,7 +1226,7 @@ void RModel::ReadInitializedTensorsFromFile(long pos) { std::string length = std::to_string(ConvertShapeToLength(i.second.shape())); fGC += " ReadTensorFromStream(f, " + tensor_name + ", \"" + tensor_name + "\", " + length + ");\n"; } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be read from a file"); } } fGC += " f.close();\n"; @@ -1288,7 +1371,7 @@ long RModel::WriteInitializedTensorsToFile(std::string filename) { } } else { - std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); + throw std::runtime_error("tmva-sofie tensor " + tensor_name + " with type " + ConvertTypeToString(i.second.type()) + " cannot be written to a file"); } if (f.fail()) std::runtime_error("tmva-sofie failed to write tensor data to file for " + tensor_name); diff --git a/tmva/sofie/src/SOFIE_common.cxx b/tmva/sofie/src/SOFIE_common.cxx index c107b489be19e..1ff510842643a 100644 --- a/tmva/sofie/src/SOFIE_common.cxx +++ b/tmva/sofie/src/SOFIE_common.cxx @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace TMVA { namespace Experimental { @@ -89,7 +91,7 @@ std::string ConvertTypeToString(ETensorType type){ return "double"; } case ETensorType::BOOL : { - return "bool"; + return "uint8_t"; } default:{ return "other_" + std::to_string( (int) type); @@ -547,6 +549,130 @@ std::vector UTILITY::ComputeStrideFromShape(const std::vector & shape) return strides; } +struct FreeBlock { + std::size_t offset; + std::size_t size; + bool operator<(const FreeBlock& other) const { + // order by offset for deterministic coalescing + return offset < other.offset; + } +}; + +struct MemoryEvent { + int t; // time (i.e. operator index) + int type; // 0 = END first, 1 = START + int idx; // tensor index + bool operator<(const MemoryEvent& o) const { + if (t != o.t) return t < o.t; + return type < o.type; // END before START at the same time + } +}; + +/// Greedy best-fit planner with coalescing free list. +MemoryResult OrganizeMemory(const std::vector & tensorsInfo ) +{ + // Basic validation + for (const auto &t : tensorsInfo) { + if (!(t.end > t.begin)) { + throw std::runtime_error("Each tensor must have end > begin."); + } + } + + // Build events: free before allocate at equal times. + std::vector events; + events.reserve(tensorsInfo.size() * 2); + for (int i = 0; i < (int)tensorsInfo.size(); ++i) { + events.push_back({tensorsInfo[i].end, 0, i}); // END + events.push_back({tensorsInfo[i].begin, 1, i}); // START + } + std::sort(events.begin(), events.end()); + + std::vector tensorsOffset(tensorsInfo.size()); + + // Free list ordered by offset (for O(log n) coalescing) + // and faster insert/erase with respect to a vector + std::set free_list; + + // Bookkeeping: size/offset map for frees. + std::unordered_map live_size; + std::unordered_map live_offset; + + std::size_t total_bytes = 0; + + auto allocate_best_fit = [&](std::size_t need) -> std::size_t { + // Find the *smallest* block whose size >= need (best-fit). + // Since free_list is ordered by offset, we scan to find best by size. + // (For very large sets you could maintain a multimap by size as well.) + auto best = free_list.end(); + for (auto it = free_list.begin(); it != free_list.end(); ++it) { + if (it->size >= need) { + if (best == free_list.end() || it->size < best->size) + best = it; + } + } + if (best != free_list.end()) { + std::size_t off = best->offset; + if (best->size == need) { + free_list.erase(best); + } else { + FreeBlock updated{best->offset + need, best->size - need}; + free_list.erase(best); + free_list.insert(updated); + } + return off; + } + // No free block large enough; grow the heap. + std::size_t off = total_bytes; + total_bytes += need; + return off; + }; + + auto try_coalesce = [&](std::set::iterator it) { + // Coalesce with previous + if (it != free_list.begin()) { + auto prev = std::prev(it); + if (prev->offset + prev->size == it->offset) { + FreeBlock merged{prev->offset, prev->size + it->size}; + free_list.erase(prev); + it = free_list.erase(it); + it = free_list.insert(merged).first; + } + } + // Coalesce with next + auto next = std::next(it); + if (next != free_list.end() && it->offset + it->size == next->offset) { + FreeBlock merged{it->offset, it->size + next->size}; + free_list.erase(next); + it = free_list.erase(it); + free_list.insert(merged); + } + }; + + // Sweep through time. + for (const auto &e : events) { + if (e.type == 0) { // END: free + auto it_sz = live_size.find(e.idx); + auto it_off = live_offset.find(e.idx); + if (it_sz != live_size.end() && it_off != live_offset.end()) { + FreeBlock fb{it_off->second, it_sz->second}; + // Insert and coalesce with neighbors + auto it = free_list.insert(fb).first; + try_coalesce(it); + live_size.erase(it_sz); + live_offset.erase(it_off); + } + } else { // START: allocate + auto &t = tensorsInfo[e.idx]; + std::size_t off = allocate_best_fit(t.size); + tensorsOffset[e.idx] = off; + live_size[e.idx] = t.size; + live_offset[e.idx] = off; + } + } + + return MemoryResult{total_bytes, std::move(tensorsOffset)}; +} + } // namespace SOFIE } // namespace Experimental } // namespace TMVA