Add pipeline #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

heavengate wants to merge 114 commits into develop from add_pipeline

CMakeLists.txt

-Original file line number
+Diff line change
@@ Expand Up @@
     option(NEW_RELEASE_JIT   "PaddlePaddle next-level release strategy for backup jit package"             OFF)
     option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU"    OFF)
     option(WITH_POCKETFFT    "Compile with pocketfft support"      ON)
+    option(WITH_OPENCV    "Compile with opencv"      OFF)
     option(WITH_RECORD_BUILDTIME    "Compile PaddlePaddle with record all targets build time"       OFF)
     option(WITH_CUSTOM_DEVICE "Compile with custom device support"    OFF)
@@ Expand Down Expand Up @@
     include(flags)              # set paddle compile flags
+    if(WITH_OPENCV)
+        find_package(OpenCV 4.0 QUIET COMPONENTS core imgproc imgcodecs)
+        if(NOT OpenCV_FOUND)
+        find_package(OpenCV 3.0 REQUIRED COMPONENTS core imgproc imgcodecs)
+        endif()
+        message(STATUS "Found OpenCV: ${OpenCV_INCLUDE_DIRS} (found suitable version \"${OpenCV_VERSION}\", minimum required is \"3.0\")")
+        include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
+        include_directories(${OpenCV_INCLUDE_DIRS})
+        link_directories(${OpenCV_LIBS})
+        add_definitions(-DPADDLE_WITH_OPENCV)
+    endif()
     if(WITH_PROFILER)
         find_package(Gperftools REQUIRED)
         include_directories(${GPERFTOOLS_INCLUDE_DIR})
@@ Expand Down @@

cmake/generic.cmake

-Original file line number
+Diff line change
@@ Expand Up / @@ -134,6 +134,8 @@ function(common_link TARGET_NAME) @@
       if (WITH_PROFILER)
         target_link_libraries(${TARGET_NAME} gperftools::profiler)
       endif()
     endfunction()
     # find all third_party modules is used for paddle static library
@@ Expand Down @@

paddle/fluid/framework/details/CMakeLists.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -139,7 +139,7 @@ set(IR_PASS_DEPS graph_viz_pass multi_devices_graph_pass @@
         coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
         fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass
         sync_batch_norm_pass runtime_context_cache_pass graph_to_program_pass
-        fix_op_run_order_pass fuse_gemm_epilogue_pass)
+        fix_op_run_order_pass fuse_gemm_epilogue_pass dataloader_queue_pass)
     if (WITH_CINN)
       set(IR_PASS_DEPS ${IR_PASS_DEPS} build_cinn_pass)
@@ Expand Down @@

paddle/fluid/framework/details/build_strategy.cc

-Original file line number
+Diff line change
@@ Expand Up / @@ -84,6 +84,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { @@
         // Note: This pass is used to check whether the multi_device_graph is right.
         AppendPass("multi_devices_check_pass");
+        AppendPass("dataloader_queue_pass");
         SetCollectiveContext();
       }
@@ Expand Down Expand Up / @@ -503,6 +505,7 @@ USE_PASS(fuse_momentum_op_pass); @@
     USE_PASS(fuse_all_reduce_op_pass);
     USE_PASS(runtime_context_cache_pass);
     USE_PASS(add_reader_dependency_pass);
+    USE_PASS(dataloader_queue_pass);
     #ifdef PADDLE_WITH_CINN
     USE_PASS(build_cinn_pass);
     #endif
@@ Expand Down @@

paddle/fluid/framework/executor_gc_helper.cc

-Original file line number
+Diff line change
@@ Expand Up @@
       return type == proto::VarType::LOD_TENSOR ||
              type == proto::VarType::SELECTED_ROWS ||
-             type == proto::VarType::LOD_TENSOR_ARRAY;
+             type == proto::VarType::LOD_TENSOR_ARRAY ||
+             type == proto::VarType::LOD_TENSOR_BLOCKING_QUEUE;
     }
     std::unordered_map<const OperatorBase *, std::vector<std::string>>
@@ Expand Down @@

paddle/fluid/framework/framework.proto

-Original file line number
+Diff line change
@@ Expand Up / @@ -152,8 +152,11 @@ message VarType { @@
         STRINGS = 26;
         VOCAB = 27;
         FEED_LIST = 28;
         // The data type of phi::StringTensor
         PSTRING = 29;
+        LOD_TENSOR_BLOCKING_QUEUE = 31;
       }
       required Type type = 1;
@@ Expand Down @@

paddle/fluid/framework/ir/CMakeLists.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -101,6 +101,7 @@ pass_library(matmul_scale_fuse_pass inference) @@
     pass_library(gpu_cpu_map_matmul_to_mul_pass inference)
     pass_library(mixed_precision_configure_pass inference)
     pass_library(generate_pass DEPS pass_desc_proto)
+    pass_library(dataloader_queue_pass base)
     target_link_libraries(generate_pass pass_desc_proto)
     if(WITH_TENSORRT)
@@ Expand Down @@

paddle/fluid/framework/ir/dataloader_queue_pass.cc

-Original file line number
+Diff line change
@@ -0,0 +1,109 @@
+    /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+        http://www.apache.org/licenses/LICENSE-2.0
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License. */
+    #include <map>
+    #include <set>
+    #include "glog/logging.h"
+    #include "paddle/fluid/framework/ir/pass.h"
+    namespace paddle {
+    namespace framework {
+    namespace ir {
+    class Graph;
+    std::set<std::string> output_queue_holder_ops = {
+        "file_label_reader", "map", "data_reader",
+    };
+    std::set<std::string> input_array_ops = {
+        "random_crop_and_resize", "batch_decode",
+    };
+    static bool IsOutputQueueHolderOp(std::string op_type) {
+      return output_queue_holder_ops.find(op_type) != output_queue_holder_ops.end();
+    }
+    static bool IsInputArrayOp(std::string op_type) {
+      return input_array_ops.find(op_type) != input_array_ops.end();
+    }
+    static void ProcessOutputQueueHolderOp(ir::Graph *graph) {
+      std::set<std::string> var_names;
+      for (const Node *n : graph->Nodes()) {
+        if (n->IsOp() && n->Op()) {
+          auto *op = n->Op();
+          if (IsOutputQueueHolderOp(op->Type())) {
+            auto &outputs = op->Outputs();
+            for (auto iter = outputs.begin(); iter != outputs.end(); iter++) {
+              for (auto var : iter->second) var_names.insert(var);
+            }
+          }
+        }
+      }
+      for (const Node *n : graph->Nodes()) {
+        if (n->IsVar() && n->Var()) {
+          auto *var = n->Var();
+          if (var_names.find(var->Name()) != var_names.end()) {
+            VLOG(3) << "Change output variable type of " << var->Name()
+                    << " to queue holder";
+            var->SetType(framework::proto::VarType::LOD_TENSOR_BLOCKING_QUEUE);
+            var->SetPersistable(true);
+          }
+        }
+      }
+    }
+    static void ProcessInputArrayOp(ir::Graph *graph) {
+      std::set<std::string> var_names;
+      for (const Node *n : graph->Nodes()) {
+        if (n->IsOp() && n->Op()) {
+          auto *op = n->Op();
+          if (IsInputArrayOp(op->Type())) {
+            auto &inputs = op->Inputs();
+            for (auto iter = inputs.begin(); iter != inputs.end(); iter++) {
+              for (auto var : iter->second) var_names.insert(var);
+            }
+          }
+        }
+      }
+      for (const Node *n : graph->Nodes()) {
+        if (n->IsVar() && n->Var()) {
+          auto *var = n->Var();
+          if (var_names.find(var->Name()) != var_names.end()) {
+            VLOG(3) << "Change output variable type of " << var->Name()
+                    << " to queue holder";
+            var->SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
+          }
+        }
+      }
+    }
+    class DataLoaderQueuePass : public Pass {
+     protected:
+      void ApplyImpl(ir::Graph *graph) const override {
+        ProcessOutputQueueHolderOp(graph);
+        ProcessInputArrayOp(graph);
+      }
+    };
+    }  // namespace ir
+    }  // namespace framework
+    }  // namespace paddle
+    REGISTER_PASS(dataloader_queue_pass,
+                  paddle::framework::ir::DataLoaderQueuePass);

paddle/fluid/framework/operator.cc

-Original file line number
+Diff line change
@@ Expand Up / @@ -1242,7 +1242,13 @@ void OperatorWithKernel::RunImpl(const Scope& scope, @@
                                      const platform::Place& place,
                                      RuntimeContext* runtime_ctx) const {
       platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-      auto* dev_ctx = pool.Get(place);
+      auto* dev_ctx = HasAttr("_stream_id")
+                          ? platform::AsyncDeviceContextPool::Instance().Get(
+                                place, Attr<int>("_stream_id"))
+                          : nullptr;
+      if (dev_ctx == nullptr) {
+        dev_ctx = pool.Get(place);
+      }
     #ifdef PADDLE_WITH_ASCEND_CL
       // NOTE(wangxi): nan/inf cannot be detected on NPU by checking the variable
@@ Expand Down @@

paddle/fluid/framework/var_type_traits.h

-Original file line number
+Diff line change
@@ Expand Up @@
     REG_PROTO_VAR_TYPE_TRAIT(LoDTensorArray, proto::VarType::LOD_TENSOR_ARRAY);
     REG_PROTO_VAR_TYPE_TRAIT(platform::PlaceList, proto::VarType::PLACE_LIST);
     REG_PROTO_VAR_TYPE_TRAIT(ReaderHolder, proto::VarType::READER);
+    REG_PROTO_VAR_TYPE_TRAIT(operators::reader::LoDTensorBlockingQueueHolder,
+                             proto::VarType::LOD_TENSOR_BLOCKING_QUEUE);
     REG_PROTO_VAR_TYPE_TRAIT(FeedList, proto::VarType::FEED_LIST);
     REG_PROTO_VAR_TYPE_TRAIT(FetchList, proto::VarType::FETCH_LIST);
     REG_PROTO_VAR_TYPE_TRAIT(int, proto::VarType::INT32);
@@ Expand Down @@

paddle/fluid/framework/variable_helper.cc

-Original file line number
+Diff line change
@@ Expand Up / @@ -22,6 +22,7 @@ limitations under the License. */ @@
     #include "paddle/fluid/framework/scope.h"
     #include "paddle/fluid/framework/selected_rows_utils.h"
     #include "paddle/fluid/framework/string_array.h"
+    #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
     #include "paddle/fluid/platform/place.h"
     namespace paddle {
@@ Expand All @@
         var->GetMutable<LoDRankTable>();
       } else if (var_type == proto::VarType::LOD_TENSOR_ARRAY) {
         var->GetMutable<LoDTensorArray>();
+      } else if (var_type == proto::VarType::LOD_TENSOR_BLOCKING_QUEUE) {
+        var->GetMutable<operators::reader::LoDTensorBlockingQueueHolder>();
       } else if (var_type == proto::VarType::STRINGS) {
         var->GetMutable<Strings>();
       } else if (var_type == proto::VarType::VOCAB) {
@@ Expand Down @@

paddle/fluid/operators/CMakeLists.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -41,6 +41,7 @@ add_subdirectory(reader) @@
     if (NOT WIN32)
         add_subdirectory(nccl)
+        add_subdirectory(data)
     endif()
     if (WITH_GPU AND TENSORRT_FOUND)
@@ Expand Down @@

paddle/fluid/operators/data/CMakeLists.txt

-Original file line number
+Diff line change
@@ -0,0 +1,29 @@
+    include(operators)
+    if(WITH_UNITY_BUILD)
+        # Load Unity Build rules for operators in paddle/fluid/operators/data/
+        include(unity_build_rule.cmake)
+    endif()
+    cc_library(pipeline SRCS pipeline.cc DEPS parallel_executor simple_threadpool scope)
+    op_library(dataloader_op SRCS dataloader_op.cc dataloader_op.cu.cc DEPS pipeline ${OP_HEADER_DEPS})
+    op_library(data_reader_op SRCS data_reader_op.cc DEPS ${OP_HEADER_DEPS})
+    cc_library(map_runner SRCS map_runner.cc DEPS parallel_executor simple_threadpool scope)
+    op_library(map_op SRCS map_op.cc map_op.cu.cc DEPS map_runner ${OP_HEADER_DEPS})
+    if (WITH_GPU AND NOT WIN32)
+        op_library(file_label_loader_op SRCS file_label_loader_op.cc DEPS ${OP_HEADER_DEPS})
+        cc_library(random_roi_generator SRCS random_roi_generator.cc DEPS ${OP_HEADER_DEPS})
+        cc_library(image_decoder SRCS image_decoder.cc DEPS random_roi_generator ${OP_HEADER_DEPS})
+        op_library(batch_decode_random_crop_op SRCS batch_decode_random_crop_op.cc batch_decode_random_crop_op.cu DEPS image_decoder ${OP_HEADER_DEPS})
+        op_library(batch_decode_op SRCS batch_decode_op.cc batch_decode_op.cu DEPS image_decoder ${OP_HEADER_DEPS})
+        op_library(batch_random_crop_and_resize_op SRCS batch_random_crop_and_resize_op.cc batch_random_crop_and_resize_op.cu DEPS ${OP_HEADER_DEPS})
+        op_library(batch_resize_op SRCS batch_resize_op.cc batch_resize_op.cu DEPS ${OP_HEADER_DEPS})
+        op_library(mirror_normalize_op SRCS mirror_normalize_op.cc mirror_normalize_op.cu DEPS ${OP_HEADER_DEPS})
+    endif()

paddle/fluid/operators/data/batch_decode_op.cc

-Original file line number
+Diff line change
@@ -0,0 +1,98 @@
+    // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+    //
+    // Licensed under the Apache License, Version 2.0 (the "License");
+    // you may not use this file except in compliance with the License.
+    // You may obtain a copy of the License at
+    //
+    //     http://www.apache.org/licenses/LICENSE-2.0
+    //
+    // Unless required by applicable law or agreed to in writing, software
+    // distributed under the License is distributed on an "AS IS" BASIS,
+    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    // See the License for the specific language governing permissions and
+    // limitations under the License.
+    #include "paddle/fluid/operators/data/batch_decode_op.h"
+    namespace paddle {
+    namespace operators {
+    namespace data {
+    class BatchDecodeOp : public framework::OperatorWithKernel {
+     public:
+      using framework::OperatorWithKernel::OperatorWithKernel;
+      void InferShape(framework::InferShapeContext* ctx) const override {
+        PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL,
+                          platform::errors::InvalidArgument(
+                              "Inputs(X) of DecodeJpeg should not be empty."));
+        PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL,
+                          platform::errors::InvalidArgument(
+                              "Outputs(Out) of DecodeJpeg should not be empty."));
+      }
+     protected:
+      framework::OpKernelType GetExpectedKernelType(
+          const framework::ExecutionContext& ctx) const override {
+        return framework::OpKernelType(framework::proto::VarType::UINT8,
+                                       ctx.GetPlace());
+      }
+      framework::OpKernelType GetKernelTypeForVar(
+          const std::string& var_name, const framework::Tensor& tensor,
+          const framework::OpKernelType& expected_kernel_type) const {
+        if (var_name == "X") {
+          return expected_kernel_type;
+        }
+        return framework::OpKernelType(expected_kernel_type.data_type_,
+                                       tensor.place());
+      }
+    };
+    class BatchDecodeOpMaker : public framework::OpProtoAndCheckerMaker {
+     public:
+      void Make() override {
+        AddInput("X",
+                 "(List[Tensor]) A one dimensional uint8 tensor containing "
+                 "the raw bytes of the JPEG image. It is a tensor with rank "
+                 "1.")
+            .AsDuplicable();
+        AddOutput("Out", "The output tensor of BatchDecodeOp").AsDuplicable();
+        AddComment(R"DOC(
+    This operator decodes a JPEG image into a 3 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+    )DOC");
+        AddAttr<int>("num_threads", "Path of the file to be readed.").SetDefault(2);
+        AddAttr<int>("local_rank",
+                     "(int)"
+                     "The index of the op to start execution");
+        AddAttr<int64_t>("program_id",
+                         "(int64_t)"
+                         "The unique hash id used as cache key for "
+                         "decode thread pool");
+        AddAttr<int64_t>(
+            "host_memory_padding",
+            "(int64, default 0),"
+            "pinned memory allocation padding number for Nvjpeg decoding")
+            .SetDefault(0);
+        AddAttr<int64_t>(
+            "device_memory_padding",
+            "(int64, default 0),"
+            "device memory allocation padding number for Nvjpeg decoding")
+            .SetDefault(0);
+      }
+    };
+    }  // namespace data
+    }  // namespace operators
+    }  // namespace paddle
+    namespace ops = paddle::operators;
+    REGISTER_OPERATOR(
+        batch_decode, ops::data::BatchDecodeOp, ops::data::BatchDecodeOpMaker,
+        paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
+        paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>)
+    REGISTER_OP_CPU_KERNEL(batch_decode, ops::data::CPUBatchDecodeKernel<uint8_t>)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add pipeline #11

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

Add pipeline #11

Are you sure you want to change the base?

Uh oh!

Add pipeline #11

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!