diff --git a/paddle/fluid/operators/data/batch_decode_op.cc b/paddle/fluid/operators/data/batch_decode_op.cc index d0f8dae02e5d17..648f2033e9a037 100644 --- a/paddle/fluid/operators/data/batch_decode_op.cc +++ b/paddle/fluid/operators/data/batch_decode_op.cc @@ -30,8 +30,8 @@ class BatchDecodeOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(framework::proto::VarType::UINT8, - ctx.GetPlace()); + return framework::OpKernelType( + framework::proto::VarType::UINT8, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( @@ -67,7 +67,8 @@ or 1 dimensional Gray Tensor. Optionally converts the image to the desired format. The values of the output tensor are uint8 between 0 and 255. )DOC"); - AddAttr("num_threads", "Path of the file to be readed.").SetDefault(2); + AddAttr("num_threads", "Path of the file to be readed.") + .SetDefault(2); AddAttr("local_rank", "(int)" "The index of the op to start execution"); @@ -77,13 +78,11 @@ and 255. "decode thread pool"); AddAttr( "host_memory_padding", - "(int64, default 0)," - "pinned memory allocation padding number for Nvjpeg decoding") + "(int64, default 0), pinned memory allocation padding number for Nvjpeg decoding") .SetDefault(0); AddAttr( "device_memory_padding", - "(int64, default 0)," - "device memory allocation padding number for Nvjpeg decoding") + "(int64, default 0), device memory allocation padding number for Nvjpeg decoding") .SetDefault(0); } }; diff --git a/paddle/fluid/operators/data/batch_decode_op.cu b/paddle/fluid/operators/data/batch_decode_op.cu index 0b640bbb3b986f..93b93b9c674f87 100644 --- a/paddle/fluid/operators/data/batch_decode_op.cu +++ b/paddle/fluid/operators/data/batch_decode_op.cu @@ -14,7 +14,6 @@ #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_HIP) -#include "paddle/fluid/operators/data/batch_decode_op.h" #include "paddle/fluid/operators/data/batch_decode_random_crop_op.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" @@ -22,8 +21,7 @@ namespace paddle { namespace operators { namespace data { -using LoDTensorBlockingQueueHolder = - operators::reader::LoDTensorBlockingQueueHolder; +using LoDTensorBlockingQueueHolder = operators::reader::LoDTensorBlockingQueueHolder; template class GPUBatchDecodeKernel : public framework::OpKernel { @@ -36,12 +34,12 @@ class GPUBatchDecodeKernel : public framework::OpKernel { auto device_memory_padding = ctx.Attr("device_memory_padding"); // multi-phrase decode thread pool - auto* decode_pool = - ImageDecoderThreadPoolManager::Instance()->GetDecoderThreadPool( - program_id, num_threads, local_rank, - static_cast(host_memory_padding), - static_cast(device_memory_padding)); - + auto* decode_pool = + ImageDecoderThreadPoolManager::Instance()->GetDecoderThreadPool( + program_id, num_threads, local_rank, + static_cast(host_memory_padding), + static_cast(device_memory_padding)); + const framework::LoDTensorArray* inputs = ctx.Input("X"); @@ -54,11 +52,13 @@ class GPUBatchDecodeKernel : public framework::OpKernel { auto* x_data = x.data(); size_t x_numel = static_cast(x.numel()); - ImageDecodeTask task = {.bit_stream = x_data, - .bit_len = x_numel, - .tensor = &out_array[i], - .roi_generator = nullptr, - .place = ctx.GetPlace()}; + ImageDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &out_array[i], + .roi_generator = nullptr, + .place = ctx.GetPlace() + }; decode_pool->AddTask(std::make_shared(task)); } diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc index 508802154d25f7..3e981a201fd477 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cc +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cc @@ -23,29 +23,21 @@ class BatchDecodeRandomCropOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( - "Inputs(X) of DecodeJpeg should not be empty.")); - PADDLE_ENFORCE_GE(ctx->Outputs("Out").size(), 1UL, - platform::errors::InvalidArgument( - "Outputs(Out) of DecodeJpeg should not be empty.")); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DecodeJpeg"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DecodeJpeg"); + auto aspect_ratio_min = ctx->Attrs().Get("aspect_ratio_min"); auto aspect_ratio_max = ctx->Attrs().Get("aspect_ratio_max"); - PADDLE_ENFORCE_GT( - aspect_ratio_min, 0., - platform::errors::InvalidArgument( + PADDLE_ENFORCE_GT(aspect_ratio_min, 0., + platform::errors::InvalidArgument( "aspect_ratio_min should be greater than 0, but received " - "%f", - aspect_ratio_min)); - PADDLE_ENFORCE_GT( - aspect_ratio_max, 0., - platform::errors::InvalidArgument( + "%f", aspect_ratio_min)); + PADDLE_ENFORCE_GT(aspect_ratio_max, 0., + platform::errors::InvalidArgument( "aspect_ratio_max should be greater than 0, but received " - "%f", - aspect_ratio_max)); - PADDLE_ENFORCE_GE( - aspect_ratio_max, aspect_ratio_min, - platform::errors::InvalidArgument( + "%f", aspect_ratio_max)); + PADDLE_ENFORCE_GE(aspect_ratio_max, aspect_ratio_min, + platform::errors::InvalidArgument( "aspect_ratio_max should be greater than aspect_ratio_min, " "but received aspect_ratio_max(%d) < aspect_ratio_min(%d)", aspect_ratio_max, aspect_ratio_min)); @@ -53,34 +45,31 @@ class BatchDecodeRandomCropOp : public framework::OperatorWithKernel { auto area_min = ctx->Attrs().Get("area_min"); auto area_max = ctx->Attrs().Get("area_max"); PADDLE_ENFORCE_GT(area_min, 0., - platform::errors::InvalidArgument( - "area_minshould be greater than 0, but received " - "%f", - area_min)); + platform::errors::InvalidArgument( + "area_minshould be greater than 0, but received " + "%f", area_min)); PADDLE_ENFORCE_GT(area_max, 0., - platform::errors::InvalidArgument( - "area_max should be greater than 0, but received " - "%f", - area_max)); + platform::errors::InvalidArgument( + "area_max should be greater than 0, but received " + "%f", area_max)); PADDLE_ENFORCE_GE(area_max, area_min, - platform::errors::InvalidArgument( - "area_max should be greater than area_min, " - "but received area_max(%f) < area_min(%f)", - area_max, area_min)); + platform::errors::InvalidArgument( + "area_max should be greater than area_min, " + "but received area_max(%f) < area_min(%f)", + area_max, area_min)); - auto num_attempts = ctx->Attrs().Get("num_attempts"); + auto num_attempts= ctx->Attrs().Get("num_attempts"); PADDLE_ENFORCE_GT(num_attempts, 0, - platform::errors::InvalidArgument( - "num_attempts should be a positive integerm, but " - "received %d", - num_attempts)); + platform::errors::InvalidArgument( + "num_attempts should be a positive integerm, but " + "received %d", num_attempts)); } protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(framework::proto::VarType::UINT8, - ctx.GetPlace()); + return framework::OpKernelType( + framework::proto::VarType::UINT8, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( @@ -108,9 +97,8 @@ class BatchDecodeRandomCropOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "A one dimensional uint8 tensor containing the raw bytes " - "of the JPEG image. It is a tensor with rank 1.") - .AsDuplicable(); - AddOutput("Out", "The output tensor of DecodeJpeg op").AsDuplicable(); + "of the JPEG image. It is a tensor with rank 1."); + AddOutput("Out", "The output tensor of DecodeJpeg op"); AddComment(R"DOC( This operator decodes a JPEG image into a 3 dimensional RGB Tensor or 1 dimensional Gray Tensor. Optionally converts the image to the @@ -120,14 +108,15 @@ and 255. AddAttr("local_rank", "(int64_t)" "The index of the op to start execution"); - AddAttr("num_threads", "Path of the file to be readed.").SetDefault(2); - AddAttr("host_memory_padding", - "(int64, default 0), pinned memory allocation padding " - "number for Nvjpeg decoding") + AddAttr("num_threads", "Path of the file to be readed.") + .SetDefault(2); + AddAttr( + "host_memory_padding", + "(int64, default 0), pinned memory allocation padding number for Nvjpeg decoding") .SetDefault(0); - AddAttr("device_memory_padding", - "(int64, default 0), device memory allocation padding " - "number for Nvjpeg decoding") + AddAttr( + "device_memory_padding", + "(int64, default 0), device memory allocation padding number for Nvjpeg decoding") .SetDefault(0); AddAttr( "data_format", @@ -136,8 +125,8 @@ and 255. "Specify that the data format of the input and output data is " "channel_first or channel_last.") .SetDefault("NCHW"); - AddAttr("aspect_ratio_min", "").SetDefault(3. / 4.); - AddAttr("aspect_ratio_max", "").SetDefault(4. / 3.); + AddAttr("aspect_ratio_min", "").SetDefault(3./4.); + AddAttr("aspect_ratio_max", "").SetDefault(4./3.); AddAttr("area_min", "").SetDefault(0.08); AddAttr("area_max", "").SetDefault(1.); AddAttr("num_attempts", "").SetDefault(10); @@ -155,10 +144,8 @@ and 255. namespace ops = paddle::operators; REGISTER_OPERATOR( - batch_decode_random_crop, ops::data::BatchDecodeRandomCropOp, - ops::data::BatchDecodeRandomCropOpMaker, + batch_decode_random_crop, ops::data::BatchDecodeRandomCropOp, ops::data::BatchDecodeRandomCropOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker) -REGISTER_OP_CPU_KERNEL(batch_decode_random_crop, - ops::data::CPUBatchDecodeRandomCropKernel) +REGISTER_OP_CPU_KERNEL(batch_decode_random_crop, ops::data::CPUBatchDecodeRandomCropKernel) diff --git a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu index 9e882ac8eac88c..e65bce62e2536b 100644 --- a/paddle/fluid/operators/data/batch_decode_random_crop_op.cu +++ b/paddle/fluid/operators/data/batch_decode_random_crop_op.cu @@ -25,8 +25,7 @@ namespace paddle { namespace operators { namespace data { -using LoDTensorBlockingQueueHolder = - operators::reader::LoDTensorBlockingQueueHolder; +using LoDTensorBlockingQueueHolder = operators::reader::LoDTensorBlockingQueueHolder; using DataLayout = framework::DataLayout; ImageDecoderThreadPool* decode_pool = nullptr; @@ -42,17 +41,21 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { auto device_memory_padding = ctx.Attr("device_memory_padding"); // multi-phrase decode thread pool - auto* decode_pool = - ImageDecoderThreadPoolManager::Instance()->GetDecoderThreadPool( - program_id, num_threads, local_rank, - static_cast(host_memory_padding), - static_cast(device_memory_padding)); + auto* decode_pool = + ImageDecoderThreadPoolManager::Instance()->GetDecoderThreadPool( + program_id, num_threads, local_rank, + static_cast(host_memory_padding), + static_cast(device_memory_padding)); - auto inputs = ctx.MultiInput("X"); - int batch_size = inputs.size(); + const framework::LoDTensorArray* inputs = + ctx.Input("X"); + int batch_size = inputs->size(); - auto out_array = ctx.MultiOutput("Out"); + auto* out = ctx.OutputVar("Out"); auto dev = platform::CUDAPlace(local_rank); + + auto& out_array = *out->GetMutable(); + out_array.resize(batch_size); const std::string data_format_str = ctx.Attr("data_format"); const DataLayout data_format = @@ -72,46 +75,52 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { AreaRange area_range{area_min, area_max}; auto* generators = GeneratorManager::Instance()->GetGenerators( - program_id, batch_size, aspect_ratio_range, area_range); - - for (size_t i = 0; i < inputs.size(); i++) { - const framework::LoDTensor* x = inputs.at(i); - auto* x_data = x->data(); - size_t x_numel = static_cast(x->numel()); - - if (data_format == DataLayout::kNCHW) { - ImageDecodeTask task = {.bit_stream = x_data, - .bit_len = x_numel, - .tensor = &temp_array[i], - .roi_generator = generators->at(i).get(), - .place = dev}; + program_id, batch_size, aspect_ratio_range, + area_range); + + for (size_t i = 0; i < inputs->size(); i++) { + const framework::LoDTensor x = inputs->at(i); + auto* x_data = x.data(); + size_t x_numel = static_cast(x.numel()); + + if (data_format == DataLayout::kNCHW){ + ImageDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &temp_array[i], + .roi_generator = generators->at(i).get(), + .place = dev + }; decode_pool->AddTask(std::make_shared(task)); - } else { - ImageDecodeTask task = {.bit_stream = x_data, - .bit_len = x_numel, - .tensor = out_array[i], - .roi_generator = generators->at(i).get(), - .place = dev}; + } + else{ + ImageDecodeTask task = { + .bit_stream = x_data, + .bit_len = x_numel, + .tensor = &out_array[i], + .roi_generator = generators->at(i).get(), + .place = dev + }; decode_pool->AddTask(std::make_shared(task)); } + } decode_pool->RunAll(true); - if (data_format == DataLayout::kNCHW) { + if (data_format == DataLayout::kNCHW){ const auto& dev_ctx = ctx.cuda_device_context(); phi::funcs::Transpose trans; std::vector axis = {2, 0, 1}; - for (size_t i = 0; i < inputs.size(); i++) { + for (size_t i = 0; i < inputs->size(); i++) { // Do transpose const framework::DDim& in_sizes = temp_array[i].dims(); framework::DDim transposed_input_shape = in_sizes.transpose(axis); std::vector transposed_input_shape_ = phi::vectorize(transposed_input_shape); - - out_array[i]->Resize(transposed_input_shape); - out_array[i]->mutable_data(dev_ctx.GetPlace()); - trans(dev_ctx, temp_array[i], out_array[i], axis); + out_array[i].Resize(transposed_input_shape); + out_array[i].mutable_data(dev_ctx.GetPlace()); + trans(dev_ctx, temp_array[i], &out_array[i], axis); } } } @@ -122,7 +131,6 @@ class GPUBatchDecodeRandomCropKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(batch_decode_random_crop, - ops::data::GPUBatchDecodeRandomCropKernel) +REGISTER_OP_CUDA_KERNEL(batch_decode_random_crop, ops::data::GPUBatchDecodeRandomCropKernel) #endif diff --git a/paddle/fluid/operators/data/batch_resize_op.cc b/paddle/fluid/operators/data/batch_resize_op.cc index e46f12cb6b23ed..d3fbbfd17f58ad 100644 --- a/paddle/fluid/operators/data/batch_resize_op.cc +++ b/paddle/fluid/operators/data/batch_resize_op.cc @@ -24,27 +24,28 @@ class BatchResizeOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_GE(ctx->Inputs("X").size(), 1UL, - platform::errors::InvalidArgument( - "Inputs(X) of BatchResize should not be empty.")); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "BatchResize"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchResize"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + "BatchResize"); auto size = ctx->Attrs().Get>("size"); PADDLE_ENFORCE_EQ(size.size(), 2, platform::errors::InvalidArgument( "The length of Attrs(size) should be 2.")); - PADDLE_ENFORCE_GT(size[0], 0, platform::errors::InvalidArgument( - "h in Attr(size) of Op(BatchResize) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(size[1], 0, platform::errors::InvalidArgument( - "w in Attr(size) of Op(BatchResize) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(size[0], 0, + platform::errors::InvalidArgument( + "h in Attr(size) of Op(BatchResize) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(size[1], 0, + platform::errors::InvalidArgument( + "w in Attr(size) of Op(BatchResize) " + "should be greater than 0.")); } framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(framework::proto::VarType::UINT8, - ctx.GetPlace()); + return framework::OpKernelType( + framework::proto::VarType::UINT8, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( @@ -61,8 +62,7 @@ class BatchResizeOp : public framework::OperatorWithKernel { class BatchResizeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "(List[LoDTensor]). A batch of instances to random crop.") - .AsDuplicable(); + AddInput("X", "(LoDTensorArray). A batch of instances to random crop."); AddOutput("Out", "(Tensor). The cropped instance batch."); AddAttr>( "size", "expected output size of the crop, for each edge."); @@ -103,8 +103,10 @@ class BatchResizeOpMaker : public framework::OpProtoAndCheckerMaker { namespace ops = paddle::operators; REGISTER_OPERATOR( - batch_resize, ops::data::BatchResizeOp, ops::data::BatchResizeOpMaker, + batch_resize, ops::data::BatchResizeOp, + ops::data::BatchResizeOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(batch_resize, ops::data::BatchResizeCPUKernel) +REGISTER_OP_CPU_KERNEL(batch_resize, + ops::data::BatchResizeCPUKernel) diff --git a/paddle/fluid/operators/data/batch_resize_op.cu b/paddle/fluid/operators/data/batch_resize_op.cu index e1164043b8c117..4953e39801d3de 100644 --- a/paddle/fluid/operators/data/batch_resize_op.cu +++ b/paddle/fluid/operators/data/batch_resize_op.cu @@ -76,10 +76,10 @@ __global__ void KeNearestNeighborInterpFw( template __global__ void KeBilinearInterpFw( const T* in, const size_t in_img_h, const size_t in_img_w, - const size_t input_h, const size_t input_w, T* out, const size_t out_img_h, - const size_t out_img_w, const size_t output_h, const size_t output_w, - const size_t num_channels, const float ratio_h, const float ratio_w, - const bool align_corners, const int align_mode, + const size_t input_h, const size_t input_w, T* out, + const size_t out_img_h, const size_t out_img_w, const size_t output_h, + const size_t output_w, const size_t num_channels, const float ratio_h, + const float ratio_w, const bool align_corners, const int align_mode, const DataLayout data_format) { int nthreads = output_h * output_w; int tid = blockIdx.x * blockDim.x + threadIdx.x; @@ -114,8 +114,8 @@ __global__ void KeBilinearInterpFw( int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0; float src_h = ratio_h * (out_img_idy + 0.5) - 0.5; src_h = src_h > 0 ? src_h : 0; - float h1lambda = - align_flag ? src_h - in_img_idy : ratio_h * out_img_idy - in_img_idy; + float h1lambda = align_flag ? src_h - in_img_idy + : ratio_h * out_img_idy - in_img_idy; float h2lambda = 1.f - h1lambda; // get input w index with offset @@ -126,8 +126,8 @@ __global__ void KeBilinearInterpFw( int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0; float src_w = ratio_w * (out_img_idx + 0.5) - 0.5; src_w = src_w > 0 ? src_w : 0; - float w1lambda = - align_flag ? src_w - in_img_idx : ratio_w * out_img_idx - in_img_idx; + float w1lambda = align_flag ? src_w - in_img_idx + : ratio_w * out_img_idx - in_img_idx; float w2lambda = 1.f - w1lambda; if (data_format == DataLayout::kNCHW) { @@ -135,34 +135,33 @@ __global__ void KeBilinearInterpFw( in_img_idy * in_img_w + in_img_idx]; // bilinear interpolation - out[out_id_h * output_w + out_id_w] = - (T)(h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[w_id]) + - h1lambda * (w2lambda * in_pos[h_id * in_img_w] + - w1lambda * in_pos[h_id * in_img_w + w_id])); + out[out_id_h * output_w + out_id_w] = (T)( + h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[w_id]) + + h1lambda * (w2lambda * in_pos[h_id * in_img_w] + + w1lambda * in_pos[h_id * in_img_w + w_id])); } else { const T* in_pos = &in[out_id_h * input_w + in_img_idy * in_img_w * num_channels + in_img_idx * num_channels + channel_id]; // bilinear interpolation - out[out_id_h * output_w + out_id_w] = - (T)(h2lambda * (w2lambda * in_pos[0] + - w1lambda * in_pos[w_id * num_channels]) + - h1lambda * (w2lambda * in_pos[h_id * in_img_w * num_channels] + - w1lambda * in_pos[h_id * in_img_w * num_channels + - w_id * num_channels])); + out[out_id_h * output_w + out_id_w] = (T)( + h2lambda * + (w2lambda * in_pos[0] + w1lambda * in_pos[w_id * num_channels]) + + h1lambda * (w2lambda * in_pos[h_id * in_img_w * num_channels] + + w1lambda * in_pos[h_id * in_img_w * num_channels + + w_id * num_channels])); } } } template -static void ResizeFwd(const framework::ExecutionContext& ctx, - const framework::LoDTensor& input, - framework::Tensor* output, - const std::vector out_size, - const std::string interp_method, const bool align_corners, - const int align_mode, const int img_h, const int img_w, - const int c, const DataLayout data_format) { +static void ResizeFwd( + const framework::ExecutionContext& ctx, const framework::LoDTensor& input, + framework::Tensor* output, const std::vector out_size, + const std::string interp_method, const bool align_corners, + const int align_mode, const int img_h, const int img_w, const int c, + const DataLayout data_format) { auto input_data = input.template data(); int out_h = static_cast(out_size[0]); int out_w = static_cast(out_size[1]); @@ -202,7 +201,8 @@ static void ResizeFwd(const framework::ExecutionContext& ctx, KeBilinearInterpFw<<>>( input_data, img_h, img_w, 1, in_chw, output_data, out_h, out_w, 1, - out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_format); + out_chw, c, ratio_h, ratio_w, align_corners, align_mode, + data_format); } } @@ -214,8 +214,8 @@ class BatchResizeCUDAKernel : public framework::OpKernel { platform::is_gpu_place(ctx.GetPlace()), true, platform::errors::NotFound("This kernel only runs on GPU device.")); // get input, output - auto x = ctx.MultiInput("X"); - PADDLE_ENFORCE_GT(x.size(), 0, + auto* x = ctx.Input("X"); + PADDLE_ENFORCE_GT(x->size(), 0, platform::errors::InvalidArgument( "The size of X must be greater than 0.")); auto* out = ctx.Output("Out"); @@ -231,28 +231,30 @@ class BatchResizeCUDAKernel : public framework::OpKernel { bool align_corners = ctx.Attr("align_corners"); int align_mode = ctx.Attr("align_mode"); - auto* img = x.at(0); - int64_t img_c = - data_format == DataLayout::kNCHW ? img->dims()[0] : img->dims()[2]; + auto* img = &x->at(0); + int64_t img_c = data_format == DataLayout::kNCHW ? \ + img->dims()[0] : img->dims()[2]; - std::vector out_dim = {static_cast(x.size()), size[0], - size[1], img_c}; + std::vector out_dim = {static_cast(x->size()), + size[0], size[1], img_c}; if (data_format == DataLayout::kNCHW) { - out_dim = {static_cast(x.size()), img_c, size[0], size[1]}; + out_dim = {static_cast(x->size()), + img_c, size[0], size[1]}; } out->Resize(phi::make_ddim(out_dim)); out->mutable_data(ctx.GetPlace()); int img_h, img_w, idx_h, idx_w, crop_h, crop_w; - for (int i = 0; i < x.size(); i++) { - img = x.at(i); + for (int i = 0; i < x->size(); i++) { + img = &x->at(i); img_h = data_format == DataLayout::kNCHW ? img->dims()[1] : img->dims()[0]; img_w = data_format == DataLayout::kNCHW ? img->dims()[2] : img->dims()[1]; auto out_tensor = out->Slice(i, i + 1); - ResizeFwd(ctx, *img, &out_tensor, size, interp_method, align_corners, - align_mode, img_h, img_w, img_c, data_format); + ResizeFwd(ctx, *img, &out_tensor, size, interp_method, + align_corners, align_mode, img_h, img_w, img_c, + data_format); } } }; @@ -262,5 +264,6 @@ class BatchResizeCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(batch_resize, ops::data::BatchResizeCUDAKernel, +REGISTER_OP_CUDA_KERNEL(batch_resize, + ops::data::BatchResizeCUDAKernel, ops::data::BatchResizeCUDAKernel); diff --git a/paddle/fluid/operators/data/file_label_loader_op.cc b/paddle/fluid/operators/data/file_label_loader_op.cc index 530d51ec35d358..3b26438db00d7f 100644 --- a/paddle/fluid/operators/data/file_label_loader_op.cc +++ b/paddle/fluid/operators/data/file_label_loader_op.cc @@ -27,9 +27,9 @@ class FileLabelLoaderOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx->HasInput("Indices"), true, platform::errors::InvalidArgument( "Input(Indices) of ReadFileLoaderOp is null.")); - // PADDLE_ENFORCE_EQ(ctx->HasOutput("Image"), true, - // platform::errors::InvalidArgument( - // "Output(Image) of ReadFileLoaderOp is null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Image"), true, + platform::errors::InvalidArgument( + "Output(Image) of ReadFileLoaderOp is null.")); PADDLE_ENFORCE_EQ(ctx->HasOutput("Label"), true, platform::errors::InvalidArgument( "Output(Label) of ReadFileLoaderOp is null.")); @@ -51,8 +51,7 @@ class FileLabelLoaderOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("Indices", "The batch indices of input samples"); - AddOutput("Image", "The output image tensor of ReadFileLoader op") - .AsDuplicable(); + AddOutput("Image", "The output image tensor of ReadFileLoader op"); AddOutput("Label", "The output label tensor of ReadFileLoader op"); AddAttr("data_root", "Path of root directory of dataset"); AddComment(R"DOC( @@ -72,5 +71,4 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker) -REGISTER_OP_CPU_KERNEL(file_label_loader, - ops::FileLabelLoaderCPUKernel) +REGISTER_OP_CPU_KERNEL(file_label_loader, ops::FileLabelLoaderCPUKernel) diff --git a/paddle/fluid/operators/data/file_label_loader_op.h b/paddle/fluid/operators/data/file_label_loader_op.h index 995c410b7966ed..7e6b0a555acafe 100644 --- a/paddle/fluid/operators/data/file_label_loader_op.h +++ b/paddle/fluid/operators/data/file_label_loader_op.h @@ -13,19 +13,19 @@ // limitations under the License. #pragma once -#include -#include -#include #include #include +#include #include +#include +#include #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" namespace paddle { namespace operators { @@ -39,7 +39,8 @@ constexpr char DIR_SEP = '\\'; constexpr char DIR_SEP = '/'; #endif -static std::string JoinPath(const std::string path1, const std::string path2) { +static std::string JoinPath(const std::string path1, + const std::string path2) { // empty check if (path1.empty()) return path2; if (path1.empty()) return path1; @@ -55,12 +56,11 @@ static std::string JoinPath(const std::string path1, const std::string path2) { return path1 + DIR_SEP + path2; } -static void ParseFilesAndLabels( - const std::string data_root, - std::vector>* samples) { +static void ParseFilesAndLabels(const std::string data_root, + std::vector>* samples) { auto* dir = opendir(data_root.c_str()); PADDLE_ENFORCE_NE(dir, nullptr, platform::errors::InvalidArgument( - "Cannot open directory %s", data_root)); + "Cannot open directory %s", data_root)); // Step 1: parse classes info std::vector classes; @@ -69,13 +69,13 @@ static void ParseFilesAndLabels( if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { entry = readdir(dir); continue; - } + } auto cls_path = JoinPath(data_root, entry->d_name); struct stat s; int ret = stat(cls_path.c_str(), &s); PADDLE_ENFORCE_EQ(ret, 0, platform::errors::InvalidArgument( - "Directory %s is unaccessiable.", cls_path)); + "Directory %s is unaccessiable.", cls_path)); if (S_ISDIR(s.st_mode)) classes.emplace_back(entry->d_name); @@ -89,12 +89,13 @@ static void ParseFilesAndLabels( // Step 2: traverse directory to generate samples for (int class_id = 0; class_id < static_cast(classes.size()); - class_id++) { - auto cur_dir = data_root + DIR_SEP + classes[class_id]; + class_id++) { + auto cur_dir = data_root + DIR_SEP + classes[class_id]; dir = opendir(cur_dir.c_str()); entry = readdir(dir); while (entry) { - if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { + if (strcmp(entry->d_name, ".") == 0 + || strcmp(entry->d_name, "..") == 0) { entry = readdir(dir); continue; } @@ -106,13 +107,12 @@ static void ParseFilesAndLabels( } closedir(dir); } + } -std::map>> - root_to_samples_; +std::map>> root_to_samples_; -static std::vector>* GetFilesAndLabelsFromCache( - const std::string data_root) { +static std::vector>* GetFilesAndLabelsFromCache(const std::string data_root) { auto iter = root_to_samples_.find(data_root); if (iter == root_to_samples_.end()) { std::vector> samples; @@ -120,16 +120,16 @@ static std::vector>* GetFilesAndLabelsFromCache( VLOG(4) << "Init sample number: " << samples.size(); root_to_samples_[data_root] = samples; } - + return &(root_to_samples_[data_root]); } template -class FileLabelLoaderCPUKernel : public framework::OpKernel { +class FileLabelLoaderCPUKernel: public framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { + void Compute(const framework::ExecutionContext& ctx) const override { auto* indices = ctx.Input("Indices"); - auto image_arr = ctx.MultiOutput("Image"); + auto* image_arr = ctx.Output("Image"); auto* label_tensor = ctx.Output("Label"); auto data_root = ctx.Attr("data_root"); @@ -138,9 +138,11 @@ class FileLabelLoaderCPUKernel : public framework::OpKernel { auto batch_size = indices->dims()[0]; const int64_t* indices_data = indices->data(); - label_tensor->Resize(phi::make_ddim({static_cast(batch_size)})); - auto* label_data = - label_tensor->mutable_data(platform::CPUPlace()); + image_arr->clear(); + image_arr->reserve(batch_size); + label_tensor->Resize( + phi::make_ddim({static_cast(batch_size)})); + auto* label_data = label_tensor->mutable_data(platform::CPUPlace()); for (int64_t i = 0; i < batch_size; i++) { int64_t index = static_cast(indices_data[i]); auto file = samples->at(index).first; @@ -151,14 +153,15 @@ class FileLabelLoaderCPUKernel : public framework::OpKernel { input.seekg(0, std::ios::beg); - auto image = image_arr[i]; + framework::LoDTensor image; std::vector image_len = {file_size}; - image->Resize(phi::make_ddim(image_len)); + image.Resize(phi::make_ddim(image_len)); - uint8_t* data = image->mutable_data(platform::CPUPlace()); + uint8_t* data = image.mutable_data(platform::CPUPlace()); input.read(reinterpret_cast(data), file_size); + image_arr->emplace_back(image); label_data[i] = static_cast(label); } } @@ -171,6 +174,7 @@ class FileLabelLoaderCPUKernel : public framework::OpKernel { framework::TensorCopy(lod_tensor, lod_tensor.place(), &out_tensor); out_tensor.set_lod(lod_tensor.lod()); } + }; } // namespace data diff --git a/paddle/fluid/operators/data/image_decoder.cc b/paddle/fluid/operators/data/image_decoder.cc index 37fec0433f9ca4..4c39030d1ad4f2 100644 --- a/paddle/fluid/operators/data/image_decoder.cc +++ b/paddle/fluid/operators/data/image_decoder.cc @@ -18,89 +18,74 @@ namespace paddle { namespace operators { namespace data { -ImageDecoder::ImageDecoder(int dev_id, size_t host_memory_padding, - size_t device_memory_padding) - : nvjpeg_streams_(2), pinned_buffers_(2), page_id_(0) { +ImageDecoder::ImageDecoder(int dev_id, + size_t host_memory_padding, + size_t device_memory_padding) + : nvjpeg_streams_(2), + pinned_buffers_(2), + page_id_(0) { platform::SetDeviceId(dev_id); // create nvjpeg handle and stream - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegCreateEx( - NVJPEG_BACKEND_HYBRID, &device_allocator_, &pinned_allocator_, 0, - &handle_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS( + platform::dynload::nvjpegCreateEx(NVJPEG_BACKEND_HYBRID, &device_allocator_, + &pinned_allocator_, 0, &handle_)); // set pinned/device memory padding if (host_memory_padding > 0) { - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegSetPinnedMemoryPadding(host_memory_padding, - handle_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegSetPinnedMemoryPadding(host_memory_padding, handle_)); } if (device_memory_padding > 0) { - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegSetDeviceMemoryPadding(device_memory_padding, - handle_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegSetDeviceMemoryPadding(device_memory_padding, handle_)); } // create nvjpeg stream for (size_t i = 0; i < nvjpeg_streams_.size(); i++) { - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamCreate( - handle_, &nvjpeg_streams_[i])); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamCreate(handle_, &nvjpeg_streams_[i])); } // create decode params, decoder and state - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecodeParamsCreate(handle_, &decode_params_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecoderCreate( - handle_, NVJPEG_BACKEND_HYBRID, &decoder_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecoderStateCreate(handle_, decoder_, &state_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsCreate(handle_, &decode_params_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecoderCreate(handle_, NVJPEG_BACKEND_HYBRID, &decoder_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecoderStateCreate(handle_, decoder_, &state_)); // create device & pinned buffer - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferDeviceCreate( - handle_, &device_allocator_, &device_buffer_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferDeviceCreate(handle_, &device_allocator_, &device_buffer_)); for (size_t i = 0; i < pinned_buffers_.size(); i++) { - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferPinnedCreate( - handle_, &pinned_allocator_, &pinned_buffers_[i])); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferPinnedCreate(handle_, &pinned_allocator_, &pinned_buffers_[i])); } } ImageDecoder::~ImageDecoder() { // destroy nvjpeg streams for (size_t i = 0; i < nvjpeg_streams_.size(); i++) { - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegJpegStreamDestroy(nvjpeg_streams_[i])); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamDestroy(nvjpeg_streams_[i])); } // destroy decode params, decoder and state - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecodeParamsDestroy(decode_params_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecoderDestroy(decoder_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegJpegStateDestroy(state_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsDestroy(decode_params_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecoderDestroy(decoder_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStateDestroy(state_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegBufferDeviceDestroy(device_buffer_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferDeviceDestroy(device_buffer_)); for (size_t i = 0; i < pinned_buffers_.size(); i++) { - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegBufferPinnedDestroy(pinned_buffers_[i])); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegBufferPinnedDestroy(pinned_buffers_[i])); } // destroy nvjpeg handle at last PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDestroy(handle_)); } -void ImageDecoder::CPUDecodeRandomCrop(const uint8_t* data, size_t length, - RandomROIGenerator* roi_generator, - unsigned char* workspace, - size_t workspace_size, - framework::LoDTensor* out, - platform::Place place) { +void ImageDecoder::CPUDecodeRandomCrop( + const uint8_t* data, size_t length, + RandomROIGenerator* roi_generator, + unsigned char* workspace, size_t workspace_size, + framework::LoDTensor* out, platform::Place place) { VLOG(4) << "CPUDecodeRandomCropResize enter"; #ifdef PADDLE_WITH_OPENCV - cv::Mat image = cv::imdecode( - cv::Mat(1, length, CV_8UC1, const_cast(data)), - cv::IMREAD_COLOR); - + cv::Mat image = + cv::imdecode(cv::Mat(1, length, CV_8UC1, const_cast(data)), cv::IMREAD_COLOR); + cv::Mat cropped; int height = image.rows; int width = image.cols; @@ -126,8 +111,7 @@ void ImageDecoder::CPUDecodeRandomCrop(const uint8_t* data, size_t length, cpu_tensor.Resize(phi::make_ddim(out_shape)); auto* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); - cv::Mat cpu_mat(height, width, CV_8UC3, const_cast(cpu_data), - cv::Mat::AUTO_STEP); + cv::Mat cpu_mat(height, width, CV_8UC3, const_cast(cpu_data), cv::Mat::AUTO_STEP); cv::cvtColor(cropped, cpu_mat, cv::COLOR_BGR2RGB); // copy cpu tensor to output gpu tensor @@ -147,24 +131,22 @@ nvjpegStatus_t ImageDecoder::ParseDecodeParams( int widths[NVJPEG_MAX_COMPONENT]; int heights[NVJPEG_MAX_COMPONENT]; - nvjpegStatus_t status = platform::dynload::nvjpegGetImageInfo( - handle_, bit_stream, bit_len, &components, &subsampling, widths, heights); + + nvjpegStatus_t status = platform::dynload::nvjpegGetImageInfo(handle_, bit_stream, bit_len, + &components, &subsampling, widths, heights); if (status != NVJPEG_STATUS_SUCCESS) return status; int64_t width = static_cast(widths[0]); int64_t height = static_cast(heights[0]); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecodeParamsSetOutputFormat(decode_params_, - NVJPEG_OUTPUT_RGBI)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetOutputFormat(decode_params_, NVJPEG_OUTPUT_RGBI)); if (roi_generator) { ROI roi; roi_generator->GenerateRandomROI(width, height, &roi); - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetROI( - decode_params_, roi.x, roi.y, roi.w, roi.h)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeParamsSetROI(decode_params_, roi.x, roi.y, roi.w, roi.h)); height = roi.h; width = roi.w; } @@ -180,72 +162,55 @@ nvjpegStatus_t ImageDecoder::ParseDecodeParams( return NVJPEG_STATUS_SUCCESS; } -nvjpegStatus_t ImageDecoder::GPUDecodeRandomCrop(const uint8_t* bit_stream, - size_t bit_len, - nvjpegImage_t* out_image) { +nvjpegStatus_t ImageDecoder::GPUDecodeRandomCrop(const uint8_t* bit_stream, size_t bit_len, nvjpegImage_t* out_image) { auto buffer = pinned_buffers_[page_id_]; auto stream = nvjpeg_streams_[page_id_]; page_id_ ^= 1; // decode jpeg in host to pinned buffer - PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamParse( - handle_, bit_stream, bit_len, false, false, stream)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegStateAttachPinnedBuffer(state_, buffer)); - nvjpegStatus_t status = platform::dynload::nvjpegDecodeJpegHost( - handle_, decoder_, state_, decode_params_, stream); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegJpegStreamParse(handle_, bit_stream, bit_len, false, false, stream)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegStateAttachPinnedBuffer(state_, buffer)); + nvjpegStatus_t status = platform::dynload::nvjpegDecodeJpegHost(handle_, decoder_, state_, decode_params_, stream); if (status != NVJPEG_STATUS_SUCCESS) return status; // transfer and decode to device buffer - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegStateAttachDeviceBuffer(state_, device_buffer_)); - PADDLE_ENFORCE_NVJPEG_SUCCESS( - platform::dynload::nvjpegDecodeJpegTransferToDevice( - handle_, decoder_, state_, stream, cuda_stream_)); - status = platform::dynload::nvjpegDecodeJpegDevice(handle_, decoder_, state_, - out_image, nullptr); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegStateAttachDeviceBuffer(state_, device_buffer_)); + PADDLE_ENFORCE_NVJPEG_SUCCESS(platform::dynload::nvjpegDecodeJpegTransferToDevice(handle_, decoder_, state_, stream, cuda_stream_)); + status = platform::dynload::nvjpegDecodeJpegDevice(handle_, decoder_, state_, out_image, nullptr); return status; } -void ImageDecoder::Run(const uint8_t* bit_stream, size_t bit_len, - framework::LoDTensor* out, - RandomROIGenerator* roi_generator, - platform::Place& place) { +void ImageDecoder::Run( + const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out, + RandomROIGenerator* roi_generator, platform::Place& place) { nvjpegImage_t image; - - nvjpegStatus_t status = - ParseDecodeParams(bit_stream, bit_len, out, roi_generator, &image, place); + nvjpegStatus_t status = ParseDecodeParams(bit_stream, bit_len, out, roi_generator, &image, place); if (status != NVJPEG_STATUS_SUCCESS) { - CPUDecodeRandomCrop(bit_stream, bit_len, roi_generator, nullptr, 0, out, - place); + CPUDecodeRandomCrop(bit_stream, bit_len, roi_generator, nullptr, 0, out, place); return; } - status = GPUDecodeRandomCrop(bit_stream, bit_len, &image); if (status != NVJPEG_STATUS_SUCCESS) { - CPUDecodeRandomCrop(bit_stream, bit_len, roi_generator, nullptr, 0, out, - place); + CPUDecodeRandomCrop(bit_stream, bit_len, roi_generator, nullptr, 0, out, place); } } ImageDecoderThreadPool::ImageDecoderThreadPool( - const int num_threads, const int dev_id, const size_t host_memory_padding, - const size_t device_memory_padding) - : threads_(num_threads), - dev_id_(dev_id), - shutdown_(false), - running_(false), - completed_(false), - outstand_tasks_(0) { - PADDLE_ENFORCE_GT(num_threads, 0, - platform::errors::InvalidArgument( - "num_threads shoule be a positive interger, " - "but got %d", - num_threads)); + const int num_threads, const int dev_id, + const size_t host_memory_padding, const size_t device_memory_padding) + : threads_(num_threads), + dev_id_(dev_id), + shutdown_(false), + running_(false), + completed_(false), + outstand_tasks_(0) { + PADDLE_ENFORCE_GT(num_threads, 0, platform::errors::InvalidArgument( + "num_threads shoule be a positive interger, " + "but got %d", num_threads)); for (int i = 0; i < num_threads; i++) { threads_.emplace_back( - std::thread(std::bind(&ImageDecoderThreadPool::ThreadLoop, this, i, - host_memory_padding, device_memory_padding))); + std::thread(std::bind(&ImageDecoderThreadPool::ThreadLoop, + this, i, host_memory_padding, device_memory_padding))); } } @@ -285,29 +250,29 @@ void ImageDecoderThreadPool::ShutDown() { task_queue_.clear(); for (auto& thread : threads_) { - if (thread.joinable()) thread.join(); + if (thread.joinable()) thread.join(); } } void ImageDecoderThreadPool::SortTaskByLengthDescend() { std::lock_guard lock(mutex_); std::sort(task_queue_.begin(), task_queue_.end(), - [](const std::shared_ptr a, - const std::shared_ptr b) { - return b->bit_len < a->bit_len; - }); + [](const std::shared_ptr a, + const std::shared_ptr b) { + return b->bit_len < a->bit_len; + }); } -void ImageDecoderThreadPool::ThreadLoop(const int thread_idx, - const size_t host_memory_padding, - const size_t device_memory_padding) { - ImageDecoder* decoder = - new ImageDecoder(dev_id_, host_memory_padding, device_memory_padding); +void ImageDecoderThreadPool::ThreadLoop( + const int thread_idx, const size_t host_memory_padding, + const size_t device_memory_padding) { + ImageDecoder* decoder = new ImageDecoder(dev_id_, + host_memory_padding, + device_memory_padding); + while (!shutdown_) { std::unique_lock lock(mutex_); - running_cond_.wait(lock, [this] { - return (running_ && !task_queue_.empty()) || shutdown_; - }); + running_cond_.wait(lock, [this] { return (running_ && !task_queue_.empty()) || shutdown_; }); if (shutdown_) break; auto task = task_queue_.front(); @@ -329,8 +294,7 @@ void ImageDecoderThreadPool::ThreadLoop(const int thread_idx, } // initialization static variables out of ImageDecoderThreadPoolManager -ImageDecoderThreadPoolManager* ImageDecoderThreadPoolManager::pm_instance_ptr_ = - nullptr; +ImageDecoderThreadPoolManager* ImageDecoderThreadPoolManager::pm_instance_ptr_ = nullptr; std::mutex ImageDecoderThreadPoolManager::m_; } // namespace data diff --git a/paddle/fluid/operators/data/image_decoder.h b/paddle/fluid/operators/data/image_decoder.h index dd2513e8358abb..de332f0a2b963f 100644 --- a/paddle/fluid/operators/data/image_decoder.h +++ b/paddle/fluid/operators/data/image_decoder.h @@ -14,17 +14,17 @@ limitations under the License. */ #pragma once -#include #include +#include #ifdef PADDLE_WITH_OPENCV -#include + #include #endif -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/platform/dynload/nvjpeg.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/dynload/nvjpeg.h" #include "paddle/fluid/platform/stream/cuda_stream.h" #include "paddle/fluid/operators/data/random_roi_generator.h" @@ -33,17 +33,17 @@ namespace paddle { namespace operators { namespace data { -static int dev_malloc(void** p, size_t s) { - return static_cast(cudaMalloc(p, s)); -} +static int dev_malloc(void **p, size_t s) { return (int)cudaMalloc(p, s); } -static int dev_free(void* p) { return static_cast(cudaFree(p)); } +static int dev_free(void *p) { return (int)cudaFree(p); } static int host_malloc(void** p, size_t s, unsigned int f) { - return static_cast(cudaHostAlloc(p, s, f)); + return (int)cudaHostAlloc(p, s, f); } -static int host_free(void* p) { return static_cast(cudaFreeHost(p)); } +static int host_free(void* p) { + return (int)cudaFreeHost(p); +} struct ImageDecodeTask { const uint8_t* bit_stream; @@ -54,92 +54,92 @@ struct ImageDecodeTask { }; class ImageDecoder { - public: - ImageDecoder(int dev_id, size_t host_memory_padding = 0, - size_t device_memory_padding = 0); + public: + ImageDecoder(int dev_id, + size_t host_memory_padding=0, + size_t device_memory_padding=0); - ~ImageDecoder(); + ~ImageDecoder(); - void Run(const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out, - RandomROIGenerator* roi_generator, const platform::Place& place); + void Run(const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out, + RandomROIGenerator* roi_generator, platform::Place& place); - private: - DISABLE_COPY_AND_ASSIGN(ImageDecoder); + private: + DISABLE_COPY_AND_ASSIGN(ImageDecoder); + + void CPUDecodeRandomCrop(const uint8_t* data, size_t length, + RandomROIGenerator* roi_generator, + unsigned char* workspace, size_t workspace_size, + framework::LoDTensor* out, platform::Place place); - void CPUDecodeRandomCrop(const uint8_t* data, size_t length, - RandomROIGenerator* roi_generator, - unsigned char* workspace, size_t workspace_size, - framework::LoDTensor* out, platform::Place place); + nvjpegStatus_t ParseDecodeParams( + const uint8_t* bit_stream, size_t bit_len, framework::LoDTensor* out, + RandomROIGenerator* roi_generator, nvjpegImage_t* out_image, + platform::Place place); - nvjpegStatus_t ParseDecodeParams(const uint8_t* bit_stream, size_t bit_len, - framework::LoDTensor* out, - RandomROIGenerator* roi_generator, - nvjpegImage_t* out_image, - platform::Place place); + nvjpegStatus_t GPUDecodeRandomCrop(const uint8_t* bit_stream, size_t bit_len, nvjpegImage_t* out_image); - nvjpegStatus_t GPUDecodeRandomCrop(const uint8_t* bit_stream, size_t bit_len, - nvjpegImage_t* out_image); - cudaStream_t cuda_stream_ = nullptr; - std::vector nvjpeg_streams_; + cudaStream_t cuda_stream_ = nullptr; + std::vector nvjpeg_streams_; - nvjpegHandle_t handle_ = nullptr; - nvjpegJpegState_t state_ = nullptr; - nvjpegJpegDecoder_t decoder_ = nullptr; - nvjpegDecodeParams_t decode_params_ = nullptr; + nvjpegHandle_t handle_ = nullptr; + nvjpegJpegState_t state_ = nullptr; + nvjpegJpegDecoder_t decoder_ = nullptr; + nvjpegDecodeParams_t decode_params_ = nullptr; - nvjpegPinnedAllocator_t pinned_allocator_ = {&host_malloc, &host_free}; - nvjpegDevAllocator_t device_allocator_ = {&dev_malloc, &dev_free}; - std::vector pinned_buffers_; - nvjpegBufferDevice_t device_buffer_ = nullptr; + nvjpegPinnedAllocator_t pinned_allocator_ = {&host_malloc, &host_free}; + nvjpegDevAllocator_t device_allocator_ = {&dev_malloc, &dev_free}; + std::vector pinned_buffers_; + nvjpegBufferDevice_t device_buffer_ = nullptr; - int page_id_; + int page_id_; }; class ImageDecoderThreadPool { - public: - ImageDecoderThreadPool(const int num_threads, const int dev_id, - size_t host_memory_padding, - size_t device_memory_padding); + public: + ImageDecoderThreadPool(const int num_threads, const int dev_id, + size_t host_memory_padding, + size_t device_memory_padding); - ~ImageDecoderThreadPool(); + ~ImageDecoderThreadPool(); - void AddTask(std::shared_ptr task); + void AddTask(std::shared_ptr task); - void RunAll(const bool wait, const bool sort = true); + void RunAll(const bool wait, const bool sort = true); - void WaitTillTasksCompleted(); + void WaitTillTasksCompleted(); - void ShutDown(); + void ShutDown(); - private: - DISABLE_COPY_AND_ASSIGN(ImageDecoderThreadPool); + private: + DISABLE_COPY_AND_ASSIGN(ImageDecoderThreadPool); - void SortTaskByLengthDescend(); + void SortTaskByLengthDescend(); - void ThreadLoop(const int thread_idx, const size_t host_memory_padding, - const size_t device_memory_padding); + void ThreadLoop(const int thread_idx, const size_t host_memory_padding, + const size_t device_memory_padding); - std::vector threads_; - int dev_id_; + std::vector threads_; + int dev_id_; - std::deque> task_queue_; - std::mutex mutex_; + std::deque> task_queue_; + std::mutex mutex_; - bool shutdown_; - std::condition_variable running_cond_; - bool running_; - std::condition_variable completed_cond_; - bool completed_; + bool shutdown_; + std::condition_variable running_cond_; + bool running_; + std::condition_variable completed_cond_; + bool completed_; - int outstand_tasks_; + int outstand_tasks_; }; class ImageDecoderThreadPoolManager { private: DISABLE_COPY_AND_ASSIGN(ImageDecoderThreadPoolManager); - static ImageDecoderThreadPoolManager* pm_instance_ptr_; + static ImageDecoderThreadPoolManager *pm_instance_ptr_; static std::mutex m_; std::map> prog_id_to_pool_; @@ -157,12 +157,15 @@ class ImageDecoderThreadPoolManager { ImageDecoderThreadPool* GetDecoderThreadPool( const int64_t program_id, const int num_threads, const int dev_id, - const size_t host_memory_padding, const size_t device_memory_padding) { + const size_t host_memory_padding, + const size_t device_memory_padding) { auto iter = prog_id_to_pool_.find(program_id); if (iter == prog_id_to_pool_.end()) { - prog_id_to_pool_[program_id] = - std::unique_ptr(new ImageDecoderThreadPool( - num_threads, dev_id, host_memory_padding, device_memory_padding)); + prog_id_to_pool_[program_id] = + std::unique_ptr( + new ImageDecoderThreadPool(num_threads, dev_id, + host_memory_padding, + device_memory_padding)); } return prog_id_to_pool_[program_id].get(); } @@ -177,7 +180,7 @@ class ImageDecoderThreadPoolManager { void ShutDown() { if (prog_id_to_pool_.empty()) return; - + std::lock_guard lk(m_); auto iter = prog_id_to_pool_.begin(); for (; iter != prog_id_to_pool_.end(); iter++) { diff --git a/python/paddle/fluid/dataloader/ops.py b/python/paddle/fluid/dataloader/ops.py index 5658852f1836f0..adfb0a8f23e07b 100755 --- a/python/paddle/fluid/dataloader/ops.py +++ b/python/paddle/fluid/dataloader/ops.py @@ -21,6 +21,7 @@ from ...fluid.framework import in_dygraph_mode from ...common_ops_import import * + __all__ = ["map", "data_reader"] @@ -61,26 +62,9 @@ def _generate_stream_id(): def map(map_func, inputs=[]): - def _build_program_inputs(x, map_block): - assert isinstance(x, (list, tuple)) - assert len(x) > 0, "map function must have inputs" - outputs = [] - if isinstance(x[0], (list, tuple)): - for item in x: - outputs.append(_build_program_inputs(item, map_block)) - else: - for item in x: - outputs.append( - map_block.create_var( - name=unique_name.generate("map_sub"), - type=item.desc.type(), - dtype=item.desc.dtype(), - persistable=False)) - return outputs - inputs = _to_list(inputs) if in_dygraph_mode(): - return map_func(inputs) + return map_func(*inputs) helper = LayerHelper("map", **locals()) @@ -90,19 +74,16 @@ def _build_program_inputs(x, map_block): program_id = _hash_with_id(main_program, map_func) map_block = main_program.current_block() - program_inputs = _build_program_inputs(inputs, map_block) - + program_inputs = [ + map_block.create_var( + name=unique_name.generate("map_sub"), + type=inp.desc.type(), + dtype=inp.desc.dtype(), + persistable=False) for inp in inputs] program_outputs = map_func(*program_inputs) program_outputs = _to_list(program_outputs) - input_var_names = [] - for variables in program_inputs: - if isinstance(variables, (list, tuple)): - inputs = inputs[0] - for v in variables: - input_var_names.append(v.name) - else: - input_var_names.append(variables.name) - + + input_var_names = [v.name for v in program_inputs] output_var_names = [v.name for v in program_outputs] outputs = \ @@ -147,39 +128,22 @@ def data_reader(reader_func, reader_block = main_program.current_block() indices_var = reader_block.create_var( - name=unique_name.generate("data_reader_sub"), - type=core.VarDesc.VarType.LOD_TENSOR, - dtype="int64", - persistable=False) + name=unique_name.generate("data_reader_sub"), + type=core.VarDesc.VarType.LOD_TENSOR, + dtype="int64", + persistable=False) program_outputs = reader_func(indices_var) program_outputs = _to_list(program_outputs) - + indices_var_name = indices_var.name - output_var_names = [] - for outs in program_outputs: - if isinstance(outs, (list, tuple)): - for out in outs: - output_var_names.append(out.name) - else: - output_var_names.append(outs.name) - - outputs = [] - for outps in program_outputs: - if isinstance(outps, (list, tuple)): - for outp in outps: - outputs.append( - helper.create_variable( - name=unique_name.generate("data_reader"), - type=outp.desc.type(), - dtype=outp.desc.dtype(), - persistable=True)) - else: - outputs.append( - helper.create_variable( - name=unique_name.generate("data_reader"), - type=outps.desc.type(), - dtype=outps.desc.dtype(), - persistable=True)) + output_var_names = [v.name for v in program_outputs] + + outputs = \ + [helper.create_variable( + name=unique_name.generate("data_reader"), + type=outp.desc.type(), + dtype=outp.desc.dtype(), + persistable=True) for outp in program_outputs] attrs = { "reader_id": _hash_with_id(main_program), @@ -196,6 +160,9 @@ def data_reader(reader_func, } helper.append_op( - type="data_reader", inputs={}, outputs={"Out": outputs}, attrs=attrs) + type="data_reader", + inputs={}, + outputs={"Out": outputs}, + attrs=attrs) return outputs diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index d35a79c964ceb5..7b6fd81a57869f 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -868,10 +868,8 @@ def read_file(filename, name=None): return out -def image_decode(x, - num_threads=2, - host_memory_padding=0, - device_memory_padding=0, +def image_decode(x, num_threads=2, + host_memory_padding=0, device_memory_padding=0, name=None): """ Decodes a JPEG image into a 3 dimensional RGB Tensor or 1 dimensional Gray Tensor. @@ -914,18 +912,17 @@ def image_decode(x, core.VarDesc.VarType.LOD_TENSOR_ARRAY, False) program_id = utils._hash_with_id(mode, num_threads, name, local_rank) return _C_ops.batch_decode( - x, out, "num_threads", num_threads, "local_rank", local_rank, - "program_id", program_id, "host_memory_padding", - host_memory_padding, "device_memory_padding", device_memory_padding) + x, out, "num_threads", num_threads, + "local_rank", local_rank, "program_id", program_id, + "host_memory_padding", host_memory_padding, + "device_memory_padding", device_memory_padding) inputs = {'X': x} - attrs = { - "num_threads": num_threads, - "local_rank": local_rank, - "program_id": utils._hash_with_id(default_main_program()), - "host_memory_padding": host_memory_padding, - "device_memory_padding": device_memory_padding - } + attrs = {"num_threads": num_threads, + "local_rank": local_rank, + "program_id": utils._hash_with_id(default_main_program()), + "host_memory_padding": host_memory_padding, + "device_memory_padding": device_memory_padding} helper = LayerHelper("batch_decode", **locals()) out = helper.create_variable( @@ -943,8 +940,8 @@ def image_decode_random_crop(x, host_memory_padding=0, device_memory_padding=0, data_format='NCHW', - aspect_ratio_min=3. / 4., - aspect_ratio_max=4. / 3., + aspect_ratio_min=3./4., + aspect_ratio_max=4./3., area_min=0.08, area_max=1., num_attempts=10, @@ -987,48 +984,39 @@ def image_decode_random_crop(x, local_rank = paddle.distributed.get_rank() if in_dygraph_mode(): out = core.VarBase(core.VarDesc.VarType.UINT8, [], - unique_name.generate("image_decode_random_crop"), - core.VarDesc.VarType.LOD_TENSOR_ARRAY, False) + unique_name.generate("image_decode_random_crop"), + core.VarDesc.VarType.LOD_TENSOR_ARRAY, False) program_id = utils._hash_with_id(mode, num_threads, name, local_rank) return _C_ops.batch_decode_random_crop( - x, out, "num_threads", num_threads, "data_format", data_format, - "aspect_ratio_min", aspect_ratio_min, "aspect_ratio_max", - aspect_ratio_max, "area_min", area_min, "area_max", area_max, - "num_attempts", num_attempts, "local_rank", local_rank, - "program_id", program_id, "host_memory_padding", - host_memory_padding, "device_memory_padding", device_memory_padding) + x, out, "num_threads", num_threads, + "data_format", data_format, "aspect_ratio_min", + aspect_ratio_min, "aspect_ratio_max", aspect_ratio_max, + "area_min", area_min, "area_max", area_max, + "num_attempts", num_attempts, "local_rank", local_rank, + "program_id", program_id, + "host_memory_padding", host_memory_padding, + "device_memory_padding", device_memory_padding) inputs = {'X': x} - attrs = { - "num_threads": num_threads, - "host_memory_padding": host_memory_padding, - "device_memory_padding": device_memory_padding, - "data_format": data_format, - "aspect_ratio_min": aspect_ratio_min, - "aspect_ratio_max": aspect_ratio_max, - "area_min": area_min, - "area_max": area_max, - "num_attempts": num_attempts, - "local_rank": local_rank, - "program_id": utils._hash_with_id(default_main_program()) - } + attrs = {"num_threads": num_threads, + "host_memory_padding": host_memory_padding, + "device_memory_padding": device_memory_padding, + "data_format": data_format, + "aspect_ratio_min": aspect_ratio_min, + "aspect_ratio_max": aspect_ratio_max, + "area_min": area_min, + "area_max": area_max, + "num_attempts": num_attempts, + "local_rank": local_rank, + "program_id": utils._hash_with_id(default_main_program())} helper = LayerHelper("batch_decode_random_crop", **locals()) - # out = helper.create_variable( - # name=unique_name.generate("image_decode_random_crop"), - # type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - # dtype=x.dtype) - out = [ - helper.create_variable( - name=unique_name.generate("file_label_loader"), - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='uint8') for i in range(len(x)) - ] + out = helper.create_variable( + name=unique_name.generate("image_decode_random_crop"), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + dtype=x.dtype) helper.append_op( - type="batch_decode_random_crop", - inputs=inputs, - attrs=attrs, - outputs={"Out": out}) + type="batch_decode_random_crop", inputs=inputs, attrs=attrs, outputs={"Out": out}) return out @@ -1037,12 +1025,12 @@ def random_flip(x, prob=0.5, name=None): if prob < 0. or prob > 1.: raise ValueError("prob should in (0, 1) in random_flip") - rand_vec = layers.uniform_random_batch_size_like(x, [1, 1], min=0., max=1.) + rand_vec = layers.uniform_random_batch_size_like( + x, [1, 1], min=0., max=1.) return rand_vec < prob -def mirror_normalize(x, - mirror, +def mirror_normalize(x, mirror, mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375], name=None): @@ -1061,18 +1049,17 @@ def _to_list_3(l): std = _to_list_3(std) if _non_static_mode(): - return _C_ops.mirror_normalize(x, mirror, "mean", mean, "std", std) + return _C_ops.mirror_normalize(x, mirror, "mean", mean, + "std", std) helper = LayerHelper("mirror_normalize", **locals()) dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype) helper.append_op( type="mirror_normalize", - inputs={"X": x, - "Mirror": mirror}, + inputs={"X": x, "Mirror": mirror}, outputs={"Out": out}, - attrs={"mean": mean, - "std": std}) + attrs={"mean": mean, "std": std}) return out @@ -1516,8 +1503,8 @@ def forward(self, x, boxes, boxes_num, aligned=True): def random_crop_and_resize(x, size, - aspect_ratio_min=3. / 4., - aspect_ratio_max=4. / 3., + aspect_ratio_min=3./4., + aspect_ratio_max=4./3., area_min=0.08, area_max=1., num_attempts=10, @@ -1582,9 +1569,10 @@ def random_crop_and_resize(x, out = _C_ops.batch_random_crop_and_resize( x, "size", size, "aspect_ratio_min", aspect_ratio_min, "aspect_ratio_max", aspect_ratio_max, "area_max", area_max, - "area_min", area_min, "num_attempts", num_attempts, "interp_method", - interp_method, "align_corners", align_corners, "align_mode", - align_mode, "data_format", data_format, "seed", seed) + "area_min", area_min, "num_attempts", num_attempts, + "interp_method", interp_method, "align_corners", + align_corners, "align_mode", align_mode, + "data_format", data_format, "seed", seed) return out helper = LayerHelper('batch_random_crop_and_resize', **locals()) @@ -1669,10 +1657,10 @@ def image_resize(x, size = (size, size) if in_dygraph_mode(): - out = _C_ops.batch_resize(x, "size", size, "interp_method", - interp_method, "align_corners", align_corners, - "align_mode", align_mode, "data_format", - data_format, "seed", seed) + out = _C_ops.batch_resize( + x, "size", size, "interp_method", interp_method, + "align_corners", align_corners, "align_mode", + align_mode, "data_format", data_format, "seed", seed) return out helper = LayerHelper('batch_resize', **locals()) @@ -1688,7 +1676,10 @@ def image_resize(x, "seed": seed, } helper.append_op( - type="batch_resize", inputs=inputs, outputs={"Out": out}, attrs=attrs) + type="batch_resize", + inputs=inputs, + outputs={"Out": out}, + attrs=attrs) return out diff --git a/python/paddle/vision/reader.py b/python/paddle/vision/reader.py index 7c8c22e2f676b0..4009b787890413 100644 --- a/python/paddle/vision/reader.py +++ b/python/paddle/vision/reader.py @@ -22,14 +22,15 @@ from paddle.common_ops_import import * from paddle import _C_ops -__all__ = [ #noqa +__all__ = [ #noqa 'file_label_loader', 'file_label_reader', ] class _Sampler(object): - def __init__(self, batch_size, num_samples, shuffle=False, drop_last=False): + def __init__(self, batch_size, num_samples, + shuffle=False, drop_last=False): self.batch_size = batch_size self.num_samples = num_samples self.shuffle = shuffle @@ -48,7 +49,7 @@ def __next__(self): batch_len = min(self.batch_size, self.num_samples - self.start_idx) indices = self.sample_ids[self.start_idx:self.start_idx + batch_len] self.start_idx += batch_len - + if self.drop_last and len(indices) < self.batch_size: self.reset() return self.__next__() @@ -65,16 +66,13 @@ class _SamplerManager(object): def __init__(self): self.samplers = {} - def get(self, - sample_id, - batch_size, - num_samples, - shuffle=False, - drop_last=False): + def get(self, sample_id, batch_size, num_samples, + shuffle=False, drop_last=False): if sample_id in self.samplers: return self.samplers[sample_id] - sampler = _Sampler(batch_size, num_samples, shuffle, drop_last) + sampler = _Sampler(batch_size, num_samples, + shuffle, drop_last) self.samplers[sample_id] = sampler return sampler @@ -82,7 +80,7 @@ def get(self, _sampler_manager = _SamplerManager() -def file_label_loader(data_root, indices, batch_size, name=None): +def file_label_loader(data_root, indices, name=None): """ Reads a batch of data, outputs the bytes contents of a file as a uint8 Tensor with one dimension. @@ -96,25 +94,23 @@ def file_label_loader(data_root, indices, batch_size, name=None): """ if in_dygraph_mode(): - image = [ - core.VarBase(core.VarDesc.VarType.UINT8, [], - unique_name.generate("file_label_loader"), - core.VarDesc.VarType.LOD_TENSOR, False) - for i in range(batch_size) - ] - return _C_ops.file_label_loader(indices, image, 'data_root', data_root) + image = core.VarBase(core.VarDesc.VarType.UINT8, [], + unique_name.generate("file_label_loader"), + core.VarDesc.VarType.LOD_TENSOR_ARRAY, False) + return _C_ops.file_label_loader(indices, image, 'data_root', + data_root) inputs = {"Indices": indices} - attrs = {'data_root': data_root, } + attrs = { + 'data_root': data_root, + } helper = LayerHelper("file_label_loader", **locals()) - image = [ - helper.create_variable( - name=unique_name.generate("file_label_loader"), - type=core.VarDesc.VarType.LOD_TENSOR, - dtype='uint8') for i in range(batch_size) - ] - + image = helper.create_variable( + name=unique_name.generate("file_label_loader"), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + dtype='uint8') + label = helper.create_variable( name=unique_name.generate("file_label_loader"), type=core.VarDesc.VarType.LOD_TENSOR, @@ -163,25 +159,23 @@ def file_label_reader(file_root, targets = [s[1] for s in data_folder.samples] if in_dygraph_mode(): - sample_id = utils._hash_with_id(file_root, batch_size, shuffle, - drop_last) + sample_id = utils._hash_with_id(file_root, batch_size, + shuffle, drop_last) sampler = _sampler_manager.get(sample_id, batch_size=batch_size, num_samples=len(samples), shuffle=shuffle, drop_last=drop_last) indices = paddle.to_tensor(next(sampler), dtype='int64') - outs = file_label_loader(file_root, indices, batch_size) - return outs[:-1], outs[-1] + return file_label_loader(file_root, indices) def _reader(indices): - return file_label_loader(file_root, indices, batch_size) - - outs = paddle.io.data_reader( - _reader, - batch_size=batch_size, - num_samples=len(samples), - shuffle=shuffle, - drop_last=drop_last, - seed=seed) - return outs[:-1], outs[-1] + return file_label_loader(file_root, indices) + + return paddle.io.data_reader(_reader, + batch_size=batch_size, + num_samples=len(samples), + shuffle=shuffle, + drop_last=drop_last, + seed=seed) +