From 1dc8aab478c9eedc773896f888240c3660728243 Mon Sep 17 00:00:00 2001 From: dimitri009 Date: Fri, 30 May 2025 18:03:58 +0200 Subject: [PATCH 1/3] Add "SLANet-1M" code (train, eval, eval_table) --- .idea/.gitignore | 8 + .idea/docling-ibm-models_slanet_1m.iml | 15 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + docling_ibm_models/slanet_1m/.gitignore | 35 + docling_ibm_models/slanet_1m/README.md | 19 + docling_ibm_models/slanet_1m/__init__.py | 0 .../slanet_1m/configs/SLANet_1M.yml | 145 +++ .../slanet_1m/dict/table_structure_dict.txt | 28 + docling_ibm_models/slanet_1m/export_model.py | 295 +++++++ .../slanet_1m/kubernetes/train-job.yaml | 88 ++ .../slanet_1m/losses/__init__.py | 36 + .../slanet_1m/losses/table_att_loss.py | 100 +++ .../slanet_1m/metrics/__init__.py | 54 ++ .../slanet_1m/metrics/det_metric.py | 153 ++++ .../slanet_1m/metrics/eval_det_iou.py | 257 ++++++ .../slanet_1m/metrics/rec_metric.py | 179 ++++ .../slanet_1m/metrics/table_metric.py | 161 ++++ .../modeling/architectures/__init__.py | 115 +++ .../modeling/architectures/base_model.py | 109 +++ .../architectures/distillation_model.py | 59 ++ .../slanet_1m/modeling/backbones/__init__.py | 39 + .../modeling/backbones/det_mobilenet_v3.py | 285 ++++++ .../modeling/backbones/det_pp_lcnet.py | 274 ++++++ .../modeling/backbones/det_pp_lcnet_v2.py | 358 ++++++++ .../modeling/backbones/rec_lcnetv3.py | 554 ++++++++++++ .../modeling/backbones/rec_resnet_fpn.py | 317 +++++++ .../modeling/backbones/rec_svtrnet.py | 642 ++++++++++++++ .../slanet_1m/modeling/heads/__init__.py | 40 + .../slanet_1m/modeling/heads/rec_att_head.py | 215 +++++ .../slanet_1m/modeling/heads/rec_ctc_head.py | 92 ++ .../modeling/heads/rec_multi_head.py | 152 ++++ .../slanet_1m/modeling/heads/rec_nrtr_head.py | 704 +++++++++++++++ .../modeling/heads/table_att_head.py | 413 +++++++++ .../slanet_1m/modeling/necks/__init__.py | 31 + .../slanet_1m/modeling/necks/csp_pan.py | 337 +++++++ .../slanet_1m/modeling/necks/rnn.py | 284 ++++++ .../slanet_1m/optimizer/__init__.py | 66 ++ .../slanet_1m/optimizer/learning_rate.py | 454 ++++++++++ .../slanet_1m/optimizer/lr_scheduler.py | 240 +++++ .../slanet_1m/optimizer/optimizer.py | 292 +++++++ .../slanet_1m/optimizer/regularizer.py | 51 ++ docling_ibm_models/slanet_1m/program.py | 826 ++++++++++++++++++ docling_ibm_models/slanet_1m/requirements.txt | 21 + docling_ibm_models/slanet_1m/src/eval.py | 174 ++++ .../slanet_1m/src/eval_table.py | 112 +++ docling_ibm_models/slanet_1m/src/train.py | 256 ++++++ 49 files changed, 9112 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/docling-ibm-models_slanet_1m.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 docling_ibm_models/slanet_1m/.gitignore create mode 100644 docling_ibm_models/slanet_1m/README.md create mode 100644 docling_ibm_models/slanet_1m/__init__.py create mode 100644 docling_ibm_models/slanet_1m/configs/SLANet_1M.yml create mode 100644 docling_ibm_models/slanet_1m/dict/table_structure_dict.txt create mode 100644 docling_ibm_models/slanet_1m/export_model.py create mode 100644 docling_ibm_models/slanet_1m/kubernetes/train-job.yaml create mode 100644 docling_ibm_models/slanet_1m/losses/__init__.py create mode 100644 docling_ibm_models/slanet_1m/losses/table_att_loss.py create mode 100644 docling_ibm_models/slanet_1m/metrics/__init__.py create mode 100644 docling_ibm_models/slanet_1m/metrics/det_metric.py create mode 100644 docling_ibm_models/slanet_1m/metrics/eval_det_iou.py create mode 100644 docling_ibm_models/slanet_1m/metrics/rec_metric.py create mode 100644 docling_ibm_models/slanet_1m/metrics/table_metric.py create mode 100644 docling_ibm_models/slanet_1m/modeling/architectures/__init__.py create mode 100644 docling_ibm_models/slanet_1m/modeling/architectures/base_model.py create mode 100644 docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/__init__.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py create mode 100644 docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/__init__.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py create mode 100644 docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py create mode 100644 docling_ibm_models/slanet_1m/modeling/necks/__init__.py create mode 100644 docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py create mode 100644 docling_ibm_models/slanet_1m/modeling/necks/rnn.py create mode 100644 docling_ibm_models/slanet_1m/optimizer/__init__.py create mode 100644 docling_ibm_models/slanet_1m/optimizer/learning_rate.py create mode 100644 docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py create mode 100644 docling_ibm_models/slanet_1m/optimizer/optimizer.py create mode 100644 docling_ibm_models/slanet_1m/optimizer/regularizer.py create mode 100644 docling_ibm_models/slanet_1m/program.py create mode 100644 docling_ibm_models/slanet_1m/requirements.txt create mode 100644 docling_ibm_models/slanet_1m/src/eval.py create mode 100644 docling_ibm_models/slanet_1m/src/eval_table.py create mode 100644 docling_ibm_models/slanet_1m/src/train.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/docling-ibm-models_slanet_1m.iml b/.idea/docling-ibm-models_slanet_1m.iml new file mode 100644 index 0000000..266b601 --- /dev/null +++ b/.idea/docling-ibm-models_slanet_1m.iml @@ -0,0 +1,15 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..812ab5a --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..e796249 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/docling_ibm_models/slanet_1m/.gitignore b/docling_ibm_models/slanet_1m/.gitignore new file mode 100644 index 0000000..c12799e --- /dev/null +++ b/docling_ibm_models/slanet_1m/.gitignore @@ -0,0 +1,35 @@ +## Python + +# Environments +.venv +venv + +# Byte-compiled / optimized / DLL files +__pycache__/ + +# Pytest cache +.pytest_cache + +# Pytest Coverage +.coverage + +## IntelliJ's IDEs + +.idea + +## Visual Studio Code + +.vscode + +## macOS + +.DS_Store + + +inference/ +inference_results/ +output/ +data/ +/data +/output +evaluation/ diff --git a/docling_ibm_models/slanet_1m/README.md b/docling_ibm_models/slanet_1m/README.md new file mode 100644 index 0000000..d1db72b --- /dev/null +++ b/docling_ibm_models/slanet_1m/README.md @@ -0,0 +1,19 @@ +# SLANet_1M + +- Install PaddlePaddle with CUDA 12.3 + + ```bash linenums="1" + python -m pip install paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ + ``` + +- Then + ```bash linenums="1" + pip install -r requirements.txt + ``` + +- To train: + ```bash linenums="1" + python train.py -c configs/SLANet_1M.yml -o Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True + ``` + +Pre-trained Model on PubTanNet + SynthTabNet can be found [here](https://drive.google.com/drive/folders/1aIzP3a3Ci0n9hXD2j57Dq4uCfQlt8yoW?usp=drive_link) \ No newline at end of file diff --git a/docling_ibm_models/slanet_1m/__init__.py b/docling_ibm_models/slanet_1m/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml b/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml new file mode 100644 index 0000000..946daf8 --- /dev/null +++ b/docling_ibm_models/slanet_1m/configs/SLANet_1M.yml @@ -0,0 +1,145 @@ +Global: + use_gpu: true + epoch_num: 50 + log_smooth_window: 20 + print_batch_step: 20 + save_model_dir: ./output/SLANet_1M + save_epoch_step: 400 + # evaluation is run every 1000 iterations after the 0th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: ./output/SLANet_1M/infer + use_visualdl: False + infer_img: + # for data or label process + character_dict_path: dict/table_structure_dict.txt + character_type: en + max_text_length: &max_text_length 500 + box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy' + infer_mode: False + use_sync_bn: True + save_res_path: 'output/infer' + d2s_train_image_shape: [3, -1, -1] + amp_custom_white_list: ['concat', 'elementwise_sub', 'set_value'] + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + clip_norm: 5.0 + lr: + name: Piecewise + learning_rate: 0.001 + decay_epochs : [29, 39] + values : [0.001, 0.0001, 0.00005] + regularizer: + name: 'L2' + factor: 0.00000 + +Architecture: + model_type: table + algorithm: SLANet + Backbone: + name: PPLCNet + scale: 1.0 + pretrained: true + use_ssld: true + Neck: + name: CSPPAN + out_channels: 96 + Head: + name: SLAHead + hidden_size: 256 + max_text_length: *max_text_length + loc_reg_num: &loc_reg_num 4 + +Loss: + name: SLALoss + structure_weight: 1.0 + loc_weight: 2.0 + loc_loss: smooth_l1 + +PostProcess: + name: TableLabelDecode + merge_no_span_structure: &merge_no_span_structure True + +Metric: + name: TableMetric + main_indicator: acc + compute_bbox_metric: False + loc_reg_num: *loc_reg_num + box_format: *box_format + +Train: + dataset: + name: PubTabDataSet + data_dir: data/final_merged/train/ + label_file_list: [data/final_merged/train_annotations.jsonl] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - TableLabelEncode: + learn_empty_box: False + merge_no_span_structure: *merge_no_span_structure + replace_empty_cell_token: False + loc_reg_num: *loc_reg_num + max_text_length: *max_text_length + - TableBoxEncode: + in_box_format: *box_format + out_box_format: *box_format + - ResizeTableImage: + max_len: 488 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - PaddingTableImage: + size: [488, 488] + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape'] + loader: + shuffle: True + batch_size_per_card: 72 + drop_last: True + num_workers: 1 + +Eval: + dataset: + name: PubTabDataSet + data_dir: data/final_merged/val/ + label_file_list: [data/final_merged/val_annotations.jsonl] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - TableLabelEncode: + learn_empty_box: False + merge_no_span_structure: *merge_no_span_structure + replace_empty_cell_token: False + loc_reg_num: *loc_reg_num + max_text_length: *max_text_length + - TableBoxEncode: + in_box_format: *box_format + out_box_format: *box_format + - ResizeTableImage: + max_len: 488 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - PaddingTableImage: + size: [488, 488] + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks', 'length', 'shape'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 72 + num_workers: 1 diff --git a/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt b/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt new file mode 100644 index 0000000..fec6f7d --- /dev/null +++ b/docling_ibm_models/slanet_1m/dict/table_structure_dict.txt @@ -0,0 +1,28 @@ + + + + + + + + + + colspan="2" + colspan="3" + rowspan="2" + colspan="4" + colspan="6" + rowspan="3" + colspan="9" + colspan="10" + colspan="7" + rowspan="4" + rowspan="5" + rowspan="9" + colspan="8" + rowspan="8" + rowspan="6" + rowspan="7" + rowspan="10" diff --git a/docling_ibm_models/slanet_1m/export_model.py b/docling_ibm_models/slanet_1m/export_model.py new file mode 100644 index 0000000..7e88ee6 --- /dev/null +++ b/docling_ibm_models/slanet_1m/export_model.py @@ -0,0 +1,295 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.insert(0, os.path.abspath(os.path.join(__dir__, ".."))) + +import argparse + +import paddle +from paddle.jit import to_static + +from modeling.architectures import build_model +from paddleocr.ppocr.postprocess import build_post_process +from paddleocr.ppocr.utils.save_load import load_model +from paddleocr.ppocr.utils.logging import get_logger +from program import load_config, merge_config, ArgsParser + + +def export_single_model( + model, arch_config, save_path, logger, input_shape=None, quanter=None +): + if arch_config["algorithm"] == "SRN": + max_text_length = arch_config["Head"]["max_text_length"] + other_shape = [ + paddle.static.InputSpec(shape=[None, 1, 64, 256], dtype="float32"), + [ + paddle.static.InputSpec(shape=[None, 256, 1], dtype="int64"), + paddle.static.InputSpec( + shape=[None, max_text_length, 1], dtype="int64" + ), + paddle.static.InputSpec( + shape=[None, 8, max_text_length, max_text_length], dtype="int64" + ), + paddle.static.InputSpec( + shape=[None, 8, max_text_length, max_text_length], dtype="int64" + ), + ], + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "SAR": + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"), + [paddle.static.InputSpec(shape=[None], dtype="float32")], + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] in ["SVTR_LCNet", "SVTR_HGNet"]: + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 48, -1], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] in ["SVTR", "CPPD"]: + other_shape = [ + paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "PREN": + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["model_type"] == "sr": + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 16, 64], dtype="float32") + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "ViTSTR": + other_shape = [ + paddle.static.InputSpec(shape=[None, 1, 224, 224], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "ABINet": + if not input_shape: + input_shape = [3, 32, 128] + other_shape = [ + paddle.static.InputSpec(shape=[None] + input_shape, dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] in ["NRTR", "SPIN", "RFL"]: + other_shape = [ + paddle.static.InputSpec(shape=[None, 1, 32, 100], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] in ["SATRN"]: + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 32, 100], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "VisionLAN": + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 64, 256], dtype="float32"), + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "RobustScanner": + max_text_length = arch_config["Head"]["max_text_length"] + other_shape = [ + paddle.static.InputSpec(shape=[None, 3, 48, 160], dtype="float32"), + [ + paddle.static.InputSpec( + shape=[ + None, + ], + dtype="float32", + ), + paddle.static.InputSpec(shape=[None, max_text_length], dtype="int64"), + ], + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] == "CAN": + other_shape = [ + [ + paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"), + paddle.static.InputSpec(shape=[None, 1, None, None], dtype="float32"), + paddle.static.InputSpec( + shape=[None, arch_config["Head"]["max_text_length"]], dtype="int64" + ), + ] + ] + model = to_static(model, input_spec=other_shape) + elif arch_config["algorithm"] in ["LayoutLM", "LayoutLMv2", "LayoutXLM"]: + input_spec = [ + paddle.static.InputSpec(shape=[None, 512], dtype="int64"), # input_ids + paddle.static.InputSpec(shape=[None, 512, 4], dtype="int64"), # bbox + paddle.static.InputSpec(shape=[None, 512], dtype="int64"), # attention_mask + paddle.static.InputSpec(shape=[None, 512], dtype="int64"), # token_type_ids + paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype="int64"), # image + ] + if "Re" in arch_config["Backbone"]["name"]: + input_spec.extend( + [ + paddle.static.InputSpec( + shape=[None, 512, 3], dtype="int64" + ), # entities + paddle.static.InputSpec( + shape=[None, None, 2], dtype="int64" + ), # relations + ] + ) + if model.backbone.use_visual_backbone is False: + input_spec.pop(4) + model = to_static(model, input_spec=[input_spec]) + else: + infer_shape = [3, -1, -1] + if arch_config["model_type"] == "rec": + infer_shape = [3, 32, -1] # for rec model, H must be 32 + if ( + "Transform" in arch_config + and arch_config["Transform"] is not None + and arch_config["Transform"]["name"] == "TPS" + ): + logger.info( + "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training" + ) + infer_shape[-1] = 100 + elif arch_config["model_type"] == "table": + infer_shape = [3, 488, 488] + if arch_config["algorithm"] == "TableMaster": + infer_shape = [3, 480, 480] + if arch_config["algorithm"] == "SLANet": + infer_shape = [3, -1, -1] + model = to_static( + model, + input_spec=[ + paddle.static.InputSpec(shape=[None] + infer_shape, dtype="float32") + ], + ) + + if ( + arch_config["model_type"] != "sr" + and arch_config["Backbone"]["name"] == "PPLCNetV3" + ): + # for rep lcnetv3 + for layer in model.sublayers(): + if hasattr(layer, "rep") and not getattr(layer, "is_repped"): + layer.rep() + + if quanter is None: + paddle.jit.save(model, save_path) + else: + quanter.save_quantized_model(model, save_path) + logger.info("inference model is saved to {}".format(save_path)) + return + + +def main(): + FLAGS = ArgsParser().parse_args() + config = load_config(FLAGS.config) + config = merge_config(config, FLAGS.opt) + logger = get_logger() + # build post process + + post_process_class = build_post_process(config["PostProcess"], config["Global"]) + + # build model + # for rec algorithm + if hasattr(post_process_class, "character"): + char_num = len(getattr(post_process_class, "character")) + if config["Architecture"]["algorithm"] in [ + "Distillation", + ]: # distillation model + for key in config["Architecture"]["Models"]: + if ( + config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead" + ): # multi head + out_channels_list = {} + if config["PostProcess"]["name"] == "DistillationSARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode": + char_num = char_num - 3 + out_channels_list["CTCLabelDecode"] = char_num + out_channels_list["SARLabelDecode"] = char_num + 2 + out_channels_list["NRTRLabelDecode"] = char_num + 3 + config["Architecture"]["Models"][key]["Head"][ + "out_channels_list" + ] = out_channels_list + else: + config["Architecture"]["Models"][key]["Head"][ + "out_channels" + ] = char_num + # just one final tensor needs to exported for inference + config["Architecture"]["Models"][key]["return_all_feats"] = False + elif config["Architecture"]["Head"]["name"] == "MultiHead": # multi head + out_channels_list = {} + char_num = len(getattr(post_process_class, "character")) + if config["PostProcess"]["name"] == "SARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "NRTRLabelDecode": + char_num = char_num - 3 + out_channels_list["CTCLabelDecode"] = char_num + out_channels_list["SARLabelDecode"] = char_num + 2 + out_channels_list["NRTRLabelDecode"] = char_num + 3 + config["Architecture"]["Head"]["out_channels_list"] = out_channels_list + else: # base rec model + config["Architecture"]["Head"]["out_channels"] = char_num + + # for sr algorithm + if config["Architecture"]["model_type"] == "sr": + config["Architecture"]["Transform"]["infer_mode"] = True + model = build_model(config["Architecture"]) + load_model(config, model, model_type=config["Architecture"]["model_type"]) + model.eval() + + save_path = config["Global"]["save_inference_dir"] + + arch_config = config["Architecture"] + + if ( + arch_config["algorithm"] in ["SVTR", "CPPD"] + and arch_config["Head"]["name"] != "MultiHead" + ): + input_shape = config["Eval"]["dataset"]["transforms"][-2]["SVTRRecResizeImg"][ + "image_shape" + ] + elif arch_config["algorithm"].lower() == "ABINet".lower(): + rec_rs = [ + c + for c in config["Eval"]["dataset"]["transforms"] + if "ABINetRecResizeImg" in c + ] + input_shape = rec_rs[0]["ABINetRecResizeImg"]["image_shape"] if rec_rs else None + else: + input_shape = None + + if arch_config["algorithm"] in [ + "Distillation", + ]: # distillation model + archs = list(arch_config["Models"].values()) + for idx, name in enumerate(model.model_name_list): + sub_model_save_path = os.path.join(save_path, name, "inference") + export_single_model( + model.model_list[idx], archs[idx], sub_model_save_path, logger + ) + else: + save_path = os.path.join(save_path, "inference") + export_single_model( + model, arch_config, save_path, logger, input_shape=input_shape + ) + + +if __name__ == "__main__": + main() diff --git a/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml b/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml new file mode 100644 index 0000000..b1af14c --- /dev/null +++ b/docling_ibm_models/slanet_1m/kubernetes/train-job.yaml @@ -0,0 +1,88 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: train-job-${CI_PIPELINE_ID} + namespace: $NAMESPACE +spec: + template: + spec: + containers: + - name: train-container + image: python:3.11 + command: [ "bash", "-c" ] + args: + - | + # Install MinIO client + curl https://dl.min.io/client/mc/release/linux-amd64/mc --create-dirs -o ~/minio-binaries/mc + chmod +x $HOME/minio-binaries/mc + export PATH=$PATH:$HOME/minio-binaries/ + + # Set alias for MinIO server + mc alias set minio $ENDPOINT_URL $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY + + # Clone the repository + git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}/${CI_PROJECT_PATH}.git /repo + cd /repo + + # Checkout the branch + git checkout $CI_COMMIT_REF_NAME + echo "Checking out branch $CI_COMMIT_REF_NAME" + + # Set up environment + echo "BUCKET=$BUCKET" >> .env + echo "ENDPOINT_URL=$ENDPOINT_URL" >> .env + echo "REGION=$REGION" >> .env + echo "MODELS_BUCKET=$MODELS_BUCKET" >> .env + echo "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" >> .env + echo "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" >> .env + export $(cat .env | xargs) + + # Install dependencies + python3.11 -m venv .venv + source .venv/bin/activate + pip install --requirement requirements.txt --no-cache-dir + pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/ + apt update && apt install --yes ffmpeg libsm6 libxext6 + + # Run training + dvc repro --pull + + # Save the experiment metadata + dvc params diff main --md > experiment_report.md + dvc metrics diff main --md >> experiment_report.md + + # Push the experiment metadata to MinIO + dvc push + + mc cp experiment_report.md minio/$MODELS_BUCKET/experiments/experiment_report_${CI_MERGE_REQUEST_IID}.md + mc cp dvc.lock minio/$MODELS_BUCKET/experiments/dvc_lock_${CI_MERGE_REQUEST_IID}.lock + volumeMounts: + - name: repo-volume + mountPath: /repo + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-credentials + key: access_key_id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-credentials + key: secret_access_key + - name: BUCKET + value: $BUCKET + - name: ENDPOINT_URL + value: $ENDPOINT_URL + - name: REGION + value: $REGION + - name: MODELS_BUCKET + value: $MODELS_BUCKET + resources: + limits: + nvidia.com/gpu-rtx-4090-24gb: 1 + restartPolicy: Never + volumes: + - name: repo-volume + emptyDir: { } + backoffLimit: 2 diff --git a/docling_ibm_models/slanet_1m/losses/__init__.py b/docling_ibm_models/slanet_1m/losses/__init__.py new file mode 100644 index 0000000..0b170d6 --- /dev/null +++ b/docling_ibm_models/slanet_1m/losses/__init__.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import paddle +import paddle.nn as nn + + +# table loss +from .table_att_loss import TableAttentionLoss, SLALoss + + +def build_loss(config): + support_dict = [ + "TableAttentionLoss", + "SLALoss", + + ] + config = copy.deepcopy(config) + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "loss only support {}".format(support_dict) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/docling_ibm_models/slanet_1m/losses/table_att_loss.py b/docling_ibm_models/slanet_1m/losses/table_att_loss.py new file mode 100644 index 0000000..5f0e780 --- /dev/null +++ b/docling_ibm_models/slanet_1m/losses/table_att_loss.py @@ -0,0 +1,100 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +from paddle import nn +from paddle.nn import functional as F + + +class TableAttentionLoss(nn.Layer): + def __init__(self, structure_weight, loc_weight, **kwargs): + super(TableAttentionLoss, self).__init__() + self.loss_func = nn.CrossEntropyLoss(weight=None, reduction="none") + self.structure_weight = structure_weight + self.loc_weight = loc_weight + + def forward(self, predicts, batch): + structure_probs = predicts["structure_probs"] + structure_targets = batch[1].astype("int64") + structure_targets = structure_targets[:, 1:] + structure_probs = paddle.reshape( + structure_probs, [-1, structure_probs.shape[-1]] + ) + structure_targets = paddle.reshape(structure_targets, [-1]) + structure_loss = self.loss_func(structure_probs, structure_targets) + + structure_loss = paddle.mean(structure_loss) * self.structure_weight + + loc_preds = predicts["loc_preds"] + loc_targets = batch[2].astype("float32") + loc_targets_mask = batch[3].astype("float32") + loc_targets = loc_targets[:, 1:, :] + loc_targets_mask = loc_targets_mask[:, 1:, :] + loc_loss = ( + F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight + ) + + total_loss = structure_loss + loc_loss + return { + "loss": total_loss, + "structure_loss": structure_loss, + "loc_loss": loc_loss, + } + + +class SLALoss(nn.Layer): + def __init__(self, structure_weight, loc_weight, loc_loss="mse", **kwargs): + super(SLALoss, self).__init__() + self.loss_func = nn.CrossEntropyLoss(weight=None, reduction="mean") + self.structure_weight = structure_weight + self.loc_weight = loc_weight + self.loc_loss = loc_loss + self.eps = 1e-12 + + def forward(self, predicts, batch): + structure_probs = predicts["structure_probs"] + structure_targets = batch[1].astype("int64") + max_len = batch[-2].max() + structure_targets = structure_targets[:, 1 : max_len + 2] + + structure_loss = self.loss_func(structure_probs, structure_targets) + + structure_loss = paddle.mean(structure_loss) * self.structure_weight + + loc_preds = predicts["loc_preds"] + loc_targets = batch[2].astype("float32") + loc_targets_mask = batch[3].astype("float32") + loc_targets = loc_targets[:, 1 : max_len + 2] + loc_targets_mask = loc_targets_mask[:, 1 : max_len + 2] + + loc_loss = ( + F.smooth_l1_loss( + loc_preds * loc_targets_mask, + loc_targets * loc_targets_mask, + reduction="sum", + ) + * self.loc_weight + ) + + loc_loss = loc_loss / (loc_targets_mask.sum() + self.eps) + total_loss = structure_loss + loc_loss + return { + "loss": total_loss, + "structure_loss": structure_loss, + "loc_loss": loc_loss, + } diff --git a/docling_ibm_models/slanet_1m/metrics/__init__.py b/docling_ibm_models/slanet_1m/metrics/__init__.py new file mode 100644 index 0000000..83ddcd7 --- /dev/null +++ b/docling_ibm_models/slanet_1m/metrics/__init__.py @@ -0,0 +1,54 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import copy + +__all__ = ["build_metric"] + +from .det_metric import DetMetric, DetFCEMetric +from .rec_metric import RecMetric, CNTMetric, CANMetric +from .table_metric import TableMetric + + + +def build_metric(config): + support_dict = [ + "DetMetric", + "DetFCEMetric", + "RecMetric", + "ClsMetric", + "E2EMetric", + "DistillationMetric", + "TableMetric", + "KIEMetric", + "VQASerTokenMetric", + "VQAReTokenMetric", + "SRMetric", + "CTMetric", + "CNTMetric", + "CANMetric", + ] + + config = copy.deepcopy(config) + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "metric only support {}".format(support_dict) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/docling_ibm_models/slanet_1m/metrics/det_metric.py b/docling_ibm_models/slanet_1m/metrics/det_metric.py new file mode 100644 index 0000000..be95ec3 --- /dev/null +++ b/docling_ibm_models/slanet_1m/metrics/det_metric.py @@ -0,0 +1,153 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +__all__ = ["DetMetric", "DetFCEMetric"] + +from .eval_det_iou import DetectionIoUEvaluator + + +class DetMetric(object): + def __init__(self, main_indicator="hmean", **kwargs): + self.evaluator = DetectionIoUEvaluator() + self.main_indicator = main_indicator + self.reset() + + def __call__(self, preds, batch, **kwargs): + """ + batch: a list produced by dataloaders. + image: np.ndarray of shape (N, C, H, W). + ratio_list: np.ndarray of shape(N,2) + polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. + ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. + preds: a list of dict produced by post process + points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. + """ + gt_polyons_batch = batch[2] + ignore_tags_batch = batch[3] + for pred, gt_polyons, ignore_tags in zip( + preds, gt_polyons_batch, ignore_tags_batch + ): + # prepare gt + gt_info_list = [ + {"points": gt_polyon, "text": "", "ignore": ignore_tag} + for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags) + ] + # prepare det + det_info_list = [ + {"points": det_polyon, "text": ""} for det_polyon in pred["points"] + ] + result = self.evaluator.evaluate_image(gt_info_list, det_info_list) + self.results.append(result) + + def get_metric(self): + """ + return metrics { + 'precision': 0, + 'recall': 0, + 'hmean': 0 + } + """ + + metrics = self.evaluator.combine_results(self.results) + self.reset() + return metrics + + def reset(self): + self.results = [] # clear results + + +class DetFCEMetric(object): + def __init__(self, main_indicator="hmean", **kwargs): + self.evaluator = DetectionIoUEvaluator() + self.main_indicator = main_indicator + self.reset() + + def __call__(self, preds, batch, **kwargs): + """ + batch: a list produced by dataloaders. + image: np.ndarray of shape (N, C, H, W). + ratio_list: np.ndarray of shape(N,2) + polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. + ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not. + preds: a list of dict produced by post process + points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions. + """ + gt_polyons_batch = batch[2] + ignore_tags_batch = batch[3] + + for pred, gt_polyons, ignore_tags in zip( + preds, gt_polyons_batch, ignore_tags_batch + ): + # prepare gt + gt_info_list = [ + {"points": gt_polyon, "text": "", "ignore": ignore_tag} + for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags) + ] + # prepare det + det_info_list = [ + {"points": det_polyon, "text": "", "score": score} + for det_polyon, score in zip(pred["points"], pred["scores"]) + ] + + for score_thr in self.results.keys(): + det_info_list_thr = [ + det_info + for det_info in det_info_list + if det_info["score"] >= score_thr + ] + result = self.evaluator.evaluate_image(gt_info_list, det_info_list_thr) + self.results[score_thr].append(result) + + def get_metric(self): + """ + return metrics {'heman':0, + 'thr 0.3':'precision: 0 recall: 0 hmean: 0', + 'thr 0.4':'precision: 0 recall: 0 hmean: 0', + 'thr 0.5':'precision: 0 recall: 0 hmean: 0', + 'thr 0.6':'precision: 0 recall: 0 hmean: 0', + 'thr 0.7':'precision: 0 recall: 0 hmean: 0', + 'thr 0.8':'precision: 0 recall: 0 hmean: 0', + 'thr 0.9':'precision: 0 recall: 0 hmean: 0', + } + """ + metrics = {} + hmean = 0 + for score_thr in self.results.keys(): + metric = self.evaluator.combine_results(self.results[score_thr]) + # for key, value in metric.items(): + # metrics['{}_{}'.format(key, score_thr)] = value + metric_str = "precision:{:.5f} recall:{:.5f} hmean:{:.5f}".format( + metric["precision"], metric["recall"], metric["hmean"] + ) + metrics["thr {}".format(score_thr)] = metric_str + hmean = max(hmean, metric["hmean"]) + metrics["hmean"] = hmean + + self.reset() + return metrics + + def reset(self): + self.results = { + 0.3: [], + 0.4: [], + 0.5: [], + 0.6: [], + 0.7: [], + 0.8: [], + 0.9: [], + } # clear results diff --git a/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py b/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py new file mode 100644 index 0000000..4ecce53 --- /dev/null +++ b/docling_ibm_models/slanet_1m/metrics/eval_det_iou.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from collections import namedtuple +import numpy as np +from shapely.geometry import Polygon + +""" +reference from: +https://github.com/MhLiao/DB/blob/3c32b808d4412680310d3d28eeb6a2d5bf1566c5/concern/icdar2015_eval/detection/iou.py#L8 +""" + + +class DetectionIoUEvaluator(object): + def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5): + self.iou_constraint = iou_constraint + self.area_precision_constraint = area_precision_constraint + + def evaluate_image(self, gt, pred): + def get_union(pD, pG): + return Polygon(pD).union(Polygon(pG)).area + + def get_intersection_over_union(pD, pG): + return get_intersection(pD, pG) / get_union(pD, pG) + + def get_intersection(pD, pG): + return Polygon(pD).intersection(Polygon(pG)).area + + def compute_ap(confList, matchList, numGtCare): + correct = 0 + AP = 0 + if len(confList) > 0: + confList = np.array(confList) + matchList = np.array(matchList) + sorted_ind = np.argsort(-confList) + confList = confList[sorted_ind] + matchList = matchList[sorted_ind] + for n in range(len(confList)): + match = matchList[n] + if match: + correct += 1 + AP += float(correct) / (n + 1) + + if numGtCare > 0: + AP /= numGtCare + + return AP + + perSampleMetrics = {} + + matchedSum = 0 + + Rectangle = namedtuple("Rectangle", "xmin ymin xmax ymax") + + numGlobalCareGt = 0 + numGlobalCareDet = 0 + + arrGlobalConfidences = [] + arrGlobalMatches = [] + + recall = 0 + precision = 0 + hmean = 0 + + detMatched = 0 + + iouMat = np.empty([1, 1]) + + gtPols = [] + detPols = [] + + gtPolPoints = [] + detPolPoints = [] + + # Array of Ground Truth Polygons' keys marked as don't Care + gtDontCarePolsNum = [] + # Array of Detected Polygons' matched with a don't Care GT + detDontCarePolsNum = [] + + pairs = [] + detMatchedNums = [] + + arrSampleConfidences = [] + arrSampleMatch = [] + + evaluationLog = "" + + for n in range(len(gt)): + points = gt[n]["points"] + dontCare = gt[n]["ignore"] + if not Polygon(points).is_valid: + continue + + gtPol = points + gtPols.append(gtPol) + gtPolPoints.append(points) + if dontCare: + gtDontCarePolsNum.append(len(gtPols) - 1) + + evaluationLog += ( + "GT polygons: " + + str(len(gtPols)) + + ( + " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" + if len(gtDontCarePolsNum) > 0 + else "\n" + ) + ) + + for n in range(len(pred)): + points = pred[n]["points"] + if not Polygon(points).is_valid: + continue + + detPol = points + detPols.append(detPol) + detPolPoints.append(points) + if len(gtDontCarePolsNum) > 0: + for dontCarePol in gtDontCarePolsNum: + dontCarePol = gtPols[dontCarePol] + intersected_area = get_intersection(dontCarePol, detPol) + pdDimensions = Polygon(detPol).area + precision = ( + 0 if pdDimensions == 0 else intersected_area / pdDimensions + ) + if precision > self.area_precision_constraint: + detDontCarePolsNum.append(len(detPols) - 1) + break + + evaluationLog += ( + "DET polygons: " + + str(len(detPols)) + + ( + " (" + str(len(detDontCarePolsNum)) + " don't care)\n" + if len(detDontCarePolsNum) > 0 + else "\n" + ) + ) + + if len(gtPols) > 0 and len(detPols) > 0: + # Calculate IoU and precision matrixs + outputShape = [len(gtPols), len(detPols)] + iouMat = np.empty(outputShape) + gtRectMat = np.zeros(len(gtPols), np.int8) + detRectMat = np.zeros(len(detPols), np.int8) + for gtNum in range(len(gtPols)): + for detNum in range(len(detPols)): + pG = gtPols[gtNum] + pD = detPols[detNum] + iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG) + + for gtNum in range(len(gtPols)): + for detNum in range(len(detPols)): + if ( + gtRectMat[gtNum] == 0 + and detRectMat[detNum] == 0 + and gtNum not in gtDontCarePolsNum + and detNum not in detDontCarePolsNum + ): + if iouMat[gtNum, detNum] > self.iou_constraint: + gtRectMat[gtNum] = 1 + detRectMat[detNum] = 1 + detMatched += 1 + pairs.append({"gt": gtNum, "det": detNum}) + detMatchedNums.append(detNum) + evaluationLog += ( + "Match GT #" + + str(gtNum) + + " with Det #" + + str(detNum) + + "\n" + ) + + numGtCare = len(gtPols) - len(gtDontCarePolsNum) + numDetCare = len(detPols) - len(detDontCarePolsNum) + if numGtCare == 0: + recall = float(1) + precision = float(0) if numDetCare > 0 else float(1) + else: + recall = float(detMatched) / numGtCare + precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare + + hmean = ( + 0 + if (precision + recall) == 0 + else 2.0 * precision * recall / (precision + recall) + ) + + matchedSum += detMatched + numGlobalCareGt += numGtCare + numGlobalCareDet += numDetCare + + perSampleMetrics = { + "gtCare": numGtCare, + "detCare": numDetCare, + "detMatched": detMatched, + } + return perSampleMetrics + + def combine_results(self, results): + numGlobalCareGt = 0 + numGlobalCareDet = 0 + matchedSum = 0 + for result in results: + numGlobalCareGt += result["gtCare"] + numGlobalCareDet += result["detCare"] + matchedSum += result["detMatched"] + + methodRecall = ( + 0 if numGlobalCareGt == 0 else float(matchedSum) / numGlobalCareGt + ) + methodPrecision = ( + 0 if numGlobalCareDet == 0 else float(matchedSum) / numGlobalCareDet + ) + methodHmean = ( + 0 + if methodRecall + methodPrecision == 0 + else 2 * methodRecall * methodPrecision / (methodRecall + methodPrecision) + ) + methodMetrics = { + "precision": methodPrecision, + "recall": methodRecall, + "hmean": methodHmean, + } + + return methodMetrics + + +if __name__ == "__main__": + evaluator = DetectionIoUEvaluator() + gts = [ + [ + { + "points": [(0, 0), (1, 0), (1, 1), (0, 1)], + "text": 1234, + "ignore": False, + }, + { + "points": [(2, 2), (3, 2), (3, 3), (2, 3)], + "text": 5678, + "ignore": False, + }, + ] + ] + preds = [ + [ + { + "points": [(0.1, 0.1), (1, 0), (1, 1), (0, 1)], + "text": 123, + "ignore": False, + } + ] + ] + results = [] + for gt, pred in zip(gts, preds): + results.append(evaluator.evaluate_image(gt, pred)) + metrics = evaluator.combine_results(results) + print(metrics) diff --git a/docling_ibm_models/slanet_1m/metrics/rec_metric.py b/docling_ibm_models/slanet_1m/metrics/rec_metric.py new file mode 100644 index 0000000..e41dd36 --- /dev/null +++ b/docling_ibm_models/slanet_1m/metrics/rec_metric.py @@ -0,0 +1,179 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rapidfuzz.distance import Levenshtein +from difflib import SequenceMatcher + +import numpy as np +import string + + +class RecMetric(object): + def __init__( + self, main_indicator="acc", is_filter=False, ignore_space=True, **kwargs + ): + self.main_indicator = main_indicator + self.is_filter = is_filter + self.ignore_space = ignore_space + self.eps = 1e-5 + self.reset() + + def _normalize_text(self, text): + text = "".join( + filter(lambda x: x in (string.digits + string.ascii_letters), text) + ) + return text.lower() + + def __call__(self, pred_label, *args, **kwargs): + preds, labels = pred_label + correct_num = 0 + all_num = 0 + norm_edit_dis = 0.0 + for (pred, pred_conf), (target, _) in zip(preds, labels): + if self.ignore_space: + pred = pred.replace(" ", "") + target = target.replace(" ", "") + if self.is_filter: + pred = self._normalize_text(pred) + target = self._normalize_text(target) + norm_edit_dis += Levenshtein.normalized_distance(pred, target) + if pred == target: + correct_num += 1 + all_num += 1 + self.correct_num += correct_num + self.all_num += all_num + self.norm_edit_dis += norm_edit_dis + return { + "acc": correct_num / (all_num + self.eps), + "norm_edit_dis": 1 - norm_edit_dis / (all_num + self.eps), + } + + def get_metric(self): + """ + return metrics { + 'acc': 0, + 'norm_edit_dis': 0, + } + """ + acc = 1.0 * self.correct_num / (self.all_num + self.eps) + norm_edit_dis = 1 - self.norm_edit_dis / (self.all_num + self.eps) + self.reset() + return {"acc": acc, "norm_edit_dis": norm_edit_dis} + + def reset(self): + self.correct_num = 0 + self.all_num = 0 + self.norm_edit_dis = 0 + + +class CNTMetric(object): + def __init__(self, main_indicator="acc", **kwargs): + self.main_indicator = main_indicator + self.eps = 1e-5 + self.reset() + + def __call__(self, pred_label, *args, **kwargs): + preds, labels = pred_label + correct_num = 0 + all_num = 0 + for pred, target in zip(preds, labels): + if pred == target: + correct_num += 1 + all_num += 1 + self.correct_num += correct_num + self.all_num += all_num + return { + "acc": correct_num / (all_num + self.eps), + } + + def get_metric(self): + """ + return metrics { + 'acc': 0, + } + """ + acc = 1.0 * self.correct_num / (self.all_num + self.eps) + self.reset() + return {"acc": acc} + + def reset(self): + self.correct_num = 0 + self.all_num = 0 + + +class CANMetric(object): + def __init__(self, main_indicator="exp_rate", **kwargs): + self.main_indicator = main_indicator + self.word_right = [] + self.exp_right = [] + self.word_total_length = 0 + self.exp_total_num = 0 + self.word_rate = 0 + self.exp_rate = 0 + self.reset() + self.epoch_reset() + + def __call__(self, preds, batch, **kwargs): + for k, v in kwargs.items(): + epoch_reset = v + if epoch_reset: + self.epoch_reset() + word_probs = preds + word_label, word_label_mask = batch + line_right = 0 + if word_probs is not None: + word_pred = word_probs.argmax(2) + word_pred = word_pred.cpu().detach().numpy() + word_scores = [ + SequenceMatcher( + None, s1[: int(np.sum(s3))], s2[: int(np.sum(s3))], autojunk=False + ).ratio() + * (len(s1[: int(np.sum(s3))]) + len(s2[: int(np.sum(s3))])) + / len(s1[: int(np.sum(s3))]) + / 2 + for s1, s2, s3 in zip(word_label, word_pred, word_label_mask) + ] + batch_size = len(word_scores) + for i in range(batch_size): + if word_scores[i] == 1: + line_right += 1 + self.word_rate = np.mean(word_scores) # float + self.exp_rate = line_right / batch_size # float + exp_length, word_length = word_label.shape[:2] + self.word_right.append(self.word_rate * word_length) + self.exp_right.append(self.exp_rate * exp_length) + self.word_total_length = self.word_total_length + word_length + self.exp_total_num = self.exp_total_num + exp_length + + def get_metric(self): + """ + return { + 'word_rate': 0, + "exp_rate": 0, + } + """ + cur_word_rate = sum(self.word_right) / self.word_total_length + cur_exp_rate = sum(self.exp_right) / self.exp_total_num + self.reset() + return {"word_rate": cur_word_rate, "exp_rate": cur_exp_rate} + + def reset(self): + self.word_rate = 0 + self.exp_rate = 0 + + def epoch_reset(self): + self.word_right = [] + self.exp_right = [] + self.word_total_length = 0 + self.exp_total_num = 0 diff --git a/docling_ibm_models/slanet_1m/metrics/table_metric.py b/docling_ibm_models/slanet_1m/metrics/table_metric.py new file mode 100644 index 0000000..6df2bb1 --- /dev/null +++ b/docling_ibm_models/slanet_1m/metrics/table_metric.py @@ -0,0 +1,161 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from metrics.det_metric import DetMetric + + +class TableStructureMetric(object): + def __init__(self, main_indicator="acc", eps=1e-6, del_thead_tbody=False, **kwargs): + self.main_indicator = main_indicator + self.eps = eps + self.del_thead_tbody = del_thead_tbody + self.reset() + + def __call__(self, pred_label, batch=None, *args, **kwargs): + preds, labels = pred_label + pred_structure_batch_list = preds["structure_batch_list"] + gt_structure_batch_list = labels["structure_batch_list"] + correct_num = 0 + all_num = 0 + for (pred, pred_conf), target in zip( + pred_structure_batch_list, gt_structure_batch_list + ): + pred_str = "".join(pred) + target_str = "".join(target) + if self.del_thead_tbody: + pred_str = ( + pred_str.replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + ) + target_str = ( + target_str.replace("", "") + .replace("", "") + .replace("", "") + .replace("", "") + ) + if pred_str == target_str: + correct_num += 1 + all_num += 1 + self.correct_num += correct_num + self.all_num += all_num + + def get_metric(self): + """ + return metrics { + 'acc': 0, + } + """ + acc = 1.0 * self.correct_num / (self.all_num + self.eps) + self.reset() + return {"acc": acc} + + def reset(self): + self.correct_num = 0 + self.all_num = 0 + self.len_acc_num = 0 + self.token_nums = 0 + self.anys_dict = dict() + + +class TableMetric(object): + def __init__( + self, + main_indicator="acc", + compute_bbox_metric=False, + box_format="xyxy", + del_thead_tbody=False, + **kwargs, + ): + """ + + @param sub_metrics: configs of sub_metric + @param main_matric: main_matric for save best_model + @param kwargs: + """ + self.structure_metric = TableStructureMetric(del_thead_tbody=del_thead_tbody) + self.bbox_metric = DetMetric() if compute_bbox_metric else None + self.main_indicator = main_indicator + self.box_format = box_format + self.reset() + + def __call__(self, pred_label, batch=None, *args, **kwargs): + self.structure_metric(pred_label) + if self.bbox_metric is not None: + self.bbox_metric(*self.prepare_bbox_metric_input(pred_label)) + + def prepare_bbox_metric_input(self, pred_label): + pred_bbox_batch_list = [] + gt_ignore_tags_batch_list = [] + gt_bbox_batch_list = [] + preds, labels = pred_label + + batch_num = len(preds["bbox_batch_list"]) + for batch_idx in range(batch_num): + # pred + pred_bbox_list = [ + self.format_box(pred_box) + for pred_box in preds["bbox_batch_list"][batch_idx] + ] + pred_bbox_batch_list.append({"points": pred_bbox_list}) + + # gt + gt_bbox_list = [] + gt_ignore_tags_list = [] + for gt_box in labels["bbox_batch_list"][batch_idx]: + gt_bbox_list.append(self.format_box(gt_box)) + gt_ignore_tags_list.append(0) + gt_bbox_batch_list.append(gt_bbox_list) + gt_ignore_tags_batch_list.append(gt_ignore_tags_list) + + return [ + pred_bbox_batch_list, + [0, 0, gt_bbox_batch_list, gt_ignore_tags_batch_list], + ] + + def get_metric(self): + structure_metric = self.structure_metric.get_metric() + if self.bbox_metric is None: + return structure_metric + bbox_metric = self.bbox_metric.get_metric() + if self.main_indicator == self.bbox_metric.main_indicator: + output = bbox_metric + for sub_key in structure_metric: + output["structure_metric_{}".format(sub_key)] = structure_metric[ + sub_key + ] + else: + output = structure_metric + for sub_key in bbox_metric: + output["bbox_metric_{}".format(sub_key)] = bbox_metric[sub_key] + return output + + def reset(self): + self.structure_metric.reset() + if self.bbox_metric is not None: + self.bbox_metric.reset() + + def format_box(self, box): + if self.box_format == "xyxy": + x1, y1, x2, y2 = box + box = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] + elif self.box_format == "xywh": + x, y, w, h = box + x1, y1, x2, y2 = x - w // 2, y - h // 2, x + w // 2, y + h // 2 + box = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] + elif self.box_format == "xyxyxyxy": + x1, y1, x2, y2, x3, y3, x4, y4 = box + box = [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] + return box diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py b/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py new file mode 100644 index 0000000..50260f9 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/architectures/__init__.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import importlib + +from paddle.jit import to_static +from paddle.static import InputSpec + +from .base_model import BaseModel +from .distillation_model import DistillationModel + +__all__ = ["build_model", "apply_to_static"] + + +def build_model(config): + config = copy.deepcopy(config) + if not "name" in config: + arch = BaseModel(config) + else: + name = config.pop("name") + mod = importlib.import_module(__name__) + arch = getattr(mod, name)(config) + return arch + + +def apply_to_static(model, config, logger): + if config["Global"].get("to_static", False) is not True: + return model + assert ( + "d2s_train_image_shape" in config["Global"] + ), "d2s_train_image_shape must be assigned for static training mode..." + supported_list = ["DB", "SVTR_LCNet", "TableMaster", "LayoutXLM", "SLANet", "SVTR"] + if config["Architecture"]["algorithm"] in ["Distillation"]: + algo = list(config["Architecture"]["Models"].values())[0]["algorithm"] + else: + algo = config["Architecture"]["algorithm"] + assert ( + algo in supported_list + ), f"algorithms that supports static training must in in {supported_list} but got {algo}" + + specs = [ + InputSpec([None] + config["Global"]["d2s_train_image_shape"], dtype="float32") + ] + + if algo == "SVTR_LCNet": + specs.append( + [ + InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"), + InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"), + InputSpec([None], dtype="int64"), + InputSpec([None], dtype="float64"), + ] + ) + elif algo == "TableMaster": + specs.append( + [ + InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"), + InputSpec( + [None, config["Global"]["max_text_length"], 4], dtype="float32" + ), + InputSpec( + [None, config["Global"]["max_text_length"], 1], dtype="float32" + ), + InputSpec([None, 6], dtype="float32"), + ] + ) + elif algo == "LayoutXLM": + specs = [ + [ + InputSpec(shape=[None, 512], dtype="int64"), # input_ids + InputSpec(shape=[None, 512, 4], dtype="int64"), # bbox + InputSpec(shape=[None, 512], dtype="int64"), # attention_mask + InputSpec(shape=[None, 512], dtype="int64"), # token_type_ids + InputSpec(shape=[None, 3, 224, 224], dtype="float32"), # image + InputSpec(shape=[None, 512], dtype="int64"), # label + ] + ] + elif algo == "SLANet": + specs.append( + [ + InputSpec( + [None, config["Global"]["max_text_length"] + 2], dtype="int64" + ), + InputSpec( + [None, config["Global"]["max_text_length"] + 2, 4], dtype="float32" + ), + InputSpec( + [None, config["Global"]["max_text_length"] + 2, 1], dtype="float32" + ), + InputSpec([None], dtype="int64"), + InputSpec([None, 6], dtype="float64"), + ] + ) + elif algo == "SVTR": + specs.append( + [ + InputSpec([None, config["Global"]["max_text_length"]], dtype="int64"), + InputSpec([None], dtype="int64"), + ] + ) + model = to_static(model, input_spec=specs) + logger.info("Successfully to apply @to_static with specs: {}".format(specs)) + return model diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py b/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py new file mode 100644 index 0000000..c1b6116 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/architectures/base_model.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import nn +from modeling.backbones import build_backbone +from modeling.necks import build_neck +from modeling.heads import build_head + +__all__ = ["BaseModel"] + + +class BaseModel(nn.Layer): + def __init__(self, config): + """ + the module for OCR. + args: + config (dict): the super parameters for module. + """ + super(BaseModel, self).__init__() + in_channels = config.get("in_channels", 3) + model_type = config["model_type"] + # build transfrom, + # for rec, transfrom can be TPS,None + # for det and cls, transfrom shoule to be None, + # if you make model differently, you can use transfrom in det and cls + # build backbone, backbone is need for del, rec and cls + self.use_transform = False + if "Backbone" not in config or config["Backbone"] is None: + self.use_backbone = False + else: + self.use_backbone = True + config["Backbone"]["in_channels"] = in_channels + self.backbone = build_backbone(config["Backbone"], model_type) + in_channels = self.backbone.out_channels + + # build neck + # for rec, neck can be cnn,rnn or reshape(None) + # for det, neck can be FPN, BIFPN and so on. + # for cls, neck should be none + if "Neck" not in config or config["Neck"] is None: + self.use_neck = False + else: + self.use_neck = True + config["Neck"]["in_channels"] = in_channels + self.neck = build_neck(config["Neck"]) + in_channels = self.neck.out_channels + + # # build head, head is need for det, rec and cls + if "Head" not in config or config["Head"] is None: + self.use_head = False + else: + self.use_head = True + config["Head"]["in_channels"] = in_channels + self.head = build_head(config["Head"]) + + self.return_all_feats = config.get("return_all_feats", False) + + def forward(self, x, data=None): + y = dict() + if self.use_transform: + x = self.transform(x) + if self.use_backbone: + x = self.backbone(x) + if isinstance(x, dict): + y.update(x) + else: + y["backbone_out"] = x + final_name = "backbone_out" + if self.use_neck: + x = self.neck(x) + if isinstance(x, dict): + y.update(x) + else: + y["neck_out"] = x + final_name = "neck_out" + if self.use_head: + x = self.head(x, targets=data) + # for multi head, save ctc neck out for udml + if isinstance(x, dict) and "ctc_neck" in x.keys(): + y["neck_out"] = x["ctc_neck"] + y["head_out"] = x + elif isinstance(x, dict): + y.update(x) + else: + y["head_out"] = x + final_name = "head_out" + if self.return_all_feats: + if self.training: + return y + elif isinstance(x, dict): + return x + else: + return {final_name: x} + else: + return x diff --git a/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py b/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py new file mode 100644 index 0000000..98912d1 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/architectures/distillation_model.py @@ -0,0 +1,59 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import nn +from modeling.backbones import build_backbone +from modeling.necks import build_neck +from modeling.heads import build_head +from .base_model import BaseModel +from paddleocr.ppocr.utils.save_load import load_pretrained_params + +__all__ = ["DistillationModel"] + + +class DistillationModel(nn.Layer): + def __init__(self, config): + """ + the module for OCR distillation. + args: + config (dict): the super parameters for module. + """ + super().__init__() + self.model_list = [] + self.model_name_list = [] + for key in config["Models"]: + model_config = config["Models"][key] + freeze_params = False + pretrained = None + if "freeze_params" in model_config: + freeze_params = model_config.pop("freeze_params") + if "pretrained" in model_config: + pretrained = model_config.pop("pretrained") + model = BaseModel(model_config) + if pretrained is not None: + load_pretrained_params(model, pretrained) + if freeze_params: + for param in model.parameters(): + param.trainable = False + self.model_list.append(self.add_sublayer(key, model)) + self.model_name_list.append(key) + + def forward(self, x, data=None): + result_dict = dict() + for idx, model_name in enumerate(self.model_name_list): + result_dict[model_name] = self.model_list[idx](x, data) + return result_dict diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py b/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py new file mode 100644 index 0000000..e91813e --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/__init__.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_backbone"] + + +def build_backbone(config, model_type): + if model_type == "table": + from .det_pp_lcnet import PPLCNet + from .rec_lcnetv3 import PPLCNetV3 + from .det_pp_lcnet_v2 import PPLCNetV2_base + + support_dict = [ + "PPLCNet", + "PPLCNetV3", + "PPLCNetV2_base", + ] + else: + raise NotImplementedError + + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "when model typs is {}, backbone only support {}".format( + model_type, support_dict + ) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py new file mode 100644 index 0000000..98db44b --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_mobilenet_v3.py @@ -0,0 +1,285 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +from paddle import nn +import paddle.nn.functional as F +from paddle import ParamAttr + +__all__ = ["MobileNetV3"] + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class MobileNetV3(nn.Layer): + def __init__( + self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs + ): + """ + the MobilenetV3 backbone network for detection module. + Args: + params(dict): the super parameters for build network + """ + super(MobileNetV3, self).__init__() + + self.disable_se = disable_se + + if model_name == "large": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, "relu", 1], + [3, 64, 24, False, "relu", 2], + [3, 72, 24, False, "relu", 1], + [5, 72, 40, True, "relu", 2], + [5, 120, 40, True, "relu", 1], + [5, 120, 40, True, "relu", 1], + [3, 240, 80, False, "hardswish", 2], + [3, 200, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 184, 80, False, "hardswish", 1], + [3, 480, 112, True, "hardswish", 1], + [3, 672, 112, True, "hardswish", 1], + [5, 672, 160, True, "hardswish", 2], + [5, 960, 160, True, "hardswish", 1], + [5, 960, 160, True, "hardswish", 1], + ] + cls_ch_squeeze = 960 + elif model_name == "small": + cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, "relu", 2], + [3, 72, 24, False, "relu", 2], + [3, 88, 24, False, "relu", 1], + [5, 96, 40, True, "hardswish", 2], + [5, 240, 40, True, "hardswish", 1], + [5, 240, 40, True, "hardswish", 1], + [5, 120, 48, True, "hardswish", 1], + [5, 144, 48, True, "hardswish", 1], + [5, 288, 96, True, "hardswish", 2], + [5, 576, 96, True, "hardswish", 1], + [5, 576, 96, True, "hardswish", 1], + ] + cls_ch_squeeze = 576 + else: + raise NotImplementedError( + "mode[" + model_name + "_model] is not implemented!" + ) + + supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] + assert ( + scale in supported_scale + ), "supported scale are {} but input scale is {}".format(supported_scale, scale) + inplanes = 16 + # conv1 + self.conv = ConvBNLayer( + in_channels=in_channels, + out_channels=make_divisible(inplanes * scale), + kernel_size=3, + stride=2, + padding=1, + groups=1, + if_act=True, + act="hardswish", + ) + + self.stages = [] + self.out_channels = [] + block_list = [] + i = 0 + inplanes = make_divisible(inplanes * scale) + for k, exp, c, se, nl, s in cfg: + se = se and not self.disable_se + start_idx = 2 if model_name == "large" else 0 + if s == 2 and i > start_idx: + self.out_channels.append(inplanes) + self.stages.append(nn.Sequential(*block_list)) + block_list = [] + block_list.append( + ResidualUnit( + in_channels=inplanes, + mid_channels=make_divisible(scale * exp), + out_channels=make_divisible(scale * c), + kernel_size=k, + stride=s, + use_se=se, + act=nl, + ) + ) + inplanes = make_divisible(scale * c) + i += 1 + block_list.append( + ConvBNLayer( + in_channels=inplanes, + out_channels=make_divisible(scale * cls_ch_squeeze), + kernel_size=1, + stride=1, + padding=0, + groups=1, + if_act=True, + act="hardswish", + ) + ) + self.stages.append(nn.Sequential(*block_list)) + self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) + for i, stage in enumerate(self.stages): + self.add_sublayer(sublayer=stage, name="stage{}".format(i)) + + def forward(self, x): + x = self.conv(x) + out_list = [] + for stage in self.stages: + x = stage(x) + out_list.append(x) + return out_list + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + padding, + groups=1, + if_act=True, + act=None, + ): + super(ConvBNLayer, self).__init__() + self.if_act = if_act + self.act = act + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + bias_attr=False, + ) + + self.bn = nn.BatchNorm(num_channels=out_channels, act=None) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.if_act: + if self.act == "relu": + x = F.relu(x) + elif self.act == "hardswish": + x = F.hardswish(x) + else: + print( + "The activation function({}) is selected incorrectly.".format( + self.act + ) + ) + exit() + return x + + +class ResidualUnit(nn.Layer): + def __init__( + self, + in_channels, + mid_channels, + out_channels, + kernel_size, + stride, + use_se, + act=None, + ): + super(ResidualUnit, self).__init__() + self.if_shortcut = stride == 1 and in_channels == out_channels + self.if_se = use_se + + self.expand_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + if_act=True, + act=act, + ) + self.bottleneck_conv = ConvBNLayer( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=int((kernel_size - 1) // 2), + groups=mid_channels, + if_act=True, + act=act, + ) + if self.if_se: + self.mid_se = SEModule(mid_channels) + self.linear_conv = ConvBNLayer( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + if_act=False, + act=None, + ) + + def forward(self, inputs): + x = self.expand_conv(inputs) + x = self.bottleneck_conv(x) + if self.if_se: + x = self.mid_se(x) + x = self.linear_conv(x) + if self.if_shortcut: + x = paddle.add(inputs, x) + return x + + +class SEModule(nn.Layer): + def __init__(self, in_channels, reduction=4): + super(SEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.conv1 = nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels // reduction, + kernel_size=1, + stride=1, + padding=0, + ) + self.conv2 = nn.Conv2D( + in_channels=in_channels // reduction, + out_channels=in_channels, + kernel_size=1, + stride=1, + padding=0, + ) + + def forward(self, inputs): + outputs = self.avg_pool(inputs) + outputs = self.conv1(outputs) + outputs = F.relu(outputs) + outputs = self.conv2(outputs) + outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) + return inputs * outputs diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py new file mode 100644 index 0000000..bf557a4 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet.py @@ -0,0 +1,274 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import os +import paddle +import paddle.nn as nn +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear +from paddle.regularizer import L2Decay +from paddle.nn.initializer import KaimingNormal +from paddle.utils.download import get_path_from_url + +MODEL_URLS = { + "PPLCNet_x0.25": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams", + "PPLCNet_x0.35": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams", + "PPLCNet_x0.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams", + "PPLCNet_x0.75": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams", + "PPLCNet_x1.0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams", + "PPLCNet_x1.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams", + "PPLCNet_x2.0": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams", + "PPLCNet_x2.5": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams", +} + +MODEL_STAGES_PATTERN = { + "PPLCNet": ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"] +} + +__all__ = list(MODEL_URLS.keys()) + +# Each element(list) represents a depthwise block, which is composed of k, in_c, out_c, s, use_se. +# k: kernel_size +# in_c: input channel number in depthwise block +# out_c: output channel number in depthwise block +# s: stride in depthwise block +# use_se: whether to use SE block + +NET_CONFIG = { + "blocks2": + # k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [ + [3, 128, 256, 2, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + ], + "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]], +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(nn.Layer): + def __init__(self, num_channels, filter_size, num_filters, stride, num_groups=1): + super().__init__() + + self.conv = Conv2D( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=num_groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False, + ) + + self.bn = BatchNorm( + num_filters, + param_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0)), + ) + self.hardswish = nn.Hardswish() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.hardswish(x) + return x + + +class DepthwiseSeparable(nn.Layer): + def __init__(self, num_channels, num_filters, stride, dw_size=3, use_se=False): + super().__init__() + self.use_se = use_se + self.dw_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=num_channels, + filter_size=dw_size, + stride=stride, + num_groups=num_channels, + ) + if use_se: + self.se = SEModule(num_channels) + self.pw_conv = ConvBNLayer( + num_channels=num_channels, filter_size=1, num_filters=num_filters, stride=1 + ) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class SEModule(nn.Layer): + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + ) + self.relu = nn.ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + ) + self.hardsigmoid = nn.Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class PPLCNet(nn.Layer): + def __init__(self, in_channels=3, scale=1.0, pretrained=False, use_ssld=False): + super().__init__() + self.out_channels = [ + int(NET_CONFIG["blocks3"][-1][2] * scale), + int(NET_CONFIG["blocks4"][-1][2] * scale), + int(NET_CONFIG["blocks5"][-1][2] * scale), + int(NET_CONFIG["blocks6"][-1][2] * scale), + ] + self.scale = scale + + self.conv1 = ConvBNLayer( + num_channels=in_channels, + filter_size=3, + num_filters=make_divisible(16 * scale), + stride=2, + ) + + self.blocks2 = nn.Sequential( + *[ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + ) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks2"]) + ] + ) + + self.blocks3 = nn.Sequential( + *[ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + ) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks3"]) + ] + ) + + self.blocks4 = nn.Sequential( + *[ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + ) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks4"]) + ] + ) + + self.blocks5 = nn.Sequential( + *[ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + ) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks5"]) + ] + ) + + self.blocks6 = nn.Sequential( + *[ + DepthwiseSeparable( + num_channels=make_divisible(in_c * scale), + num_filters=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + ) + for i, (k, in_c, out_c, s, se) in enumerate(NET_CONFIG["blocks6"]) + ] + ) + + if pretrained: + self._load_pretrained( + MODEL_URLS["PPLCNet_x{}".format(scale)], use_ssld=use_ssld + ) + + def forward(self, x): + outs = [] + x = self.conv1(x) + x = self.blocks2(x) + x = self.blocks3(x) + outs.append(x) + x = self.blocks4(x) + outs.append(x) + x = self.blocks5(x) + outs.append(x) + x = self.blocks6(x) + outs.append(x) + return outs + + def _load_pretrained(self, pretrained_url, use_ssld=False): + if use_ssld: + pretrained_url = pretrained_url.replace("_pretrained", "_ssld_pretrained") + print(pretrained_url) + local_weight_path = get_path_from_url( + pretrained_url, os.path.expanduser("~/.paddleclas/weights") + ) + param_state_dict = paddle.load(local_weight_path) + self.set_dict(param_state_dict) + return diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py new file mode 100644 index 0000000..5b5a568 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/det_pp_lcnet_v2.py @@ -0,0 +1,358 @@ +# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Dropout, Linear +from paddle.regularizer import L2Decay +from paddle.nn.initializer import KaimingNormal +from paddle.utils.download import get_path_from_url + +MODEL_URLS = { + "PPLCNetV2_small": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_small_ssld_pretrained.pdparams", + "PPLCNetV2_base": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_base_ssld_pretrained.pdparams", + "PPLCNetV2_large": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNetV2_large_ssld_pretrained.pdparams", +} + +__all__ = list(MODEL_URLS.keys()) + +NET_CONFIG = { + # in_channels, kernel_size, split_pw, use_rep, use_se, use_shortcut + "stage1": [64, 3, False, False, False, False], + "stage2": [128, 3, False, False, False, False], + "stage3": [256, 5, True, True, True, False], + "stage4": [512, 5, False, True, False, True], +} + + +def make_divisible(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNLayer(nn.Layer): + def __init__( + self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True + ): + super().__init__() + self.use_act = use_act + self.conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=KaimingNormal()), + bias_attr=False, + ) + + self.bn = BatchNorm2D( + out_channels, + weight_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0)), + ) + if self.use_act: + self.act = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + if self.use_act: + x = self.act(x) + return x + + +class SEModule(nn.Layer): + def __init__(self, channel, reduction=4): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + ) + self.relu = nn.ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + ) + self.hardsigmoid = nn.Sigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class RepDepthwiseSeparable(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + stride, + dw_size=3, + split_pw=False, + use_rep=False, + use_se=False, + use_shortcut=False, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.is_repped = False + + self.dw_size = dw_size + self.split_pw = split_pw + self.use_rep = use_rep + self.use_se = use_se + self.use_shortcut = ( + True + if use_shortcut and stride == 1 and in_channels == out_channels + else False + ) + + if self.use_rep: + self.dw_conv_list = nn.LayerList() + for kernel_size in range(self.dw_size, 0, -2): + if kernel_size == 1 and stride != 1: + continue + dw_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + stride=stride, + groups=in_channels, + use_act=False, + ) + self.dw_conv_list.append(dw_conv) + self.dw_conv = nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=dw_size, + stride=stride, + padding=(dw_size - 1) // 2, + groups=in_channels, + ) + else: + self.dw_conv = ConvBNLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=dw_size, + stride=stride, + groups=in_channels, + ) + + self.act = nn.ReLU() + + if use_se: + self.se = SEModule(in_channels) + + if self.split_pw: + pw_ratio = 0.5 + self.pw_conv_1 = ConvBNLayer( + in_channels=in_channels, + kernel_size=1, + out_channels=int(out_channels * pw_ratio), + stride=1, + ) + self.pw_conv_2 = ConvBNLayer( + in_channels=int(out_channels * pw_ratio), + kernel_size=1, + out_channels=out_channels, + stride=1, + ) + else: + self.pw_conv = ConvBNLayer( + in_channels=in_channels, + kernel_size=1, + out_channels=out_channels, + stride=1, + ) + + def forward(self, x): + if self.use_rep: + input_x = x + if self.is_repped: + x = self.act(self.dw_conv(x)) + else: + y = self.dw_conv_list[0](x) + for dw_conv in self.dw_conv_list[1:]: + y += dw_conv(x) + x = self.act(y) + else: + x = self.dw_conv(x) + + if self.use_se: + x = self.se(x) + if self.split_pw: + x = self.pw_conv_1(x) + x = self.pw_conv_2(x) + else: + x = self.pw_conv(x) + if self.use_shortcut: + x = x + input_x + return x + + def re_parameterize(self): + if self.use_rep: + self.is_repped = True + kernel, bias = self._get_equivalent_kernel_bias() + self.dw_conv.weight.set_value(kernel) + self.dw_conv.bias.set_value(bias) + + def _get_equivalent_kernel_bias(self): + kernel_sum = 0 + bias_sum = 0 + for dw_conv in self.dw_conv_list: + kernel, bias = self._fuse_bn_tensor(dw_conv) + kernel = self._pad_tensor(kernel, to_size=self.dw_size) + kernel_sum += kernel + bias_sum += bias + return kernel_sum, bias_sum + + def _fuse_bn_tensor(self, branch): + kernel = branch.conv.weight + running_mean = branch.bn._mean + running_var = branch.bn._variance + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn._epsilon + std = (running_var + eps).sqrt() + t = (gamma / std).reshape((-1, 1, 1, 1)) + return kernel * t, beta - running_mean * gamma / std + + def _pad_tensor(self, tensor, to_size): + from_size = tensor.shape[-1] + if from_size == to_size: + return tensor + pad = (to_size - from_size) // 2 + return F.pad(tensor, [pad, pad, pad, pad]) + + +class PPLCNetV2(nn.Layer): + def __init__(self, scale, depths, out_indx=[1, 2, 3, 4], **kwargs): + super().__init__(**kwargs) + self.scale = scale + self.out_channels = [ + # int(NET_CONFIG["blocks3"][-1][2] * scale), + int(NET_CONFIG["stage1"][0] * scale * 2), + int(NET_CONFIG["stage2"][0] * scale * 2), + int(NET_CONFIG["stage3"][0] * scale * 2), + int(NET_CONFIG["stage4"][0] * scale * 2), + ] + self.stem = nn.Sequential( + *[ + ConvBNLayer( + in_channels=3, + kernel_size=3, + out_channels=make_divisible(32 * scale), + stride=2, + ), + RepDepthwiseSeparable( + in_channels=make_divisible(32 * scale), + out_channels=make_divisible(64 * scale), + stride=1, + dw_size=3, + ), + ] + ) + self.out_indx = out_indx + # stages + self.stages = nn.LayerList() + for depth_idx, k in enumerate(NET_CONFIG): + ( + in_channels, + kernel_size, + split_pw, + use_rep, + use_se, + use_shortcut, + ) = NET_CONFIG[k] + self.stages.append( + nn.Sequential( + *[ + RepDepthwiseSeparable( + in_channels=make_divisible( + (in_channels if i == 0 else in_channels * 2) * scale + ), + out_channels=make_divisible(in_channels * 2 * scale), + stride=2 if i == 0 else 1, + dw_size=kernel_size, + split_pw=split_pw, + use_rep=use_rep, + use_se=use_se, + use_shortcut=use_shortcut, + ) + for i in range(depths[depth_idx]) + ] + ) + ) + + # if pretrained: + self._load_pretrained(MODEL_URLS["PPLCNetV2_base"], use_ssld=True) + + def forward(self, x): + x = self.stem(x) + i = 1 + outs = [] + for stage in self.stages: + x = stage(x) + if i in self.out_indx: + outs.append(x) + i += 1 + return outs + + def _load_pretrained(self, pretrained_url, use_ssld=False): + print(pretrained_url) + local_weight_path = get_path_from_url( + pretrained_url, os.path.expanduser("~/.paddleclas/weights") + ) + param_state_dict = paddle.load(local_weight_path) + self.set_dict(param_state_dict) + print("load pretrain ssd success!") + return + + +def PPLCNetV2_base(in_channels=3, **kwargs): + """ + PPLCNetV2_base + Args: + pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise. + If str, means the path of the pretrained model. + use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True. + Returns: + model: nn.Layer. Specific `PPLCNetV2_base` model depends on args. + """ + model = PPLCNetV2(scale=1.0, depths=[2, 2, 6, 2], **kwargs) + return model diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py new file mode 100644 index 0000000..b54670c --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_lcnetv3.py @@ -0,0 +1,554 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr +from paddle.nn.initializer import Constant, KaimingNormal +from paddle.nn import ( + AdaptiveAvgPool2D, + BatchNorm2D, + Conv2D, + Dropout, + Hardsigmoid, + Hardswish, + Identity, + Linear, + ReLU, +) +from paddle.regularizer import L2Decay + +NET_CONFIG_det = { + "blocks2": + # k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]], + "blocks5": [ + [3, 128, 256, 2, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + ], + "blocks6": [ + [5, 256, 512, 2, True], + [5, 512, 512, 1, True], + [5, 512, 512, 1, False], + [5, 512, 512, 1, False], + ], +} + +NET_CONFIG_rec = { + "blocks2": + # k, in_c, out_c, s, use_se + [[3, 16, 32, 1, False]], + "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]], + "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]], + "blocks5": [ + [3, 128, 256, (1, 2), False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + [5, 256, 256, 1, False], + ], + "blocks6": [ + [5, 256, 512, (2, 1), True], + [5, 512, 512, 1, True], + [5, 512, 512, (2, 1), False], + [5, 512, 512, 1, False], + ], +} + + +def make_divisible(v, divisor=16, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class LearnableAffineBlock(nn.Layer): + def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1): + super().__init__() + self.scale = self.create_parameter( + shape=[ + 1, + ], + default_initializer=Constant(value=scale_value), + attr=ParamAttr(learning_rate=lr_mult * lab_lr), + ) + self.add_parameter("scale", self.scale) + self.bias = self.create_parameter( + shape=[ + 1, + ], + default_initializer=Constant(value=bias_value), + attr=ParamAttr(learning_rate=lr_mult * lab_lr), + ) + self.add_parameter("bias", self.bias) + + def forward(self, x): + return self.scale * x + self.bias + + +class ConvBNLayer(nn.Layer): + def __init__( + self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0 + ): + super().__init__() + self.conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(initializer=KaimingNormal(), learning_rate=lr_mult), + bias_attr=False, + ) + + self.bn = BatchNorm2D( + out_channels, + weight_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult), + bias_attr=ParamAttr(regularizer=L2Decay(0.0), learning_rate=lr_mult), + ) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class Act(nn.Layer): + def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1): + super().__init__() + if act == "hswish": + self.act = Hardswish() + else: + assert act == "relu" + self.act = ReLU() + self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr) + + def forward(self, x): + return self.lab(self.act(x)) + + +class LearnableRepLayer(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + num_conv_branches=1, + lr_mult=1.0, + lab_lr=0.1, + ): + super().__init__() + self.is_repped = False + self.groups = groups + self.stride = stride + self.kernel_size = kernel_size + self.in_channels = in_channels + self.out_channels = out_channels + self.num_conv_branches = num_conv_branches + self.padding = (kernel_size - 1) // 2 + + self.identity = ( + BatchNorm2D( + num_features=in_channels, + weight_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=ParamAttr(learning_rate=lr_mult), + ) + if out_channels == in_channels and stride == 1 + else None + ) + + self.conv_kxk = nn.LayerList( + [ + ConvBNLayer( + in_channels, + out_channels, + kernel_size, + stride, + groups=groups, + lr_mult=lr_mult, + ) + for _ in range(self.num_conv_branches) + ] + ) + + self.conv_1x1 = ( + ConvBNLayer( + in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult + ) + if kernel_size > 1 + else None + ) + + self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr) + self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr) + + def forward(self, x): + # for export + if self.is_repped: + out = self.lab(self.reparam_conv(x)) + if self.stride != 2: + out = self.act(out) + return out + + out = 0 + if self.identity is not None: + out += self.identity(x) + + if self.conv_1x1 is not None: + out += self.conv_1x1(x) + + for conv in self.conv_kxk: + out += conv(x) + + out = self.lab(out) + if self.stride != 2: + out = self.act(out) + return out + + def rep(self): + if self.is_repped: + return + kernel, bias = self._get_kernel_bias() + self.reparam_conv = Conv2D( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=self.kernel_size, + stride=self.stride, + padding=self.padding, + groups=self.groups, + ) + self.reparam_conv.weight.set_value(kernel) + self.reparam_conv.bias.set_value(bias) + self.is_repped = True + + def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad): + if not isinstance(kernel1x1, paddle.Tensor): + return 0 + else: + return nn.functional.pad(kernel1x1, [pad, pad, pad, pad]) + + def _get_kernel_bias(self): + kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1) + kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk( + kernel_conv_1x1, self.kernel_size // 2 + ) + + kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity) + + kernel_conv_kxk = 0 + bias_conv_kxk = 0 + for conv in self.conv_kxk: + kernel, bias = self._fuse_bn_tensor(conv) + kernel_conv_kxk += kernel + bias_conv_kxk += bias + + kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity + bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity + return kernel_reparam, bias_reparam + + def _fuse_bn_tensor(self, branch): + if not branch: + return 0, 0 + elif isinstance(branch, ConvBNLayer): + kernel = branch.conv.weight + running_mean = branch.bn._mean + running_var = branch.bn._variance + gamma = branch.bn.weight + beta = branch.bn.bias + eps = branch.bn._epsilon + else: + assert isinstance(branch, BatchNorm2D) + if not hasattr(self, "id_tensor"): + input_dim = self.in_channels // self.groups + kernel_value = paddle.zeros( + (self.in_channels, input_dim, self.kernel_size, self.kernel_size), + dtype=branch.weight.dtype, + ) + for i in range(self.in_channels): + kernel_value[ + i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2 + ] = 1 + self.id_tensor = kernel_value + kernel = self.id_tensor + running_mean = branch._mean + running_var = branch._variance + gamma = branch.weight + beta = branch.bias + eps = branch._epsilon + std = (running_var + eps).sqrt() + t = (gamma / std).reshape((-1, 1, 1, 1)) + return kernel * t, beta - running_mean * gamma / std + + +class SELayer(nn.Layer): + def __init__(self, channel, reduction=4, lr_mult=1.0): + super().__init__() + self.avg_pool = AdaptiveAvgPool2D(1) + self.conv1 = Conv2D( + in_channels=channel, + out_channels=channel // reduction, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=ParamAttr(learning_rate=lr_mult), + ) + self.relu = ReLU() + self.conv2 = Conv2D( + in_channels=channel // reduction, + out_channels=channel, + kernel_size=1, + stride=1, + padding=0, + weight_attr=ParamAttr(learning_rate=lr_mult), + bias_attr=ParamAttr(learning_rate=lr_mult), + ) + self.hardsigmoid = Hardsigmoid() + + def forward(self, x): + identity = x + x = self.avg_pool(x) + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.hardsigmoid(x) + x = paddle.multiply(x=identity, y=x) + return x + + +class LCNetV3Block(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + stride, + dw_size, + use_se=False, + conv_kxk_num=4, + lr_mult=1.0, + lab_lr=0.1, + ): + super().__init__() + self.use_se = use_se + self.dw_conv = LearnableRepLayer( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=dw_size, + stride=stride, + groups=in_channels, + num_conv_branches=conv_kxk_num, + lr_mult=lr_mult, + lab_lr=lab_lr, + ) + if use_se: + self.se = SELayer(in_channels, lr_mult=lr_mult) + self.pw_conv = LearnableRepLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + num_conv_branches=conv_kxk_num, + lr_mult=lr_mult, + lab_lr=lab_lr, + ) + + def forward(self, x): + x = self.dw_conv(x) + if self.use_se: + x = self.se(x) + x = self.pw_conv(x) + return x + + +class PPLCNetV3(nn.Layer): + def __init__( + self, + scale=1.0, + conv_kxk_num=4, + lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + lab_lr=0.1, + det=False, + **kwargs, + ): + super().__init__() + self.scale = scale + self.lr_mult_list = lr_mult_list + self.det = det + + self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec + + assert isinstance( + self.lr_mult_list, (list, tuple) + ), "lr_mult_list should be in (list, tuple) but got {}".format( + type(self.lr_mult_list) + ) + assert ( + len(self.lr_mult_list) == 6 + ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list)) + + self.conv1 = ConvBNLayer( + in_channels=3, + out_channels=make_divisible(16 * scale), + kernel_size=3, + stride=2, + lr_mult=self.lr_mult_list[0], + ) + + self.blocks2 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[1], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"]) + ] + ) + + self.blocks3 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[2], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"]) + ] + ) + + self.blocks4 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[3], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"]) + ] + ) + + self.blocks5 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[4], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"]) + ] + ) + + self.blocks6 = nn.Sequential( + *[ + LCNetV3Block( + in_channels=make_divisible(in_c * scale), + out_channels=make_divisible(out_c * scale), + dw_size=k, + stride=s, + use_se=se, + conv_kxk_num=conv_kxk_num, + lr_mult=self.lr_mult_list[5], + lab_lr=lab_lr, + ) + for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"]) + ] + ) + self.out_channels = make_divisible(512 * scale) + + if self.det: + mv_c = [16, 24, 56, 480] + self.out_channels = [ + make_divisible(self.net_config["blocks3"][-1][2] * scale), + make_divisible(self.net_config["blocks4"][-1][2] * scale), + make_divisible(self.net_config["blocks5"][-1][2] * scale), + make_divisible(self.net_config["blocks6"][-1][2] * scale), + ] + + self.layer_list = nn.LayerList( + [ + nn.Conv2D(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0), + nn.Conv2D(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0), + nn.Conv2D(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0), + nn.Conv2D(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0), + ] + ) + self.out_channels = [ + int(mv_c[0] * scale), + int(mv_c[1] * scale), + int(mv_c[2] * scale), + int(mv_c[3] * scale), + ] + + def forward(self, x): + out_list = [] + x = self.conv1(x) + + x = self.blocks2(x) + x = self.blocks3(x) + out_list.append(x) + x = self.blocks4(x) + out_list.append(x) + x = self.blocks5(x) + out_list.append(x) + x = self.blocks6(x) + out_list.append(x) + + if self.det: + out_list[0] = self.layer_list[0](out_list[0]) + out_list[1] = self.layer_list[1](out_list[1]) + out_list[2] = self.layer_list[2](out_list[2]) + out_list[3] = self.layer_list[3](out_list[3]) + return out_list + + if self.training: + x = F.adaptive_avg_pool2d(x, [1, 40]) + else: + x = F.avg_pool2d(x, [3, 2]) + return x diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py new file mode 100644 index 0000000..d259f1d --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_resnet_fpn.py @@ -0,0 +1,317 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import nn, ParamAttr +from paddle.nn import functional as F +import paddle +import numpy as np + +__all__ = ["ResNetFPN"] + + +class ResNetFPN(nn.Layer): + def __init__(self, in_channels=1, layers=50, **kwargs): + super(ResNetFPN, self).__init__() + supported_layers = { + 18: {"depth": [2, 2, 2, 2], "block_class": BasicBlock}, + 34: {"depth": [3, 4, 6, 3], "block_class": BasicBlock}, + 50: {"depth": [3, 4, 6, 3], "block_class": BottleneckBlock}, + 101: {"depth": [3, 4, 23, 3], "block_class": BottleneckBlock}, + 152: {"depth": [3, 8, 36, 3], "block_class": BottleneckBlock}, + } + stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)] + num_filters = [64, 128, 256, 512] + self.depth = supported_layers[layers]["depth"] + self.F = [] + self.conv = ConvBNLayer( + in_channels=in_channels, + out_channels=64, + kernel_size=7, + stride=2, + act="relu", + name="conv1", + ) + self.block_list = [] + in_ch = 64 + if layers >= 50: + for block in range(len(self.depth)): + for i in range(self.depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + block_list = self.add_sublayer( + "bottleneckBlock_{}_{}".format(block, i), + BottleneckBlock( + in_channels=in_ch, + out_channels=num_filters[block], + stride=stride_list[block] if i == 0 else 1, + name=conv_name, + ), + ) + in_ch = num_filters[block] * 4 + self.block_list.append(block_list) + self.F.append(block_list) + else: + for block in range(len(self.depth)): + for i in range(self.depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + if i == 0 and block != 0: + stride = (2, 1) + else: + stride = (1, 1) + basic_block = self.add_sublayer( + conv_name, + BasicBlock( + in_channels=in_ch, + out_channels=num_filters[block], + stride=stride_list[block] if i == 0 else 1, + is_first=block == i == 0, + name=conv_name, + ), + ) + in_ch = basic_block.out_channels + self.block_list.append(basic_block) + out_ch_list = [in_ch // 4, in_ch // 2, in_ch] + self.base_block = [] + self.conv_trans = [] + self.bn_block = [] + for i in [-2, -3]: + in_channels = out_ch_list[i + 1] + out_ch_list[i] + + self.base_block.append( + self.add_sublayer( + "F_{}_base_block_0".format(i), + nn.Conv2D( + in_channels=in_channels, + out_channels=out_ch_list[i], + kernel_size=1, + weight_attr=ParamAttr(trainable=True), + bias_attr=ParamAttr(trainable=True), + ), + ) + ) + self.base_block.append( + self.add_sublayer( + "F_{}_base_block_1".format(i), + nn.Conv2D( + in_channels=out_ch_list[i], + out_channels=out_ch_list[i], + kernel_size=3, + padding=1, + weight_attr=ParamAttr(trainable=True), + bias_attr=ParamAttr(trainable=True), + ), + ) + ) + self.base_block.append( + self.add_sublayer( + "F_{}_base_block_2".format(i), + nn.BatchNorm( + num_channels=out_ch_list[i], + act="relu", + param_attr=ParamAttr(trainable=True), + bias_attr=ParamAttr(trainable=True), + ), + ) + ) + self.base_block.append( + self.add_sublayer( + "F_{}_base_block_3".format(i), + nn.Conv2D( + in_channels=out_ch_list[i], + out_channels=512, + kernel_size=1, + bias_attr=ParamAttr(trainable=True), + weight_attr=ParamAttr(trainable=True), + ), + ) + ) + self.out_channels = 512 + + def __call__(self, x): + x = self.conv(x) + fpn_list = [] + F = [] + for i in range(len(self.depth)): + fpn_list.append(np.sum(self.depth[: i + 1])) + + for i, block in enumerate(self.block_list): + x = block(x) + for number in fpn_list: + if i + 1 == number: + F.append(x) + base = F[-1] + + j = 0 + for i, block in enumerate(self.base_block): + if i % 3 == 0 and i < 6: + j = j + 1 + b, c, w, h = F[-j - 1].shape + if [w, h] == list(base.shape[2:]): + base = base + else: + base = self.conv_trans[j - 1](base) + base = self.bn_block[j - 1](base) + base = paddle.concat([base, F[-j - 1]], axis=1) + base = block(base) + return base + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + groups=1, + act=None, + name=None, + ): + super(ConvBNLayer, self).__init__() + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=2 if stride == (1, 1) else kernel_size, + dilation=2 if stride == (1, 1) else 1, + stride=stride, + padding=(kernel_size - 1) // 2, + groups=groups, + weight_attr=ParamAttr(name=name + ".conv2d.output.1.w_0"), + bias_attr=False, + ) + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + self.bn = nn.BatchNorm( + num_channels=out_channels, + act=act, + param_attr=ParamAttr(name=name + ".output.1.w_0"), + bias_attr=ParamAttr(name=name + ".output.1.b_0"), + moving_mean_name=bn_name + "_mean", + moving_variance_name=bn_name + "_variance", + ) + + def __call__(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class ShortCut(nn.Layer): + def __init__(self, in_channels, out_channels, stride, name, is_first=False): + super(ShortCut, self).__init__() + self.use_conv = True + + if in_channels != out_channels or stride != 1 or is_first == True: + if stride == (1, 1): + self.conv = ConvBNLayer(in_channels, out_channels, 1, 1, name=name) + else: # stride==(2,2) + self.conv = ConvBNLayer(in_channels, out_channels, 1, stride, name=name) + else: + self.use_conv = False + + def forward(self, x): + if self.use_conv: + x = self.conv(x) + return x + + +class BottleneckBlock(nn.Layer): + def __init__(self, in_channels, out_channels, stride, name): + super(BottleneckBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + act="relu", + name=name + "_branch2a", + ) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act="relu", + name=name + "_branch2b", + ) + + self.conv2 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, + act=None, + name=name + "_branch2c", + ) + + self.short = ShortCut( + in_channels=in_channels, + out_channels=out_channels * 4, + stride=stride, + is_first=False, + name=name + "_branch1", + ) + self.out_channels = out_channels * 4 + + def forward(self, x): + y = self.conv0(x) + y = self.conv1(y) + y = self.conv2(y) + y = y + self.short(x) + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, in_channels, out_channels, stride, name, is_first): + super(BasicBlock, self).__init__() + self.conv0 = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + act="relu", + stride=stride, + name=name + "_branch2a", + ) + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + act=None, + name=name + "_branch2b", + ) + self.short = ShortCut( + in_channels=in_channels, + out_channels=out_channels, + stride=stride, + is_first=is_first, + name=name + "_branch1", + ) + self.out_channels = out_channels + + def forward(self, x): + y = self.conv0(x) + y = self.conv1(y) + y = y + self.short(x) + return F.relu(y) diff --git a/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py b/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py new file mode 100644 index 0000000..427c87b --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/backbones/rec_svtrnet.py @@ -0,0 +1,642 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle import ParamAttr +from paddle.nn.initializer import KaimingNormal +import numpy as np +import paddle +import paddle.nn as nn +from paddle.nn.initializer import TruncatedNormal, Constant, Normal + +trunc_normal_ = TruncatedNormal(std=0.02) +normal_ = Normal +zeros_ = Constant(value=0.0) +ones_ = Constant(value=1.0) + + +def drop_path(x, drop_prob=0.0, training=False): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... + """ + if drop_prob == 0.0 or not training: + return x + keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype) + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) + random_tensor = paddle.floor(random_tensor) # binarize + output = x.divide(keep_prob) * random_tensor + return output + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=0, + bias_attr=False, + groups=1, + act=nn.GELU, + ): + super().__init__() + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=groups, + weight_attr=paddle.ParamAttr(initializer=nn.initializer.KaimingUniform()), + bias_attr=bias_attr, + ) + self.norm = nn.BatchNorm2D(out_channels) + self.act = act() + + def forward(self, inputs): + out = self.conv(inputs) + out = self.norm(out) + out = self.act(out) + return out + + +class DropPath(nn.Layer): + """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).""" + + def __init__(self, drop_prob=None): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + + def forward(self, x): + return drop_path(x, self.drop_prob, self.training) + + +class Identity(nn.Layer): + def __init__(self): + super(Identity, self).__init__() + + def forward(self, input): + return input + + +class Mlp(nn.Layer): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class ConvMixer(nn.Layer): + def __init__( + self, + dim, + num_heads=8, + HW=[8, 25], + local_k=[3, 3], + ): + super().__init__() + self.HW = HW + self.dim = dim + self.local_mixer = nn.Conv2D( + dim, + dim, + local_k, + 1, + [local_k[0] // 2, local_k[1] // 2], + groups=num_heads, + weight_attr=ParamAttr(initializer=KaimingNormal()), + ) + + def forward(self, x): + h = self.HW[0] + w = self.HW[1] + x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w]) + x = self.local_mixer(x) + x = x.flatten(2).transpose([0, 2, 1]) + return x + + +class Attention(nn.Layer): + def __init__( + self, + dim, + num_heads=8, + mixer="Global", + HW=None, + local_k=[7, 11], + qkv_bias=False, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.num_heads = num_heads + self.dim = dim + self.head_dim = dim // num_heads + self.scale = qk_scale or self.head_dim**-0.5 + + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + self.HW = HW + if HW is not None: + H = HW[0] + W = HW[1] + self.N = H * W + self.C = dim + if mixer == "Local" and HW is not None: + hk = local_k[0] + wk = local_k[1] + mask = paddle.ones([H * W, H + hk - 1, W + wk - 1], dtype="float32") + for h in range(0, H): + for w in range(0, W): + mask[h * W + w, h : h + hk, w : w + wk] = 0.0 + mask_paddle = mask[:, hk // 2 : H + hk // 2, wk // 2 : W + wk // 2].flatten( + 1 + ) + mask_inf = paddle.full([H * W, H * W], "-inf", dtype="float32") + mask = paddle.where(mask_paddle < 1, mask_paddle, mask_inf) + self.mask = mask.unsqueeze([0, 1]) + self.mixer = mixer + + def forward(self, x): + qkv = ( + self.qkv(x) + .reshape((0, -1, 3, self.num_heads, self.head_dim)) + .transpose((2, 0, 3, 1, 4)) + ) + q, k, v = qkv[0] * self.scale, qkv[1], qkv[2] + + attn = q.matmul(k.transpose((0, 1, 3, 2))) + if self.mixer == "Local": + attn += self.mask + attn = nn.functional.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, -1, self.dim)) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class Block(nn.Layer): + def __init__( + self, + dim, + num_heads, + mixer="Global", + local_mixer=[7, 11], + HW=None, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer="nn.LayerNorm", + epsilon=1e-6, + prenorm=True, + ): + super().__init__() + if isinstance(norm_layer, str): + self.norm1 = eval(norm_layer)(dim, epsilon=epsilon) + else: + self.norm1 = norm_layer(dim) + if mixer == "Global" or mixer == "Local": + self.mixer = Attention( + dim, + num_heads=num_heads, + mixer=mixer, + HW=HW, + local_k=local_mixer, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop, + ) + elif mixer == "Conv": + self.mixer = ConvMixer(dim, num_heads=num_heads, HW=HW, local_k=local_mixer) + else: + raise TypeError("The mixer must be one of [Global, Local, Conv]") + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity() + if isinstance(norm_layer, str): + self.norm2 = eval(norm_layer)(dim, epsilon=epsilon) + else: + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp_ratio = mlp_ratio + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + self.prenorm = prenorm + + def forward(self, x): + if self.prenorm: + x = self.norm1(x + self.drop_path(self.mixer(x))) + x = self.norm2(x + self.drop_path(self.mlp(x))) + else: + x = x + self.drop_path(self.mixer(self.norm1(x))) + x = x + self.drop_path(self.mlp(self.norm2(x))) + return x + + +class PatchEmbed(nn.Layer): + """Image to Patch Embedding""" + + def __init__( + self, + img_size=[32, 100], + in_channels=3, + embed_dim=768, + sub_num=2, + patch_size=[4, 4], + mode="pope", + ): + super().__init__() + num_patches = (img_size[1] // (2**sub_num)) * (img_size[0] // (2**sub_num)) + self.img_size = img_size + self.num_patches = num_patches + self.embed_dim = embed_dim + self.norm = None + if mode == "pope": + if sub_num == 2: + self.proj = nn.Sequential( + ConvBNLayer( + in_channels=in_channels, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, + act=nn.GELU, + bias_attr=None, + ), + ConvBNLayer( + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, + act=nn.GELU, + bias_attr=None, + ), + ) + if sub_num == 3: + self.proj = nn.Sequential( + ConvBNLayer( + in_channels=in_channels, + out_channels=embed_dim // 4, + kernel_size=3, + stride=2, + padding=1, + act=nn.GELU, + bias_attr=None, + ), + ConvBNLayer( + in_channels=embed_dim // 4, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, + act=nn.GELU, + bias_attr=None, + ), + ConvBNLayer( + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, + act=nn.GELU, + bias_attr=None, + ), + ) + elif mode == "linear": + self.proj = nn.Conv2D( + 1, embed_dim, kernel_size=patch_size, stride=patch_size + ) + self.num_patches = ( + img_size[0] // patch_size[0] * img_size[1] // patch_size[1] + ) + + def forward(self, x): + B, C, H, W = x.shape + assert ( + H == self.img_size[0] and W == self.img_size[1] + ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})." + x = self.proj(x).flatten(2).transpose((0, 2, 1)) + return x + + +class SubSample(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + types="Pool", + stride=[2, 1], + sub_norm="nn.LayerNorm", + act=None, + ): + super().__init__() + self.types = types + if types == "Pool": + self.avgpool = nn.AvgPool2D( + kernel_size=[3, 5], stride=stride, padding=[1, 2] + ) + self.maxpool = nn.MaxPool2D( + kernel_size=[3, 5], stride=stride, padding=[1, 2] + ) + self.proj = nn.Linear(in_channels, out_channels) + else: + self.conv = nn.Conv2D( + in_channels, + out_channels, + kernel_size=3, + stride=stride, + padding=1, + weight_attr=ParamAttr(initializer=KaimingNormal()), + ) + self.norm = eval(sub_norm)(out_channels) + if act is not None: + self.act = act() + else: + self.act = None + + def forward(self, x): + if self.types == "Pool": + x1 = self.avgpool(x) + x2 = self.maxpool(x) + x = (x1 + x2) * 0.5 + out = self.proj(x.flatten(2).transpose((0, 2, 1))) + else: + x = self.conv(x) + out = x.flatten(2).transpose((0, 2, 1)) + out = self.norm(out) + if self.act is not None: + out = self.act(out) + + return out + + +class SVTRNet(nn.Layer): + def __init__( + self, + img_size=[32, 100], + in_channels=3, + embed_dim=[64, 128, 256], + depth=[3, 6, 3], + num_heads=[2, 4, 8], + mixer=["Local"] * 6 + ["Global"] * 6, # Local atten, Global atten, Conv + local_mixer=[[7, 11], [7, 11], [7, 11]], + patch_merging="Conv", # Conv, Pool, None + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + last_drop=0.1, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_layer="nn.LayerNorm", + sub_norm="nn.LayerNorm", + epsilon=1e-6, + out_channels=192, + out_char_num=25, + block_unit="Block", + act="nn.GELU", + last_stage=True, + sub_num=2, + prenorm=True, + use_lenhead=False, + **kwargs, + ): + super().__init__() + self.img_size = img_size + self.embed_dim = embed_dim + self.out_channels = out_channels + self.prenorm = prenorm + patch_merging = ( + None + if patch_merging != "Conv" and patch_merging != "Pool" + else patch_merging + ) + self.patch_embed = PatchEmbed( + img_size=img_size, + in_channels=in_channels, + embed_dim=embed_dim[0], + sub_num=sub_num, + ) + num_patches = self.patch_embed.num_patches + self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)] + self.pos_embed = self.create_parameter( + shape=[1, num_patches, embed_dim[0]], default_initializer=zeros_ + ) + self.add_parameter("pos_embed", self.pos_embed) + self.pos_drop = nn.Dropout(p=drop_rate) + Block_unit = eval(block_unit) + + dpr = np.linspace(0, drop_path_rate, sum(depth)) + self.blocks1 = nn.LayerList( + [ + Block_unit( + dim=embed_dim[0], + num_heads=num_heads[0], + mixer=mixer[0 : depth[0]][i], + HW=self.HW, + local_mixer=local_mixer[0], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=eval(act), + attn_drop=attn_drop_rate, + drop_path=dpr[0 : depth[0]][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[0]) + ] + ) + if patch_merging is not None: + self.sub_sample1 = SubSample( + embed_dim[0], + embed_dim[1], + sub_norm=sub_norm, + stride=[2, 1], + types=patch_merging, + ) + HW = [self.HW[0] // 2, self.HW[1]] + else: + HW = self.HW + self.patch_merging = patch_merging + self.blocks2 = nn.LayerList( + [ + Block_unit( + dim=embed_dim[1], + num_heads=num_heads[1], + mixer=mixer[depth[0] : depth[0] + depth[1]][i], + HW=HW, + local_mixer=local_mixer[1], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=eval(act), + attn_drop=attn_drop_rate, + drop_path=dpr[depth[0] : depth[0] + depth[1]][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[1]) + ] + ) + if patch_merging is not None: + self.sub_sample2 = SubSample( + embed_dim[1], + embed_dim[2], + sub_norm=sub_norm, + stride=[2, 1], + types=patch_merging, + ) + HW = [self.HW[0] // 4, self.HW[1]] + else: + HW = self.HW + self.blocks3 = nn.LayerList( + [ + Block_unit( + dim=embed_dim[2], + num_heads=num_heads[2], + mixer=mixer[depth[0] + depth[1] :][i], + HW=HW, + local_mixer=local_mixer[2], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=eval(act), + attn_drop=attn_drop_rate, + drop_path=dpr[depth[0] + depth[1] :][i], + norm_layer=norm_layer, + epsilon=epsilon, + prenorm=prenorm, + ) + for i in range(depth[2]) + ] + ) + self.last_stage = last_stage + if last_stage: + self.avg_pool = nn.AdaptiveAvgPool2D([1, out_char_num]) + self.last_conv = nn.Conv2D( + in_channels=embed_dim[2], + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + bias_attr=False, + ) + self.hardswish = nn.Hardswish() + self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer") + if not prenorm: + self.norm = eval(norm_layer)(embed_dim[-1], epsilon=epsilon) + self.use_lenhead = use_lenhead + if use_lenhead: + self.len_conv = nn.Linear(embed_dim[2], self.out_channels) + self.hardswish_len = nn.Hardswish() + self.dropout_len = nn.Dropout(p=last_drop, mode="downscale_in_infer") + + trunc_normal_(self.pos_embed) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward_features(self, x): + x = self.patch_embed(x) + x = x + self.pos_embed + x = self.pos_drop(x) + for blk in self.blocks1: + x = blk(x) + if self.patch_merging is not None: + x = self.sub_sample1( + x.transpose([0, 2, 1]).reshape( + [0, self.embed_dim[0], self.HW[0], self.HW[1]] + ) + ) + for blk in self.blocks2: + x = blk(x) + if self.patch_merging is not None: + x = self.sub_sample2( + x.transpose([0, 2, 1]).reshape( + [0, self.embed_dim[1], self.HW[0] // 2, self.HW[1]] + ) + ) + for blk in self.blocks3: + x = blk(x) + if not self.prenorm: + x = self.norm(x) + return x + + def forward(self, x): + x = self.forward_features(x) + if self.use_lenhead: + len_x = self.len_conv(x.mean(1)) + len_x = self.dropout_len(self.hardswish_len(len_x)) + if self.last_stage: + if self.patch_merging is not None: + h = self.HW[0] // 4 + else: + h = self.HW[0] + x = self.avg_pool( + x.transpose([0, 2, 1]).reshape([0, self.embed_dim[2], h, self.HW[1]]) + ) + x = self.last_conv(x) + x = self.hardswish(x) + x = self.dropout(x) + if self.use_lenhead: + return x, len_x + return x diff --git a/docling_ibm_models/slanet_1m/modeling/heads/__init__.py b/docling_ibm_models/slanet_1m/modeling/heads/__init__.py new file mode 100644 index 0000000..829728f --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_head"] + + +def build_head(config): + + # rec head + from .rec_ctc_head import CTCHead + from .rec_att_head import AttentionHead + from .rec_nrtr_head import Transformer + from .rec_multi_head import MultiHead + + + from .table_att_head import TableAttentionHead, SLAHead + + support_dict = [ + "SLAHead", + ] + + # table head + + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "head only support {}".format(support_dict) + ) + module_class = eval(module_name)(**config) + return module_class diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py new file mode 100644 index 0000000..2c952ce --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_att_head.py @@ -0,0 +1,215 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import numpy as np + + +class AttentionHead(nn.Layer): + def __init__(self, in_channels, out_channels, hidden_size, **kwargs): + super(AttentionHead, self).__init__() + self.input_size = in_channels + self.hidden_size = hidden_size + self.num_classes = out_channels + + self.attention_cell = AttentionGRUCell( + in_channels, hidden_size, out_channels, use_gru=False + ) + self.generator = nn.Linear(hidden_size, out_channels) + + def _char_to_onehot(self, input_char, onehot_dim): + input_ont_hot = F.one_hot(input_char, onehot_dim) + return input_ont_hot + + def forward(self, inputs, targets=None, batch_max_length=25): + batch_size = inputs.shape[0] + num_steps = batch_max_length + + hidden = paddle.zeros((batch_size, self.hidden_size)) + output_hiddens = [] + + if targets is not None: + for i in range(num_steps): + char_onehots = self._char_to_onehot( + targets[:, i], onehot_dim=self.num_classes + ) + (outputs, hidden), alpha = self.attention_cell( + hidden, inputs, char_onehots + ) + output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) + output = paddle.concat(output_hiddens, axis=1) + probs = self.generator(output) + else: + targets = paddle.zeros(shape=[batch_size], dtype="int32") + probs = None + char_onehots = None + outputs = None + alpha = None + + for i in range(num_steps): + char_onehots = self._char_to_onehot( + targets, onehot_dim=self.num_classes + ) + (outputs, hidden), alpha = self.attention_cell( + hidden, inputs, char_onehots + ) + probs_step = self.generator(outputs) + if probs is None: + probs = paddle.unsqueeze(probs_step, axis=1) + else: + probs = paddle.concat( + [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1 + ) + next_input = probs_step.argmax(axis=1) + targets = next_input + if not self.training: + probs = paddle.nn.functional.softmax(probs, axis=2) + return probs + + +class AttentionGRUCell(nn.Layer): + def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): + super(AttentionGRUCell, self).__init__() + self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) + self.h2h = nn.Linear(hidden_size, hidden_size) + self.score = nn.Linear(hidden_size, 1, bias_attr=False) + + self.rnn = nn.GRUCell( + input_size=input_size + num_embeddings, hidden_size=hidden_size + ) + + self.hidden_size = hidden_size + + def forward(self, prev_hidden, batch_H, char_onehots): + batch_H_proj = self.i2h(batch_H) + prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden), axis=1) + + res = paddle.add(batch_H_proj, prev_hidden_proj) + res = paddle.tanh(res) + e = self.score(res) + + alpha = F.softmax(e, axis=1) + alpha = paddle.transpose(alpha, [0, 2, 1]) + context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) + concat_context = paddle.concat([context, char_onehots], 1) + + cur_hidden = self.rnn(concat_context, prev_hidden) + + return cur_hidden, alpha + + +class AttentionLSTM(nn.Layer): + def __init__(self, in_channels, out_channels, hidden_size, **kwargs): + super(AttentionLSTM, self).__init__() + self.input_size = in_channels + self.hidden_size = hidden_size + self.num_classes = out_channels + + self.attention_cell = AttentionLSTMCell( + in_channels, hidden_size, out_channels, use_gru=False + ) + self.generator = nn.Linear(hidden_size, out_channels) + + def _char_to_onehot(self, input_char, onehot_dim): + input_ont_hot = F.one_hot(input_char, onehot_dim) + return input_ont_hot + + def forward(self, inputs, targets=None, batch_max_length=25): + batch_size = inputs.shape[0] + num_steps = batch_max_length + + hidden = ( + paddle.zeros((batch_size, self.hidden_size)), + paddle.zeros((batch_size, self.hidden_size)), + ) + output_hiddens = [] + + if targets is not None: + for i in range(num_steps): + # one-hot vectors for a i-th char + char_onehots = self._char_to_onehot( + targets[:, i], onehot_dim=self.num_classes + ) + hidden, alpha = self.attention_cell(hidden, inputs, char_onehots) + + hidden = (hidden[1][0], hidden[1][1]) + output_hiddens.append(paddle.unsqueeze(hidden[0], axis=1)) + output = paddle.concat(output_hiddens, axis=1) + probs = self.generator(output) + + else: + targets = paddle.zeros(shape=[batch_size], dtype="int32") + probs = None + char_onehots = None + alpha = None + + for i in range(num_steps): + char_onehots = self._char_to_onehot( + targets, onehot_dim=self.num_classes + ) + hidden, alpha = self.attention_cell(hidden, inputs, char_onehots) + probs_step = self.generator(hidden[0]) + hidden = (hidden[1][0], hidden[1][1]) + if probs is None: + probs = paddle.unsqueeze(probs_step, axis=1) + else: + probs = paddle.concat( + [probs, paddle.unsqueeze(probs_step, axis=1)], axis=1 + ) + + next_input = probs_step.argmax(axis=1) + + targets = next_input + if not self.training: + probs = paddle.nn.functional.softmax(probs, axis=2) + return probs + + +class AttentionLSTMCell(nn.Layer): + def __init__(self, input_size, hidden_size, num_embeddings, use_gru=False): + super(AttentionLSTMCell, self).__init__() + self.i2h = nn.Linear(input_size, hidden_size, bias_attr=False) + self.h2h = nn.Linear(hidden_size, hidden_size) + self.score = nn.Linear(hidden_size, 1, bias_attr=False) + if not use_gru: + self.rnn = nn.LSTMCell( + input_size=input_size + num_embeddings, hidden_size=hidden_size + ) + else: + self.rnn = nn.GRUCell( + input_size=input_size + num_embeddings, hidden_size=hidden_size + ) + + self.hidden_size = hidden_size + + def forward(self, prev_hidden, batch_H, char_onehots): + batch_H_proj = self.i2h(batch_H) + prev_hidden_proj = paddle.unsqueeze(self.h2h(prev_hidden[0]), axis=1) + res = paddle.add(batch_H_proj, prev_hidden_proj) + res = paddle.tanh(res) + e = self.score(res) + + alpha = F.softmax(e, axis=1) + alpha = paddle.transpose(alpha, [0, 2, 1]) + context = paddle.squeeze(paddle.mm(alpha, batch_H), axis=1) + concat_context = paddle.concat([context, char_onehots], 1) + cur_hidden = self.rnn(concat_context, prev_hidden) + + return cur_hidden, alpha diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py new file mode 100644 index 0000000..5e19a9a --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_ctc_head.py @@ -0,0 +1,92 @@ +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +from paddle import ParamAttr, nn +from paddle.nn import functional as F + + +def get_para_bias_attr(l2_decay, k): + regularizer = paddle.regularizer.L2Decay(l2_decay) + stdv = 1.0 / math.sqrt(k * 1.0) + initializer = nn.initializer.Uniform(-stdv, stdv) + weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer) + bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer) + return [weight_attr, bias_attr] + + +class CTCHead(nn.Layer): + def __init__( + self, + in_channels, + out_channels, + fc_decay=0.0004, + mid_channels=None, + return_feats=False, + **kwargs, + ): + super(CTCHead, self).__init__() + if mid_channels is None: + weight_attr, bias_attr = get_para_bias_attr( + l2_decay=fc_decay, k=in_channels + ) + self.fc = nn.Linear( + in_channels, out_channels, weight_attr=weight_attr, bias_attr=bias_attr + ) + else: + weight_attr1, bias_attr1 = get_para_bias_attr( + l2_decay=fc_decay, k=in_channels + ) + self.fc1 = nn.Linear( + in_channels, + mid_channels, + weight_attr=weight_attr1, + bias_attr=bias_attr1, + ) + + weight_attr2, bias_attr2 = get_para_bias_attr( + l2_decay=fc_decay, k=mid_channels + ) + self.fc2 = nn.Linear( + mid_channels, + out_channels, + weight_attr=weight_attr2, + bias_attr=bias_attr2, + ) + self.out_channels = out_channels + self.mid_channels = mid_channels + self.return_feats = return_feats + + def forward(self, x, targets=None): + if self.mid_channels is None: + predicts = self.fc(x) + else: + x = self.fc1(x) + predicts = self.fc2(x) + + if self.return_feats: + result = (x, predicts) + else: + result = predicts + if not self.training: + predicts = F.softmax(predicts, axis=2) + result = predicts + + return result diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py new file mode 100644 index 0000000..a62ae40 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_multi_head.py @@ -0,0 +1,152 @@ +# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +from paddle import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F + +from modeling.necks.rnn import ( + Im2Seq, + EncoderWithRNN, + EncoderWithFC, + SequenceEncoder, + EncoderWithSVTR, + trunc_normal_, + zeros_, +) +from .rec_ctc_head import CTCHead +from .rec_nrtr_head import Transformer + + +class FCTranspose(nn.Layer): + def __init__(self, in_channels, out_channels, only_transpose=False): + super().__init__() + self.only_transpose = only_transpose + if not self.only_transpose: + self.fc = nn.Linear(in_channels, out_channels, bias_attr=False) + + def forward(self, x): + if self.only_transpose: + return x.transpose([0, 2, 1]) + else: + return self.fc(x.transpose([0, 2, 1])) + + +class AddPos(nn.Layer): + def __init__(self, dim, w): + super().__init__() + self.dec_pos_embed = self.create_parameter( + shape=[1, w, dim], default_initializer=zeros_ + ) + self.add_parameter("dec_pos_embed", self.dec_pos_embed) + trunc_normal_(self.dec_pos_embed) + + def forward(self, x): + x = x + self.dec_pos_embed[:, : x.shape[1], :] + return x + + +class MultiHead(nn.Layer): + def __init__(self, in_channels, out_channels_list, **kwargs): + super().__init__() + self.head_list = kwargs.pop("head_list") + self.use_pool = kwargs.get("use_pool", False) + self.use_pos = kwargs.get("use_pos", False) + self.in_channels = in_channels + if self.use_pool: + self.pool = nn.AvgPool2D(kernel_size=[3, 2], stride=[3, 2], padding=0) + self.gtc_head = "sar" + assert len(self.head_list) >= 2 + for idx, head_name in enumerate(self.head_list): + name = list(head_name)[0] + if name == "SARHead": + # sar head + sar_args = self.head_list[idx][name] + self.sar_head = eval(name)( + in_channels=in_channels, + out_channels=out_channels_list["SARLabelDecode"], + **sar_args, + ) + elif name == "NRTRHead": + gtc_args = self.head_list[idx][name] + max_text_length = gtc_args.get("max_text_length", 25) + nrtr_dim = gtc_args.get("nrtr_dim", 256) + num_decoder_layers = gtc_args.get("num_decoder_layers", 4) + if self.use_pos: + self.before_gtc = nn.Sequential( + nn.Flatten(2), + FCTranspose(in_channels, nrtr_dim), + AddPos(nrtr_dim, 80), + ) + else: + self.before_gtc = nn.Sequential( + nn.Flatten(2), FCTranspose(in_channels, nrtr_dim) + ) + + self.gtc_head = Transformer( + d_model=nrtr_dim, + nhead=nrtr_dim // 32, + num_encoder_layers=-1, + beam_size=-1, + num_decoder_layers=num_decoder_layers, + max_len=max_text_length, + dim_feedforward=nrtr_dim * 4, + out_channels=out_channels_list["NRTRLabelDecode"], + ) + elif name == "CTCHead": + # ctc neck + self.encoder_reshape = Im2Seq(in_channels) + neck_args = self.head_list[idx][name]["Neck"] + encoder_type = neck_args.pop("name") + self.ctc_encoder = SequenceEncoder( + in_channels=in_channels, encoder_type=encoder_type, **neck_args + ) + # ctc head + head_args = self.head_list[idx][name]["Head"] + self.ctc_head = eval(name)( + in_channels=self.ctc_encoder.out_channels, + out_channels=out_channels_list["CTCLabelDecode"], + **head_args, + ) + else: + raise NotImplementedError( + "{} is not supported in MultiHead yet".format(name) + ) + + def forward(self, x, targets=None): + if self.use_pool: + x = self.pool( + x.reshape([0, 3, -1, self.in_channels]).transpose([0, 3, 1, 2]) + ) + ctc_encoder = self.ctc_encoder(x) + ctc_out = self.ctc_head(ctc_encoder, targets) + head_out = dict() + head_out["ctc"] = ctc_out + head_out["ctc_neck"] = ctc_encoder + # eval mode + if not self.training: + return ctc_out + if self.gtc_head == "sar": + sar_out = self.sar_head(x, targets[1:]) + head_out["sar"] = sar_out + else: + gtc_out = self.gtc_head(self.before_gtc(x), targets[1:]) + head_out["gtc"] = gtc_out + return head_out diff --git a/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py b/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py new file mode 100644 index 0000000..b13a849 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/rec_nrtr_head.py @@ -0,0 +1,704 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import paddle +from paddle import nn +import paddle.nn.functional as F +from paddle.nn import Dropout, LayerNorm +import numpy as np +from modeling.backbones.rec_svtrnet import Mlp, zeros_ +from paddle.nn.initializer import XavierNormal as xavier_normal_ + + +class Transformer(nn.Layer): + """A transformer model. User is able to modify the attributes as needed. The architechture + is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, + Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and + Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information + Processing Systems, pages 6000-6010. + + Args: + d_model: the number of expected features in the encoder/decoder inputs (default=512). + nhead: the number of heads in the multiheadattention models (default=8). + num_encoder_layers: the number of sub-encoder-layers in the encoder (default=6). + num_decoder_layers: the number of sub-decoder-layers in the decoder (default=6). + dim_feedforward: the dimension of the feedforward network model (default=2048). + dropout: the dropout value (default=0.1). + custom_encoder: custom encoder (default=None). + custom_decoder: custom decoder (default=None). + """ + + def __init__( + self, + d_model=512, + nhead=8, + num_encoder_layers=6, + beam_size=0, + num_decoder_layers=6, + max_len=25, + dim_feedforward=1024, + attention_dropout_rate=0.0, + residual_dropout_rate=0.1, + in_channels=0, + out_channels=0, + scale_embedding=True, + ): + super(Transformer, self).__init__() + self.out_channels = out_channels + 1 + self.max_len = max_len + self.embedding = Embeddings( + d_model=d_model, + vocab=self.out_channels, + padding_idx=0, + scale_embedding=scale_embedding, + ) + self.positional_encoding = PositionalEncoding( + dropout=residual_dropout_rate, dim=d_model + ) + + if num_encoder_layers > 0: + self.encoder = nn.LayerList( + [ + TransformerBlock( + d_model, + nhead, + dim_feedforward, + attention_dropout_rate, + residual_dropout_rate, + with_self_attn=True, + with_cross_attn=False, + ) + for i in range(num_encoder_layers) + ] + ) + else: + self.encoder = None + + self.decoder = nn.LayerList( + [ + TransformerBlock( + d_model, + nhead, + dim_feedforward, + attention_dropout_rate, + residual_dropout_rate, + with_self_attn=True, + with_cross_attn=True, + ) + for i in range(num_decoder_layers) + ] + ) + + self.beam_size = beam_size + self.d_model = d_model + self.nhead = nhead + self.tgt_word_prj = nn.Linear(d_model, self.out_channels, bias_attr=False) + w0 = np.random.normal( + 0.0, d_model**-0.5, (d_model, self.out_channels) + ).astype(np.float32) + self.tgt_word_prj.weight.set_value(w0) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + xavier_normal_(m.weight) + if m.bias is not None: + zeros_(m.bias) + + def forward_train(self, src, tgt): + tgt = tgt[:, :-1] + + tgt = self.embedding(tgt) + tgt = self.positional_encoding(tgt) + tgt_mask = self.generate_square_subsequent_mask(tgt.shape[1]) + + if self.encoder is not None: + src = self.positional_encoding(src) + for encoder_layer in self.encoder: + src = encoder_layer(src) + memory = src # B N C + else: + memory = src # B N C + for decoder_layer in self.decoder: + tgt = decoder_layer(tgt, memory, self_mask=tgt_mask) + output = tgt + logit = self.tgt_word_prj(output) + return logit + + def forward(self, src, targets=None): + """Take in and process masked source/target sequences. + Args: + src: the sequence to the encoder (required). + tgt: the sequence to the decoder (required). + Shape: + - src: :math:`(B, sN, C)`. + - tgt: :math:`(B, tN, C)`. + Examples: + >>> output = transformer_model(src, tgt) + """ + + if self.training: + max_len = targets[1].max() + tgt = targets[0][:, : 2 + max_len] + return self.forward_train(src, tgt) + else: + if self.beam_size > 0: + return self.forward_beam(src) + else: + return self.forward_test(src) + + def forward_test(self, src): + bs = src.shape[0] + if self.encoder is not None: + src = self.positional_encoding(src) + for encoder_layer in self.encoder: + src = encoder_layer(src) + memory = src # B N C + else: + memory = src + dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) + dec_prob = paddle.full((bs, 1), 1.0, dtype=paddle.float32) + for len_dec_seq in range(1, paddle.to_tensor(self.max_len)): + dec_seq_embed = self.embedding(dec_seq) + dec_seq_embed = self.positional_encoding(dec_seq_embed) + tgt_mask = self.generate_square_subsequent_mask(dec_seq_embed.shape[1]) + tgt = dec_seq_embed + for decoder_layer in self.decoder: + tgt = decoder_layer(tgt, memory, self_mask=tgt_mask) + dec_output = tgt + dec_output = dec_output[:, -1, :] + word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=-1) + preds_idx = paddle.argmax(word_prob, axis=-1) + if paddle.equal_all( + preds_idx, paddle.full(preds_idx.shape, 3, dtype="int64") + ): + break + preds_prob = paddle.max(word_prob, axis=-1) + dec_seq = paddle.concat( + [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1 + ) + dec_prob = paddle.concat( + [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1 + ) + return [dec_seq, dec_prob] + + def forward_beam(self, images): + """Translation work in one batch""" + + def get_inst_idx_to_tensor_position_map(inst_idx_list): + """Indicate the position of an instance in a tensor.""" + return { + inst_idx: tensor_position + for tensor_position, inst_idx in enumerate(inst_idx_list) + } + + def collect_active_part( + beamed_tensor, curr_active_inst_idx, n_prev_active_inst, n_bm + ): + """Collect tensor parts associated to active instances.""" + + beamed_tensor_shape = beamed_tensor.shape + n_curr_active_inst = len(curr_active_inst_idx) + new_shape = ( + n_curr_active_inst * n_bm, + beamed_tensor_shape[1], + beamed_tensor_shape[2], + ) + + beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1]) + beamed_tensor = beamed_tensor.index_select(curr_active_inst_idx, axis=0) + beamed_tensor = beamed_tensor.reshape(new_shape) + + return beamed_tensor + + def collate_active_info( + src_enc, inst_idx_to_position_map, active_inst_idx_list + ): + # Sentences which are still active are collected, + # so the decoder will not run on completed sentences. + + n_prev_active_inst = len(inst_idx_to_position_map) + active_inst_idx = [ + inst_idx_to_position_map[k] for k in active_inst_idx_list + ] + active_inst_idx = paddle.to_tensor(active_inst_idx, dtype="int64") + active_src_enc = collect_active_part( + src_enc.transpose([1, 0, 2]), active_inst_idx, n_prev_active_inst, n_bm + ).transpose([1, 0, 2]) + active_inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( + active_inst_idx_list + ) + return active_src_enc, active_inst_idx_to_position_map + + def beam_decode_step( + inst_dec_beams, len_dec_seq, enc_output, inst_idx_to_position_map, n_bm + ): + """Decode and update beam status, and then return active beam idx""" + + def prepare_beam_dec_seq(inst_dec_beams, len_dec_seq): + dec_partial_seq = [ + b.get_current_state() for b in inst_dec_beams if not b.done + ] + dec_partial_seq = paddle.stack(dec_partial_seq) + dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq]) + return dec_partial_seq + + def predict_word(dec_seq, enc_output, n_active_inst, n_bm): + dec_seq = self.embedding(dec_seq) + dec_seq = self.positional_encoding(dec_seq) + tgt_mask = self.generate_square_subsequent_mask(dec_seq.shape[1]) + tgt = dec_seq + for decoder_layer in self.decoder: + tgt = decoder_layer(tgt, enc_output, self_mask=tgt_mask) + dec_output = tgt + dec_output = dec_output[:, -1, :] # Pick the last step: (bh * bm) * d_h + word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) + word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1]) + return word_prob + + def collect_active_inst_idx_list( + inst_beams, word_prob, inst_idx_to_position_map + ): + active_inst_idx_list = [] + for inst_idx, inst_position in inst_idx_to_position_map.items(): + is_inst_complete = inst_beams[inst_idx].advance( + word_prob[inst_position] + ) + if not is_inst_complete: + active_inst_idx_list += [inst_idx] + + return active_inst_idx_list + + n_active_inst = len(inst_idx_to_position_map) + dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq) + word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm) + # Update the beam with predicted word prob information and collect incomplete instances + active_inst_idx_list = collect_active_inst_idx_list( + inst_dec_beams, word_prob, inst_idx_to_position_map + ) + return active_inst_idx_list + + def collect_hypothesis_and_scores(inst_dec_beams, n_best): + all_hyp, all_scores = [], [] + for inst_idx in range(len(inst_dec_beams)): + scores, tail_idxs = inst_dec_beams[inst_idx].sort_scores() + all_scores += [scores[:n_best]] + hyps = [ + inst_dec_beams[inst_idx].get_hypothesis(i) + for i in tail_idxs[:n_best] + ] + all_hyp += [hyps] + return all_hyp, all_scores + + with paddle.no_grad(): + # -- Encode + if self.encoder is not None: + src = self.positional_encoding(images) + src_enc = self.encoder(src) + else: + src_enc = images + + n_bm = self.beam_size + src_shape = src_enc.shape + inst_dec_beams = [Beam(n_bm) for _ in range(1)] + active_inst_idx_list = list(range(1)) + # Repeat data for beam search + src_enc = paddle.tile(src_enc, [1, n_bm, 1]) + inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( + active_inst_idx_list + ) + # Decode + for len_dec_seq in range(1, paddle.to_tensor(self.max_len)): + src_enc_copy = src_enc.clone() + active_inst_idx_list = beam_decode_step( + inst_dec_beams, + len_dec_seq, + src_enc_copy, + inst_idx_to_position_map, + n_bm, + ) + if not active_inst_idx_list: + break # all instances have finished their path to + src_enc, inst_idx_to_position_map = collate_active_info( + src_enc_copy, inst_idx_to_position_map, active_inst_idx_list + ) + batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams, 1) + result_hyp = [] + hyp_scores = [] + for bs_hyp, score in zip(batch_hyp, batch_scores): + l = len(bs_hyp[0]) + bs_hyp_pad = bs_hyp[0] + [3] * (25 - l) + result_hyp.append(bs_hyp_pad) + score = float(score) / l + hyp_score = [score for _ in range(25)] + hyp_scores.append(hyp_score) + return [ + paddle.to_tensor(np.array(result_hyp), dtype=paddle.int64), + paddle.to_tensor(hyp_scores), + ] + + def generate_square_subsequent_mask(self, sz): + """Generate a square mask for the sequence. The masked positions are filled with float('-inf'). + Unmasked positions are filled with float(0.0). + """ + mask = paddle.zeros([sz, sz], dtype="float32") + mask_inf = paddle.triu( + paddle.full(shape=[sz, sz], dtype="float32", fill_value="-inf"), diagonal=1 + ) + mask = mask + mask_inf + return mask.unsqueeze([0, 1]) + + +class MultiheadAttention(nn.Layer): + """Allows the model to jointly attend to information + from different representation subspaces. + See reference: Attention Is All You Need + + .. math:: + \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O + \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V) + + Args: + embed_dim: total dimension of the model + num_heads: parallel attention layers, or heads + + """ + + def __init__(self, embed_dim, num_heads, dropout=0.0, self_attn=False): + super(MultiheadAttention, self).__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + # self.dropout = dropout + self.head_dim = embed_dim // num_heads + assert ( + self.head_dim * num_heads == self.embed_dim + ), "embed_dim must be divisible by num_heads" + self.scale = self.head_dim**-0.5 + self.self_attn = self_attn + if self_attn: + self.qkv = nn.Linear(embed_dim, embed_dim * 3) + else: + self.q = nn.Linear(embed_dim, embed_dim) + self.kv = nn.Linear(embed_dim, embed_dim * 2) + self.attn_drop = nn.Dropout(dropout) + self.out_proj = nn.Linear(embed_dim, embed_dim) + + def forward(self, query, key=None, attn_mask=None): + qN = query.shape[1] + + if self.self_attn: + qkv = ( + self.qkv(query) + .reshape((0, qN, 3, self.num_heads, self.head_dim)) + .transpose((2, 0, 3, 1, 4)) + ) + q, k, v = qkv[0], qkv[1], qkv[2] + else: + kN = key.shape[1] + q = ( + self.q(query) + .reshape([0, qN, self.num_heads, self.head_dim]) + .transpose([0, 2, 1, 3]) + ) + kv = ( + self.kv(key) + .reshape((0, kN, 2, self.num_heads, self.head_dim)) + .transpose((2, 0, 3, 1, 4)) + ) + k, v = kv[0], kv[1] + + attn = (q.matmul(k.transpose((0, 1, 3, 2)))) * self.scale + + if attn_mask is not None: + attn += attn_mask + + attn = F.softmax(attn, axis=-1) + attn = self.attn_drop(attn) + + x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, qN, self.embed_dim)) + x = self.out_proj(x) + + return x + + +class TransformerBlock(nn.Layer): + def __init__( + self, + d_model, + nhead, + dim_feedforward=2048, + attention_dropout_rate=0.0, + residual_dropout_rate=0.1, + with_self_attn=True, + with_cross_attn=False, + epsilon=1e-5, + ): + super(TransformerBlock, self).__init__() + self.with_self_attn = with_self_attn + if with_self_attn: + self.self_attn = MultiheadAttention( + d_model, nhead, dropout=attention_dropout_rate, self_attn=with_self_attn + ) + self.norm1 = LayerNorm(d_model, epsilon=epsilon) + self.dropout1 = Dropout(residual_dropout_rate) + self.with_cross_attn = with_cross_attn + if with_cross_attn: + self.cross_attn = ( + MultiheadAttention( # for self_attn of encoder or cross_attn of decoder + d_model, nhead, dropout=attention_dropout_rate + ) + ) + self.norm2 = LayerNorm(d_model, epsilon=epsilon) + self.dropout2 = Dropout(residual_dropout_rate) + + self.mlp = Mlp( + in_features=d_model, + hidden_features=dim_feedforward, + act_layer=nn.ReLU, + drop=residual_dropout_rate, + ) + + self.norm3 = LayerNorm(d_model, epsilon=epsilon) + + self.dropout3 = Dropout(residual_dropout_rate) + + def forward(self, tgt, memory=None, self_mask=None, cross_mask=None): + if self.with_self_attn: + tgt1 = self.self_attn(tgt, attn_mask=self_mask) + tgt = self.norm1(tgt + self.dropout1(tgt1)) + + if self.with_cross_attn: + tgt2 = self.cross_attn(tgt, key=memory, attn_mask=cross_mask) + tgt = self.norm2(tgt + self.dropout2(tgt2)) + tgt = self.norm3(tgt + self.dropout3(self.mlp(tgt))) + return tgt + + +class PositionalEncoding(nn.Layer): + """Inject some information about the relative or absolute position of the tokens + in the sequence. The positional encodings have the same dimension as + the embeddings, so that the two can be summed. Here, we use sine and cosine + functions of different frequencies. + .. math:: + \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) + \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) + \text{where pos is the word position and i is the embed idx) + Args: + d_model: the embed dim (required). + dropout: the dropout value (default=0.1). + max_len: the max. length of the incoming sequence (default=5000). + Examples: + >>> pos_encoder = PositionalEncoding(d_model) + """ + + def __init__(self, dropout, dim, max_len=5000): + super(PositionalEncoding, self).__init__() + self.dropout = nn.Dropout(p=dropout) + + pe = paddle.zeros([max_len, dim]) + position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) + div_term = paddle.exp( + paddle.arange(0, dim, 2).astype("float32") * (-math.log(10000.0) / dim) + ) + pe[:, 0::2] = paddle.sin(position * div_term) + pe[:, 1::2] = paddle.cos(position * div_term) + pe = paddle.unsqueeze(pe, 0) + pe = paddle.transpose(pe, [1, 0, 2]) + self.register_buffer("pe", pe) + + def forward(self, x): + """Inputs of forward function + Args: + x: the sequence fed to the positional encoder model (required). + Shape: + x: [sequence length, batch size, embed dim] + output: [sequence length, batch size, embed dim] + Examples: + >>> output = pos_encoder(x) + """ + x = x.transpose([1, 0, 2]) + x = x + self.pe[: x.shape[0], :] + return self.dropout(x).transpose([1, 0, 2]) + + +class PositionalEncoding_2d(nn.Layer): + """Inject some information about the relative or absolute position of the tokens + in the sequence. The positional encodings have the same dimension as + the embeddings, so that the two can be summed. Here, we use sine and cosine + functions of different frequencies. + .. math:: + \text{PosEncoder}(pos, 2i) = sin(pos/10000^(2i/d_model)) + \text{PosEncoder}(pos, 2i+1) = cos(pos/10000^(2i/d_model)) + \text{where pos is the word position and i is the embed idx) + Args: + d_model: the embed dim (required). + dropout: the dropout value (default=0.1). + max_len: the max. length of the incoming sequence (default=5000). + Examples: + >>> pos_encoder = PositionalEncoding(d_model) + """ + + def __init__(self, dropout, dim, max_len=5000): + super(PositionalEncoding_2d, self).__init__() + self.dropout = nn.Dropout(p=dropout) + + pe = paddle.zeros([max_len, dim]) + position = paddle.arange(0, max_len, dtype=paddle.float32).unsqueeze(1) + div_term = paddle.exp( + paddle.arange(0, dim, 2).astype("float32") * (-math.log(10000.0) / dim) + ) + pe[:, 0::2] = paddle.sin(position * div_term) + pe[:, 1::2] = paddle.cos(position * div_term) + pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2]) + self.register_buffer("pe", pe) + + self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1)) + self.linear1 = nn.Linear(dim, dim) + self.linear1.weight.data.fill_(1.0) + self.avg_pool_2 = nn.AdaptiveAvgPool2D((1, 1)) + self.linear2 = nn.Linear(dim, dim) + self.linear2.weight.data.fill_(1.0) + + def forward(self, x): + """Inputs of forward function + Args: + x: the sequence fed to the positional encoder model (required). + Shape: + x: [sequence length, batch size, embed dim] + output: [sequence length, batch size, embed dim] + Examples: + >>> output = pos_encoder(x) + """ + w_pe = self.pe[: x.shape[-1], :] + w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0) + w_pe = w_pe * w1 + w_pe = paddle.transpose(w_pe, [1, 2, 0]) + w_pe = paddle.unsqueeze(w_pe, 2) + + h_pe = self.pe[: x.shape.shape[-2], :] + w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0) + h_pe = h_pe * w2 + h_pe = paddle.transpose(h_pe, [1, 2, 0]) + h_pe = paddle.unsqueeze(h_pe, 3) + + x = x + w_pe + h_pe + x = paddle.transpose( + paddle.reshape(x, [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]), + [2, 0, 1], + ) + + return self.dropout(x) + + +class Embeddings(nn.Layer): + def __init__(self, d_model, vocab, padding_idx=None, scale_embedding=True): + super(Embeddings, self).__init__() + self.embedding = nn.Embedding(vocab, d_model, padding_idx=padding_idx) + w0 = np.random.normal(0.0, d_model**-0.5, (vocab, d_model)).astype(np.float32) + self.embedding.weight.set_value(w0) + self.d_model = d_model + self.scale_embedding = scale_embedding + + def forward(self, x): + if self.scale_embedding: + x = self.embedding(x) + return x * math.sqrt(self.d_model) + return self.embedding(x) + + +class Beam: + """Beam search""" + + def __init__(self, size, device=False): + self.size = size + self._done = False + # The score for each translation on the beam. + self.scores = paddle.zeros((size,), dtype=paddle.float32) + self.all_scores = [] + # The backpointers at each time-step. + self.prev_ks = [] + # The outputs at each time-step. + self.next_ys = [paddle.full((size,), 0, dtype=paddle.int64)] + self.next_ys[0][0] = 2 + + def get_current_state(self): + "Get the outputs for the current timestep." + return self.get_tentative_hypothesis() + + def get_current_origin(self): + "Get the backpointers for the current timestep." + return self.prev_ks[-1] + + @property + def done(self): + return self._done + + def advance(self, word_prob): + "Update beam status and check if finished or not." + num_words = word_prob.shape[1] + + # Sum the previous scores. + if len(self.prev_ks) > 0: + beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) + else: + beam_lk = word_prob[0] + + flat_beam_lk = beam_lk.reshape([-1]) + best_scores, best_scores_id = flat_beam_lk.topk( + self.size, 0, True, True + ) # 1st sort + self.all_scores.append(self.scores) + self.scores = best_scores + # bestScoresId is flattened as a (beam x word) array, + # so we need to calculate which word and beam each score came from + prev_k = best_scores_id // num_words + self.prev_ks.append(prev_k) + self.next_ys.append(best_scores_id - prev_k * num_words) + # End condition is when top-of-beam is EOS. + if self.next_ys[-1][0] == 3: + self._done = True + self.all_scores.append(self.scores) + + return self._done + + def sort_scores(self): + "Sort the scores." + return self.scores, paddle.to_tensor( + [i for i in range(int(self.scores.shape[0]))], dtype="int32" + ) + + def get_the_best_score_and_idx(self): + "Get the score of the best in the beam." + scores, ids = self.sort_scores() + return scores[1], ids[1] + + def get_tentative_hypothesis(self): + "Get the decoded sequence for the current timestep." + if len(self.next_ys) == 1: + dec_seq = self.next_ys[0].unsqueeze(1) + else: + _, keys = self.sort_scores() + hyps = [self.get_hypothesis(k) for k in keys] + hyps = [[2] + h for h in hyps] + dec_seq = paddle.to_tensor(hyps, dtype="int64") + return dec_seq + + def get_hypothesis(self, k): + """Walk back to construct the full hypothesis.""" + hyp = [] + for j in range(len(self.prev_ks) - 1, -1, -1): + hyp.append(self.next_ys[j + 1][k]) + k = self.prev_ks[j][k] + return list(map(lambda x: x.item(), hyp[::-1])) diff --git a/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py b/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py new file mode 100644 index 0000000..4202283 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/heads/table_att_head.py @@ -0,0 +1,413 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle +import paddle.nn as nn +from paddle import ParamAttr +import paddle.nn.functional as F +import numpy as np + +from .rec_att_head import AttentionGRUCell +from modeling.backbones.rec_svtrnet import DropPath, Identity, Mlp + + +def get_para_bias_attr(l2_decay, k): + if l2_decay > 0: + regularizer = paddle.regularizer.L2Decay(l2_decay) + stdv = 1.0 / math.sqrt(k * 1.0) + initializer = nn.initializer.Uniform(-stdv, stdv) + else: + regularizer = None + initializer = None + weight_attr = ParamAttr(regularizer=regularizer, initializer=initializer) + bias_attr = ParamAttr(regularizer=regularizer, initializer=initializer) + return [weight_attr, bias_attr] + + +class TableAttentionHead(nn.Layer): + def __init__( + self, + in_channels, + hidden_size, + in_max_len=488, + max_text_length=800, + out_channels=30, + loc_reg_num=4, + **kwargs, + ): + super(TableAttentionHead, self).__init__() + self.input_size = in_channels[-1] + self.hidden_size = hidden_size + self.out_channels = out_channels + self.max_text_length = max_text_length + + self.structure_attention_cell = AttentionGRUCell( + self.input_size, hidden_size, self.out_channels, use_gru=False + ) + self.structure_generator = nn.Linear(hidden_size, self.out_channels) + self.in_max_len = in_max_len + + if self.in_max_len == 640: + self.loc_fea_trans = nn.Linear(400, self.max_text_length + 1) + elif self.in_max_len == 800: + self.loc_fea_trans = nn.Linear(625, self.max_text_length + 1) + else: + self.loc_fea_trans = nn.Linear(256, self.max_text_length + 1) + self.loc_generator = nn.Linear(self.input_size + hidden_size, loc_reg_num) + + def _char_to_onehot(self, input_char, onehot_dim): + input_ont_hot = F.one_hot(input_char, onehot_dim) + return input_ont_hot + + def forward(self, inputs, targets=None): + # if and else branch are both needed when you want to assign a variable + # if you modify the var in just one branch, then the modification will not work. + fea = inputs[-1] + last_shape = int(np.prod(fea.shape[2:])) # gry added + fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape]) + fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) + batch_size = fea.shape[0] + + hidden = paddle.zeros((batch_size, self.hidden_size)) + output_hiddens = paddle.zeros( + (batch_size, self.max_text_length + 1, self.hidden_size) + ) + if self.training and targets is not None: + structure = targets[0] + for i in range(self.max_text_length + 1): + elem_onehots = self._char_to_onehot( + structure[:, i], onehot_dim=self.out_channels + ) + (outputs, hidden), alpha = self.structure_attention_cell( + hidden, fea, elem_onehots + ) + output_hiddens[:, i, :] = outputs + structure_probs = self.structure_generator(output_hiddens) + loc_fea = fea.transpose([0, 2, 1]) + loc_fea = self.loc_fea_trans(loc_fea) + loc_fea = loc_fea.transpose([0, 2, 1]) + loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2) + loc_preds = self.loc_generator(loc_concat) + loc_preds = F.sigmoid(loc_preds) + else: + temp_elem = paddle.zeros(shape=[batch_size], dtype="int32") + structure_probs = None + loc_preds = None + elem_onehots = None + outputs = None + alpha = None + max_text_length = paddle.to_tensor(self.max_text_length) + for i in range(max_text_length + 1): + elem_onehots = self._char_to_onehot( + temp_elem, onehot_dim=self.out_channels + ) + (outputs, hidden), alpha = self.structure_attention_cell( + hidden, fea, elem_onehots + ) + output_hiddens[:, i, :] = outputs + structure_probs_step = self.structure_generator(outputs) + temp_elem = structure_probs_step.argmax(axis=1, dtype="int32") + + structure_probs = self.structure_generator(output_hiddens) + structure_probs = F.softmax(structure_probs) + loc_fea = fea.transpose([0, 2, 1]) + loc_fea = self.loc_fea_trans(loc_fea) + loc_fea = loc_fea.transpose([0, 2, 1]) + loc_concat = paddle.concat([output_hiddens, loc_fea], axis=2) + loc_preds = self.loc_generator(loc_concat) + loc_preds = F.sigmoid(loc_preds) + return {"structure_probs": structure_probs, "loc_preds": loc_preds} + + +class HWAttention(nn.Layer): + def __init__( + self, + head_dim=32, + qk_scale=None, + attn_drop=0.0, + ): + super().__init__() + self.head_dim = head_dim + self.scale = qk_scale or self.head_dim**-0.5 + self.attn_drop = nn.Dropout(attn_drop) + + def forward(self, x): + B, N, C = x.shape + C = C // 3 + qkv = x.reshape([B, N, 3, C // self.head_dim, self.head_dim]).transpose( + [2, 0, 3, 1, 4] + ) + q, k, v = qkv.unbind(0) + attn = q @ k.transpose([0, 1, 3, 2]) * self.scale + attn = F.softmax(attn, -1) + attn = self.attn_drop(attn) + x = attn @ v + x = x.transpose([0, 2, 1]).reshape([B, N, C]) + return x + + +def img2windows(img, H_sp, W_sp): + """ + img: B C H W + """ + B, H, W, C = img.shape + img_reshape = img.reshape([B, H // H_sp, H_sp, W // W_sp, W_sp, C]) + img_perm = img_reshape.transpose([0, 1, 3, 2, 4, 5]).reshape([-1, H_sp * W_sp, C]) + return img_perm + + +def windows2img(img_splits_hw, H_sp, W_sp, H, W): + """ + img_splits_hw: B' H W C + """ + B = int(img_splits_hw.shape[0] / (H * W / H_sp / W_sp)) + + img = img_splits_hw.reshape([B, H // H_sp, W // W_sp, H_sp, W_sp, -1]) + img = img.transpose([0, 1, 3, 2, 4, 5]).flatten(1, 4) + return img + + +class Block(nn.Layer): + def __init__( + self, + dim, + num_heads, + split_h=4, + split_w=4, + h_num_heads=None, + w_num_heads=None, + mlp_ratio=4.0, + qkv_bias=False, + qk_scale=None, + drop=0.0, + attn_drop=0.0, + drop_path=0.0, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + eps=1e-6, + ): + super().__init__() + self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias) + self.proj = nn.Linear(dim, dim) + self.split_h = split_h + self.split_w = split_w + mlp_hidden_dim = int(dim * mlp_ratio) + self.norm1 = norm_layer(dim, epsilon=eps) + self.h_num_heads = h_num_heads if h_num_heads is not None else num_heads // 2 + self.w_num_heads = w_num_heads if w_num_heads is not None else num_heads // 2 + self.head_dim = dim // num_heads + self.mixer = HWAttention( + head_dim=dim // num_heads, + qk_scale=qk_scale, + attn_drop=attn_drop, + ) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity() + self.norm2 = norm_layer(dim, epsilon=eps) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop, + ) + + def forward(self, x): + B, C, H, W = x.shape + x = x.flatten(2).transpose([0, 2, 1]) + + qkv = self.qkv(x).reshape([B, H, W, 3 * C]) + + x1 = qkv[:, :, :, : 3 * self.h_num_heads * self.head_dim] # b, h, w, 3ch + x2 = qkv[:, :, :, 3 * self.h_num_heads * self.head_dim :] # b, h, w, 3cw + + x1 = self.mixer(img2windows(x1, self.split_h, W)) # b*splith, W, 3ch + x2 = self.mixer(img2windows(x2, H, self.split_w)) # b*splitw, h, 3ch + x1 = windows2img(x1, self.split_h, W, H, W) + x2 = windows2img(x2, H, self.split_w, H, W) + + attened_x = paddle.concat([x1, x2], 2) + attened_x = self.proj(attened_x) + + x = self.norm1(x + self.drop_path(attened_x)) + x = self.norm2(x + self.drop_path(self.mlp(x))) + x = x.transpose([0, 2, 1]).reshape([-1, C, H, W]) + return x + + +class SLAHead(nn.Layer): + def __init__( + self, + in_channels, + hidden_size, + out_channels=30, + max_text_length=500, + loc_reg_num=4, + fc_decay=0.0, + use_attn=False, + **kwargs, + ): + """ + @param in_channels: input shape + @param hidden_size: hidden_size for RNN and Embedding + @param out_channels: num_classes to rec + @param max_text_length: max text pred + """ + super().__init__() + in_channels = in_channels[-1] + self.hidden_size = hidden_size + self.max_text_length = max_text_length + self.emb = self._char_to_onehot + self.num_embeddings = out_channels + self.loc_reg_num = loc_reg_num + self.eos = self.num_embeddings - 1 + + # structure + self.structure_attention_cell = AttentionGRUCell( + in_channels, hidden_size, self.num_embeddings + ) + weight_attr, bias_attr = get_para_bias_attr(l2_decay=fc_decay, k=hidden_size) + weight_attr1_1, bias_attr1_1 = get_para_bias_attr( + l2_decay=fc_decay, k=hidden_size + ) + weight_attr1_2, bias_attr1_2 = get_para_bias_attr( + l2_decay=fc_decay, k=hidden_size + ) + self.structure_generator = nn.Sequential( + nn.Linear( + self.hidden_size, + self.hidden_size, + weight_attr=weight_attr1_2, + bias_attr=bias_attr1_2, + ), + nn.Linear( + hidden_size, out_channels, weight_attr=weight_attr, bias_attr=bias_attr + ), + ) + dpr = np.linspace(0, 0.1, 2) + + self.use_attn = use_attn + if use_attn: + layer_list = [ + Block( + in_channels, + num_heads=2, + mlp_ratio=4.0, + qkv_bias=True, + drop_path=dpr[i], + ) + for i in range(2) + ] + self.cross_atten = nn.Sequential(*layer_list) + # loc + weight_attr1, bias_attr1 = get_para_bias_attr( + l2_decay=fc_decay, k=self.hidden_size + ) + weight_attr2, bias_attr2 = get_para_bias_attr( + l2_decay=fc_decay, k=self.hidden_size + ) + self.loc_generator = nn.Sequential( + nn.Linear( + self.hidden_size, + self.hidden_size, + weight_attr=weight_attr1, + bias_attr=bias_attr1, + ), + nn.Linear( + self.hidden_size, + loc_reg_num, + weight_attr=weight_attr2, + bias_attr=bias_attr2, + ), + nn.Sigmoid(), + ) + + def forward(self, inputs, targets=None): + fea = inputs[-1] + batch_size = fea.shape[0] + if self.use_attn: + fea = fea + self.cross_atten(fea) + # reshape + fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], -1]) + fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) + + hidden = paddle.zeros((batch_size, self.hidden_size)) + structure_preds = paddle.zeros( + (batch_size, self.max_text_length + 1, self.num_embeddings) + ) + loc_preds = paddle.zeros( + (batch_size, self.max_text_length + 1, self.loc_reg_num) + ) + structure_preds.stop_gradient = True + loc_preds.stop_gradient = True + + if self.training and targets is not None: + structure = targets[0] + max_len = targets[-2].max() + for i in range(max_len + 1): + hidden, structure_step, loc_step = self._decode( + structure[:, i], fea, hidden + ) + structure_preds[:, i, :] = structure_step + loc_preds[:, i, :] = loc_step + structure_preds = structure_preds[:, : max_len + 1] + loc_preds = loc_preds[:, : max_len + 1] + else: + structure_ids = paddle.zeros( + (batch_size, self.max_text_length + 1), dtype=paddle.int64 + ) + pre_chars = paddle.zeros(shape=[batch_size], dtype="int32") + max_text_length = paddle.to_tensor(self.max_text_length) + # for export + loc_step, structure_step = None, None + for i in range(max_text_length + 1): + hidden, structure_step, loc_step = self._decode(pre_chars, fea, hidden) + pre_chars = structure_step.argmax(axis=1, dtype="int32") + structure_preds[:, i, :] = structure_step + loc_preds[:, i, :] = loc_step + + structure_ids[:, i] = pre_chars + if (structure_ids == self.eos).any(-1).all(): + break + if not self.training: + structure_preds = F.softmax(structure_preds[:, : i + 1]) + loc_preds = loc_preds[:, : i + 1] + return {"structure_probs": structure_preds, "loc_preds": loc_preds} + + def _decode(self, pre_chars, features, hidden): + """ + Predict table label and coordinates for each step + @param pre_chars: Table label in previous step + @param features: + @param hidden: hidden status in previous step + @return: + """ + emb_feature = self.emb(pre_chars) + # output shape is b * self.hidden_size + (output, hidden), alpha = self.structure_attention_cell( + hidden, features, emb_feature + ) + + # structure + structure_step = self.structure_generator(output) + # loc + loc_step = self.loc_generator(output) + return hidden, structure_step, loc_step + + def _char_to_onehot(self, input_char): + input_ont_hot = F.one_hot(input_char, self.num_embeddings) + return input_ont_hot diff --git a/docling_ibm_models/slanet_1m/modeling/necks/__init__.py b/docling_ibm_models/slanet_1m/modeling/necks/__init__.py new file mode 100644 index 0000000..ef501f0 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/necks/__init__.py @@ -0,0 +1,31 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ["build_neck"] + + +def build_neck(config): + from .csp_pan import CSPPAN + + support_dict = [ + "CSPPAN", + ] + + module_name = config.pop("name") + assert module_name in support_dict, Exception( + "neck only support {}".format(support_dict) + ) + + module_class = eval(module_name)(**config) + return module_class diff --git a/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py b/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py new file mode 100644 index 0000000..5e8464d --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/necks/csp_pan.py @@ -0,0 +1,337 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The code is based on: +# https://github.com/PaddlePaddle/PaddleDetection/blob/release%2F2.3/ppdet/modeling/necks/csp_pan.py + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle import ParamAttr + +__all__ = ["CSPPAN"] + + +class ConvBNLayer(nn.Layer): + def __init__( + self, + in_channel=96, + out_channel=96, + kernel_size=3, + stride=1, + groups=1, + act="leaky_relu", + ): + super(ConvBNLayer, self).__init__() + initializer = nn.initializer.KaimingUniform() + self.act = act + assert self.act in ["leaky_relu", "hard_swish"] + self.conv = nn.Conv2D( + in_channels=in_channel, + out_channels=out_channel, + kernel_size=kernel_size, + groups=groups, + padding=(kernel_size - 1) // 2, + stride=stride, + weight_attr=ParamAttr(initializer=initializer), + bias_attr=False, + ) + self.bn = nn.BatchNorm2D(out_channel) + + def forward(self, x): + x = self.bn(self.conv(x)) + if self.act == "leaky_relu": + x = F.leaky_relu(x) + elif self.act == "hard_swish": + x = F.hardswish(x) + return x + + +class DPModule(nn.Layer): + """ + Depth-wise and point-wise module. + Args: + in_channel (int): The input channels of this Module. + out_channel (int): The output channels of this Module. + kernel_size (int): The conv2d kernel size of this Module. + stride (int): The conv2d's stride of this Module. + act (str): The activation function of this Module, + Now support `leaky_relu` and `hard_swish`. + """ + + def __init__( + self, in_channel=96, out_channel=96, kernel_size=3, stride=1, act="leaky_relu" + ): + super(DPModule, self).__init__() + initializer = nn.initializer.KaimingUniform() + self.act = act + self.dwconv = nn.Conv2D( + in_channels=in_channel, + out_channels=out_channel, + kernel_size=kernel_size, + groups=out_channel, + padding=(kernel_size - 1) // 2, + stride=stride, + weight_attr=ParamAttr(initializer=initializer), + bias_attr=False, + ) + self.bn1 = nn.BatchNorm2D(out_channel) + self.pwconv = nn.Conv2D( + in_channels=out_channel, + out_channels=out_channel, + kernel_size=1, + groups=1, + padding=0, + weight_attr=ParamAttr(initializer=initializer), + bias_attr=False, + ) + self.bn2 = nn.BatchNorm2D(out_channel) + + def act_func(self, x): + if self.act == "leaky_relu": + x = F.leaky_relu(x) + elif self.act == "hard_swish": + x = F.hardswish(x) + return x + + def forward(self, x): + x = self.act_func(self.bn1(self.dwconv(x))) + x = self.act_func(self.bn2(self.pwconv(x))) + return x + + +class DarknetBottleneck(nn.Layer): + """The basic bottleneck block used in Darknet. + Each Block consists of two ConvModules and the input is added to the + final output. Each ConvModule is composed of Conv, BN, and act. + The first convLayer has filter size of 1x1 and the second one has the + filter size of 3x3. + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + expansion (int): The kernel size of the convolution. Default: 0.5 + add_identity (bool): Whether to add identity to the out. + Default: True + use_depthwise (bool): Whether to use depthwise separable convolution. + Default: False + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + expansion=0.5, + add_identity=True, + use_depthwise=False, + act="leaky_relu", + ): + super(DarknetBottleneck, self).__init__() + hidden_channels = int(out_channels * expansion) + conv_func = DPModule if use_depthwise else ConvBNLayer + self.conv1 = ConvBNLayer( + in_channel=in_channels, out_channel=hidden_channels, kernel_size=1, act=act + ) + self.conv2 = conv_func( + in_channel=hidden_channels, + out_channel=out_channels, + kernel_size=kernel_size, + stride=1, + act=act, + ) + self.add_identity = add_identity and in_channels == out_channels + + def forward(self, x): + identity = x + out = self.conv1(x) + out = self.conv2(out) + + if self.add_identity: + return out + identity + else: + return out + + +class CSPLayer(nn.Layer): + """Cross Stage Partial Layer. + Args: + in_channels (int): The input channels of the CSP layer. + out_channels (int): The output channels of the CSP layer. + expand_ratio (float): Ratio to adjust the number of channels of the + hidden layer. Default: 0.5 + num_blocks (int): Number of blocks. Default: 1 + add_identity (bool): Whether to add identity in blocks. + Default: True + use_depthwise (bool): Whether to depthwise separable convolution in + blocks. Default: False + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + expand_ratio=0.5, + num_blocks=1, + add_identity=True, + use_depthwise=False, + act="leaky_relu", + ): + super().__init__() + mid_channels = int(out_channels * expand_ratio) + self.main_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act) + self.short_conv = ConvBNLayer(in_channels, mid_channels, 1, act=act) + self.final_conv = ConvBNLayer(2 * mid_channels, out_channels, 1, act=act) + + self.blocks = nn.Sequential( + *[ + DarknetBottleneck( + mid_channels, + mid_channels, + kernel_size, + 1.0, + add_identity, + use_depthwise, + act=act, + ) + for _ in range(num_blocks) + ] + ) + + def forward(self, x): + x_short = self.short_conv(x) + + x_main = self.main_conv(x) + x_main = self.blocks(x_main) + + x_final = paddle.concat((x_main, x_short), axis=1) + return self.final_conv(x_final) + + +class Channel_T(nn.Layer): + def __init__(self, in_channels=[116, 232, 464], out_channels=96, act="leaky_relu"): + super(Channel_T, self).__init__() + self.convs = nn.LayerList() + for i in range(len(in_channels)): + self.convs.append(ConvBNLayer(in_channels[i], out_channels, 1, act=act)) + + def forward(self, x): + outs = [self.convs[i](x[i]) for i in range(len(x))] + return outs + + +class CSPPAN(nn.Layer): + """Path Aggregation Network with CSP module. + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale) + kernel_size (int): The conv2d kernel size of this Module. + num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 1 + use_depthwise (bool): Whether to depthwise separable convolution in + blocks. Default: True + """ + + def __init__( + self, + in_channels, + out_channels, + kernel_size=5, + num_csp_blocks=1, + use_depthwise=True, + act="hard_swish", + ): + super(CSPPAN, self).__init__() + self.in_channels = in_channels + self.out_channels = [out_channels] * len(in_channels) + conv_func = DPModule if use_depthwise else ConvBNLayer + + self.conv_t = Channel_T(in_channels, out_channels, act=act) + + # build top-down blocks + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + self.top_down_blocks = nn.LayerList() + for idx in range(len(in_channels) - 1, 0, -1): + self.top_down_blocks.append( + CSPLayer( + out_channels * 2, + out_channels, + kernel_size=kernel_size, + num_blocks=num_csp_blocks, + add_identity=False, + use_depthwise=use_depthwise, + act=act, + ) + ) + + # build bottom-up blocks + self.downsamples = nn.LayerList() + self.bottom_up_blocks = nn.LayerList() + for idx in range(len(in_channels) - 1): + self.downsamples.append( + conv_func( + out_channels, + out_channels, + kernel_size=kernel_size, + stride=2, + act=act, + ) + ) + self.bottom_up_blocks.append( + CSPLayer( + out_channels * 2, + out_channels, + kernel_size=kernel_size, + num_blocks=num_csp_blocks, + add_identity=False, + use_depthwise=use_depthwise, + act=act, + ) + ) + + def forward(self, inputs): + """ + Args: + inputs (tuple[Tensor]): input features. + Returns: + tuple[Tensor]: CSPPAN features. + """ + assert len(inputs) == len(self.in_channels) + inputs = self.conv_t(inputs) + + # top-down path + inner_outs = [inputs[-1]] + for idx in range(len(self.in_channels) - 1, 0, -1): + feat_heigh = inner_outs[0] + feat_low = inputs[idx - 1] + upsample_feat = F.upsample( + feat_heigh, size=feat_low.shape[2:4], mode="nearest" + ) + + inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx]( + paddle.concat([upsample_feat, feat_low], 1) + ) + inner_outs.insert(0, inner_out) + + # bottom-up path + outs = [inner_outs[0]] + for idx in range(len(self.in_channels) - 1): + feat_low = outs[-1] + feat_height = inner_outs[idx + 1] + downsample_feat = self.downsamples[idx](feat_low) + out = self.bottom_up_blocks[idx]( + paddle.concat([downsample_feat, feat_height], 1) + ) + outs.append(out) + + return tuple(outs) diff --git a/docling_ibm_models/slanet_1m/modeling/necks/rnn.py b/docling_ibm_models/slanet_1m/modeling/necks/rnn.py new file mode 100644 index 0000000..7f50319 --- /dev/null +++ b/docling_ibm_models/slanet_1m/modeling/necks/rnn.py @@ -0,0 +1,284 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +from paddle import nn + +from modeling.heads.rec_ctc_head import get_para_bias_attr +from modeling.backbones.rec_svtrnet import ( + Block, + ConvBNLayer, + trunc_normal_, + zeros_, + ones_, +) + + +class Im2Seq(nn.Layer): + def __init__(self, in_channels, **kwargs): + super().__init__() + self.out_channels = in_channels + + def forward(self, x): + B, C, H, W = x.shape + assert H == 1 + x = x.squeeze(axis=2) + x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels) + return x + + +class EncoderWithRNN(nn.Layer): + def __init__(self, in_channels, hidden_size): + super(EncoderWithRNN, self).__init__() + self.out_channels = hidden_size * 2 + self.lstm = nn.LSTM( + in_channels, hidden_size, direction="bidirectional", num_layers=2 + ) + + def forward(self, x): + x, _ = self.lstm(x) + return x + + +class BidirectionalLSTM(nn.Layer): + def __init__( + self, + input_size, + hidden_size, + output_size=None, + num_layers=1, + dropout=0, + direction=False, + time_major=False, + with_linear=False, + ): + super(BidirectionalLSTM, self).__init__() + self.with_linear = with_linear + self.rnn = nn.LSTM( + input_size, + hidden_size, + num_layers=num_layers, + dropout=dropout, + direction=direction, + time_major=time_major, + ) + + # text recognition the specified structure LSTM with linear + if self.with_linear: + self.linear = nn.Linear(hidden_size * 2, output_size) + + def forward(self, input_feature): + recurrent, _ = self.rnn( + input_feature + ) # batch_size x T x input_size -> batch_size x T x (2*hidden_size) + if self.with_linear: + output = self.linear(recurrent) # batch_size x T x output_size + return output + return recurrent + + +class EncoderWithCascadeRNN(nn.Layer): + def __init__( + self, in_channels, hidden_size, out_channels, num_layers=2, with_linear=False + ): + super(EncoderWithCascadeRNN, self).__init__() + self.out_channels = out_channels[-1] + self.encoder = nn.LayerList( + [ + BidirectionalLSTM( + in_channels if i == 0 else out_channels[i - 1], + hidden_size, + output_size=out_channels[i], + num_layers=1, + direction="bidirectional", + with_linear=with_linear, + ) + for i in range(num_layers) + ] + ) + + def forward(self, x): + for i, l in enumerate(self.encoder): + x = l(x) + return x + + +class EncoderWithFC(nn.Layer): + def __init__(self, in_channels, hidden_size): + super(EncoderWithFC, self).__init__() + self.out_channels = hidden_size + weight_attr, bias_attr = get_para_bias_attr(l2_decay=0.00001, k=in_channels) + self.fc = nn.Linear( + in_channels, + hidden_size, + weight_attr=weight_attr, + bias_attr=bias_attr, + name="reduce_encoder_fea", + ) + + def forward(self, x): + x = self.fc(x) + return x + + +class EncoderWithSVTR(nn.Layer): + def __init__( + self, + in_channels, + dims=64, # XS + depth=2, + hidden_dims=120, + use_guide=False, + num_heads=8, + qkv_bias=True, + mlp_ratio=2.0, + drop_rate=0.1, + attn_drop_rate=0.1, + drop_path=0.0, + kernel_size=[3, 3], + qk_scale=None, + ): + super(EncoderWithSVTR, self).__init__() + self.depth = depth + self.use_guide = use_guide + self.conv1 = ConvBNLayer( + in_channels, + in_channels // 8, + kernel_size=kernel_size, + padding=[kernel_size[0] // 2, kernel_size[1] // 2], + act=nn.Swish, + ) + self.conv2 = ConvBNLayer( + in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish + ) + + self.svtr_block = nn.LayerList( + [ + Block( + dim=hidden_dims, + num_heads=num_heads, + mixer="Global", + HW=None, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + act_layer=nn.Swish, + attn_drop=attn_drop_rate, + drop_path=drop_path, + norm_layer="nn.LayerNorm", + epsilon=1e-05, + prenorm=False, + ) + for i in range(depth) + ] + ) + self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6) + self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act=nn.Swish) + # last conv-nxn, the input is concat of input tensor and conv3 output tensor + self.conv4 = ConvBNLayer( + 2 * in_channels, + in_channels // 8, + kernel_size=kernel_size, + padding=[kernel_size[0] // 2, kernel_size[1] // 2], + act=nn.Swish, + ) + + self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act=nn.Swish) + self.out_channels = dims + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight) + if isinstance(m, nn.Linear) and m.bias is not None: + zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + zeros_(m.bias) + ones_(m.weight) + + def forward(self, x): + # for use guide + if self.use_guide: + z = x.clone() + z.stop_gradient = True + else: + z = x + # for short cut + h = z + # reduce dim + z = self.conv1(z) + z = self.conv2(z) + # SVTR global block + B, C, H, W = z.shape + z = z.flatten(2).transpose([0, 2, 1]) + for blk in self.svtr_block: + z = blk(z) + z = self.norm(z) + # last stage + z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2]) + z = self.conv3(z) + z = paddle.concat((h, z), axis=1) + z = self.conv1x1(self.conv4(z)) + return z + + +class SequenceEncoder(nn.Layer): + def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs): + super(SequenceEncoder, self).__init__() + self.encoder_reshape = Im2Seq(in_channels) + self.out_channels = self.encoder_reshape.out_channels + self.encoder_type = encoder_type + if encoder_type == "reshape": + self.only_reshape = True + else: + support_encoder_dict = { + "reshape": Im2Seq, + "fc": EncoderWithFC, + "rnn": EncoderWithRNN, + "svtr": EncoderWithSVTR, + "cascadernn": EncoderWithCascadeRNN, + } + assert encoder_type in support_encoder_dict, "{} must in {}".format( + encoder_type, support_encoder_dict.keys() + ) + if encoder_type == "svtr": + self.encoder = support_encoder_dict[encoder_type]( + self.encoder_reshape.out_channels, **kwargs + ) + elif encoder_type == "cascadernn": + self.encoder = support_encoder_dict[encoder_type]( + self.encoder_reshape.out_channels, hidden_size, **kwargs + ) + else: + self.encoder = support_encoder_dict[encoder_type]( + self.encoder_reshape.out_channels, hidden_size + ) + self.out_channels = self.encoder.out_channels + self.only_reshape = False + + def forward(self, x): + if self.encoder_type != "svtr": + x = self.encoder_reshape(x) + if not self.only_reshape: + x = self.encoder(x) + return x + else: + x = self.encoder(x) + x = self.encoder_reshape(x) + return x diff --git a/docling_ibm_models/slanet_1m/optimizer/__init__.py b/docling_ibm_models/slanet_1m/optimizer/__init__.py new file mode 100644 index 0000000..a191a4b --- /dev/null +++ b/docling_ibm_models/slanet_1m/optimizer/__init__.py @@ -0,0 +1,66 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import copy +import paddle + +__all__ = ["build_optimizer"] + + +def build_lr_scheduler(lr_config, epochs, step_each_epoch): + from . import learning_rate + + lr_config.update({"epochs": epochs, "step_each_epoch": step_each_epoch}) + lr_name = lr_config.pop("name", "Const") + lr = getattr(learning_rate, lr_name)(**lr_config)() + return lr + + +def build_optimizer(config, epochs, step_each_epoch, model): + from . import regularizer, optimizer + + config = copy.deepcopy(config) + # step1 build lr + lr = build_lr_scheduler(config.pop("lr"), epochs, step_each_epoch) + + # step2 build regularization + if "regularizer" in config and config["regularizer"] is not None: + reg_config = config.pop("regularizer") + reg_name = reg_config.pop("name") + if not hasattr(regularizer, reg_name): + reg_name += "Decay" + reg = getattr(regularizer, reg_name)(**reg_config)() + elif "weight_decay" in config: + reg = config.pop("weight_decay") + else: + reg = None + + # step3 build optimizer + optim_name = config.pop("name") + if "clip_norm" in config: + clip_norm = config.pop("clip_norm") + grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm) + elif "clip_norm_global" in config: + clip_norm = config.pop("clip_norm_global") + grad_clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=clip_norm) + else: + grad_clip = None + optim = getattr(optimizer, optim_name)( + learning_rate=lr, weight_decay=reg, grad_clip=grad_clip, **config + ) + return optim(model), lr diff --git a/docling_ibm_models/slanet_1m/optimizer/learning_rate.py b/docling_ibm_models/slanet_1m/optimizer/learning_rate.py new file mode 100644 index 0000000..687a145 --- /dev/null +++ b/docling_ibm_models/slanet_1m/optimizer/learning_rate.py @@ -0,0 +1,454 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from paddle.optimizer import lr +from .lr_scheduler import CyclicalCosineDecay, OneCycleDecay, TwoStepCosineDecay + + +class Linear(object): + """ + Linear learning rate decay + Args: + lr (float): The initial learning rate. It is a python float number. + epochs(int): The decay step size. It determines the decay cycle. + end_lr(float, optional): The minimum final learning rate. Default: 0.0001. + power(float, optional): Power of polynomial. Default: 1.0. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + epochs, + step_each_epoch, + end_lr=0.0, + power=1.0, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(Linear, self).__init__() + self.learning_rate = learning_rate + self.epochs = epochs * step_each_epoch + self.end_lr = end_lr + self.power = power + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = lr.PolynomialDecay( + learning_rate=self.learning_rate, + decay_steps=self.epochs, + end_lr=self.end_lr, + power=self.power, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class Cosine(object): + """ + Cosine learning rate decay + lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + step_each_epoch, + epochs, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(Cosine, self).__init__() + self.learning_rate = learning_rate + self.T_max = step_each_epoch * epochs + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = lr.CosineAnnealingDecay( + learning_rate=self.learning_rate, + T_max=self.T_max, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class Step(object): + """ + Piecewise learning rate decay + Args: + step_each_epoch(int): steps each epoch + learning_rate (float): The initial learning rate. It is a python float number. + step_size (int): the interval to update. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + step_size, + step_each_epoch, + gamma, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(Step, self).__init__() + self.step_size = step_each_epoch * step_size + self.learning_rate = learning_rate + self.gamma = gamma + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = lr.StepDecay( + learning_rate=self.learning_rate, + step_size=self.step_size, + gamma=self.gamma, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class Piecewise(object): + """ + Piecewise learning rate decay + Args: + boundaries(list): A list of steps numbers. The type of element in the list is python int. + values(list): A list of learning rate values that will be picked during different epoch boundaries. + The type of element in the list is python float. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + step_each_epoch, + decay_epochs, + values, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(Piecewise, self).__init__() + self.boundaries = [step_each_epoch * e for e in decay_epochs] + self.values = values + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = lr.PiecewiseDecay( + boundaries=self.boundaries, values=self.values, last_epoch=self.last_epoch + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.values[0], + last_epoch=self.last_epoch, + ) + return learning_rate + + +class CyclicalCosine(object): + """ + Cyclical cosine learning rate decay + Args: + learning_rate(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + cycle(int): period of the cosine learning rate + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + step_each_epoch, + epochs, + cycle, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(CyclicalCosine, self).__init__() + self.learning_rate = learning_rate + self.T_max = step_each_epoch * epochs + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + self.cycle = round(cycle * step_each_epoch) + + def __call__(self): + learning_rate = CyclicalCosineDecay( + learning_rate=self.learning_rate, + T_max=self.T_max, + cycle=self.cycle, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class OneCycle(object): + """ + One Cycle learning rate decay + Args: + max_lr(float): Upper learning rate boundaries + epochs(int): total training epochs + step_each_epoch(int): steps each epoch + anneal_strategy(str): {‘cos’, ‘linear’} Specifies the annealing strategy: “cos” for cosine annealing, “linear” for linear annealing. + Default: ‘cos’ + three_phase(bool): If True, use a third phase of the schedule to annihilate the learning rate according to ‘final_div_factor’ + instead of modifying the second phase (the first two phases will be symmetrical about the step indicated by ‘pct_start’). + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + max_lr, + epochs, + step_each_epoch, + anneal_strategy="cos", + three_phase=False, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(OneCycle, self).__init__() + self.max_lr = max_lr + self.epochs = epochs + self.steps_per_epoch = step_each_epoch + self.anneal_strategy = anneal_strategy + self.three_phase = three_phase + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = OneCycleDecay( + max_lr=self.max_lr, + epochs=self.epochs, + steps_per_epoch=self.steps_per_epoch, + anneal_strategy=self.anneal_strategy, + three_phase=self.three_phase, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.max_lr, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class Const(object): + """ + Const learning rate decay + Args: + learning_rate(float): initial learning rate + step_each_epoch(int): steps each epoch + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, learning_rate, step_each_epoch, warmup_epoch=0, last_epoch=-1, **kwargs + ): + super(Const, self).__init__() + self.learning_rate = learning_rate + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = self.learning_rate + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class DecayLearningRate(object): + """ + DecayLearningRate learning rate decay + new_lr = (lr - end_lr) * (1 - epoch/decay_steps)**power + end_lr + Args: + learning_rate(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + factor(float): Power of polynomial, should greater than 0.0 to get learning rate decay. Default: 0.9 + end_lr(float): The minimum final learning rate. Default: 0.0. + """ + + def __init__( + self, learning_rate, step_each_epoch, epochs, factor=0.9, end_lr=0, **kwargs + ): + super(DecayLearningRate, self).__init__() + self.learning_rate = learning_rate + self.epochs = epochs + 1 + self.factor = factor + self.end_lr = 0 + self.decay_steps = step_each_epoch * epochs + + def __call__(self): + learning_rate = lr.PolynomialDecay( + learning_rate=self.learning_rate, + decay_steps=self.decay_steps, + power=self.factor, + end_lr=self.end_lr, + ) + return learning_rate + + +class MultiStepDecay(object): + """ + Piecewise learning rate decay + Args: + step_each_epoch(int): steps each epoch + learning_rate (float): The initial learning rate. It is a python float number. + step_size (int): the interval to update. + gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` . + It should be less than 1.0. Default: 0.1. + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + milestones, + step_each_epoch, + gamma, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(MultiStepDecay, self).__init__() + self.milestones = [step_each_epoch * e for e in milestones] + self.learning_rate = learning_rate + self.gamma = gamma + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = lr.MultiStepDecay( + learning_rate=self.learning_rate, + milestones=self.milestones, + gamma=self.gamma, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate + + +class TwoStepCosine(object): + """ + Cosine learning rate decay + lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) + Args: + lr(float): initial learning rate + step_each_epoch(int): steps each epoch + epochs(int): total training epochs + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + """ + + def __init__( + self, + learning_rate, + step_each_epoch, + epochs, + warmup_epoch=0, + last_epoch=-1, + **kwargs, + ): + super(TwoStepCosine, self).__init__() + self.learning_rate = learning_rate + self.T_max1 = step_each_epoch * 200 + self.T_max2 = step_each_epoch * epochs + self.last_epoch = last_epoch + self.warmup_epoch = round(warmup_epoch * step_each_epoch) + + def __call__(self): + learning_rate = TwoStepCosineDecay( + learning_rate=self.learning_rate, + T_max1=self.T_max1, + T_max2=self.T_max2, + last_epoch=self.last_epoch, + ) + if self.warmup_epoch > 0: + learning_rate = lr.LinearWarmup( + learning_rate=learning_rate, + warmup_steps=self.warmup_epoch, + start_lr=0.0, + end_lr=self.learning_rate, + last_epoch=self.last_epoch, + ) + return learning_rate diff --git a/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py b/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py new file mode 100644 index 0000000..4034e14 --- /dev/null +++ b/docling_ibm_models/slanet_1m/optimizer/lr_scheduler.py @@ -0,0 +1,240 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from paddle.optimizer.lr import LRScheduler + + +class CyclicalCosineDecay(LRScheduler): + def __init__( + self, learning_rate, T_max, cycle=1, last_epoch=-1, eta_min=0.0, verbose=False + ): + """ + Cyclical cosine learning rate decay + A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf + Args: + learning rate(float): learning rate + T_max(int): maximum epoch num + cycle(int): period of the cosine decay + last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate. + eta_min(float): minimum learning rate during training + verbose(bool): whether to print learning rate for each epoch + """ + super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch, verbose) + self.cycle = cycle + self.eta_min = eta_min + + def get_lr(self): + if self.last_epoch == 0: + return self.base_lr + reletive_epoch = self.last_epoch % self.cycle + lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * ( + 1 + math.cos(math.pi * reletive_epoch / self.cycle) + ) + return lr + + +class OneCycleDecay(LRScheduler): + """ + One Cycle learning rate decay + A learning rate which can be referred in https://arxiv.org/abs/1708.07120 + Code refered in https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR + """ + + def __init__( + self, + max_lr, + epochs=None, + steps_per_epoch=None, + pct_start=0.3, + anneal_strategy="cos", + div_factor=25.0, + final_div_factor=1e4, + three_phase=False, + last_epoch=-1, + verbose=False, + ): + # Validate total_steps + if epochs <= 0 or not isinstance(epochs, int): + raise ValueError( + "Expected positive integer epochs, but got {}".format(epochs) + ) + if steps_per_epoch <= 0 or not isinstance(steps_per_epoch, int): + raise ValueError( + "Expected positive integer steps_per_epoch, but got {}".format( + steps_per_epoch + ) + ) + self.total_steps = epochs * steps_per_epoch + + self.max_lr = max_lr + self.initial_lr = self.max_lr / div_factor + self.min_lr = self.initial_lr / final_div_factor + + if three_phase: + self._schedule_phases = [ + { + "end_step": float(pct_start * self.total_steps) - 1, + "start_lr": self.initial_lr, + "end_lr": self.max_lr, + }, + { + "end_step": float(2 * pct_start * self.total_steps) - 2, + "start_lr": self.max_lr, + "end_lr": self.initial_lr, + }, + { + "end_step": self.total_steps - 1, + "start_lr": self.initial_lr, + "end_lr": self.min_lr, + }, + ] + else: + self._schedule_phases = [ + { + "end_step": float(pct_start * self.total_steps) - 1, + "start_lr": self.initial_lr, + "end_lr": self.max_lr, + }, + { + "end_step": self.total_steps - 1, + "start_lr": self.max_lr, + "end_lr": self.min_lr, + }, + ] + + # Validate pct_start + if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float): + raise ValueError( + "Expected float between 0 and 1 pct_start, but got {}".format(pct_start) + ) + + # Validate anneal_strategy + if anneal_strategy not in ["cos", "linear"]: + raise ValueError( + "anneal_strategy must by one of 'cos' or 'linear', instead got {}".format( + anneal_strategy + ) + ) + elif anneal_strategy == "cos": + self.anneal_func = self._annealing_cos + elif anneal_strategy == "linear": + self.anneal_func = self._annealing_linear + + super(OneCycleDecay, self).__init__(max_lr, last_epoch, verbose) + + def _annealing_cos(self, start, end, pct): + "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." + cos_out = math.cos(math.pi * pct) + 1 + return end + (start - end) / 2.0 * cos_out + + def _annealing_linear(self, start, end, pct): + "Linearly anneal from `start` to `end` as pct goes from 0.0 to 1.0." + return (end - start) * pct + start + + def get_lr(self): + computed_lr = 0.0 + step_num = self.last_epoch + + if step_num > self.total_steps: + raise ValueError( + "Tried to step {} times. The specified number of total steps is {}".format( + step_num + 1, self.total_steps + ) + ) + start_step = 0 + for i, phase in enumerate(self._schedule_phases): + end_step = phase["end_step"] + if step_num <= end_step or i == len(self._schedule_phases) - 1: + pct = (step_num - start_step) / (end_step - start_step) + computed_lr = self.anneal_func(phase["start_lr"], phase["end_lr"], pct) + break + start_step = phase["end_step"] + + return computed_lr + + +class TwoStepCosineDecay(LRScheduler): + def __init__( + self, learning_rate, T_max1, T_max2, eta_min=0, last_epoch=-1, verbose=False + ): + if not isinstance(T_max1, int): + raise TypeError( + "The type of 'T_max1' in 'CosineAnnealingDecay' must be 'int', but received %s." + % type(T_max1) + ) + if not isinstance(T_max2, int): + raise TypeError( + "The type of 'T_max2' in 'CosineAnnealingDecay' must be 'int', but received %s." + % type(T_max2) + ) + if not isinstance(eta_min, (float, int)): + raise TypeError( + "The type of 'eta_min' in 'CosineAnnealingDecay' must be 'float, int', but received %s." + % type(eta_min) + ) + assert T_max1 > 0 and isinstance( + T_max1, int + ), " 'T_max1' must be a positive integer." + assert T_max2 > 0 and isinstance( + T_max2, int + ), " 'T_max1' must be a positive integer." + self.T_max1 = T_max1 + self.T_max2 = T_max2 + self.eta_min = float(eta_min) + super(TwoStepCosineDecay, self).__init__(learning_rate, last_epoch, verbose) + + def get_lr(self): + if self.last_epoch <= self.T_max1: + if self.last_epoch == 0: + return self.base_lr + elif (self.last_epoch - 1 - self.T_max1) % (2 * self.T_max1) == 0: + return ( + self.last_lr + + (self.base_lr - self.eta_min) + * (1 - math.cos(math.pi / self.T_max1)) + / 2 + ) + + return (1 + math.cos(math.pi * self.last_epoch / self.T_max1)) / ( + 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max1) + ) * (self.last_lr - self.eta_min) + self.eta_min + else: + if (self.last_epoch - 1 - self.T_max2) % (2 * self.T_max2) == 0: + return ( + self.last_lr + + (self.base_lr - self.eta_min) + * (1 - math.cos(math.pi / self.T_max2)) + / 2 + ) + + return (1 + math.cos(math.pi * self.last_epoch / self.T_max2)) / ( + 1 + math.cos(math.pi * (self.last_epoch - 1) / self.T_max2) + ) * (self.last_lr - self.eta_min) + self.eta_min + + def _get_closed_form_lr(self): + if self.last_epoch <= self.T_max1: + return ( + self.eta_min + + (self.base_lr - self.eta_min) + * (1 + math.cos(math.pi * self.last_epoch / self.T_max1)) + / 2 + ) + else: + return ( + self.eta_min + + (self.base_lr - self.eta_min) + * (1 + math.cos(math.pi * self.last_epoch / self.T_max2)) + / 2 + ) diff --git a/docling_ibm_models/slanet_1m/optimizer/optimizer.py b/docling_ibm_models/slanet_1m/optimizer/optimizer.py new file mode 100644 index 0000000..d7f78a5 --- /dev/null +++ b/docling_ibm_models/slanet_1m/optimizer/optimizer.py @@ -0,0 +1,292 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from paddle import optimizer as optim + + +class Momentum(object): + """ + Simple Momentum optimizer with velocity state. + Args: + learning_rate (float|Variable) - The learning rate used to update parameters. + Can be a float value or a Variable with one float value as data element. + momentum (float) - Momentum factor. + regularization (WeightDecayRegularizer, optional) - The strategy of regularization. + """ + + def __init__( + self, learning_rate, momentum, weight_decay=None, grad_clip=None, **args + ): + super(Momentum, self).__init__() + self.learning_rate = learning_rate + self.momentum = momentum + self.weight_decay = weight_decay + self.grad_clip = grad_clip + + def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] + opt = optim.Momentum( + learning_rate=self.learning_rate, + momentum=self.momentum, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + parameters=train_params, + ) + return opt + + +class Adam(object): + def __init__( + self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + parameter_list=None, + weight_decay=None, + grad_clip=None, + name=None, + lazy_mode=False, + **kwargs, + ): + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.parameter_list = parameter_list + self.learning_rate = learning_rate + self.weight_decay = weight_decay + self.grad_clip = grad_clip + self.name = name + self.lazy_mode = lazy_mode + self.group_lr = kwargs.get("group_lr", False) + self.training_step = kwargs.get("training_step", None) + + def __call__(self, model): + if self.group_lr: + if self.training_step == "LF_2": + import paddle + + if isinstance(model, paddle.DataParallel): # multi gpu + mlm = model._layers.head.MLM_VRM.MLM.parameters() + pre_mlm_pp = ( + model._layers.head.MLM_VRM.Prediction.pp_share.parameters() + ) + pre_mlm_w = ( + model._layers.head.MLM_VRM.Prediction.w_share.parameters() + ) + else: # single gpu + mlm = model.head.MLM_VRM.MLM.parameters() + pre_mlm_pp = model.head.MLM_VRM.Prediction.pp_share.parameters() + pre_mlm_w = model.head.MLM_VRM.Prediction.w_share.parameters() + + total = [] + for param in mlm: + total.append(id(param)) + for param in pre_mlm_pp: + total.append(id(param)) + for param in pre_mlm_w: + total.append(id(param)) + + group_base_params = [ + param for param in model.parameters() if id(param) in total + ] + group_small_params = [ + param for param in model.parameters() if id(param) not in total + ] + train_params = [ + {"params": group_base_params}, + { + "params": group_small_params, + "learning_rate": self.learning_rate.values[0] * 0.1, + }, + ] + + else: + print("group lr currently only support VisionLAN in LF_2 training step") + train_params = [ + param for param in model.parameters() if param.trainable is True + ] + else: + train_params = [ + param for param in model.parameters() if param.trainable is True + ] + + opt = optim.Adam( + learning_rate=self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + name=self.name, + lazy_mode=self.lazy_mode, + parameters=train_params, + ) + return opt + + +class RMSProp(object): + """ + Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method. + Args: + learning_rate (float|Variable) - The learning rate used to update parameters. + Can be a float value or a Variable with one float value as data element. + momentum (float) - Momentum factor. + rho (float) - rho value in equation. + epsilon (float) - avoid division by zero, default is 1e-6. + regularization (WeightDecayRegularizer, optional) - The strategy of regularization. + """ + + def __init__( + self, + learning_rate, + momentum=0.0, + rho=0.95, + epsilon=1e-6, + weight_decay=None, + grad_clip=None, + **args, + ): + super(RMSProp, self).__init__() + self.learning_rate = learning_rate + self.momentum = momentum + self.rho = rho + self.epsilon = epsilon + self.weight_decay = weight_decay + self.grad_clip = grad_clip + + def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] + opt = optim.RMSProp( + learning_rate=self.learning_rate, + momentum=self.momentum, + rho=self.rho, + epsilon=self.epsilon, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + parameters=train_params, + ) + return opt + + +class Adadelta(object): + def __init__( + self, + learning_rate=0.001, + epsilon=1e-08, + rho=0.95, + parameter_list=None, + weight_decay=None, + grad_clip=None, + name=None, + **kwargs, + ): + self.learning_rate = learning_rate + self.epsilon = epsilon + self.rho = rho + self.parameter_list = parameter_list + self.learning_rate = learning_rate + self.weight_decay = weight_decay + self.grad_clip = grad_clip + self.name = name + + def __call__(self, model): + train_params = [ + param for param in model.parameters() if param.trainable is True + ] + opt = optim.Adadelta( + learning_rate=self.learning_rate, + epsilon=self.epsilon, + rho=self.rho, + weight_decay=self.weight_decay, + grad_clip=self.grad_clip, + name=self.name, + parameters=train_params, + ) + return opt + + +class AdamW(object): + def __init__( + self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8, + weight_decay=0.01, + multi_precision=False, + grad_clip=None, + no_weight_decay_name=None, + one_dim_param_no_weight_decay=False, + name=None, + lazy_mode=False, + **args, + ): + super().__init__() + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.grad_clip = grad_clip + self.weight_decay = 0.01 if weight_decay is None else weight_decay + self.grad_clip = grad_clip + self.name = name + self.lazy_mode = lazy_mode + self.multi_precision = multi_precision + self.no_weight_decay_name_list = ( + no_weight_decay_name.split() if no_weight_decay_name else [] + ) + self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay + + def __call__(self, model): + parameters = [param for param in model.parameters() if param.trainable is True] + + self.no_weight_decay_param_name_list = [ + p.name + for n, p in model.named_parameters() + if any(nd in n for nd in self.no_weight_decay_name_list) + ] + + if self.one_dim_param_no_weight_decay: + self.no_weight_decay_param_name_list += [ + p.name for n, p in model.named_parameters() if len(p.shape) == 1 + ] + + opt = optim.AdamW( + learning_rate=self.learning_rate, + beta1=self.beta1, + beta2=self.beta2, + epsilon=self.epsilon, + parameters=parameters, + weight_decay=self.weight_decay, + multi_precision=self.multi_precision, + grad_clip=self.grad_clip, + name=self.name, + lazy_mode=self.lazy_mode, + apply_decay_param_fun=self._apply_decay_param_fun, + ) + return opt + + def _apply_decay_param_fun(self, name): + return name not in self.no_weight_decay_param_name_list diff --git a/docling_ibm_models/slanet_1m/optimizer/regularizer.py b/docling_ibm_models/slanet_1m/optimizer/regularizer.py new file mode 100644 index 0000000..740ad1c --- /dev/null +++ b/docling_ibm_models/slanet_1m/optimizer/regularizer.py @@ -0,0 +1,51 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import paddle + + +class L1Decay(object): + """ + L1 Weight Decay Regularization, which encourages the weights to be sparse. + Args: + factor(float): regularization coeff. Default:0.0. + """ + + def __init__(self, factor=0.0): + super(L1Decay, self).__init__() + self.coeff = factor + + def __call__(self): + reg = paddle.regularizer.L1Decay(self.coeff) + return reg + + +class L2Decay(object): + """ + L2 Weight Decay Regularization, which helps to prevent the model over-fitting. + Args: + factor(float): regularization coeff. Default:0.0. + """ + + def __init__(self, factor=0.0): + super(L2Decay, self).__init__() + self.coeff = float(factor) + + def __call__(self): + return self.coeff diff --git a/docling_ibm_models/slanet_1m/program.py b/docling_ibm_models/slanet_1m/program.py new file mode 100644 index 0000000..daa6fa6 --- /dev/null +++ b/docling_ibm_models/slanet_1m/program.py @@ -0,0 +1,826 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import platform +import yaml +import time +import datetime +import paddle +import paddle.distributed as dist +from tqdm import tqdm +import cv2 +import numpy as np +from argparse import ArgumentParser, RawDescriptionHelpFormatter + +from paddleocr.ppocr.utils.stats import TrainingStats +from paddleocr.ppocr.utils.save_load import save_model +from paddleocr.ppocr.utils.utility import print_dict, AverageMeter +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.ppocr.utils.loggers import WandbLogger, Loggers +from paddleocr.ppocr.utils import profiler +from paddleocr.ppocr.data import build_dataloader + + +class ArgsParser(ArgumentParser): + def __init__(self): + super(ArgsParser, self).__init__(formatter_class=RawDescriptionHelpFormatter) + self.add_argument("-c", "--config", help="configuration file to use") + self.add_argument("-o", "--opt", nargs="+", help="set configuration options") + self.add_argument( + "-p", + "--profiler_options", + type=str, + default=None, + help="The option of profiler, which should be in format " + '"key1=value1;key2=value2;key3=value3".', + ) + + def parse_args(self, argv=None): + args = super(ArgsParser, self).parse_args(argv) + assert args.config is not None, "Please specify --config=configure_file_path." + args.opt = self._parse_opt(args.opt) + return args + + def _parse_opt(self, opts): + config = {} + if not opts: + return config + for s in opts: + s = s.strip() + k, v = s.split("=") + config[k] = yaml.load(v, Loader=yaml.Loader) + return config + + +def load_config(file_path): + """ + Load config from yml/yaml file. + Args: + file_path (str): Path of the config file to be loaded. + Returns: global config + """ + _, ext = os.path.splitext(file_path) + assert ext in [".yml", ".yaml"], "only support yaml files for now" + config = yaml.load(open(file_path, "rb"), Loader=yaml.Loader) + return config + + +def merge_config(config, opts): + """ + Merge config into global config. + Args: + config (dict): Config to be merged. + Returns: global config + """ + for key, value in opts.items(): + if "." not in key: + if isinstance(value, dict) and key in config: + config[key].update(value) + else: + config[key] = value + else: + sub_keys = key.split(".") + assert sub_keys[0] in config, ( + "the sub_keys can only be one of global_config: {}, but get: " + "{}, please check your running command".format( + config.keys(), sub_keys[0] + ) + ) + cur = config[sub_keys[0]] + for idx, sub_key in enumerate(sub_keys[1:]): + if idx == len(sub_keys) - 2: + cur[sub_key] = value + else: + cur = cur[sub_key] + return config + + +def check_device(use_gpu, use_xpu=False, use_npu=False, use_mlu=False): + """ + Log error and exit when set use_gpu=true in paddlepaddle + cpu version. + """ + err = ( + "Config {} cannot be set as true while your paddle " + "is not compiled with {} ! \nPlease try: \n" + "\t1. Install paddlepaddle to run model on {} \n" + "\t2. Set {} as false in config file to run " + "model on CPU" + ) + + try: + if use_gpu and use_xpu: + print("use_xpu and use_gpu can not both be true.") + if use_gpu and not paddle.is_compiled_with_cuda(): + print(err.format("use_gpu", "cuda", "gpu", "use_gpu")) + sys.exit(1) + if use_xpu and not paddle.device.is_compiled_with_xpu(): + print(err.format("use_xpu", "xpu", "xpu", "use_xpu")) + sys.exit(1) + if use_npu: + if ( + int(paddle.version.major) != 0 + and int(paddle.version.major) <= 2 + and int(paddle.version.minor) <= 4 + ): + if not paddle.device.is_compiled_with_npu(): + print(err.format("use_npu", "npu", "npu", "use_npu")) + sys.exit(1) + # is_compiled_with_npu() has been updated after paddle-2.4 + else: + if not paddle.device.is_compiled_with_custom_device("npu"): + print(err.format("use_npu", "npu", "npu", "use_npu")) + sys.exit(1) + if use_mlu and not paddle.device.is_compiled_with_mlu(): + print(err.format("use_mlu", "mlu", "mlu", "use_mlu")) + sys.exit(1) + except Exception as e: + pass + + +def to_float32(preds): + if isinstance(preds, dict): + for k in preds: + if isinstance(preds[k], dict) or isinstance(preds[k], list): + preds[k] = to_float32(preds[k]) + elif isinstance(preds[k], paddle.Tensor): + preds[k] = preds[k].astype(paddle.float32) + elif isinstance(preds, list): + for k in range(len(preds)): + if isinstance(preds[k], dict): + preds[k] = to_float32(preds[k]) + elif isinstance(preds[k], list): + preds[k] = to_float32(preds[k]) + elif isinstance(preds[k], paddle.Tensor): + preds[k] = preds[k].astype(paddle.float32) + elif isinstance(preds, paddle.Tensor): + preds = preds.astype(paddle.float32) + return preds + + +def train( + config, + train_dataloader, + valid_dataloader, + device, + model, + loss_class, + optimizer, + lr_scheduler, + post_process_class, + eval_class, + pre_best_model_dict, + logger, + step_pre_epoch, + log_writer=None, + scaler=None, + amp_level="O2", + amp_custom_black_list=[], + amp_custom_white_list=[], + amp_dtype="float16", +): + cal_metric_during_train = config["Global"].get("cal_metric_during_train", False) + calc_epoch_interval = config["Global"].get("calc_epoch_interval", 1) + log_smooth_window = config["Global"]["log_smooth_window"] + epoch_num = config["Global"]["epoch_num"] + print_batch_step = config["Global"]["print_batch_step"] + eval_batch_step = config["Global"]["eval_batch_step"] + eval_batch_epoch = config["Global"].get("eval_batch_epoch", None) + profiler_options = config["profiler_options"] + + global_step = 0 + if "global_step" in pre_best_model_dict: + global_step = pre_best_model_dict["global_step"] + start_eval_step = 0 + if isinstance(eval_batch_step, list) and len(eval_batch_step) >= 2: + start_eval_step = eval_batch_step[0] if not eval_batch_epoch else 0 + eval_batch_step = ( + eval_batch_step[1] + if not eval_batch_epoch + else step_pre_epoch * eval_batch_epoch + ) + if len(valid_dataloader) == 0: + logger.info( + "No Images in eval dataset, evaluation during training " + "will be disabled" + ) + start_eval_step = 1e111 + logger.info( + "During the training process, after the {}th iteration, " + "an evaluation is run every {} iterations".format( + start_eval_step, eval_batch_step + ) + ) + save_epoch_step = config["Global"]["save_epoch_step"] + save_model_dir = config["Global"]["save_model_dir"] + if not os.path.exists(save_model_dir): + os.makedirs(save_model_dir) + main_indicator = eval_class.main_indicator + best_model_dict = {main_indicator: 0} + best_model_dict.update(pre_best_model_dict) + train_stats = TrainingStats(log_smooth_window, ["lr"]) + model_average = False + model.train() + + use_srn = config["Architecture"]["algorithm"] == "SRN" + extra_input_models = [ + "SRN", + "NRTR", + "SAR", + "SEED", + "SVTR", + "SVTR_LCNet", + "SPIN", + "VisionLAN", + "RobustScanner", + "RFL", + "DRRG", + "SATRN", + "SVTR_HGNet", + "ParseQ", + "CPPD", + ] + extra_input = False + if config["Architecture"]["algorithm"] == "Distillation": + for key in config["Architecture"]["Models"]: + extra_input = ( + extra_input + or config["Architecture"]["Models"][key]["algorithm"] + in extra_input_models + ) + else: + extra_input = config["Architecture"]["algorithm"] in extra_input_models + try: + model_type = config["Architecture"]["model_type"] + except: + model_type = None + + algorithm = config["Architecture"]["algorithm"] + + start_epoch = ( + best_model_dict["start_epoch"] if "start_epoch" in best_model_dict else 1 + ) + + total_samples = 0 + train_reader_cost = 0.0 + train_batch_cost = 0.0 + reader_start = time.time() + eta_meter = AverageMeter() + + max_iter = ( + len(train_dataloader) - 1 + if platform.system() == "Windows" + else len(train_dataloader) + ) + + for epoch in range(start_epoch, epoch_num + 1): + if train_dataloader.dataset.need_reset: + train_dataloader = build_dataloader( + config, "Train", device, logger, seed=epoch + ) + max_iter = ( + len(train_dataloader) - 1 + if platform.system() == "Windows" + else len(train_dataloader) + ) + + for idx, batch in enumerate(train_dataloader): + profiler.add_profiler_step(profiler_options) + train_reader_cost += time.time() - reader_start + if idx >= max_iter: + break + lr = optimizer.get_lr() + images = batch[0] + if use_srn: + model_average = True + # use amp + if scaler: + with paddle.amp.auto_cast( + level=amp_level, + custom_black_list=amp_custom_black_list, + custom_white_list=amp_custom_white_list, + dtype=amp_dtype, + ): + if model_type == "table" or extra_input: + preds = model(images, data=batch[1:]) + elif model_type in ["kie"]: + preds = model(batch) + elif algorithm in ["CAN"]: + preds = model(batch[:3]) + else: + preds = model(images) + preds = to_float32(preds) + loss = loss_class(preds, batch) + avg_loss = loss["loss"] + scaled_avg_loss = scaler.scale(avg_loss) + scaled_avg_loss.backward() + scaler.minimize(optimizer, scaled_avg_loss) + else: + if model_type == "table" or extra_input: + preds = model(images, data=batch[1:]) + elif model_type in ["kie", "sr"]: + preds = model(batch) + elif algorithm in ["CAN"]: + preds = model(batch[:3]) + else: + preds = model(images) + loss = loss_class(preds, batch) + avg_loss = loss["loss"] + avg_loss.backward() + optimizer.step() + + optimizer.clear_grad() + + if ( + cal_metric_during_train and epoch % calc_epoch_interval == 0 + ): # only rec and cls need + batch = [item.numpy() for item in batch] + if model_type in ["kie", "sr"]: + eval_class(preds, batch) + elif model_type in ["table"]: + post_result = post_process_class(preds, batch) + eval_class(post_result, batch) + elif algorithm in ["CAN"]: + model_type = "can" + eval_class(preds[0], batch[2:], epoch_reset=(idx == 0)) + else: + if config["Loss"]["name"] in [ + "MultiLoss", + "MultiLoss_v2", + ]: # for multi head loss + post_result = post_process_class( + preds["ctc"], batch[1] + ) # for CTC head out + elif config["Loss"]["name"] in ["VLLoss"]: + post_result = post_process_class(preds, batch[1], batch[-1]) + else: + post_result = post_process_class(preds, batch[1]) + eval_class(post_result, batch) + metric = eval_class.get_metric() + train_stats.update(metric) + + train_batch_time = time.time() - reader_start + train_batch_cost += train_batch_time + eta_meter.update(train_batch_time) + global_step += 1 + total_samples += len(images) + + if not isinstance(lr_scheduler, float): + lr_scheduler.step() + + # logger and visualdl + stats = { + k: float(v) if v.shape == [] else v.numpy().mean() + for k, v in loss.items() + } + stats["lr"] = lr + train_stats.update(stats) + + if log_writer is not None and dist.get_rank() == 0: + log_writer.log_metrics( + metrics=train_stats.get(), prefix="TRAIN", step=global_step + ) + + if dist.get_rank() == 0 and ( + (global_step > 0 and global_step % print_batch_step == 0) + or (idx >= len(train_dataloader) - 1) + ): + logs = train_stats.log() + + eta_sec = ( + (epoch_num + 1 - epoch) * len(train_dataloader) - idx - 1 + ) * eta_meter.avg + eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) + max_mem_reserved_str = "" + max_mem_allocated_str = "" + if paddle.device.is_compiled_with_cuda(): + max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB," + max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB" + strs = ( + "epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: " + "{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, " + "ips: {:.5f} samples/s, eta: {}, {} {}".format( + epoch, + epoch_num, + global_step, + logs, + train_reader_cost / print_batch_step, + train_batch_cost / print_batch_step, + total_samples / print_batch_step, + total_samples / train_batch_cost, + eta_sec_format, + max_mem_reserved_str, + max_mem_allocated_str, + ) + ) + logger.info(strs) + + total_samples = 0 + train_reader_cost = 0.0 + train_batch_cost = 0.0 + # eval + if ( + global_step > start_eval_step + and (global_step - start_eval_step) % eval_batch_step == 0 + and dist.get_rank() == 0 + ): + if model_average: + Model_Average = paddle.incubate.optimizer.ModelAverage( + 0.15, + parameters=model.parameters(), + min_average_window=10000, + max_average_window=15625, + ) + Model_Average.apply() + cur_metric = eval( + model, + valid_dataloader, + post_process_class, + eval_class, + model_type, + extra_input=extra_input, + scaler=scaler, + amp_level=amp_level, + amp_custom_black_list=amp_custom_black_list, + amp_custom_white_list=amp_custom_white_list, + amp_dtype=amp_dtype, + ) + cur_metric_str = "cur metric, {}".format( + ", ".join(["{}: {}".format(k, v) for k, v in cur_metric.items()]) + ) + logger.info(cur_metric_str) + + # logger metric + if log_writer is not None: + log_writer.log_metrics( + metrics=cur_metric, prefix="EVAL", step=global_step + ) + + if cur_metric[main_indicator] >= best_model_dict[main_indicator]: + best_model_dict.update(cur_metric) + best_model_dict["best_epoch"] = epoch + save_model( + model, + optimizer, + save_model_dir, + logger, + config, + is_best=True, + prefix="best_accuracy", + best_model_dict=best_model_dict, + epoch=epoch, + global_step=global_step, + ) + best_str = "best metric, {}".format( + ", ".join( + ["{}: {}".format(k, v) for k, v in best_model_dict.items()] + ) + ) + logger.info(best_str) + # logger best metric + if log_writer is not None: + log_writer.log_metrics( + metrics={ + "best_{}".format(main_indicator): best_model_dict[ + main_indicator + ] + }, + prefix="EVAL", + step=global_step, + ) + + log_writer.log_model( + is_best=True, prefix="best_accuracy", metadata=best_model_dict + ) + + reader_start = time.time() + if dist.get_rank() == 0: + save_model( + model, + optimizer, + save_model_dir, + logger, + config, + is_best=False, + prefix="latest", + best_model_dict=best_model_dict, + epoch=epoch, + global_step=global_step, + ) + + if log_writer is not None: + log_writer.log_model(is_best=False, prefix="latest") + + if dist.get_rank() == 0 and epoch > 0 and epoch % save_epoch_step == 0: + save_model( + model, + optimizer, + save_model_dir, + logger, + config, + is_best=False, + prefix="iter_epoch_{}".format(epoch), + best_model_dict=best_model_dict, + epoch=epoch, + global_step=global_step, + ) + if log_writer is not None: + log_writer.log_model( + is_best=False, prefix="iter_epoch_{}".format(epoch) + ) + + best_str = "best metric, {}".format( + ", ".join(["{}: {}".format(k, v) for k, v in best_model_dict.items()]) + ) + logger.info(best_str) + if dist.get_rank() == 0 and log_writer is not None: + log_writer.close() + return + + +def eval( + model, + valid_dataloader, + post_process_class, + eval_class, + model_type=None, + extra_input=False, + scaler=None, + amp_level="O2", + amp_custom_black_list=[], + amp_custom_white_list=[], + amp_dtype="float16", +): + model.eval() + with paddle.no_grad(): + total_frame = 0.0 + total_time = 0.0 + pbar = tqdm( + total=len(valid_dataloader), desc="eval model:", position=0, leave=True + ) + max_iter = ( + len(valid_dataloader) - 1 + if platform.system() == "Windows" + else len(valid_dataloader) + ) + sum_images = 0 + for idx, batch in enumerate(valid_dataloader): + if idx >= max_iter: + break + images = batch[0] + start = time.time() + + # use amp + if scaler: + with paddle.amp.auto_cast( + level=amp_level, + custom_black_list=amp_custom_black_list, + dtype=amp_dtype, + ): + if model_type == "table" or extra_input: + preds = model(images, data=batch[1:]) + elif model_type in ["kie"]: + preds = model(batch) + elif model_type in ["can"]: + preds = model(batch[:3]) + elif model_type in ["sr"]: + preds = model(batch) + sr_img = preds["sr_img"] + lr_img = preds["lr_img"] + else: + preds = model(images) + preds = to_float32(preds) + else: + if model_type == "table" or extra_input: + preds = model(images, data=batch[1:]) + elif model_type in ["kie"]: + preds = model(batch) + elif model_type in ["can"]: + preds = model(batch[:3]) + elif model_type in ["sr"]: + preds = model(batch) + sr_img = preds["sr_img"] + lr_img = preds["lr_img"] + else: + preds = model(images) + + batch_numpy = [] + for item in batch: + if isinstance(item, paddle.Tensor): + batch_numpy.append(item.numpy()) + else: + batch_numpy.append(item) + # Obtain usable results from post-processing methods + total_time += time.time() - start + # Evaluate the results of the current batch + if model_type in ["table", "kie"]: + if post_process_class is None: + eval_class(preds, batch_numpy) + else: + post_result = post_process_class(preds, batch_numpy) + eval_class(post_result, batch_numpy) + elif model_type in ["sr"]: + eval_class(preds, batch_numpy) + elif model_type in ["can"]: + eval_class(preds[0], batch_numpy[2:], epoch_reset=(idx == 0)) + else: + post_result = post_process_class(preds, batch_numpy[1]) + eval_class(post_result, batch_numpy) + + pbar.update(1) + total_frame += len(images) + sum_images += 1 + # Get final metric,eg. acc or hmean + metric = eval_class.get_metric() + + pbar.close() + model.train() + metric["fps"] = total_frame / total_time + return metric + + +def update_center(char_center, post_result, preds): + result, label = post_result + feats, logits = preds + logits = paddle.argmax(logits, axis=-1) + feats = feats.numpy() + logits = logits.numpy() + + for idx_sample in range(len(label)): + if result[idx_sample][0] == label[idx_sample][0]: + feat = feats[idx_sample] + logit = logits[idx_sample] + for idx_time in range(len(logit)): + index = logit[idx_time] + if index in char_center.keys(): + char_center[index][0] = ( + char_center[index][0] * char_center[index][1] + feat[idx_time] + ) / (char_center[index][1] + 1) + char_center[index][1] += 1 + else: + char_center[index] = [feat[idx_time], 1] + return char_center + + +def get_center(model, eval_dataloader, post_process_class): + pbar = tqdm(total=len(eval_dataloader), desc="get center:") + max_iter = ( + len(eval_dataloader) - 1 + if platform.system() == "Windows" + else len(eval_dataloader) + ) + char_center = dict() + for idx, batch in enumerate(eval_dataloader): + if idx >= max_iter: + break + images = batch[0] + start = time.time() + preds = model(images) + + batch = [item.numpy() for item in batch] + # Obtain usable results from post-processing methods + post_result = post_process_class(preds, batch[1]) + + # update char_center + char_center = update_center(char_center, post_result, preds) + pbar.update(1) + + pbar.close() + for key in char_center.keys(): + char_center[key] = char_center[key][0] + return char_center + + +def preprocess(is_train=False): + FLAGS = ArgsParser().parse_args() + profiler_options = FLAGS.profiler_options + config = load_config(FLAGS.config) + config = merge_config(config, FLAGS.opt) + profile_dic = {"profiler_options": FLAGS.profiler_options} + config = merge_config(config, profile_dic) + + if is_train: + # save_config + save_model_dir = config["Global"]["save_model_dir"] + os.makedirs(save_model_dir, exist_ok=True) + with open(os.path.join(save_model_dir, "config.yml"), "w") as f: + yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False) + log_file = "{}/train.log".format(save_model_dir) + else: + log_file = None + logger = get_logger(log_file=log_file) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config["Global"].get("use_gpu", False) + use_xpu = config["Global"].get("use_xpu", False) + use_npu = config["Global"].get("use_npu", False) + use_mlu = config["Global"].get("use_mlu", False) + + alg = config["Architecture"]["algorithm"] + assert alg in [ + "EAST", + "DB", + "SAST", + "Rosetta", + "CRNN", + "STARNet", + "RARE", + "SRN", + "CLS", + "PGNet", + "Distillation", + "NRTR", + "TableAttn", + "SAR", + "PSE", + "SEED", + "SDMGR", + "LayoutXLM", + "LayoutLM", + "LayoutLMv2", + "PREN", + "FCE", + "SVTR", + "SVTR_LCNet", + "ViTSTR", + "ABINet", + "DB++", + "TableMaster", + "SPIN", + "VisionLAN", + "Gestalt", + "SLANet", + "RobustScanner", + "CT", + "RFL", + "DRRG", + "CAN", + "Telescope", + "SATRN", + "SVTR_HGNet", + "ParseQ", + "CPPD", + ] + + if use_xpu: + device = "xpu:{0}".format(os.getenv("FLAGS_selected_xpus", 0)) + elif use_npu: + device = "npu:{0}".format(os.getenv("FLAGS_selected_npus", 0)) + elif use_mlu: + device = "mlu:{0}".format(os.getenv("FLAGS_selected_mlus", 0)) + else: + device = "gpu:{}".format(dist.ParallelEnv().dev_id) if use_gpu else "cpu" + check_device(use_gpu, use_xpu, use_npu, use_mlu) + + device = paddle.set_device(device) + + config["Global"]["distributed"] = dist.get_world_size() != 1 + + loggers = [] + + if "use_visualdl" in config["Global"] and config["Global"]["use_visualdl"]: + logger.warning( + "You are using VisualDL, the VisualDL is deprecated and " + "removed in ppocr!" + ) + log_writer = None + if ( + "use_wandb" in config["Global"] and config["Global"]["use_wandb"] + ) or "wandb" in config: + save_dir = config["Global"]["save_model_dir"] + wandb_writer_path = "{}/wandb".format(save_dir) + if "wandb" in config: + wandb_params = config["wandb"] + else: + wandb_params = dict() + wandb_params.update({"save_dir": save_dir}) + log_writer = WandbLogger(**wandb_params, config=config) + loggers.append(log_writer) + else: + log_writer = None + print_dict(config, logger) + + if loggers: + log_writer = Loggers(loggers) + else: + log_writer = None + + logger.info("train with paddle {} and device {}".format(paddle.__version__, device)) + return config, device, logger, log_writer diff --git a/docling_ibm_models/slanet_1m/requirements.txt b/docling_ibm_models/slanet_1m/requirements.txt new file mode 100644 index 0000000..a11cb3f --- /dev/null +++ b/docling_ibm_models/slanet_1m/requirements.txt @@ -0,0 +1,21 @@ +paddleocr==2.8.0 +dvc[s3]==3.58.0 +openpyxl +premailer +shapely==2.0.6 +scikit-image==0.24.0 +imgaug==0.4.0 +pyclipper==1.3.0.post6 +lmdb==1.5.1 +tqdm==4.66.5 +numpy==1.26.4 +RapidFuzz==3.10.0 +Cython==3.0.11 +pillow==11.0.0 +PyYAML==6.0.1 +requests==2.32.3 +albumentations==1.4.10 +# to be compatible with albumentations +albucore==0.0.13 +#TEDS +apted==1.0.3 \ No newline at end of file diff --git a/docling_ibm_models/slanet_1m/src/eval.py b/docling_ibm_models/slanet_1m/src/eval.py new file mode 100644 index 0000000..4dcf353 --- /dev/null +++ b/docling_ibm_models/slanet_1m/src/eval.py @@ -0,0 +1,174 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import json + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.insert(0, __dir__) +sys.path.insert(0, os.path.abspath(os.path.join(__dir__, ".."))) + +import paddle +from paddleocr.ppocr.data import build_dataloader, set_signal_handlers +from modeling.architectures import build_model +from paddleocr.ppocr.postprocess import build_post_process +from metrics import build_metric +from paddleocr.ppocr.utils.save_load import load_model +import program as program + + +def main(): + global_config = config["Global"] + # build dataloader + set_signal_handlers() + valid_dataloader = build_dataloader(config, "Eval", device, logger) + + # build post process + post_process_class = build_post_process(config["PostProcess"], global_config) + + # build model + # for rec algorithm + if hasattr(post_process_class, "character"): + char_num = len(getattr(post_process_class, "character")) + if config["Architecture"]["algorithm"] in [ + "Distillation", + ]: # distillation model + for key in config["Architecture"]["Models"]: + if ( + config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead" + ): # for multi head + out_channels_list = {} + if config["PostProcess"]["name"] == "DistillationSARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode": + char_num = char_num - 3 + out_channels_list["CTCLabelDecode"] = char_num + out_channels_list["SARLabelDecode"] = char_num + 2 + out_channels_list["NRTRLabelDecode"] = char_num + 3 + config["Architecture"]["Models"][key]["Head"][ + "out_channels_list" + ] = out_channels_list + else: + config["Architecture"]["Models"][key]["Head"][ + "out_channels" + ] = char_num + elif config["Architecture"]["Head"]["name"] == "MultiHead": # for multi head + out_channels_list = {} + if config["PostProcess"]["name"] == "SARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "NRTRLabelDecode": + char_num = char_num - 3 + out_channels_list["CTCLabelDecode"] = char_num + out_channels_list["SARLabelDecode"] = char_num + 2 + out_channels_list["NRTRLabelDecode"] = char_num + 3 + config["Architecture"]["Head"]["out_channels_list"] = out_channels_list + else: # base rec model + config["Architecture"]["Head"]["out_channels"] = char_num + + model = build_model(config["Architecture"]) + extra_input_models = [ + "SRN", + "NRTR", + "SAR", + "SEED", + "SVTR", + "SVTR_LCNet", + "VisionLAN", + "RobustScanner", + "SVTR_HGNet", + ] + extra_input = False + if config["Architecture"]["algorithm"] == "Distillation": + for key in config["Architecture"]["Models"]: + extra_input = ( + extra_input + or config["Architecture"]["Models"][key]["algorithm"] + in extra_input_models + ) + else: + extra_input = config["Architecture"]["algorithm"] in extra_input_models + if "model_type" in config["Architecture"].keys(): + if config["Architecture"]["algorithm"] == "CAN": + model_type = "can" + else: + model_type = config["Architecture"]["model_type"] + else: + model_type = None + + # build metric + eval_class = build_metric(config["Metric"]) + # amp + use_amp = config["Global"].get("use_amp", False) + amp_level = config["Global"].get("amp_level", "O2") + amp_custom_black_list = config["Global"].get("amp_custom_black_list", []) + if use_amp: + AMP_RELATED_FLAGS_SETTING = { + "FLAGS_cudnn_batchnorm_spatial_persistent": 1, + "FLAGS_max_inplace_grad_add": 8, + } + paddle.set_flags(AMP_RELATED_FLAGS_SETTING) + scale_loss = config["Global"].get("scale_loss", 1.0) + use_dynamic_loss_scaling = config["Global"].get( + "use_dynamic_loss_scaling", False + ) + scaler = paddle.amp.GradScaler( + init_loss_scaling=scale_loss, + use_dynamic_loss_scaling=use_dynamic_loss_scaling, + ) + if amp_level == "O2": + model = paddle.amp.decorate( + models=model, level=amp_level, master_weight=True + ) + else: + scaler = None + + best_model_dict = load_model( + config, model, model_type=config["Architecture"]["model_type"] + ) + if len(best_model_dict): + logger.info("metric in ckpt ***************") + for k, v in best_model_dict.items(): + logger.info("{}:{}".format(k, v)) + + # start eval + metric = program.eval( + model, + valid_dataloader, + post_process_class, + eval_class, + model_type, + extra_input, + scaler, + amp_level, + amp_custom_black_list, + ) + os.makedirs("evaluation", exist_ok=True) + + # Save metrics to evaluation/metrics.json + with open("evaluation/metrics.json", "w") as f: + json.dump(metric, f, indent=4) + + logger.info("metric eval ***************") + for k, v in metric.items(): + logger.info("{}:{}".format(k, v)) + + +if __name__ == "__main__": + config, device, logger, vdl_writer = program.preprocess() + main() diff --git a/docling_ibm_models/slanet_1m/src/eval_table.py b/docling_ibm_models/slanet_1m/src/eval_table.py new file mode 100644 index 0000000..58ebaa4 --- /dev/null +++ b/docling_ibm_models/slanet_1m/src/eval_table.py @@ -0,0 +1,112 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "../.."))) + +import cv2 +import pickle +import paddle +from tqdm import tqdm +from paddleocr.ppstructure.table.table_metric import TEDS +from paddleocr.ppstructure.table.predict_table import TableSystem +from paddleocr.ppstructure.utility import init_args +from paddleocr.ppocr.utils.logging import get_logger + +logger = get_logger() + + +def parse_args(): + parser = init_args() + parser.add_argument("--gt_path", type=str) + return parser.parse_args() + + +def load_txt(txt_path): + pred_html_dict = {} + if not os.path.exists(txt_path): + return pred_html_dict + with open(txt_path, encoding="utf-8") as f: + lines = f.readlines() + for line in lines: + line = line.strip().split("\t") + img_name, pred_html = line + pred_html_dict[img_name] = pred_html + return pred_html_dict + + +def load_result(path): + data = {} + if os.path.exists(path): + data = pickle.load(open(path, "rb")) + return data + + +def save_result(path, data): + old_data = load_result(path) + old_data.update(data) + with open(path, "wb") as f: + pickle.dump(old_data, f) + + +def main(gt_path, img_root, args): + os.makedirs(args.output, exist_ok=True) + # init TableSystem + text_sys = TableSystem(args) + # load gt and preds html result + gt_html_dict = load_txt(gt_path) + + ocr_result = load_result(os.path.join(args.output, "ocr.pickle")) + structure_result = load_result(os.path.join(args.output, "structure.pickle")) + + pred_htmls = [] + gt_htmls = [] + for img_name, gt_html in tqdm(gt_html_dict.items()): + img = cv2.imread(os.path.join(img_root, img_name)) + # run ocr and save result + if img_name not in ocr_result: + dt_boxes, rec_res, _, _ = text_sys._ocr(img) + ocr_result[img_name] = [dt_boxes, rec_res] + save_result(os.path.join(args.output, "ocr.pickle"), ocr_result) + # run structure and save result + if img_name not in structure_result: + structure_res, _ = text_sys._structure(img) + structure_result[img_name] = structure_res + save_result(os.path.join(args.output, "structure.pickle"), structure_result) + dt_boxes, rec_res = ocr_result[img_name] + structure_res = structure_result[img_name] + # match ocr and structure + pred_html = text_sys.match(structure_res, dt_boxes, rec_res) + + pred_htmls.append(pred_html) + gt_htmls.append(gt_html) + + # print(pred_htmls) + # print(gt_htmls) + # compute teds + teds = TEDS(n_jobs=16, structure_only=True) + teds2 = TEDS(n_jobs=16) + scores = teds.batch_evaluate_html(gt_htmls, pred_htmls) + scores2 = teds2.batch_evaluate_html(gt_htmls, pred_htmls) + logger.info("s-teds: {}".format(sum(scores) / len(scores))) + logger.info("teds: {}".format(sum(scores2) / len(scores2))) + + +if __name__ == "__main__": + args = parse_args() + main(args.gt_path, args.image_dir, args) diff --git a/docling_ibm_models/slanet_1m/src/train.py b/docling_ibm_models/slanet_1m/src/train.py new file mode 100644 index 0000000..c2abe41 --- /dev/null +++ b/docling_ibm_models/slanet_1m/src/train.py @@ -0,0 +1,256 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.insert(0, os.path.abspath(os.path.join(__dir__, ".."))) + +import yaml +import paddle +import paddle.distributed as dist + +from paddleocr.ppocr.data import build_dataloader, set_signal_handlers +from modeling.architectures import build_model +from losses import build_loss +from optimizer import build_optimizer +from paddleocr.ppocr.postprocess import build_post_process +from metrics import build_metric +from paddleocr.ppocr.utils.save_load import load_model +from paddleocr.ppocr.utils.utility import set_seed +from modeling.architectures import apply_to_static +import program as program + +dist.get_world_size() + + +def main(config, device, logger, vdl_writer, seed): + # init dist environment + if config["Global"]["distributed"]: + dist.init_parallel_env() + + global_config = config["Global"] + + # build dataloader + set_signal_handlers() + train_dataloader = build_dataloader(config, "Train", device, logger, seed) + if len(train_dataloader) == 0: + logger.error( + "No Images in train dataset, please ensure\n" + + "\t1. The images num in the train label_file_list should be larger than or equal with batch size.\n" + + "\t2. The annotation file and path in the configuration file are provided normally." + ) + return + + if config["Eval"]: + valid_dataloader = build_dataloader(config, "Eval", device, logger, seed) + else: + valid_dataloader = None + step_pre_epoch = len(train_dataloader) + + # build post process + post_process_class = build_post_process(config["PostProcess"], global_config) + + # build model + # for rec algorithm + if hasattr(post_process_class, "character"): + char_num = len(getattr(post_process_class, "character")) + if config["Architecture"]["algorithm"] in [ + "Distillation", + ]: # distillation model + for key in config["Architecture"]["Models"]: + if ( + config["Architecture"]["Models"][key]["Head"]["name"] == "MultiHead" + ): # for multi head + if config["PostProcess"]["name"] == "DistillationSARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "DistillationNRTRLabelDecode": + char_num = char_num - 3 + out_channels_list = {} + out_channels_list["CTCLabelDecode"] = char_num + # update SARLoss params + if ( + list(config["Loss"]["loss_config_list"][-1].keys())[0] + == "DistillationSARLoss" + ): + config["Loss"]["loss_config_list"][-1]["DistillationSARLoss"][ + "ignore_index" + ] = (char_num + 1) + out_channels_list["SARLabelDecode"] = char_num + 2 + elif any( + "DistillationNRTRLoss" in d + for d in config["Loss"]["loss_config_list"] + ): + out_channels_list["NRTRLabelDecode"] = char_num + 3 + + config["Architecture"]["Models"][key]["Head"][ + "out_channels_list" + ] = out_channels_list + else: + config["Architecture"]["Models"][key]["Head"][ + "out_channels" + ] = char_num + elif config["Architecture"]["Head"]["name"] == "MultiHead": # for multi head + if config["PostProcess"]["name"] == "SARLabelDecode": + char_num = char_num - 2 + if config["PostProcess"]["name"] == "NRTRLabelDecode": + char_num = char_num - 3 + out_channels_list = {} + out_channels_list["CTCLabelDecode"] = char_num + # update SARLoss params + if list(config["Loss"]["loss_config_list"][1].keys())[0] == "SARLoss": + if config["Loss"]["loss_config_list"][1]["SARLoss"] is None: + config["Loss"]["loss_config_list"][1]["SARLoss"] = { + "ignore_index": char_num + 1 + } + else: + config["Loss"]["loss_config_list"][1]["SARLoss"]["ignore_index"] = ( + char_num + 1 + ) + out_channels_list["SARLabelDecode"] = char_num + 2 + elif list(config["Loss"]["loss_config_list"][1].keys())[0] == "NRTRLoss": + out_channels_list["NRTRLabelDecode"] = char_num + 3 + config["Architecture"]["Head"]["out_channels_list"] = out_channels_list + else: # base rec model + config["Architecture"]["Head"]["out_channels"] = char_num + + if config["PostProcess"]["name"] == "SARLabelDecode": # for SAR model + config["Loss"]["ignore_index"] = char_num - 1 + + model = build_model(config["Architecture"]) + + use_sync_bn = config["Global"].get("use_sync_bn", False) + if use_sync_bn: + model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model) + logger.info("convert_sync_batchnorm") + + model = apply_to_static(model, config, logger) + + # build loss + loss_class = build_loss(config["Loss"]) + + # build optim + optimizer, lr_scheduler = build_optimizer( + config["Optimizer"], + epochs=config["Global"]["epoch_num"], + step_each_epoch=len(train_dataloader), + model=model, + ) + + # build metric + eval_class = build_metric(config["Metric"]) + + logger.info("train dataloader has {} iters".format(len(train_dataloader))) + if valid_dataloader is not None: + logger.info("valid dataloader has {} iters".format(len(valid_dataloader))) + + use_amp = config["Global"].get("use_amp", False) + amp_level = config["Global"].get("amp_level", "O2") + amp_dtype = config["Global"].get("amp_dtype", "float16") + amp_custom_black_list = config["Global"].get("amp_custom_black_list", []) + amp_custom_white_list = config["Global"].get("amp_custom_white_list", []) + if use_amp: + AMP_RELATED_FLAGS_SETTING = { + "FLAGS_max_inplace_grad_add": 8, + } + if paddle.is_compiled_with_cuda(): + AMP_RELATED_FLAGS_SETTING.update( + { + "FLAGS_cudnn_batchnorm_spatial_persistent": 1, + "FLAGS_gemm_use_half_precision_compute_type": 0, + } + ) + paddle.set_flags(AMP_RELATED_FLAGS_SETTING) + scale_loss = config["Global"].get("scale_loss", 1.0) + use_dynamic_loss_scaling = config["Global"].get( + "use_dynamic_loss_scaling", False + ) + scaler = paddle.amp.GradScaler( + init_loss_scaling=scale_loss, + use_dynamic_loss_scaling=use_dynamic_loss_scaling, + ) + if amp_level == "O2": + model, optimizer = paddle.amp.decorate( + models=model, + optimizers=optimizer, + level=amp_level, + master_weight=True, + dtype=amp_dtype, + ) + else: + scaler = None + + # load pretrain model + pre_best_model_dict = load_model( + config, model, optimizer, config["Architecture"]["model_type"] + ) + + if config["Global"]["distributed"]: + model = paddle.DataParallel(model) + # start train + program.train( + config, + train_dataloader, + valid_dataloader, + device, + model, + loss_class, + optimizer, + lr_scheduler, + post_process_class, + eval_class, + pre_best_model_dict, + logger, + step_pre_epoch, + vdl_writer, + scaler, + amp_level, + amp_custom_black_list, + amp_custom_white_list, + amp_dtype, + ) + + +def test_reader(config, device, logger): + loader = build_dataloader(config, "Train", device, logger) + import time + + starttime = time.time() + count = 0 + try: + for data in loader(): + count += 1 + if count % 1 == 0: + batch_time = time.time() - starttime + starttime = time.time() + logger.info( + "reader: {}, {}, {}".format(count, len(data[0]), batch_time) + ) + except Exception as e: + logger.info(e) + logger.info("finish reader: {}, Success!".format(count)) + + +if __name__ == "__main__": + config, device, logger, vdl_writer = program.preprocess(is_train=True) + seed = config["Global"]["seed"] if "seed" in config["Global"] else 1024 + set_seed(seed) + main(config, device, logger, vdl_writer, seed) + # test_reader(config, device, logger) From ff6d781e3e6a7000ba2696df066bb7dda61f4229 Mon Sep 17 00:00:00 2001 From: dimitri009 Date: Mon, 16 Jun 2025 15:12:58 +0200 Subject: [PATCH 2/3] Add predict_table for inference --- docling_ibm_models/slanet_1m/.gitignore | 1 - docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF | Bin 0 -> 175956 bytes .../slanet_1m/Fonts/ARIALNB.TTF | Bin 0 -> 180740 bytes .../slanet_1m/Fonts/ARIALNBI.TTF | Bin 0 -> 180084 bytes .../slanet_1m/Fonts/ARIALNI.TTF | Bin 0 -> 181124 bytes docling_ibm_models/slanet_1m/Fonts/arial.ttf | Bin 0 -> 1045960 bytes .../slanet_1m/Fonts/arialbd.ttf | Bin 0 -> 990208 bytes .../slanet_1m/Fonts/arialbi.ttf | Bin 0 -> 730804 bytes docling_ibm_models/slanet_1m/Fonts/ariali.ttf | Bin 0 -> 727152 bytes docling_ibm_models/slanet_1m/Fonts/ariblk.ttf | Bin 0 -> 167592 bytes .../slanet_1m/dict_table/en_dict.txt | 95 + .../slanet_1m/dict_table/ppocr_keys_v1.txt | 6623 +++++++++++++++++ .../slanet_1m/dict_table/table_dict.txt | 277 + .../dict_table/table_structure_dict.txt | 28 + .../en_PP-OCRv3_det_infer/inference.pdiparams | Bin 0 -> 2377917 bytes .../inference.pdiparams.info | Bin 0 -> 26392 bytes .../en_PP-OCRv3_det_infer/inference.pdmodel | Bin 0 -> 1590133 bytes .../en_PP-OCRv3_rec_infer/inference.pdiparams | Bin 0 -> 8916816 bytes .../inference.pdiparams.info | Bin 0 -> 21964 bytes .../en_PP-OCRv3_rec_infer/inference.pdmodel | Bin 0 -> 1020915 bytes .../inference.pdiparams | Bin 0 -> 7580402 bytes .../inference.pdiparams.info | Bin 0 -> 35261 bytes .../inference.pdmodel | Bin 0 -> 2574256 bytes .../slanet_1m/model_final/inference.pdiparams | Bin 0 -> 7580402 bytes .../model_final/inference.pdiparams.info | Bin 0 -> 35261 bytes .../slanet_1m/model_final/inference.pdmodel | Bin 0 -> 379778 bytes .../slanet_1m/model_final/inference.yml | 72 + docling_ibm_models/slanet_1m/predict_table.py | 254 + 28 files changed, 7349 insertions(+), 1 deletion(-) create mode 100644 docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF create mode 100644 docling_ibm_models/slanet_1m/Fonts/ARIALNB.TTF create mode 100644 docling_ibm_models/slanet_1m/Fonts/ARIALNBI.TTF create mode 100644 docling_ibm_models/slanet_1m/Fonts/ARIALNI.TTF create mode 100644 docling_ibm_models/slanet_1m/Fonts/arial.ttf create mode 100644 docling_ibm_models/slanet_1m/Fonts/arialbd.ttf create mode 100644 docling_ibm_models/slanet_1m/Fonts/arialbi.ttf create mode 100644 docling_ibm_models/slanet_1m/Fonts/ariali.ttf create mode 100644 docling_ibm_models/slanet_1m/Fonts/ariblk.ttf create mode 100644 docling_ibm_models/slanet_1m/dict_table/en_dict.txt create mode 100644 docling_ibm_models/slanet_1m/dict_table/ppocr_keys_v1.txt create mode 100644 docling_ibm_models/slanet_1m/dict_table/table_dict.txt create mode 100644 docling_ibm_models/slanet_1m/dict_table/table_structure_dict.txt create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdiparams.info create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_det_infer/inference.pdmodel create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdiparams.info create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_PP-OCRv3_rec_infer/inference.pdmodel create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdiparams.info create mode 100644 docling_ibm_models/slanet_1m/inference_table/en_ppstructure_mobile_v2.0_SLANet_infer/inference.pdmodel create mode 100644 docling_ibm_models/slanet_1m/model_final/inference.pdiparams create mode 100644 docling_ibm_models/slanet_1m/model_final/inference.pdiparams.info create mode 100644 docling_ibm_models/slanet_1m/model_final/inference.pdmodel create mode 100644 docling_ibm_models/slanet_1m/model_final/inference.yml create mode 100644 docling_ibm_models/slanet_1m/predict_table.py diff --git a/docling_ibm_models/slanet_1m/.gitignore b/docling_ibm_models/slanet_1m/.gitignore index c12799e..85db5a4 100644 --- a/docling_ibm_models/slanet_1m/.gitignore +++ b/docling_ibm_models/slanet_1m/.gitignore @@ -31,5 +31,4 @@ inference_results/ output/ data/ /data -/output evaluation/ diff --git a/docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF b/docling_ibm_models/slanet_1m/Fonts/ARIALN.TTF new file mode 100644 index 0000000000000000000000000000000000000000..94907a3dfc67bedb2ba9f245afe628ae98c07f1f GIT binary patch literal 175956 zcmeEv378bc*?)D==ifXgW$$Rfxgh=Ab1 zA*guZiATJ0xZnlGD8|Gi-Z3UI5JZg{H6Afh5~Ixj_x9`}n(xb_&+~oX^L_s(Ra@0n z)m7DRz3;Eydb@VFnsJOV8Ihhfw@es0{Dq(=A37Lm&XI_=E4XP-6LFmxs39vg?}9bdF$_RPcEE2l8FZW?2T zs>L%`bs4woMxyQ5K?f67!d3K+XJvi~P^ef-P2A2`1C%WbBjnZ{?+>eCIh zGp#*7hB5P*$G0B8$W%OWhwj4hS5pb%Pgs98nHd<*MAnB5U`#iMdx7b2m*d~!b|Pgq zTx+O7lPgMB-p1zfF0+BR8I3&8>v%o-fkaD`QQs3s4~?>B7X1frc=`CLoWs<~-4kW( zrq_N$ce$5Yw&zM|3cL7;Y?SVHFywaD&+rPHq3>iPxgl&g--pdb7{G651>Gxb8tPt( zd@ZimQCrj>if|u7AA}YJHv*OQM;MJTgz`Q0FGU|ysUPinI~#7ilXd7ji^uW2wfcwI zID~abZ`Si{yFq2sk>7x}dvqgMj_RPVRfd)9Jd~e@`;$<%5!X>v59v{8m+48Hjn&MD zGBbh!AFCE$`#_aUB>+&Sg=R@8kLb=nZlK)D;ig}(cNSJzP+YLC9cJviL_ z)s<~VovXmtKjLads3sbEgN8H0T?wPNqaPW;fZ#_+B0P*hV{AvLLbwV6_$Yn}JQYFT zDA3ZEu!(Dou%Umr_&L!=w4;p1uo3x1J#%Iw+D}HH=L8I^+2o!O&|~t0uIZ>-13Irf z(KoP#eg+f9=&IYp8ubjg&z1O!`i(SR?Midc%<72-wBKC1SGpp-3cQ-HUEeKT`77Cc z!Ys-b^n)a}bV8vLd#NFSxUW2l0Os{YY4NtOz zx|g9_o@2W;?7hOr^{;RS!}p8t>ObHP8otl3qx3&tsjN4E-{1dV5$1uW_YAK9Pp_2L z0Mq+edrkx{O69%5T*+NyK9k#TnuOJxvA-fL(Kllp&5-*+)WgaPJ|$63IN!s+2N_<; z#%pT<)(E4-X7bOlAo{zU|24aS0@C|%?K%lZU&GutvDBOI#p^e(Jy;;@_%THV3%Qk$y(MYmq)v*TNP955Q~j2k4_0pM>XSz4+|C(z7(2 zo{Z056+iBw;VWEz1E<>B0{xC>YV#CyoESfJ2}e2(&lmw5FXiv%ruEkU=JllZ^Dx$5 zBOC`Vr=B#oMafoFS&PYBlDbyVMi=V9LB{zWL?2>m&Z<|ony1hZiTbo}S69c@eF#XO`r zNc=<@&EKWEZngxxm-*LOKPpEUg?qPavOu|32WujK0c%3KR`&?%D{O#PzL?qdPqL*3 zBWpq%TECWRb#SFJ(6HI?LzdBW!Gml(^3?Yp1X^En#*diQxQAV6xR#}LFF{{qu;xDq zT#{WehFyaCYqhqpTB$FilNoiNv1ZiSL_upahBS!i55yByx5s6#l9sjw=si zP@+RbgFq+{vU7Ial{jio#1c>7s9V&7mqc^|N5x)B{5LrHt3CrEB2FVo;>yVr5n)C1 zA&&DSgh3?{1d@8Gq)H(LBLz1?Gn>(bp6o!0o8z8%5XI;)gnMXZ=(K9{W~H2GUZJeTFY&fYv6a~n z+n61(ojDLYwAjg}4LrKE$6hKl340 zm>+R~DTq}q4zfV;?<~Yrl!OugjYU`pB~cbe9AiJT@wdXzUIrZ?CIlnh{vNDtKFL97YsX4b#>XEvA( zKzaz`KeM51AmSD_2yrVLT>O{~V?z)RXDx_Fu%U=YvSG!K*ePr{;x;w{C8H33#747` z#ShsSb_&vCwUTjc6iQBIqY;m1V~QWJ30gdnjYWDA8;5u@I~DO1#D8M#Y&_x)HUaTe zHnI3VJB>|3dK#O8c)FIJ!P-%NI-6X4k9D#Rq-U~eNY7%^5zl6)Bc6lwyKFA&L_Ckp zKzs(9g?K*VKe7dEHsXbB4&pPl@|G*nGrG5WmB^*aE~$k$#(+nLkUo#CK)jBvEWXLk z*V5~e{sY^<&PB;awg&M9Y;Exkwn>XOqvQ>Cp%!1Hm0ZluE56PyVdo>>!qy|+s>R#b z2Bf#MjfgL07Zi`O9a_ATZ7ROTE<=2jUCu56;KAVSCuF;_unbh>x&a*p-NHWmhA< z4e{^U_t{_JnM0}Xt#jZzuH@gAx_qF&3>_()2i1-lu5xW`jJ&1qDy4fvA z@73ac?0YD=m)(x|K6VG<``Mj{AJF0l*8%^vv=8Vk$#W8iuC*JcZmPQ z4k7+Pi$7$Ci_fu-wD@Co1nEDs*ARcgjw1dGd%gH9`>Pgz%HAkG!#-ngqU0F+1LD6S zeun*>y^Z*D_738IXz>^9-Qq9Ums)(By@&E5;$QrKsnb6@NvD7IEp>V|H1+>3o&M4P zfKHzb-Tl8qr!R!YC!PMzNjm*)==8Io-$|#xeL|;qAx%2{?GrkEIrRHFb`E0F>2I-K zoxTd`jchgI3pD+{NsCFRzp3f;H`!M@{Y_1$zp3f;H%`*&Z)iIG4Na%NuIcnQ80qxa zHJ$$Y37vj1N=T=_uIconnod9ZU!>E2|8JfCzh0-m@PAUL3;$>A^#4C}`hTyTPB!~A zSmj^Y>6HGao&G-T^wXgCXKU7aubn;vCB1e!+3fQ*%Y6ZxiS$CvX8+1ge;0Q8T$G;$ zo1N@*in}z+{p)u6zjgY5Zm0iSr~lhd|L?KW|8MH_Z@c&MKVzp4?%8|!=sWGb4A=Hv zXdmW3+Itz*v-k2Ln~2>Q+Iykhn915c4DG#qfW4P-(AOQ>J`C->{0Vz6C7nL$Bs=}z zI{kdu@3i;wZ#(_pdoTZb@8!k+(!G~w|BLrtXusW$-v?c9v+I}+&R{Sbd7Vy=jd#lQ zx>DL=G@6VQc!SYkHkpk0LC%aa)J7YvI&U!Wyn#1UMrS}(+&AcWoq_6_Os3vGam8Oo zq4o)YwFtf9In<>Yy`k6ykKXA%HOB>H>kN7-hu4|t7H`y1PPvwP_8#z#Fvs;NAFw|_)Lj-2MNspfrarDqqszdzVrq7=zmg9J(1h|ZC-g`(I&`cu|tviBA6s zn{-FRrd6jiSI+Q9{yRVK@Kawa=D z&}?_uEEa-?(E?cVdP@nLCE|#rQmRA}X1eAHmY|iUMGvK5V5thGE;_?~K;8iEkte0{ z6*f&~iynpAEH;>RpdIW$9b!2~3@nifB5dLr$Pui1uxYYD?7)ZB3jC9ff>J$~J9u(hKj>=HY zY_>sV*feZnkQN&>h~8>70t^^3Mn!bktVY!8!KP8KO>JKPPuN7u9!GM*9HNO09-@UF zAVP0F*hKGigXSCEhn6(zAuIBFfW=_qObROV~8pfS<3h326|0 z#1ml0fMjWfDPfZY3Ne8UV3U>#(ryH3l8dGs%_J{UM;bP5HXsrtQzI)zPsLX$L> z1OU@)gTSaYDFnhS98zE)D4{v7-6jnsCNr=}(j|vx$mlef0OQcG7$LBUTc~a&B=f4H`}c?8YjB9oAn00-G(Iyqb9^c&sd>sPz(C81REMW)V-5x5HUKd)e6Q#;r2RB zXa*gclQ~j?J4QgoP4@4w6J(g9=nRX)09JI|Va&vtYwb zi^XQq7U^#tSp;&RR1lpcOR(2wHCQmq3>FwKq>t#Knzw=&kONNxBxeNi7&jGZw@P$s zRlql_rr@sK2F(Ss;I+I3oUwwTdb8DLu-U9SFbs1_XGR5jK3bE6Y572eDF};()E$Wj zSSEQ{%or@VY;=%RP@kBNBBX!{!UWAxGdhQYv6H~TTL2eb66$X4h$4&x1USG$JeMe^pf)A+Qf>#%;1JAqB&2=P~AO^IZ}Cdjv19Z+fxD;7C)Z?-us(2CGcX7JBM zumBxahuwr)Ivp_+Dvf{z{5!R0P`Cd9Hi-_@r#?ZM9`h4mFl&+}{_1f9w=~9r>|ShQ z<%6dwr(qL)U}j+27=cwx3_&!LH;Z=kWwk;a-^JOjA~_H%N`l>Pfs(_dgiH)}BW!1+ zh$f;69{s~po(4#cU`b;_k#>vdEDZxCc5n~391e)w4v1)Zq8*N~iCJW{*=;&v87zF7 zS0qsI6Vn5H0_f2>^dem_6$xBcg0jhqr3@(5nVb$VqST&H03rz$=#D5w|5iNHfgH8g z)_hP;^UPoZI!byKlZx^*BmiC0P7Y^}UZ+xEQ*seDfk`v42@8N!RgVQtAS$IHdD2ES z*DPkjjtx00o~|ttq*6-Q1m;XSTByiaqlt*2p=+uO;ZeC(v4qW1_Cy(~S*=c}45tkP zBZ1hR&>#jZd@yn_db9@}HmAevJb_Itf285pvXvgTApxl0-LaqM9D(lB9*XdheH1@&Z_-wASjhwqQjvv z)L}D%ZbuI`q1kDvCvVd{5v2jM7n^2XiB6&v)M11r53$fxB>Wg15R#V1sz>CJ@n|7a z*=`3YNs72Q04GiB0ab)eFab+EctK0FCR@TLcxScPflmloZ~~DU7||Lt(@YvgQ(1&j zhXZQG>V&{b*aW&k8r%tNlA@uK5;loYdXiHhfkW8zP7G|iWeuBHQ0*j8BUF`UkkO=o zE2S~NN#dAmAkTp}ORN_dhX86w2b1{%0AiF_YITHIpb4UL6H`H<##>s!jkFE|z6KjmiHB%;k~^t#^a4}PVsyE! zUUE)2%slh~ z=1F+(l^j^)Xo!M@*I=WFVH#=34(J%rAc* zvuZ*^eOfv39HY_cblMzLOhTubvOivW~=fIaJADLye4QZ(iT9Q z?7+Yrc8mw)NlwTCIj6%77!b~Fupj_HBZ0%J7pWmIZnKbHg&K!OaXOp=On!2xSOO7U zdmNQvQ$-MDOe|TDATmfxlJOdBa=Bn((sU4PM28@|Y^Vhc+AR)x@50mvHvMP<{=oqq zkb(AoL*^26ke=jfAbwzS)3yx^J-a}~PIw#aSMbw`@!(?9V;E+q%?WMeG(o>u?FPGv z@+5VeQ-u0=nCVr(=eLu$`z5p$1WC{q>2Djk1V1?tt0-O!QFPkvF2Mr2U1q!?2_Th- zBa#3J%@YwcKyp?PZ?)-5bP}aRC*{by1sLiMo9q%CX172dc^hbl!_sMTh>}^7M4H8r zA}*3s6DZ^)3dC11L0cCAN4zEhfKZMC%4~uIg9VqZ9$B(MsI+)HTz0}5Pyr#^X^uK% zS#&v^G6ar^47O7bn)IQYMa(r~ztswP0wy%wT%=;P=}rPSL2uYKY^p&c16Iv;yDUJU z76;8ND4rf?G|~XPf*^{5Qxc7sA1+w$5^|#80SpM|zz*gN88Lvh!9`6?P&Ux3c)HO9 zjUo#8UxrebsEDd?rK}cH8O%#j^qNe9S8_vScG2PX3V2iT$WE|cutSRy9g^2$M=gUv zfXwtj8m2zpDHTm&QMW(`WI(ockFy&fJe!f=h{;`oyUPikEPz1FZ>I^J+eM5A7a$Qk zmZDw6v=>dH2~CYoyzju!Kqq$a9Qxesgt-+^o#dSXnN*YLmb69s+eWgek^`l|pd!mo zm*{lLl2y=~WwKmEtWHE6ktBc=@7si6cBa%nn z4%*?!Xck;PL+J(QcnX(nwX0kJTHfjRt}jhnO~oMx9138n0bGP6@~x%_zV^}9U~ z8788~FTyDBVg{fK2Vg;Txcy#-pRkFo7fgMaEqIX!Hqiu#gabMN&(bCU+23XdL@3ye zk^{nq`bUW@%YvI^Ng~Hy5G{K&-*9p0v1w_M91<1z|eayYo6cHW?_YSv{JP`%O zLEb>Hv^(@AI*C%ElXB#}l4vuC4!_4OSiKT??Gk0T$>9PEWtWVc+oiZ%B0y#rNLZ*VU`jcRP;PjC zahL>J^I_#$fH}9zEt58c)9f)Tj2w;c9h}T<}9?Kt=d59}WO2I|lLI8j#?C)MD7_nqJOm4vi;$?cL{n3aCW4a*q>>~f2e3|adXk0QBM2;7O*F7#v*Tv+qbslyBN z!2|X?9Y8lsb((H|k3bfJB@lFx&>$$$g#XKCv4oict1u`uY}%lzu%CRwVU}TX0h`d% zl1DZ>U4l=BjzCU!0V)#VoUn-pp&8yYj6P}%n^%Cv;lyz(7K|Ba_5fStP!Uz(=-T55 zuNEgJwhGRv9zU1{u=rICn|?_`7aBG}her(vDmEp6O{dEMY&xO%jOG$Hsat3+(ht4J z#88b`!JQ_|RS*xHTPy&PBu{h}<&@^{3hUyhLjrSU{qQCwJnCB<*~>lB?{uLNii9;|pfGaD7JfaS|!QA0i0#9#=U$#M;wHn$X(>7~j=TK_AH z-b^EqBC_oD%2-a#f-EUszwAfOD~F(>z$qsb3B3xS8ML8Eq1G_UB!M(2UeSPSuk7`E zOTB9E48-8C9-_TR*d$Esu+=Ju{Q-y!gA9aayd(uxw;NqZvJd}MEX)3IND9M@H33Sp zL4rvwNZ8GdqX`fRhc^u92YU1GaS~wZ7R=xWh<9PZwBlcn-F}j#7hZB((K+Tl-~ry6 zuxV+hb+*U7zQA{Jsy9gvl%~q# z!5|pwb_e|qkI@!{YQYaE0&zr=3{uDw5i~$Ja2b-7OeH#rQlgV`E1n%IHq194$;&4P zL@&53L?Zzg7>1V~j~vA4UFZW{U=TjYS`GSxvNr;Oi#DJiYrezfK}LdNQeX=B6}L|m zfNseKaFyZqliqE4e zR>|iMDXLFJPVvQjKI%+#W3vvg0%!)rnM2eV3!hselwv0Y6ISsls#5AyduJdLPCP?v z?6%7uMTy&NzPK931b6#A;e-#yX*A;T*kqsE7jV1dKA#$oxlzk(_IV_q5zC(haG7o8 zq!fJAE#5Fl26zqYac(oTq+|gbz6NP5NnUehR)rX>U04Tv13W=pz2m^ ziVaPz9tY(;=)|Q2VZ`|yUXR;Rk@1lCq{FfzD@qvuu>BoeM9Gi?rIp$8Xw(~2Jf5g3 z_)YdG#D^bH1mcJ!A4nljM9=`?z-7!-x4A?oQA%`Dj(kk<;l4W_Q9aI>LLPayTOkXV z-j{vBuz&*uti&;of=fv8l0ZE^Uoc?xc?6OqOi(ve4&D;nGMR4yl2^bNb}8VpRF(+I zUIp!4K3^1alhFsdP&}BUiZ2pTqdp~}!1!~>fFag=*-zpIJcC%DfLo(!S1+1_JtQaY1XeSdaD&v+mlihNZc1vt&m|v#Xq_u zrz&LtEI8%zV6)Ecfe<}rv)c?TVdKti^?;vvy48URt14IAmWi zn8JAnsZb0e^QgX9O2NBJ0)Rvp9>9X=2&KwADZ(b8q!_V5=!RtvY@!Je2?qfHo-ky3 zod+WHxvi=Pa`SkhlpK*r#1|r2lE^(i2Rg^&RJwS~*t8T>kLm$@9V$@iv-+HrCy>a& zu+JkZPQTCN%;tUMeYu!7=nV#AL2Z%#woyEoCl?F``&4BT34d7i`4S;1U~(kHAYR@@ zP(*kHNeW0IPejlF;lO3gbdR}2Cs7JIZImNl1~p(-JgIodCzPq=k@t9l7h+Tjfcc;B zsj3%!028XuA5wx!EFMlMYD)EZWv3UWr^h3D12DDFsXGF78;(G)x`VREn&S}< z-p}0jR7HS<20{5WY!+&53|JKuU%86C5z(*BESgf9v)Qml`ZB5-4y#HuY=aEr;iwu# zF07UVuo}*tn1DW?4d#r+V*!?YJ}0!1Cye1>>&1`OYB);T5Dpbl6^^bwj!Yq^PYH+9 zPN$lVmH}Lth-GON#%T&O0A2Xhh|iZ+)o8lhhgud3pro37PA8^2euY$pCaOx^;td1( z%LY@o$N4OPrNI*x=Kqa)N zKzRa*Hw+!*Qbo*6p{k&eSE|eWVShNBh$Oy?D+?E@;Xr9kZ9bJ!W8r|3in>B(Atiyf(8f&E~_5dXDQK1loFkkBVQg?ao?9Niz-rim^|`6Ul_xVlhLfk z%A`bD+@`8QND-HKSQ9ApPb^|n6&Fborlrp&doUHThC!4Ol2=4ccthZ_r!t-J2S^8a zgK7#2*^fSeiLergK-Q`9Xi5#GA#ktY_o;5ESAP&0A2e7TUB==8D6ojv=My|0Sis4c z5ASEPGo1~R%pfSmW`W+U>t|=!Xac)4VX`j**ebDWN=S?Bc34qLE*yr$)p*qI4J*lL zJRCLLY8MShV4dhJmq4B`U;6vNO{)_SAu6D$UDfg` zI`L!LC^%vVDnhKOQ^~8f<$-7*7E8ra-^G>3>T2PjX6yS^r_-TCG^nQIa>OjA-7&np zyFn3%Ba*@(g**{O1B3&Y!x*=U709(?vqOeg3)k@ zW@TE9Mgt+B0zy`U@o+3$kxryT(HsQs7Xu1x9>pC9`xQUzT5lOlfwHn7%)yu+Lic%L z0auhMZkVe=wkk|QgP>Hq71(SXfCFQ&`K1OjQ42P>g4il?d}Y-;99VV&RnaIU9!ka? z{%D{go{T1ui{t6h5ZZbJVj$qhaYumw)U2(7T8n;1z#oUs#b$62t)uZ|ywocyqAFY| ztHtylAq7H-M3pE;tCDGeD-a8%tD>Sy%4KkRQZO2bCIUo9vMLv-!lr~R8uCZY0qnf^ z1K5SHMw4iix`hA$Pwcz(x_~X}5Bcr!KorCWv0#c76&0amz!3$3aZG;^od@C=4=w>) zz!`AI1MxsI5EtVjnmR%*%7@U2l1PODo~SDl3P|;hA@ZSqnP4KANTkcs-^FDTjs4)D zYMTbsX0wr0JRHgxqLXsubMYwd z2dXm3kUJMAk9;7Iz_2Ss>{Ug}Gjcjp?jVkZ;^WJN_XURfci$wT#vdhyDLkOYAhYB=sf=iqGxT_W~~81Ys_Dk7DU3Ri^-O@+9J@^N$$s>sG7{-mcY z9+8`e#>vNrR7ESI6%~EbeZGsUt{6H54wSYGA5bWiWh>(GLZv@tlWTkxet(4@6oEJ* zDG5@@6A?5(IB+?MaobCD5~V~Z<;eG~NaB8^zPd8*?OQ<}`ADRK3|#CfxRSXlU!STx zEgz5~F8PWw2s9Q?CbJnK8S|4QRX?PJXQ-Y?BoGNzWJq3_WL2mfTvi+F`$QAv$+Bc9 znXHMGmq*Kh3J5u#$RyLr>O!`rth^oq56RI;Qi(+T(Ns8s-!dp_PK|`Jxp+DhOot;8 zIjCZtF61J9m;$c)rX&dsf{F`vV6$y34%EU1Lo8mGwqp~aj5bopGATJpDl03C4@{@C z*>tKZD@D@rnrv0N3b|~$F`Z7KZ743s zN`pYm@05(r<5`Rcm$)Q9Qnp<8u#M^>Z?*pW0pMf@pulyuECyyoG#P_YU^sGbUFhm;!=~X zfIyR}bh^+-N&^{EJP}bLB|IbIk0)q}>_hVEldg?r(~=Y!Jg~N`Je#ga$1>@D$!xZ) z0;qtHQ{{cq`E-3lb-#-2z-&Ab^OnWa!FXIP%f#cccqAC9i6o-cHK}|wl8-^?kq9i{ zhC*D8#KZD{AsJ0zG#DqaIbjM8cyqh`$y8&`;qwJ5@H*=L%5rkMF||{zxmm0W zj^|SSs%vw#$W`Z>bGZ!KMpM32DxONAnIOc4e$?6*cc_Org-?^W|G=n=rvsRq3XdJYF4}2V^q7ihL?xlS;J!11-&|7MQU@J{`~5 zQa)cio^lJ`p^$PeN8JJ*c*%hM-Rn|9E}n_Is#93qQWX^_#$uYOO<}#OhEHXDxl}Gy zjq%`;5>j$Xtxi>^YE#v|Y9E@qGYaK1=%lPrpH78yfovwFjGd4npESD*NUN zxq;1f&AIB<>Qs4|Qjy9>QmJr7pHwQDf(C1dmnZ5QDr*vnnq(@aB;wdM9$23W<4N9@ zk=Xa}`5>r_TL3nv&Gaz5POF(rYr%;vnk-&NJ=)%z7I{2SLYdKpLS0=U-&E&G6*7bB znhH(G)fI*p3VF0mWUyJ6%3!xIBS|S~khXD`@?=tV7!LOSvS?kXYpN^tii)TTSITN} z5^T$d?;_-q!%g9Z)CH)T8p5LlPWXbSKO+EP_p!wPky>N1rT!EC0G$Yf&KzL`uqlS-rprYg$^4D8cTUfwUA$pp(&*fwq% zkcp)-Nu}-78WI`=mG?-%=9~pS2CRni`4RQ_+n8t-ZKRN8QgS{Y*2w&X`ue7(`a*M) zFI}H+X=<)-My{!TbbWn+aGqE5`E(w;eR-EF?P{TxSorekCM>krxT!+x`le>mhH$8e zs&I7eabyaGa)ti=$E#}n_~zmD_4WD2>fz(-@#;8wWTAjA@_MKMjUqC0>{zIzsW%ZHTLOy)@oC5j6tPxfH ztNQmJK5+PVaU=WBnGM&!fB$(4rnR-z4r!_>v^AIY6GNjC{S){BMIeqyss}0Li3l1X z9JpMMal1-%5~V~Z<;ah2s>l8O_>s+p*ytwm$mjF@G3*vK1`ZD$kr+0j#aCb704d_q z(o_q9Ru}5)hYa-9SC^3_(`Aqno{^5^`;_M^`wt>{4XPiWZK}s!-joT$s`@t7*I`!H zk18}ZRn-9%5OSgK!1@98Bija#u4|gml<$)bSLN$d`FvSbLoT1qr&H;n={}W1hSm(I z%nZop^WjX|<#U}fG+&m^rPcB64J0%Os^D`0n{YgRivv#9zPrJ=0An0}omncxmHt(j z!pROd@qg*~OM3imt{uO2#V=#svd8Q5D*-ha3P+-`c%m$sDz8XqDzmwKpQ`FYP2bwO z`i6duP5lQ99Mn8`$k3M7VZ%p^Jf&^a=rLo*ojQKP#7UE$dHe?$~+R z<<{H2cl#Z8-gWo)bwBvwkM8N-yYJrn?tkFH{RbZU z@xwoPjU|Lqj zN5p5w=O*}sn206u8HJ=hDJJElKN-ZEM;5Q2{gaE5>yw+37bmwRuSnjU{6TVG@}cCz z$)6>kNxqPLHTia`f2uh(B-NRkotl?A^B;V%h*3mwV)*6U=e(bP0@VK&)Ncaym+EfS zb9$%#RQ(M7&hYy1Cs8+cQt`SD>I>0+C#b*mpQt}Aw(|t_5~#=aQV;c=@i`jx(f@?{ zaVMzXncS1S`vmpB0QJ8G^^GT}pPO3v4;)U76G2!}#tEkQ&dzZx^2eDuoKPdmEl=-*$z^XST>529q}QSs=OqvyST&T9)_TYdDQ zqnEyR=WACTx$4L*N47F{E)ej|fZ%Nc)!KUym04}atK zzsU0U@elOg=J)lc{^_d2*@TRlF2uC*FYbWpBgD&s(4XF2H%WALIPe z?KlhZVvfT(svqI}+PiU*#wR$#^cMC5oap)l&YhjbX5(DNIXKgGE_;$ajr0DVVb9_` z*LQF#*-JRf^+lXYdj|Uur!M{q=eWL%^U2=D$*Gs%Y_bJ7L3lA+gwx3O;4HGGIEn0R zwhX68F2}jRD{Onc5~Nq*Wskpnq$AZYy^mcR%+4_aL_)r*u2H1KdNL zz|mRbE_Rf?juX6P?#JB2oCoKeZ{vQ#J;FW89mMhMKjnO!A1Cp4;|TTu_c-@6PUV6) zJ zdzt$cdx$IND!4S4!3ni*aj)R)+t+Z~?eB2P?Gc=S`+M#q?qlxH++pq$?l0V5xzD&` zIHdmX+~-^+_Ydw1?n~}CmxX>~JcoaM#*rU%2nKKB&Af%Tayj0{+j$4?jCEmrm zd6~=e9^T9K;i`BaG>gIqpdEvJh!67-KFV+7V|*Nk$`|+qSHqX_Nj}Aw^A&uW&+wHv zcX%Jah3kuRh96*$b8q47;rnnh@J5^$d=dMUdk5zve}GeizsD};2C*I7U~UC>88p}x z+)B2e!`Hi-oBB63_G_r`TT`g6>XXl9D>Lbe@>H@c5syV9;ZRTwD1M(8f0g0FU*_T8 z4e%G~_(wDRZ#B=cY-?gzXS6%j*{x3{MvTZ&J~0yoGfyh$?2e*n*w^cHN2%RR-1=J0 zX4IVbjhfA+n$0I_a$>YU>z~U;TNBal=UNic{oK@3+mYVhlIVzbf1ssDYiWH-%Q%pU z#n2+!>Yv{d?dCe8t=+>`%-_=5*^bA7{7euM`mMs@WyZ4;h zeo|*@jJoXTK&QEE+xYevs7|zQi_$R0w`(Inr<}hJ2&Xa{W@*546RlLzxiH#oNeoHM z-?Fe1!U}BZX5&}K_61I9K7d2HPic*Anb@9)bq`V#9Wz^kdu6s|{OWs0Hb+N({Z1~s zSCmSe-s=>45_ZSQiMfzM>7JI-s#6M7_p+KJiV`Ecn}M;}QBcyJ=;o6R6wPg5TV^*v zB5)lXIJyA*>)awXVszbxq?m|q`4l>%Gx5QnzFso3r^J{PKV@{2;Bo?T-Q3LHba!QC zcP2wfF%5-iLD@hpUz^LW=x$4NiP7#hu#AmsN289$K45t)MpE9oznRTKrhEOV?WKH_ z%~JQV=024j-FzqAIoNy0Gl}l3@4a)Pd1nG}zE4{iJ>BNii68tqn7eg;V>jph7q{n@ z?qmA2M)&Fs$t~mBQ!}@0Ra2c?wsip2!!WD2Y#Ek_4%^bXW#<0k`dNvnnAoznt!+zJ zD`sP9Nc)Qix2oOE+dAfRU}|?wi5uPSq3t@ACuAbUt2(52%W)ld0cLrh80}nqDxyX8Oof#DA(8auI`$ zdI>}8g()2KbLP`MJ3Rd9@(!72_#MF@ zPqRnQ@Q1>KZ~K03 z?7dZH?b_@>f(;u6tJ38%H}B^@yPsJ))(k}DoA<4b;d0Zy)e|ER!zLTVUkg!;*|$1~ z(oy?XH{vpK-|A*uhVNT_1ealS87y#vApoj7kWDxJg?kv!@5iQdG1iQfx!JM05cz2J z8IccHFNwT2eLsKg1Cgg!S4E!KfaJc&L#2{?R}YWuU9FYev*CdM5<}kyyx+sKuAjbN zx8UAKz1L0O&&BR-_Fmbk)wy`}es1)>$fi;s8%w!!rw_Ie9r((YMmt;14KnoI7g>t7 z#%4>Sw`=-)eZBLk@`I6i(;ut!&c>Y~_YU`BV%rUUF@hUdhjzV|tuf5uY@okwU&k4d z!Cr0vo1!}oi_Ob5uqm)n8<8@yi?neY*cARwoJH>C^hllY;Dz3gJ1Qa{tT_<)JD%_T z(?tjDSMt|3+q_S99Esf5F)^}hiZ+TXrtH_Td++d`SIRD4qm?eAT(dVary~=YHsyGO zcbIONewbkr9W}7?$J|TMEjzg-%>zt3pEd2g-L!L^Y3E$i&Y7m2lTAA(naa#@bJQF* z2h9Pq-;6(fHM?+p2L1tw{^%Kh;jTCH_-km!b-Q(KeA|Q}TwC|S*{p3=wEMFOiT#}Q z)T!Nu#1O9A)yCQ;4)J$4RJH-++ee9AF$5 z-@IL=YjN}Tjt=Ig?c2d#KPfR`LouR12CC!>1aQIEt9{&?m#)uV9vDd=WJR% z*2x`1J+_)IsE4m(by7XnX(*KHjV{%jj`pZ`23=4Ox_Y`+Z@RJ6AAc>?+q>b6)|S0z zAVMAc5Vk?9v%x+@>&wu$MB5;=ZP_~=5o-C~;09@w4f4H#TCAX-Cm>JKERD8+w$f+}1WIPjMV?fZts zhP7^O0Y7x_P;RD{+qb%NSnK=*Y}S8W?TqQ)rrJaJj@uyCQ%QWd>>2ICC zpSQQp?{3}-gBfoZMk;Xh6vPuP%kT}yhnRv8FxH75_-r;|q#^r2BN` zzbJlac#OID-{blazFPGjFU*x(H zUkw_{PDS}`Xi2+QyV!A@m&>S4@j84PsH?aYb?(GBXM#A1_)NBu|0}-6r7#$0Y#VCd z#0k1UvAg&vzNNAbCD*V6#oLPe(SsXLoQ*I1{6*)}rF2ihgc`<1VL#?9jOZ#n>u&6_ z`~>GGW90|!r?V@#Q?N_$Q+)jjpNqy9yauEGM4W7VHQweQU1XI9Qx+Y;CX^XZ?bt}F>bQIq@i(_|U8n+0$ zH$UVK;lLoTZawxruhhMv_v^nr{+FU!ys7v+zEZ?ttUCIB8RT1rFNsw`K2_+kfepgf zx!Ul3t#-&_254Uh>Xtzk=V^OQo3wqgo#5&X_-fN0_*)>|yET6RoO}r1NcuVUwGLxf ztjL-1%{LFm-JcuAO@^PwosE5s)!c>L#oTuA;u`ERJplh0_H_<{vnK4bB=~{+&-vH+ zH+7<}QrBNMRrhDTOFu?`z5dsxjmIY(zwP+b#SNgIIP797zE9Su(K{aJPZQ0v!ISx* zc@g%_Hehe%V(hGKf!_g|Zw9TmV)yMnZSUnd_B(L+efA}|M&HQGaCPvlpmj7ihMUMu z0=;K~mz!a;-2u7%ihGrNANxq3asR-ckp+7{rG1|)KZI}P+xXG^RDK5E#V_MG@>lX# z^LzM@_`mX>>I9u%7uV%=&AQ?6Gj(U`Ho!lo7xf#*)Y`y>?^VV^dmMw7sT0M@9Jh6KIQ`kMRz5>i+cwD zjNS}>9OUSW>^rm^N z`EAQ~%g?MA+j6#{8`)G%AvmfU4xnQnOt~GaBZfWk4+>N<=b5G^|ko!C@;upsK^Aqz6^Xv0h z<$sX>Ss#6$1yy~k{$Bl7!BS`|6l>n=dkg-M;Rm11~ZftZn z{-`O~G^uH8Q?dW0{ofw2Yv6-}b~P)_Gny}Ier@mtLjpsV40(BIzoE;9UNZEiq4&2; zXdT*mdh5BZ?+hC|Y}>G(4>u1_;dg$MhhH+HVZ_1_ca2Psyk_K=rzoe4Kjo3O!)>38 zGLH(6sv9+G)T~jfM(rGR`>2OU{c_YFM;#w6kIsxfWAxRdKN;g3Gi}VOF~1rsj~zXB z-Pj+F^Nw3NZu_`f#_b>X+^Nn}e=vUH_=V%wkN<4Kt_k-~^i7;G@!?4olLk$iGHJ=A z=O_JP(&v-K$%)CeliMcGoV;@K_9+9VoHk|Yl#8a^Fr|CS<5OOp^5-dkZ@0Hc+H2d} z+Gn<}Y~R{`Q~SR5$J<|R|ER;zp?1`Cv~|qvSkbYqe#AO;gpWHB;NB z&Yrqv>Sa@JpZf6Bm!=*+?UHG1+WcwnOh0}4gERD40e7FSJAHKLshvwZFYDaf`C?~r zW`5?}ndi*BYvwz%_*vmuLubvOwPMz$S-WQ4HtW7wKb!TNS?|sIa<(|TVs`)8<7UsD zeeUdCv+tVy=k_~HqR=P%x{_=?5XExz@v#b>>7*0CjqCF+v;CDWHITXM;g zo0il)iNw`)z;WnFi4{iN%~u6Me=Tgt`VkE}j^Zus1R=gv5H#)_IIxl5Xx+GOse*!1_!(&qBb12#|CeCFoEn?Jd5`GspQ+;rjf7v6S}>!R~6dgY?` zFXk?8x_Ict*Ij(;#oxd9vx|>k^46AZTVCGs?$&u*@7lU|>w&G0ZGC#{OIu&v`ues( z+kU$37u$8)M{K`v`-_(*F0H+E!w%z)3wK<*;O}K6h2%s_9qlx$2Ksv#Xo0zWqcC6)^%51ciZ(tu3vKfqc>!4IQ52C zZ@l2ft8TpO#)ofw{>Im_DrK=Fv)6!M7UN}7ZMK>4%NTq!+M;J>@z>AchxMs>uFrGT zRWN6em7Tf{I~Qgn_B%I+QxyV=h|R#8DL|~YPNRSIP)B%VBieFtKVtlj5k=t&(2;t zz5fT|2jXA*Bl@6UKQ?I42h+t5s;ju^Zk<#k*|^#o&ttg}lK)b>prntM zsIGGBK%lNhi_b~lnY_RJ&I;YVobTh0kH1@rL6A^Y@t03foQ#`>&rJ|9YiZBPuYL{Z%+VuD;gBZ=cTxZznc;=fGU zJBYJI;*^&&r@ZEHOVsufwF7!tsr|5zYX}%ysr?A&e#zV5gQ?(fpn;x-+b8!>aKa*A zw(`T}$Pbar%4KXFyJAHy-(06_BWgy}ltxNp%XSSqYpU^Lwq`)A@3w=lq_U*0Yw- z&YI>Y&ukezPE4fC?s zBs8V2&VC}{iEP=Em{gRTk?G3H!D4YaNlC3Vs3sSg;);q)h_Fb?5!V*&D>`0ux=1K0 z$}$EbcC__vCANd`aJ7<^hg^vtX0;MQHF{jU6yfy4QFoAVnXTlBgCy6;-iyol5FSdS zDJRR7kz16M=&`uncz=}|#O}dh+BR?^KE7Z{a(ZrhZTbu8XVWjGOI&&|ou;?aR5evR zAxUkQr~obv_Qm#QyLife)_%!;)h;OZ?e>dy-hP~>a2@osb)_UX5&KHQtgYeV2HfQd(+5$PPADe<;n*<7&jB9GX#_M7)l0 zMh+=z--Q%|`je>F5YBLTyh+p@CmGzljUegE)de?%GsyPYtE%OVu4(t)U#nK!x1wQ4 z&7vyXX8+o{#Jfk8yt84*eRao&44gKo@!Vn8sFxo~eQKkTcJ}nh+4Vr<@mgC|mHECc zjR|k3M}A7SC6C=w`^I<1qf(}_Y}~%^I zBi2uNVDsil6E+Z1TwdO(sJx<>uAd!gBx}~!PuaX>B737_c}1u8<>kV#W3Rvd+AD9u zF+QWEW%`Wu>u3HvO)~!UFS6#?>#x1`>YHcCZ`d0%*RP*}(^h`tOQ92XB?0HHfDm<} zg!~X%Bpcdfah#m!bonxoZtx-}gY4ya>T&t9I5DqHqSGK}N^W;v+9WH>TpOf_j`6vXx)SdRpD~%*iS9FH9`2n5o>rQr8rv-^6UkYe zLB`Sx8B4zsslkO&`V~iEuCv5Zf~SoUZwZctBPxB$A~-3rA2iu=vTclK*-oFvMJYXf zI=Y?VoZOU<+0F~Oe7oITsx4SW(k>LJ4^6~)P%4tv`7iD zlM~%1gR4WaS2B2Teu#ct(%7bXjo9?)-to!n&m8(KN$S_}z`U7H{pqtb&z6jC{U?kb zs?De_t?XDbuoH1;-%TAhe~h|j{~2O=ir9BgS=)W}%P+3@aD(<@)y&LS7cQ$=+mm0` zZEUx`)x&z-Gayn#whV0PJLc&9jM?<$4{_h>GPLAqOn5TE8OMaCh|n@BjAtSeEy@ij zIS2-E1q>l74kM-P2jB})5jZ)#Hv@vt`Th6W-`OYEY5mD}*nJDvTXlNOVKX{zI4n@? z8!TdjWH|vjBT>5=Z(0`Q*YZ(Eide=k;^|1dqWpTR&cPy=nRtmnu9V@cNDT>7bgVCw zOayttWW@tVNZN#kLHR#u{hxzzIGKD+KK~yrqP_FD7Sxj2I67k-@95*`u6q0kdO#Zv z2S;3TgMn+nv`ou*2szby%5c^|+2s<-G#@8f9KOG%@!A!LH6W8)-|tFuI@+5^Mj_`= z=$;AR=ik>bfuuc;FZlzx^f>f%{(rO#EnfSphRjgH74V~JCBEIqJ)$~fo*?bn?s3|@ zRv${&#l1>i0MBAkekRz^H>ur3g3^5^)GkCuD|`#0Q|8T|$sb9h`H$pT;%6u;&IMr) zQDL8IhVFd|F*}zybM)aVvk=Dn^G*L}CA$j#;f;Yd06RThqo? zv|>>2)sIvRT2<)BEUURI{7m5}=F(3!OBZB*vXNdeb8;&gbyzZQ6M-2p*{K~TJs&1Lwc}UF z`n_bW*0@)DfUWWx?MtG<%3$JpzG#r(1W-eZ=@^wW8Kg7(V$&?d(Tpdj5F|x+aHIi? zN4awIjO98vX9%4IiY62ybd5(S`-JMEAM>TU&bWeDMKV@+OqogaCRD_+Dq7-)#Y-g=U zj^J}1!-sXl$a$`v8lYo1)BiG3;nySO$^ehq_#WE}7>v(=WtE%*jqzM(H6Aa=@|2xl zoLL?_g19SMA(C>RMu`h_^1`%KbApM`hxbzRC}Ma7?bS(LvxL21E=w zjIKI@`cZY9Tfj3f#WQ%P>NuMPmpI8&2O}IV%)T6zVoOjGsIb6`l5G^s(xor~=1{`m zi{0LQ{khSz&%E*6g-t6b)~#4Mc}x>MM;giettYf`+B$71@ccIIv48$R?k8iv`vL26 z7*m*n(VMuDhgKR0eT6&+%|(LGPb9Cah(i)Nf||DAORZ$jL1F@xmZ=I{N_j*j4MaE# zyJflg1Q|~jaQY-;l=1^(d^$sjfHN?rl#=GK5knD(rI1e9Q21Q0xi5ulg?Xh5g3tGO zr`Z_?GMez3Tvi8ZaNe&-JhP3nkfNWkEVY|jbrF>-<{Z*yz%`qZa3_5tFJNqFcY zSb^v&00t9BbR~)cVVhHsQ$w~8DkRhqAuQDqIRzJMGG00oK&WDfuj349#yXN>PqNf; z$w5vj%`Uw~rA(lcni^LzXqDlsv{XZ}H}*_t8c`HxV1O8O9PYj$=cT$0D@M2J;_yyh zF?GP88A?mS>DBAs9o)Cqh)#dsz4a5a)Tdo~cE#dtaZk{P8pbVHx;*&usn4Dsv$btp z_12N^Xo!u%d31q_20~h!U}JtlMNwnPLeY7`+Db~)059Ow!wkwBtXE_-+7Uz(I!jT3 zGsrIy1Ejf$lpV4l0Hu|jQVWgA94T1~>--IlPX>9G!HqBlftzb#K0m-2?eMDS;z!$Y z#|MaCL==n-(jLqbiXBu3(ATw6hM)9v02mM{#R~>`n-%~g;YfgSlR^hnI0x?_G$HaY z8k)Ux=LT&B8M%u;dc9(o_9S8(H%2CcY8aoX6?emP@xJP2X6mK{zSz}+R)K1SG#LdB zTS*rcRW{;4$2QcbjAjWt=KNL}6 zWYO(0=c4=SH@8gbHY0WQ3_3_l={j-tri&L}-u5Y3tUSE(wLOpTzqgx*{^L1qX0>+X zi%+%pJ_i+)VzsJtUXsCmpeEPzbKpwZ#tT8!66z)!lPwEEA(x+g$o7z8Wo)WjT;Laz z7r0Qh(}QDkEOm*!A`9wGk0YM)%v+0~bMR{n9qViy+9AUdn7htF3KG+;KD5vYIl_|9 zSjPpOsdXeNE}@P~x8j-X6>(aWbSGP92Zk%ZZ_d$hhRK{^3bm2}hs3ZM zLl|(7gQkts#YH(1G)Z=xB%X01tdosE4mg-l@Q|D845R_ywZ)xv*T5xxyOF}JRck+< zxVP@;H{ZRo@_6Lk>& zj&9%f+_Rv)J~&f@;jJ`tC928HU7-YxS1d3e^*jb4T49xzz=C2p!j)TYE|kt{N}=mT zWk5`(OB|opA)XZX;Z4tLKgMn5r@Y=E{Jj6+KSr{(ZLQ>QG#R@ zk#=gY$oDOQa6iSLw*p0&GDw!(FAi+9pBd4rjE~2GU~Wc94^Un zN*zfPaPiYX9Y=CK?I7m4L=#?^vu$``wdkBP#y)R~d8QMQ+j@{>NP@xx^(YL@$aUv7 z-|?J;hYtVE#z)^AGIa8cKka?yPjmWB9oT>BjLOPs`RnS&u30~}?h!taxpB&#k3QKq zp(QK()SA=(`q$enC&|D`ix-X=w{-FKUz&F9c&KUD(=lYj_JLUo-c?h_(COeh>nYAq z$lODglg&6o{^SJ?5xfPUTR%oe_yvBe%#Mv}cUNRZ50dSU0~8g%5q5l-X$2?e*6C@V zm~Qb2k~b`(z+So~wT=^f;2KHJggO#b@G{LBs^cUN(!0~tw1(X z5(oH=#3gZ~1ehyv@N0f01o<~r`law?P<#{Rwb8YaR>;DGgey11^qYp)%`N>!65F&^+E>`38Z z9$E^NmHSkeL7EJel86Sb!YGk)`R2FcD>>K(u@Ou~gz#o67yk1zuYb+Qv;X{vy^+WE zG8}t*uH)2>z!G#L3rHj!+5!q2MJW!ZckV0cD?TW^sfu6nUk2%$gnv-tD$sZZP5H>} zxw1XSH$!L0{jW1Mk|O*g7|dllE3%SLpi8t-he#PIJ*17rY38dc22O%J5|L%ov;@td`Ms$c0Qln_%ZqBXBxrhmvQvaXot>Y0z-5CudTa z9j3gjq)8q4dndseVVAN@o=IK0PvW2HV(Qr|nqCMf7~eJPOq+A@qZ>Lc?nFDa(sVVk zpzAWjTdAXi1$hN(L2ZHXLcyhis|9>PdB5Q+W6Bfkrm*C&#bdZaw?pH^mq7`I(x|YX z!ukx08^c%-PRwq>WD_%=w(H&Z?ryoAy&<$533Uu5goB~>;q-7`s1!8_`h<+(ZbDbU z$Kv>RNpewYE17bL3+1v$)yH`|$L}V+O?dxqsrzm&P?Gd6NhwTXH<7e_OnHiG=KObg zc5{8@dw1V7tLn^`G2mN&0AXS}6JxCz5LHfQ#q$;y@-d$3ENV}Y_aac#Toeim(u)gi zL`Vy!GyIZXSRgpvsGZFCFq3LGDT4AxJ1sAnJz(^j*K4{zm>bwI%a`hH`_uzlwSR1X zMSK6`bENFTZ%B6E4f%i2uD%G<)}tSOO-BChG%0!I`Ih|y?$IVq?%wI)F%A7X&l|II zz%q{g^?Tq$w9FdgppAr zg++z|NR);eVJmt^RSYV8cs$SH_Ye}I@UN27oCW`b7*T+;E8l}BM!|RqvtBSB0@e+# zRR)+I(>`K6G8@!`e@)8i(}vQf(@l6#O7Rc;4O$E`p-39f+)8{HmM==)f0lf6^kL1k z>>$~+pi{uxMm+Iz5Aw3soem?9zBJ;wP-vuR&YI}Jj;XlI7gF*Gyt zK*q@onvuZ;5{6<*Nr^)hPBtb=bG%#A=D?q5p+SpkskJP&?690d^Mh8}PW5CZB-X-U z!;E^m^^QGZCZqLS7PD4kUVv;hk!zT8RHcuqBQ?wq>1T&1dOB1-jPi;eDhVNf*cMJf zp(ZMY!YIVL5WL&FkR;pF7Lu?jZ6S}!;tM(1Ck7UhRBO^gF5R55keHZeTN<;#w88T> zOJH#;&@q{g%s}uYr9fw5IbdvfFZMhwWj=Xh<&LpKD(mM=KDu4|mAv{=0R`D>wziH?pqO^Fl{$(7 z&pM3Rvi*kgd*?h0Pi`pqz*h2_8uHH*jLHG~PVPX|+uJ}ElO6E+8p_B>LKVm@n0Q)= zPqljHoRvwQTq!r85!!L&R3tFa3|0?X0mHh9=prylf*mEfeCTw9bYN51CwfM4sHQxM z^BVl4IJeO`3alf_J)-P_cqIXco?$UC0oeMX`bpHyRGJR_r#p(Yf4#cr$|j^pZh!vg zf^C)mnEgWi`4=91h0^?g{c|T7|NVEQ-?N{8)Q%L))2_d%ozu=8htyjLN~{GXS`bgT zM{N@fkP_MFXOw5N4z+NyYff}B-^y_Dv!}I~Su?`HTZvaq;QUr%g_lwta?D=B?9JQ+AWm_p;edP+VeT><7HkPj#-T7i z8>Fo~-Dy(IwdSj49tzz>)y)aUn+J|pm_3+b8a87%{Fxyb2TrSjW#by)C|<XxIQmEN#D1;pInBBspY#ZrL@UN+rlyG*5yjoJ$S34mev$s%oOCDYZP$4EL=x;b35ni5-_T z<%cU68HEeZKUmTXYzMFD57m#0Y=1!cjKp=a!Z;fBf$Z4MBp{=ob~pY^U&nVkxn{N9O!^Rgls-*&Nqek^rK4yB z>4Eyu3TXw~$G4T>-WM;;T%<{-gj1YM= zk#j|v&Na49jNiierZZcoh@LzW`b<3b`^g@qibqRpN5H82t^n*wZ=5pK6us_tJh z*zVr`;**c}8A^8e_RcTM>$;5p?z&(5?#nS#F(W3&N9cR|kvc!iGz+3%)e2k0`Gpzip7vuuK9x8EF#ZSPMTRv>r4qgIrBrZ z%!*;H;m+(a*^b1re)!?ZSTdQMj2yG=@G~-Q?6gpGElI{YU_)*r0C1$5sf;0A#`ZWE zT8>c+L&;H)%#e-@uxyZ(Qdy@U4)-!DV*dU#VsOmgP6vIa8L0gu?Y`fDl(Oijk>{^( zreFU}L#_ex_BnnW)QZ+s>QMTP|2H*d&fx4Yx~Lk zMSHa%LaG`C8i)NyZ?Q(ffM@71C-8Oyr6{e960lU%~N|4Qs5RW+<|ONu^B7Lq?MvCIySzccyl)yZ!~gEYIn3amAmuz%1il2 z zX7-~&kb>vL^U!7zxJ7F4nB$nM)!!Nrddl(hc-}b=#j58h24eV^eU9CZuZB=d#1UlL zzAnmiUetjkh#@cP>{7`XwubzIdlW%+`%%PUv5(>c6c>*aKZ+m;Zxn|{Go!c!f|pEs zMIm7}kPzGg3U^Z-ObKR$b)P4~3J_BTo82H%@xyi^ZP~C>`%(Mhsg|d|efGeW&bhBN z9R4ePhs+`^hqZlIv=i_B@h=~dFidhE!;l(!W#iP9+Icq52XWH8*c%ISLz>mJEVQv5 z3QFbN9tMp74IOh-L#<)4VF&VB#XQWol_seH&SUx2;sYDy;gB~l=8RqN-a6(nKo|oc zAi*di=Hu8-QppuYyRe0bGsFdHhzmKVrL@Pr2qiLoKqeozuD1y z{QP%*)ZQhoUZ}FB!u68^vutZ=XkRGp~3TWFt^?})(C&V9=f4@?NxhlZAhlr zQS2oS6Nh^yiSxzP;$z}Iu~js8lBsA2`!YS8n4UDx4#W67bdimxZBVV=;}bb4C4LUV zM_m6(nPX-<4RwocG}tWUq3-|4En}~4qLwxRc%h6T2P_ZPaGIPl3bcWJO}61#swo{? z?@Ss+PzU-ZW??%X11upH-jgAvVWSG&$Y~k`J zwI6r3JVD5l+ljb!q;~JhVGmDyrQzWJ{Qbavy({i}ye7M;)7;^IK1;g%oz(0J|-3fE@q)MqlS}X~i6qHn{7MtBlE&)*o zJyE*3%0&@%W$fIh{EA|tHXmJvG2Ekj)oy9AI)7quRwReZiK?_H+0D!GAt3p#bnSE5TSmcn49gku8PiCh#}>3Xf}$uq{K`tDNCq(&-StVcXdna+2NzN zcGQdjf8m4!DaIv;bJTc(p!rkvDWMeP z?A(sDRU46{ZnPO4Qkbvh#~SKw_5S+AvF)TveyG2y9gifM6k5y|r_c?(9KWZ8D#OO7TmrOhPOmpSeZ#?w&6k&^{WqH$@_U(@z?Dasy%-^oP zsQrt4s?2E`uzE_@QC&I~jD39c!9Pwo^N?-rsyVeo^YaVx(>oqpwD5yT#KPoL8{nEh zf$9z118N`6Nb-zRj15Tl7eqP~g=Y+$EWRUwnM&Y(I$GC}7lX!^AaNxYVl@RQ`78&C zdzPcn%64$H1fkV1p_b&r13s075i&uGxMy7+>2~v)M(BnJ??I^2QEfVZT$?Id_wWDt zpBT{+NS{KC$OVZRQUr7Ybsn)FbBKyP*-_@^Lo$Ruwx23Y+d}Q1T6nRcPegIF0tHMj zTW}SwK*o*Z^nfDv{WsBaRF%+KaXjx7UvcT^^}>W{Y^@`imcH-#&qINaOi_*AKpVu;vu;u=RxrSct|U ze5hvY#;r=2wwf&mEEg?e6TiZ+ncIZm1hic5!Iv986uv}jY9U#otv80LK?EWMVRsox zML0#2e4*t$M@rHI3F+jXr!<%ZfBAI&^f|MHEGR|c-iQS)L|i zldZ|VN#11JWZx_8GVZbMu^+P?v;WL}kE-mxVn;_$Pe(70>Kkkv?-=K<_06(1`z)&+ z%RCpP^TxBL-#IQg&%4ig&OxFH#f1(<_ZN z`raadH@3sKm2374Y>I$+W#6&gF;`k_^YIFAgMCeH1Py=)Km#t1H-M`Xl;C2fVi<{e zQl-7N^H`#%!&j%CD@*pKzp_I+x;{fX*@n%?=lwDgul?i4wJ-3`19{dwSKIrS zow4&$rJw#kt1FSxDP+l+=1g0LGtWKFJj>kRZg!hDN(6-*(W2G_6D9$~ccEIA6~U?! ztdtbFA?OT@rl^R8F%bp25_B_awT`s#X#2+-(Vk~yH2VVwiUt3%p56d^(OZqXOj|Ad zF4V5D?rOj}rItUqb^+I=GETyP5BLcFVWDnGXtr={!EQSp5h}p^Gq9)2_}e5ar*B3@ zm&SfjYCByDILPA4pntL@WQ)f%QD$RD*6oMd*jFxeOU&r_&*!vZ|-&rmKC2lI4*;*MFkswjC;yH1I@ZDaCk=1>U`qT3fSihwTHKkY^*jB*BOqM;ODD zpo$wun#@qkp+^!PF(vvcUF03KB*FVB-a}^&3e9I8l+FBnvPaN+=M(Y`AbBnNiRm8yOw9qX+3< zfFvbunVLu?4#jFfyciA)iPMn>2EUgiV9S{Rbg5=!Wn&E};YHyDu_9{KswM~5>iKF| zYffMEO0dQTltDa25K&5yxR%8kbkxnfWVe_h9AwaBetnj)5m67==}jbc7D?UkTY8XB zxqguj;>e9_2njrJV=OekBv^&GuPIL|MDFIoBX}5*%m^09&jb0SSooK&-?Mcrn_;=| z3|Sz)j^B${Eh_zdG2>fLBS|keat(okxd^Ss0wJY~ESUAm8Qkn+`f8^ef8nczE?g>T z`MR35#kJ4AH{-ayHA5sa3=AeduYj}>Itepe%UwdE%sJY^Bw%8wpfeXD?NlQiggOMV zw35zhZ#AtlZA#jKw3BHfTB-5wb;)tLD4ujNPKdka80<3yQ%r3K^X8Pf!C*mBUYdmu zBw2q+s$rHlW{AWfYN7O=1rU_DR?4Z06gSu1oHedeB6g@hR60&&E+4DR+(PhXH(vQl zK~O!zg}R-9#REhG=W4F7^Z}scsNvh`51^ZQSn*p25%hJBcMO6jjTyF?3e(6n)`S=u zhF2L;#2v~=dlm*4izQ8huQtx(O$#Ha#fFg5sP~vYU9qET#Z$ePJ#_d!l2JT$+*seb z0;u+{78zoVuiYsA2QOVp)(`aZq7dQGJ(K0Rl0TXoP3-?Q7sZC-!%`qD1N zmSDG{e=#X!9(XgIOV}?rcvFK zSr2RWtvD$CUAsFDwiQpk;I`vk#Swj1cSV5*IankHv=A0)9?syS5Lgf9m9DL9$(>a` zcF&kUZt6X!EiHYZ+wezLj2_O6@!$9AIrqmGwePeG{rZwecRgM8-n$>3eh<{&6)V>f zbnNGzQ-f0i^X&8d{A&AZKlNH3vp?qNQ)SyaD@f2};MM`Uj!?UK2McD~T8=7Igc>%7s<6x6;*6DdoQ)7NgJ>*$mMxWCo z8a-m%AkJyT#VvUUaW0t=9$Q2`gt>-z8QH}mEa6FsYG|Oa4s_2t|J#o|`}>OrnvTb> zb&oHfymsZp`(}~l-dEp(dhH^Jl-avCFlpjzAASD%66}33Rv=raokZ?UHG5Gq>6Z-h zaSKQ|4rIzUGjZFwz+kh1Gr2K!;|8r? z#Hv;GH4i=1r$0cCeB}DvKKD=?oHx$*-#YdB#rJ-LRbu?E26W=$0^Fl&hee5dlw%4) zcvtdUcs^ftBb+L(1^omCzfLNQv0rmyx#F;RbICOKT+T*pt<-IEH47}Y*6HbRAg(-(5 zMGwrC=&8-&H(QQ>_2(D&{gFgw8aGVXtyPn+P5C_^82vD*@;UdSoTMkE^R-t ze;IfL!c{c_Ic@QZRt z)N{6r9-(4DO}7NpxHdgyqs5!vW^kOr?~~F8TTEmfa{%RGDgt*+7@TC!m?SrIHam4#ig^hcdjw9iIUnn0q}CCWC~IHUeHXE@E8GKdosjW|y(JWI8w z58~v2FoOQ!i$@GwPv9-#koFh8+r<%wbb{y$~|v z-Csq1*EA-h=rR}&ym{=<-HxyVq|!}A;X6ZvE1OMYhK`!%w&u+)-!^XM_KiP7BRc#Yw&v)hJFMbmlNuOLbd+X`QOAI|>>>RXDAQuBSR4wFC*^*YGtR<=_ zU8Q#L4i2^fgMhR$V22vuOB6@Q6Doxr0^ba239l0q$g(bw^%H=DfJdNO{EfyvFmoP@ z$p!D5)Y8%-{Lcp;{NfhEzkGr3zwv_BpVRd3M?p@tYn}^b>?>WUH7J73-yDJk28wTl zQ-&twYBY=g!vp>Zm6`szV$n6|Ea-|5a{=JH#9-^Ol34!(=@-I}4y0Zf=%@0JU=p@ob%sU0E@ z9MLY0hn?bO@`5%qGKMD4)rPYb84n8jMc=`$Y8wv9jw z)9gx(Q*ksqsH2tC16ugwYZ8<|bASfe>mrU$OHE@deN!^9Sc8*bsYV4^PL;s-&6v+w z&d?9k5Pce*PCC?9-PL#*^TMp6R}dTi;XvqO0#|l4SV}NdY)rZlSw5JyTR*zq+Xpxb zq;4PO2bQ&6HK2M?pncPT1-RYd00gcH(gEj$l(}wa`GN0SQFL<`CpKXJVh7K8XHo>kV;?1PPF z=Qt>}5(6@paiAs$=_rt4FfW=y+J<-)**S>VOqM|$MmLCfD4sbCKr(0v3b-c%&Ugtf z5JfQngSs&+vW}%Z0M_0qFpB>C;`4v|`rv`TZhCae_>GTFso%gDIPqzJfuHq{+FyWM zobSJl9wcwSqjOfRp0CB&9mq=i9GXxv!UWw`$b~>iR;;``Fc>B+$vqgIEMd%t$<5&# zXJSA$3k2vOjv97um@c_kCo&dwa~948vjVgNWO8$J0CE3a7t(R=EzunBi8ol}kjLT! zbTi=M1&HJP#ydc6RJ0@{3Sy!FjGORpqJ@cNn=Q^|2f~fBLG0L-nApK$#Yp#-9^~Nf zNV%ERR4}*SxZQPs+R}Yywr$n@Bi5`MJ#@WRFTS#>s`uQV4#4|-p@$mzg@5|}H{LmS z_N`B`=N++bZLn@EM)q~J&7vgIF9}*e9JByaJp3GjH7*{=)+WUP)B?+Tt-Ng-S7TRn zCBX5YNrMfDsp`1aON|xG@cYMQrP+#OF6#e3tln z(Qo$F69}Ie#E-t~_E6qKR4nR#M1ej}eC%Q-WONE`K|?EuSz*;_R>z-vd#^eD6aYRJJ;oNkAH!lP6cMZMr@ zoX*o2e2hubC`V)P7EY0