From d837a02e7cf9071512201fb2bae5b67b070a2d02 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Tue, 1 Jul 2025 18:35:12 -0700 Subject: [PATCH] remove unused examples Signed-off-by: Sun, Xuehao --- .pre-commit-config.yaml | 3 - examples/.config/model_params_onnxrt.json | 219 +-------- examples/.config/model_params_onnxrt_win.json | 7 - examples/README.md | 126 ----- ..._Started_Notebook_of_INC_for_Pytorch.ipynb | 329 ------------- examples/notebook/pytorch/benchmark.py | 109 ----- examples/notebook/pytorch/requirements.txt | 11 - .../unet/quantization/ptq_static/README.md | 37 -- .../unet/quantization/ptq_static/main.py | 102 ---- .../quantization/ptq_static/prepare_model.py | 73 --- .../quantization/ptq_static/requirements.txt | 4 - .../quantization/ptq_static/run_benchmark.sh | 40 -- .../unet/quantization/ptq_static/run_quant.sh | 39 -- .../quantization/ptq_dynamic/README.md | 54 --- .../quantization/ptq_dynamic/main.py | 337 -------------- .../quantization/ptq_dynamic/prepare_data.sh | 6 - .../quantization/ptq_dynamic/prepare_model.py | 21 - .../quantization/ptq_dynamic/requirements.txt | 12 - .../quantization/ptq_dynamic/run_benchmark.sh | 46 -- .../ptq_dynamic/run_fine_tuning.sh | 47 -- .../quantization/ptq_dynamic/run_quant.sh | 42 -- .../quantization/ptq_static/README.md | 55 --- .../quantization/ptq_static/main.py | 338 -------------- .../quantization/ptq_static/prepare_data.sh | 6 - .../quantization/ptq_static/prepare_model.py | 21 - .../quantization/ptq_static/requirements.txt | 12 - .../quantization/ptq_static/run_benchmark.sh | 46 -- .../ptq_static/run_fine_tuning.sh | 47 -- .../quantization/ptq_static/run_quant.sh | 46 -- .../quantization/ptq_dynamic/README.md | 69 --- .../quantization/ptq_dynamic/main.py | 431 ----------------- .../quantization/ptq_dynamic/prepare_data.sh | 34 -- .../quantization/ptq_dynamic/prepare_model.py | 97 ---- .../quantization/ptq_dynamic/requirements.txt | 12 - .../quantization/ptq_dynamic/run_benchmark.sh | 105 ----- .../quantization/ptq_dynamic/run_quant.sh | 130 ------ .../quantization/ptq_static/README.md | 72 --- .../quantization/ptq_static/main.py | 435 ------------------ .../quantization/ptq_static/prepare_data.sh | 34 -- .../quantization/ptq_static/prepare_model.py | 96 ---- .../quantization/ptq_static/requirements.txt | 12 - .../quantization/ptq_static/run_benchmark.sh | 105 ----- .../quantization/ptq_static/run_quant.sh | 133 ------ 43 files changed, 1 insertion(+), 3999 deletions(-) delete mode 100644 examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb delete mode 100644 examples/notebook/pytorch/benchmark.py delete mode 100644 examples/notebook/pytorch/requirements.txt delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b94000b727b..c5ef44e8e66 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -107,8 +107,6 @@ repos: files: (.*\.py)$ exclude: | (?x)^( - neural_compressor/conf/config.py| - neural_compressor/conf/pythonic_config.py| examples/.+| neural_compressor/torch/algorithms/fp8_quant/.+| test/3x/torch/.+ @@ -124,7 +122,6 @@ repos: exclude: | (?x)^( examples/.+| - docs/source-app| neural_compressor/torch/algorithms/fp8_quant/.+| test/3x/torch/.+ )$ diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json index e5059e4d9bb..3a23604fb7d 100644 --- a/examples/.config/model_params_onnxrt.json +++ b/examples/.config/model_params_onnxrt.json @@ -483,13 +483,6 @@ "main_script": "main.py", "batch_size": 1 }, - "unet": { - "model_src_dir": "image_recognition/unet/quantization/ptq_static", - "dataset_location": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val", - "input_model": "/tf_dataset2/models/onnx/unet/unet-export.onnx", - "main_script": "main.py", - "batch_size": 1 - }, "BiDAF_dynamic": { "model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad/dev-v1.1.json", @@ -497,118 +490,6 @@ "main_script": "main.py", "batch_size": 1 }, - "hf_bert-base-uncased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bert-base-uncased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_roberta-base_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_roberta-base": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_xlm-roberta-base_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_xlm-roberta-base": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_camembert-base_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_camembert-base": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_MiniLM-L12-H384-uncased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_MiniLM-L12-H384-uncased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_distilbert-base-uncased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_distilbert-base-uncased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_albert-base-v2_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_albert-base-v2": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_MiniLM-L6-H384-uncased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_MiniLM-L6-H384-uncased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx", - "main_script": "main.py", - "batch_size": 8 - }, "hf_spanbert_dynamic": { "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad", @@ -637,76 +518,6 @@ "main_script": "main.py", "batch_size": 1 }, - "hf_bert-base-cased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_dynamic/bert-base-cased-finetuned-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bert-base-cased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_static/bert-base-cased-finetuned-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_electra-small-discriminator_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_electra-small-discriminator": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bert-mini_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bert-mini": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_xlnet-base-cased_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_xlnet-base-cased": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bart-large_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx", - "main_script": "main.py", - "batch_size": 8 - }, - "hf_bart-large": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx", - "main_script": "main.py", - "batch_size": 8 - }, "hf_distilbert-base-uncased-distilled_dynamic": { "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad", @@ -917,20 +728,6 @@ "main_script": "main.py", "batch_size": 1 }, - "hf_deberta_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_deberta/deberta-v3-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 1 - }, - "hf_deberta": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_deberta/deberta-v3-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 1 - }, "table_transformer_structure_recognition": { "model_src_dir": "object_detection/table_transformer/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/PubTables-1M", @@ -944,20 +741,6 @@ "input_model": "/tf_dataset2/models/onnx/table-transformer/pubtables1m_detection_detr_r18.onnx", "main_script": "patch", "batch_size": 1 - }, - "hf_codebert": { - "model_src_dir": "nlp/huggingface_model/code_detection/quantization/ptq_static", - "dataset_location": "/tf_dataset2/datasets/devign_dataset/valid.jsonl", - "input_model": "/tf_dataset2/models/onnx/hf_codebert/codebert-model.onnx", - "main_script": "main.py", - "batch_size": 1 - }, - "hf_codebert_dynamic": { - "model_src_dir": "nlp/huggingface_model/code_detection/quantization/ptq_dynamic", - "dataset_location": "/tf_dataset2/datasets/devign_dataset/valid.jsonl", - "input_model": "/tf_dataset2/models/onnx/hf_codebert/codebert-model.onnx", - "main_script": "main.py", - "batch_size": 1 - } + } } } diff --git a/examples/.config/model_params_onnxrt_win.json b/examples/.config/model_params_onnxrt_win.json index 8c461f9064b..67921c88031 100644 --- a/examples/.config/model_params_onnxrt_win.json +++ b/examples/.config/model_params_onnxrt_win.json @@ -29,13 +29,6 @@ "input_model": "models/onnx/DUC/ResNet101-DUC-12.onnx", "main_script": "main.py", "batch_size": 1 - }, - "hf_roberta-base_dynamic": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", - "dataset_location": "glue_data/MRPC", - "input_model": "models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 } } } \ No newline at end of file diff --git a/examples/README.md b/examples/README.md index d7b2c7ea88a..719873f6b4a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,8 +7,6 @@ Intel® Neural Compressor validated examples with multiple compression technique * [Quick Get Started Notebook of Intel® Neural Compressor for Tensorflow](/examples/notebook/tensorflow/resnet/resnet_quantization.ipynb) -* [Quick Get Started Notebook of Intel® Neural Compressor for Pytorch](/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb) - # Helloworld Examples * [torch_llm](/examples/helloworld/torch_llm): apply the weight-only quantization to LLMs. * [torch_non_llm](/examples/helloworld/torch_non_llm): apply the static quantization to non-LLMs. @@ -1109,18 +1107,6 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Static Quantization qlinearops - - CodeBert - Natural Language Processing - Post-Training Static Quantization - qlinearops - - - CodeBert - Natural Language Processing - Post-Training Dynamic Quantization - integerops - BERT base MRPC Natural Language Processing @@ -1175,118 +1161,6 @@ Intel® Neural Compressor validated examples with multiple compression technique Post-Training Dynamic Quantization integerops - - BERT base uncased MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - Roberta base MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - XLM Roberta base MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - Camembert base MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - MiniLM L12 H384 uncased MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - DistilBERT base uncased SST-2 (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - Albert base v2 SST-2 (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - MiniLM L6 H384 uncased SST-2 (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - BERT base cased MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - Electra small discriminator MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - BERT mini MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - Xlnet base cased MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - BART large MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - - - DeBERTa v3 base MRPC (HuggingFace) - Natural Language Processing - Post-Training Dynamic / Static Quantization - - integerops / qlinearops - - Spanbert SQuAD (HuggingFace) Natural Language Processing diff --git a/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb b/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb deleted file mode 100644 index f504aea0b10..00000000000 --- a/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb +++ /dev/null @@ -1,329 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Quick Get Started Notebook of Intel® Neural Compressor for Pytorch\n", - "\n", - "\n", - "This notebook is designed to provide an easy-to-follow guide for getting started with the [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) library for [pytorch](https://github.com/pytorch/pytorch) framework.\n", - "\n", - "In the following sections, we are going to use a DistilBert model fine-tuned on MRPC as an example to show how to apply post-training quantization on [transformers](https://github.com/huggingface/transformers) models using the INC library.\n", - "\n", - "\n", - "The main objectives of this notebook are:\n", - "\n", - "1. Prerequisite: Prepare necessary environment, model and dataset.\n", - "2. Quantization with INC: Walk through the step-by-step process of applying post-training quantization.\n", - "3. Benchmark with INC: Evaluate the performance of the FP32 and INT8 models.\n", - "\n", - "\n", - "## 1. Prerequisite\n", - "\n", - "### 1.1 Environment\n", - "\n", - "If you have Jupyter Notebook, you may directly run this notebook. We will use pip to install or upgrade [neural-compressor](https://github.com/intel/neural-compressor), [pytorch](https://github.com/pytorch/pytorch) and other required packages.\n", - "\n", - "Otherwise, you can setup a new environment. First, we install [Anaconda](https://www.anaconda.com/distribution/). Then open an Anaconda prompt window and run the following commands:\n", - "\n", - "```shell\n", - "conda create -n inc_notebook python==3.8\n", - "conda activate inc_notebook\n", - "pip install jupyter\n", - "jupyter notebook\n", - "```\n", - "The last command will launch Jupyter Notebook and we can open this notebook in browser to continue.\n", - "\n", - "Then, let's install necessary packages." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install neural-compressor from source\n", - "import sys\n", - "!git clone https://github.com/intel/neural-compressor.git\n", - "%cd ./neural-compressor\n", - "!{sys.executable} -m pip install -r requirements.txt\n", - "!{sys.executable} setup.py install\n", - "%cd ..\n", - "\n", - "# or install stable basic version from pypi\n", - "!{sys.executable} -m pip install neural-compressor\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install other packages used in this notebook.\n", - "!{sys.executable} -m pip install -r requirements.txt\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1.2 Load Dataset\n", - "\n", - "The General Language Understanding Evaluation (GLUE) benchmark is a group of nine classification tasks on sentences or pairs of sentences which are:\n", - "\n", - "- [CoLA](https://nyu-mll.github.io/CoLA/) (Corpus of Linguistic Acceptability) Determine if a sentence is grammatically correct or not.\n", - "- [MNLI](https://arxiv.org/abs/1704.05426) (Multi-Genre Natural Language Inference) Determine if a sentence entails, contradicts or is unrelated to a given hypothesis. This dataset has two versions, one with the validation and test set coming from the same distribution, another called mismatched where the validation and test use out-of-domain data.\n", - "- [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398) (Microsoft Research Paraphrase Corpus) Determine if two sentences are paraphrases from one another or not.\n", - "- [QNLI](https://rajpurkar.github.io/SQuAD-explorer/) (Question-answering Natural Language Inference) Determine if the answer to a question is in the second sentence or not. This dataset is built from the SQuAD dataset.\n", - "- [QQP](https://data.quora.com/First-Quora-Dataset-Release-Question-Pairs) (Quora Question Pairs2) Determine if two questions are semantically equivalent or not.\n", - "- [RTE](https://aclweb.org/aclwiki/Recognizing_Textual_Entailment) (Recognizing Textual Entailment) Determine if a sentence entails a given hypothesis or not.\n", - "- [SST-2](https://nlp.stanford.edu/sentiment/index.html) (Stanford Sentiment Treebank) Determine if the sentence has a positive or negative sentiment.\n", - "- [STS-B](http://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark) (Semantic Textual Similarity Benchmark) Determine the similarity of two sentences with a score from 1 to 5.\n", - "- [WNLI](https://cs.nyu.edu/faculty/davise/papers/WinogradSchemas/WS.html) (Winograd Natural Language Inference) Determine if a sentence with an anonymous pronoun and a sentence with this pronoun replaced are entailed or not. This dataset is built from the Winograd Schema Challenge dataset.\n", - "\n", - "Here, we use MRPC task. We download and load the required dataset from hub." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import datasets\n", - "import numpy as np\n", - "import transformers\n", - "from datasets import load_dataset, load_metric\n", - "from transformers import (\n", - " AutoConfig,\n", - " AutoModelForSequenceClassification,\n", - " AutoTokenizer,\n", - " EvalPrediction,\n", - " Trainer,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "task_name = 'mrpc'\n", - "raw_datasets = load_dataset(\"glue\", task_name)\n", - "label_list = raw_datasets[\"train\"].features[\"label\"].names\n", - "num_labels = len(label_list)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1.3 Prepare Model\n", - "Download the pretrained model [textattack/distilbert-base-uncased-MRPC](https://huggingface.co/textattack/distilbert-base-uncased-MRPC) to a pytorch model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = 'textattack/distilbert-base-uncased-MRPC'\n", - "\n", - "config = AutoConfig.from_pretrained(\n", - " model_name,\n", - " num_labels=num_labels,\n", - " finetuning_task=task_name,\n", - " use_auth_token=None,\n", - ")\n", - "\n", - "tokenizer = AutoTokenizer.from_pretrained(\n", - " model_name,\n", - " use_auth_token=None,\n", - ")\n", - "\n", - "model = AutoModelForSequenceClassification.from_pretrained(\n", - " model_name,\n", - " from_tf=False,\n", - " config=config,\n", - " use_auth_token=None,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1.4 Dataset Preprocessing\n", - "We need to preprocess the raw dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sentence1_key, sentence2_key = (\"sentence1\", \"sentence2\")\n", - "padding = \"max_length\"\n", - "label_to_id = None\n", - "max_seq_length = 128\n", - "\n", - "def preprocess_function(examples):\n", - " args = (\n", - " (examples[sentence1_key], examples[sentence2_key])\n", - " )\n", - " result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)\n", - " return result\n", - "\n", - "raw_datasets = raw_datasets.map(preprocess_function, batched=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Quantization with Intel® Neural Compressor" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2.1 Define metric, evaluate function, and dataloader\n", - "\n", - "In this part, we define a GLUE metirc and use it to generate an evaluate function for INC.\n", - "\n", - "Refer to doc [metric.md](https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#build-custom-metric-with-python-api) for how to build your own metric.\n", - "Refer to doc [dataset.md](https://github.com/intel/neural-compressor/blob/master/docs/source/dataset.md#user-specific-dataset) and [dataloader.md](https://github.com/intel/neural-compressor/blob/master/docs/source/dataloader.md#build-custom-dataloader-with-python-apiapi) for how to build your own dataset and dataloader." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_dataset = raw_datasets[\"validation\"]\n", - "metric = load_metric(\"glue\", task_name)\n", - "data_collator = None\n", - "\n", - "def compute_metrics(p: EvalPrediction):\n", - " preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions\n", - " preds = np.argmax(preds, axis=1)\n", - " result = metric.compute(predictions=preds, references=p.label_ids)\n", - " if len(result) > 1:\n", - " result[\"combined_score\"] = np.mean(list(result.values())).item()\n", - " return result\n", - "\n", - "# Initialize our Trainer\n", - "trainer = Trainer(\n", - " model=model,\n", - " train_dataset=None,\n", - " eval_dataset=eval_dataset,\n", - " compute_metrics=compute_metrics,\n", - " tokenizer=tokenizer,\n", - " data_collator=data_collator,\n", - ")\n", - "\n", - "eval_dataloader = trainer.get_eval_dataloader()\n", - "\n", - "# for transformers 4.31.0: accelerate dataloader\n", - "# please use the code below to avoid error \n", - "if eval_dataloader.batch_size is None:\n", - " def _build_inc_dataloader(dataloader):\n", - " class INCDataLoader:\n", - " __iter__ = dataloader.__iter__\n", - " def __init__(self) -> None:\n", - " self.dataloader = dataloader\n", - " self.batch_size = dataloader.total_batch_size\n", - " return INCDataLoader()\n", - " eval_dataloader = _build_inc_dataloader(eval_dataloader)\n", - "batch_size = eval_dataloader.batch_size\n", - "\n", - "def take_eval_steps(model, trainer, save_metrics=False):\n", - " trainer.model = model\n", - " metrics = trainer.evaluate()\n", - " bert_task_acc_keys = ['eval_f1', 'eval_accuracy', 'eval_matthews_correlation',\n", - " 'eval_pearson', 'eval_mcc', 'eval_spearmanr']\n", - " for key in bert_task_acc_keys:\n", - " if key in metrics.keys():\n", - " throughput = metrics.get(\"eval_samples_per_second\")\n", - " print('Batch size = %d' % batch_size)\n", - " print(\"Finally Eval {} Accuracy: {}\".format(key, metrics[key]))\n", - " print(\"Latency: %.3f ms\" % (1000 / throughput))\n", - " print(\"Throughput: {} samples/sec\".format(throughput))\n", - " return metrics[key]\n", - " assert False, \"No metric returned, Please check inference metric!\"\n", - "\n", - "def eval_func(model):\n", - " return take_eval_steps(model, trainer)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2.2 Run Quantization\n", - "\n", - "So far, we can finally start to quantize the model. \n", - "\n", - "To start, we need to set the configuration for post-training quantization using `PostTrainingQuantConfig` class. Once the configuration is set, we can proceed to the next step by calling the `quantization.fit()` function. This function performs the quantization process on the model and will return the best quantized model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from neural_compressor.quantization import fit\n", - "from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion\n", - "tuning_criterion = TuningCriterion(max_trials=600)\n", - "conf = PostTrainingQuantConfig(approach=\"static\", tuning_criterion=tuning_criterion)\n", - "q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)\n", - "q_model.save(\"./saved_results\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Benchmark with Intel® Neural Compressor\n", - "\n", - "INC provides a benchmark feature to measure the model performance with the objective settings." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# fp32 benchmark\n", - "!{sys.executable} benchmark.py 2>&1|tee fp32_benchmark.log\n", - "\n", - "# int8 benchmark\n", - "!{sys.executable} benchmark.py --input_model saved_results 2>&1|tee int8_benchmark.log\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "name": "python", - "version": "3.9.12" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/notebook/pytorch/benchmark.py b/examples/notebook/pytorch/benchmark.py deleted file mode 100644 index 71b6a0f04f1..00000000000 --- a/examples/notebook/pytorch/benchmark.py +++ /dev/null @@ -1,109 +0,0 @@ -import datasets -import numpy as np -import transformers -import logging -import argparse -from datasets import load_dataset, load_metric -from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, - AutoTokenizer, - EvalPrediction, - Trainer, -) - -from neural_compressor.config import BenchmarkConfig -from neural_compressor import benchmark - -logger = logging.getLogger(__name__) -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.WARN) - -task_name = 'mrpc' -raw_datasets = load_dataset("glue", task_name) -label_list = raw_datasets["train"].features["label"].names -num_labels = len(label_list) - -sentence1_key, sentence2_key = ("sentence1", "sentence2") -padding = "max_length" -label_to_id = None -max_seq_length = 128 -model_name = 'textattack/distilbert-base-uncased-MRPC' - -config = AutoConfig.from_pretrained( - model_name, - num_labels=num_labels, - finetuning_task=task_name, - use_auth_token=None, -) - -tokenizer = AutoTokenizer.from_pretrained( - model_name, - use_auth_token=None, -) - -model = AutoModelForSequenceClassification.from_pretrained( - model_name, - from_tf=False, - config=config, - use_auth_token=None, -) - -def preprocess_function(examples): - args = ( - (examples[sentence1_key], examples[sentence2_key]) - ) - result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True) - return result - -raw_datasets = raw_datasets.map(preprocess_function, batched=True) -eval_dataset = raw_datasets["validation"] -metric = load_metric("glue", task_name) -data_collator = None - -def compute_metrics(p: EvalPrediction): - preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions - preds = np.argmax(preds, axis=1) - result = metric.compute(predictions=preds, references=p.label_ids) - if len(result) > 1: - result["combined_score"] = np.mean(list(result.values())).item() - return result - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(__doc__) - parser.add_argument("--input_model", type=str, required=False, default=None) - args = parser.parse_args() - - # Initialize our Trainer - trainer = Trainer( - model=model, - train_dataset=None, - eval_dataset=eval_dataset, - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=data_collator, - ) - - eval_dataloader = trainer.get_eval_dataloader() - if eval_dataloader.batch_size is None: - def _build_inc_dataloader(dataloader): - class INCDataLoader: - __iter__ = dataloader.__iter__ - def __init__(self) -> None: - self.dataloader = dataloader - self.batch_size = dataloader.total_batch_size - return INCDataLoader() - eval_dataloader = _build_inc_dataloader(eval_dataloader) - batch_size = eval_dataloader.batch_size - - if args.input_model: - from neural_compressor.utils.pytorch import load - model = load(args.input_model, model, dataloader=eval_dataloader) - - b_conf = BenchmarkConfig(warmup=5, - iteration=100, - cores_per_instance=4, - num_of_instance=1) - benchmark.fit(model, b_conf, b_dataloader=eval_dataloader) diff --git a/examples/notebook/pytorch/requirements.txt b/examples/notebook/pytorch/requirements.txt deleted file mode 100644 index aa1af71d2b3..00000000000 --- a/examples/notebook/pytorch/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -torch>=1.9.0 -transformers>=4.16.0 -accelerate -sympy -numpy -sentencepiece!=0.1.92 -protobuf<=3.20.3 -datasets>=1.1.3 -scipy -scikit-learn -Keras-Preprocessing diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md deleted file mode 100644 index d7f119e043c..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# Step-by-Step - -This is an experimental example to quantize unet model. We use dummy data to do quantization and evaluation, so the accuracy is not guaranteed. - -# Prerequisite - -## 1. Environment - -```shell -pip install neural-compressor -pip install -r requirements.txt -``` - -> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). - -## 2. Prepare Model - -```bash -python prepare_model.py --input_model='CompVis/stable-diffusion-v1-4' --output_model='unet-export.onnx' -``` - -# Run - -## 1. Quantization - -```bash -bash run_quant.sh --input_model=path/to/model \ # model path as *.onnx - --output_model=path/to/save -``` - -## 2. Benchmark - -```bash -bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --batch_size=batch_size \ - --mode=performance -``` diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py deleted file mode 100644 index a949faeef48..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py +++ /dev/null @@ -1,102 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint:disable=redefined-outer-name,logging-format-interpolation - - -import logging -import argparse - -import numpy as np -import onnx - -logger = logging.getLogger(__name__) -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.WARN) - -class Dataloader: - def __init__(self, batch_size): - self.batch_size = batch_size - shape = [[batch_size, 4, 64, 64], [batch_size], [batch_size, 77, 768]] - dtype = ['float32', 'float32', 'float32'] - self.dataset = [] - for idx in range(0, len(shape)): - tensor = np.random.uniform(size=shape[idx]) - tensor = tensor.astype(dtype[idx]) - self.dataset.append(tensor) - - def __iter__(self): - yield self.dataset, 0 - -if __name__ == "__main__": - logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - parser.add_argument( - '--model_path', - type=str, - help="Pre-trained model on onnx file" - ) - parser.add_argument( - '--benchmark', - action='store_true', \ - default=False - ) - parser.add_argument( - '--tune', - action='store_true', \ - default=False, - help="whether quantize the model" - ) - parser.add_argument( - '--output_model', - type=str, - help="output model path" - ) - parser.add_argument( - '--mode', - type=str, - help="benchmark mode of performance or accuracy" - ) - parser.add_argument( - '--quant_format', - type=str, - default='default', - choices=['default', 'QDQ', 'QOperator'], - help="quantization format" - ) - parser.add_argument( - "--batch_size", - default=1, - type=int, - ) - args = parser.parse_args() - - dataloader = Dataloader(args.batch_size) - - if args.benchmark and args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1) - fit(args.model_path, conf, b_dataloader=dataloader) - if args.tune: - from neural_compressor import quantization, PostTrainingQuantConfig - config = PostTrainingQuantConfig(quant_format=args.quant_format, recipes={'graph_optimization_level':'ENABLE_EXTENDED'}) - q_model = quantization.fit(args.model_path, config, calib_dataloader=dataloader) - - q_model.save(args.output_model) diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py deleted file mode 100644 index 18e79ff6cd3..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py +++ /dev/null @@ -1,73 +0,0 @@ -import argparse -import os -import shutil -import subprocess - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument("--input_model", type=str, required=False, default='CompVis/stable-diffusion-v1-4') - parser.add_argument("--output_model", type=str, required=True) - return parser.parse_args() - -def move_and_rename_model(source_folder, destination_folder): - if not os.path.exists(source_folder): - raise RuntimeError("{} path is not exists".format(source_folder)) - for file_name in os.listdir(source_folder): - source_file = os.path.join(source_folder, file_name) - destination_file = os.path.join(destination_folder, file_name) - - if os.path.isdir(source_file): - continue - - shutil.move(source_file, destination_file) - - if file_name == "model.onnx": - new_file_name = "unet-export.onnx" - new_file_path = os.path.join(destination_folder, new_file_name) - os.rename(destination_file, new_file_path) - -def prepare_model(input_model, output_model): - # Use [tf2onnx tool](https://github.com/onnx/tensorflow-onnx) to convert tflite to onnx model. - print("\nexport model...") - - export_file = "prepare_unet" - subprocess.run( - [ - "git", - "clone", - "https://github.com/huggingface/diffusers.git", - ], - stdout=subprocess.PIPE, - text=True, - ) - subprocess.run( - ["pip", "install", "--upgrade", "diffusers[torch]", "transformers"], - stdout=subprocess.PIPE, - text=True, - ) - subprocess.run( - [ - "python", - "diffusers/scripts/convert_stable_diffusion_checkpoint_to_onnx.py", - "--model_path", - input_model, - "--output_path", - export_file, - ], - stdout=subprocess.PIPE, - text=True, - ) - - move_and_rename_model(os.path.join(export_file, "unet"), os.path.dirname(output_model)) - try: - shutil.rmtree(export_file, ignore_errors=True) - except OSError as e: - raise e - - assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!" - - -if __name__ == "__main__": - args = parse_arguments() - prepare_model(args.input_model, args.output_model) diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt deleted file mode 100644 index b0c7e4bab62..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -onnx -onnxruntime -onnxruntime-extensions; python_version < '3.11' -protobuf==3.20.3 diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh deleted file mode 100644 index 0f5384d8e63..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - - python main.py \ - --model_path ${input_model} \ - --mode=${mode} \ - --batch_size ${batch_size-1} \ - --benchmark - -} - -main "$@" diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh deleted file mode 100644 index 8e6133f8bb0..00000000000 --- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --quant_format=*) - quant_format=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --model_path ${input_model} \ - --output_model ${output_model} \ - --quant_format ${quant_format-default} \ - --tune -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md deleted file mode 100644 index ea1c76aa43e..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md +++ /dev/null @@ -1,54 +0,0 @@ -Step-by-Step -============ - -This example quantizes the [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) fine-tuned on the the [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task. - -# Prerequisite - -## 1. Environment -```shell -pip install neural-compressor -pip install -r requirements.txt -``` -> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). - - -## 2. Prepare Dataset -Run `prepare_data.sh` script to download dataset from website to `dataset` folder and pre-process it: - -```shell -bash prepare_data.sh -``` -## 3. Prepare Model - -Fine-tuning the model on [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task. -``` -bash run_fine_tuning.sh --train_dataset_location=./dataset/train.jsonl --dataset_location=./dataset/valid.jsonl --fine_tune -``` - -Export model to ONNX format. -```bash -# By default, the input model path is `checkpoint-best-acc/`. -python prepare_model.py --input_model=./checkpoint-best-acc --output_model=./codebert-exported-onnx -``` - -# Run - -## 1. Quantization - -Static quantization with QOperator format: - -```bash -bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune \ - --dataset_location=path/to/glue/data -``` - -## 2. Benchmark - -```bash -bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/glue/data \ - --batch_size=batch_size \ - --mode=performance # or accuracy -``` diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py deleted file mode 100644 index b9c023458a0..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py +++ /dev/null @@ -1,337 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import argparse -import logging -from typing import List - -import numpy as np -import onnx - -logger = logging.getLogger(__name__) - - -def load_dataset_from_local(file_path, model_name_or_path): - """Load the raw data from local.""" - import json - - import torch - - def read_data(file_path): - texts, labels = [], [] - with open(file_path, "r") as f: - for i, line in enumerate(f): - js = json.loads(line.strip()) - code = " ".join(js["func"].split()) - texts.append(code) - labels.append(js["target"]) - return texts, labels - - texts, labels = read_data(file_path) - - # tokenize the raw data - from transformers import AutoTokenizer - - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - encodings = tokenizer( - texts, return_tensors="pt", truncation=True, padding="max_length" - ) - - class CodeDataset(torch.utils.data.Dataset): - def __init__(self, encodings, labels): - self.encodings = encodings - self.labels = labels - - def __getitem__(self, idx): - item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} - item["labels"] = torch.tensor(self.labels[idx]) - return item - - def __len__(self): - return len(self.labels) - - dataset = CodeDataset(encodings, labels) - return dataset - - -# evaluation func for fine-tuning -def evaluate(model, val_loader): - import torch - - print("*** eval model .. ") - all_labels = [] - all_preds = [] - for idx, batch in enumerate(val_loader): - model.eval() - with torch.no_grad(): - labels = batch.pop("labels") - inputs = batch - outputs = model(**inputs, labels=labels) - loss = outputs.loss - logits = outputs.logits - all_labels.append(labels.numpy()) - all_preds.append(np.argmax(logits.detach().numpy(), axis=1)) - np.concatenate(all_labels, axis=0) - np.concatenate(all_preds, axis=0) - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print(f"{idx} batch evaluation accuracy: {cur_acc}") - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print("Overall evaluation accuracy: ", cur_acc) - return cur_acc - - -def fine_tune(args): - import os - import numpy as np - import torch - from torch.utils.data import DataLoader - from transformers import AdamW, AutoModelForSequenceClassification - - train_dataset = load_dataset_from_local( - args.train_data_path, args.model_name_or_path - ) - val_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path) - val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True) - train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) - - optim = AdamW(model.parameters(), lr=5e-5) - - results = {"eval_acc": 0} - global_step = -1 - for epoch in range(3): - all_labels = [] - all_preds = [] - for idx, batch in enumerate(train_loader): - global_step += 1 - optim.zero_grad() - labels = batch.pop("labels") - inputs = batch - model.train() - outputs = model(**inputs, labels=labels) - loss = outputs.loss - logits = outputs.logits - all_labels.append(labels.numpy()) - all_preds.append(np.argmax(logits.detach().numpy(), axis=1)) - np.concatenate(all_labels, axis=0) - np.concatenate(all_preds, axis=0) - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print(" Current acc:%s", round(cur_acc, 4)) - loss.backward() - print(f" Loss: {loss.item()}") - optim.step() - - if global_step % 100 == 0: - best_acc = results["eval_acc"] - cur_acc = evaluate(model, val_loader) - if cur_acc > best_acc: - results["eval_acc"] = cur_acc - best_acc = results["eval_acc"] - print(" Best acc:%s", round(best_acc, 4)) - checkpoint_prefix = "checkpoint-best-acc" - output_dir = os.path.join("{}".format(checkpoint_prefix)) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - model_to_save = model.module if hasattr(model, "module") else model - model.config.to_json_file("{}/config.json".format(checkpoint_prefix)) - output_dir = os.path.join(output_dir, "{}".format("pytorch_model.bin")) - torch.save(model_to_save.state_dict(), output_dir) - print("Saving model checkpoint to %s", output_dir) - - -class ONNXRTDataset: - def __init__(self, model_path, dataset): - self.inputs = [inp.name for inp in onnx.load(model_path).graph.input] - self.dataset = dataset - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, index): - batch = self.dataset[index] - labels = batch["labels"].detach().cpu().numpy() - batch.pop("labels") - inputs = [batch["input_ids"].numpy(), batch["attention_mask"].numpy()] - return inputs, labels - - -def get_dataloader(ort_model_path, dataset): - """Create INC ORT dataloader.""" - dataloader = ONNXRTDataset(ort_model_path, dataset) - return dataloader - - -def main(): - # parse args - parser = argparse.ArgumentParser() - parser.add_argument( - "--train_data_path", - default=None, - type=str, - help="An optional input training data file to evaluate the perplexity on (a text file).", - ) - parser.add_argument( - "--data_path", - default=None, - type=str, - help="An optional input evaluation data file to evaluate the perplexity on (a text file).", - ) - parser.add_argument( - "--model_name_or_path", - default=None, - type=str, - help="The model checkpoint for weights initialization.", - ) - parser.add_argument( - "--model_path", default=None, type=str, help="The onnx model path." - ) - parser.add_argument("--benchmark", action="store_true", default=False) - parser.add_argument( - "--fine_tune", - action="store_true", - default=False, - help="whether fine tune the model", - ) - parser.add_argument( - "--tune", action="store_true", default=False, help="whether quantize the model" - ) - parser.add_argument( - "--output_model", type=str, default=None, help="output model path" - ) - parser.add_argument( - "--mode", type=str, help="benchmark mode of performance or accuracy" - ) - parser.add_argument("--batch_size", default=1, type=int, help="batch size") - parser.add_argument( - "--quant_format", - type=str, - default="QOperator", - choices=["QOperator", "QDQ"], - help="quantization format", - ) - args = parser.parse_args() - - # fine tune - if args.fine_tune: - fine_tune(args) - - - def eval_func(model): - session = ort.InferenceSession( - model.SerializeToString(), providers=ort.get_available_providers() - ) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - all_labels = [] - all_preds = [] - import tqdm - - for idx, (inputs, labels) in tqdm.tqdm(enumerate(dataloader)): - if not isinstance(labels, list): - labels: List[np.array] = [labels] # List[shape: bs] - inputs = inputs[:len_inputs] - for i in range(len_inputs): - ort_inputs.update({inputs_names[i]: inputs[i]}) - predictions: List[np.array] = session.run( - None, ort_inputs - ) # List[# shape, (bs, 2)] - predictions = [np.argmax(p, axis=1) for p in predictions] - - all_labels += labels - all_preds += predictions - np.mean( - np.concatenate(all_labels, 0) == (np.concatenate(all_preds, 0)) - ) # [:,0]>0.5)) - label_flatten = np.concatenate(all_labels, 0) - preds_flatten = np.concatenate(all_preds, 0) - correct_count = np.sum(label_flatten == preds_flatten) - acc = correct_count / len(label_flatten) - return acc - - # tune - if args.tune: - from neural_compressor import PostTrainingQuantConfig, quantization - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.fusion_options import FusionOptions - - train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - ort_dataset = ONNXRTDataset(args.model_path, train_dataset) - - from neural_compressor.data import DataLoader as INC_DataLoader - - dataloader = INC_DataLoader( - framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size - ) - - model_type = "bert" - opt_options = FusionOptions(model_type) - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - args.model_path, - model_type, - num_heads=12, - hidden_size=768, - optimization_options=opt_options, - ) - model = model_optimizer.model - - # check the optimized model is valid - import onnxruntime as ort - - try: - ort.InferenceSession( - model.SerializeToString(), providers=ort.get_available_providers() - ) - except Exception as e: - logger.warning("Optimized model is invalid: {}. ".format(e)) - logger.warning( - "Model optimizer will be skipped. " - "Try to upgrade onnxruntime to avoid this error" - ) - model = onnx.load(args.model_path) - - config = PostTrainingQuantConfig( - approach="dynamic", - quant_level=1, - ) - q_model = quantization.fit( - model, - config, - eval_func=eval_func, - calib_dataloader=dataloader, - ) - q_model.save(args.output_model) - - # benchmark - if args.benchmark: - import onnx - import onnxruntime as ort - from neural_compressor.data import DataLoader as INC_DataLoader - - train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - ort_dataset = ONNXRTDataset(args.model_path, train_dataset) - dataloader = INC_DataLoader( - framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size - ) - model = onnx.load(args.model_path) - if args.mode == "performance": - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - - conf = BenchmarkConfig(iteration=100) - fit(model, conf, b_dataloader=dataloader) - elif args.mode == "accuracy": - acc_result = eval_func(model) - print("Batch size = %d" % args.batch_size) - print("Accuracy: %.5f" % acc_result) - - -if __name__ == "__main__": - main() diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh deleted file mode 100644 index 81ce2ae3e91..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh +++ /dev/null @@ -1,6 +0,0 @@ -git clone https://github.com/microsoft/CodeXGLUE/ -cp -r ./CodeXGLUE/Code-Code/Defect-detection/dataset dataset -cd dataset -pip install gdown -gdown https://drive.google.com/uc?id=1x6hoF7G-tSYxg8AFybggypLZgMGDNHfF -python preprocess.py \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py deleted file mode 100644 index 9d941a0e5e4..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py +++ /dev/null @@ -1,21 +0,0 @@ -import argparse -import os -from optimum.exporters.onnx import main_export - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument("--input_model", type=str, required=False, default="checkpoint-best-acc") - parser.add_argument("--output_model", type=str, required=False, default="codebert-exported-onnx") - return parser.parse_args() - -def prepare_model(input_model, output_model): - print("\nexport model...") - print(f"Try to export model from {input_model} to {output_model}") - main_export(input_model, output=output_model, task="text-classification") - - assert os.path.exists(output_model), f"{output_model} doesn't exist!" - - -if __name__ == "__main__": - args = parse_arguments() - prepare_model(args.input_model, args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt deleted file mode 100644 index 9988cdf0329..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -torch -transformers -accelerate -onnx -onnxruntime -coloredlogs -sympy -onnxruntime-extensions; python_version < '3.11' -numpy==1.23.5 -sentencepiece -protobuf<=3.20.3 -optimum diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh deleted file mode 100644 index 1f514a25368..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --mode ${mode} \ - --batch_size ${batch_size} \ - --benchmark - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh deleted file mode 100644 index d959b607a14..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_fine_tuning - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --train_dataset_location=*) - train_dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_fine_tuning -function run_fine_tuning { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --train_data_path ${train_dataset_location} \ - --data_path ${dataset_location} \ - --fine_tune - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh deleted file mode 100644 index 5f75411f60a..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_tuning - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --output_model ${output_model} \ - --tune - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md deleted file mode 100644 index 21000ca3aea..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md +++ /dev/null @@ -1,55 +0,0 @@ -Step-by-Step -============ - -This example quantizes the [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) fine-tuned on the the [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task. - -# Prerequisite - -## 1. Environment -```shell -pip install neural-compressor -pip install -r requirements.txt -``` -> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). - - -## 2. Prepare Dataset -Run `prepare_data.sh` script to download dataset from website to `dataset` folder and pre-process it: - -```shell -bash prepare_data.sh -``` -## 3. Prepare Model - -Fine-tuning the model on [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task. -``` -bash run_fine_tuning.sh --train_dataset_location=./dataset/train.jsonl --dataset_location=./dataset/valid.jsonl --fine_tune -``` - -Export model to ONNX format. -```bash -# By default, the input model path is `checkpoint-best-acc/`. -python prepare_model.py --input_model=./checkpoint-best-acc --output_model=./codebert-exported-onnx -``` - -# Run - -## 1. Quantization - -Static quantization with QOperator format: - -```bash -bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune \ - --dataset_location=path/to/glue/data \ - --quant_format="QOperator" -``` - -## 2. Benchmark - -```bash -bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/glue/data \ - --batch_size=batch_size \ - --mode=performance # or accuracy -``` diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py deleted file mode 100644 index 6b4db97faca..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py +++ /dev/null @@ -1,338 +0,0 @@ -from __future__ import absolute_import, division, print_function - -import argparse -import logging -from typing import List - -import numpy as np -import onnx - -logger = logging.getLogger(__name__) - - -def load_dataset_from_local(file_path, model_name_or_path): - """Load the raw data from local.""" - import json - - import torch - - def read_data(file_path): - texts, labels = [], [] - with open(file_path, "r") as f: - for i, line in enumerate(f): - js = json.loads(line.strip()) - code = " ".join(js["func"].split()) - texts.append(code) - labels.append(js["target"]) - return texts, labels - - texts, labels = read_data(file_path) - - # tokenize the raw data - from transformers import AutoTokenizer - - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - encodings = tokenizer( - texts, return_tensors="pt", truncation=True, padding="max_length" - ) - - class CodeDataset(torch.utils.data.Dataset): - def __init__(self, encodings, labels): - self.encodings = encodings - self.labels = labels - - def __getitem__(self, idx): - item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} - item["labels"] = torch.tensor(self.labels[idx]) - return item - - def __len__(self): - return len(self.labels) - - dataset = CodeDataset(encodings, labels) - return dataset - - -# evaluation func for fine-tuning -def evaluate(model, val_loader): - import torch - - print("*** eval model .. ") - all_labels = [] - all_preds = [] - for idx, batch in enumerate(val_loader): - model.eval() - with torch.no_grad(): - labels = batch.pop("labels") - inputs = batch - outputs = model(**inputs, labels=labels) - loss = outputs.loss - logits = outputs.logits - all_labels.append(labels.numpy()) - all_preds.append(np.argmax(logits.detach().numpy(), axis=1)) - np.concatenate(all_labels, axis=0) - np.concatenate(all_preds, axis=0) - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print(f"{idx} batch evaluation accuracy: {cur_acc}") - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print("Overall evaluation accuracy: ", cur_acc) - return cur_acc - - -def fine_tune(args): - import os - import numpy as np - import torch - from torch.utils.data import DataLoader - from transformers import AdamW, AutoModelForSequenceClassification - - train_dataset = load_dataset_from_local( - args.train_data_path, args.model_name_or_path - ) - val_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path) - val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True) - train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) - - optim = AdamW(model.parameters(), lr=5e-5) - - results = {"eval_acc": 0} - global_step = -1 - for epoch in range(3): - all_labels = [] - all_preds = [] - for idx, batch in enumerate(train_loader): - global_step += 1 - optim.zero_grad() - labels = batch.pop("labels") - inputs = batch - model.train() - outputs = model(**inputs, labels=labels) - loss = outputs.loss - logits = outputs.logits - all_labels.append(labels.numpy()) - all_preds.append(np.argmax(logits.detach().numpy(), axis=1)) - np.concatenate(all_labels, axis=0) - np.concatenate(all_preds, axis=0) - cur_acc = np.mean( - np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0) - ) - print(" Current acc:%s", round(cur_acc, 4)) - loss.backward() - print(f" Loss: {loss.item()}") - optim.step() - - if global_step % 100 == 0: - best_acc = results["eval_acc"] - cur_acc = evaluate(model, val_loader) - if cur_acc > best_acc: - results["eval_acc"] = cur_acc - best_acc = results["eval_acc"] - print(" Best acc:%s", round(best_acc, 4)) - checkpoint_prefix = "checkpoint-best-acc" - output_dir = os.path.join("{}".format(checkpoint_prefix)) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - model_to_save = model.module if hasattr(model, "module") else model - model.config.to_json_file("{}/config.json".format(checkpoint_prefix)) - output_dir = os.path.join(output_dir, "{}".format("pytorch_model.bin")) - torch.save(model_to_save.state_dict(), output_dir) - print("Saving model checkpoint to %s", output_dir) - - -class ONNXRTDataset: - def __init__(self, model_path, dataset): - self.inputs = [inp.name for inp in onnx.load(model_path).graph.input] - self.dataset = dataset - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, index): - batch = self.dataset[index] - labels = batch["labels"].detach().cpu().numpy() - batch.pop("labels") - inputs = [batch["input_ids"].numpy(), batch["attention_mask"].numpy()] - return inputs, labels - - -def get_dataloader(ort_model_path, dataset): - """Create INC ORT dataloader.""" - dataloader = ONNXRTDataset(ort_model_path, dataset) - return dataloader - - -def main(): - # parse args - parser = argparse.ArgumentParser() - parser.add_argument( - "--train_data_path", - default=None, - type=str, - help="An optional input training data file to evaluate the perplexity on (a text file).", - ) - parser.add_argument( - "--data_path", - default=None, - type=str, - help="An optional input evaluation data file to evaluate the perplexity on (a text file).", - ) - parser.add_argument( - "--model_name_or_path", - default=None, - type=str, - help="The model checkpoint for weights initialization.", - ) - parser.add_argument( - "--model_path", default=None, type=str, help="The onnx model path." - ) - parser.add_argument("--benchmark", action="store_true", default=False) - parser.add_argument( - "--fine_tune", - action="store_true", - default=False, - help="whether fine tune the model", - ) - parser.add_argument( - "--tune", action="store_true", default=False, help="whether quantize the model" - ) - parser.add_argument( - "--output_model", type=str, default=None, help="output model path" - ) - parser.add_argument( - "--mode", type=str, help="benchmark mode of performance or accuracy" - ) - parser.add_argument("--batch_size", default=1, type=int, help="batch size") - parser.add_argument( - "--quant_format", - type=str, - default="QOperator", - choices=["QOperator", "QDQ"], - help="quantization format", - ) - args = parser.parse_args() - - # fine tune - if args.fine_tune: - fine_tune(args) - - - def eval_func(model): - session = ort.InferenceSession( - model.SerializeToString(), providers=ort.get_available_providers() - ) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - all_labels = [] - all_preds = [] - import tqdm - - for idx, (inputs, labels) in tqdm.tqdm(enumerate(dataloader)): - if not isinstance(labels, list): - labels: List[np.array] = [labels] # List[shape: bs] - inputs = inputs[:len_inputs] - for i in range(len_inputs): - ort_inputs.update({inputs_names[i]: inputs[i]}) - predictions: List[np.array] = session.run( - None, ort_inputs - ) # List[# shape, (bs, 2)] - predictions = [np.argmax(p, axis=1) for p in predictions] - - all_labels += labels - all_preds += predictions - np.mean( - np.concatenate(all_labels, 0) == (np.concatenate(all_preds, 0)) - ) # [:,0]>0.5)) - label_flatten = np.concatenate(all_labels, 0) - preds_flatten = np.concatenate(all_preds, 0) - correct_count = np.sum(label_flatten == preds_flatten) - acc = correct_count / len(label_flatten) - return acc - - # tune - if args.tune: - from neural_compressor import PostTrainingQuantConfig, quantization - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.fusion_options import FusionOptions - - train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - ort_dataset = ONNXRTDataset(args.model_path, train_dataset) - - from neural_compressor.data import DataLoader as INC_DataLoader - - dataloader = INC_DataLoader( - framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size - ) - - model_type = "bert" - opt_options = FusionOptions(model_type) - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - args.model_path, - model_type, - num_heads=12, - hidden_size=768, - optimization_options=opt_options, - ) - model = model_optimizer.model - - # check the optimized model is valid - import onnxruntime as ort - - try: - ort.InferenceSession( - model.SerializeToString(), providers=ort.get_available_providers() - ) - except Exception as e: - logger.warning("Optimized model is invalid: {}. ".format(e)) - logger.warning( - "Model optimizer will be skipped. " - "Try to upgrade onnxruntime to avoid this error" - ) - model = onnx.load(args.model_path) - - config = PostTrainingQuantConfig( - approach="static", - quant_level=1, - quant_format=args.quant_format, - ) - q_model = quantization.fit( - model, - config, - eval_func=eval_func, - calib_dataloader=dataloader, - ) - q_model.save(args.output_model) - - # benchmark - if args.benchmark: - import onnx - import onnxruntime as ort - from neural_compressor.data import DataLoader as INC_DataLoader - - train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path) - ort_dataset = ONNXRTDataset(args.model_path, train_dataset) - dataloader = INC_DataLoader( - framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size - ) - model = onnx.load(args.model_path) - if args.mode == "performance": - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - - conf = BenchmarkConfig(iteration=100) - fit(model, conf, b_dataloader=dataloader) - elif args.mode == "accuracy": - acc_result = eval_func(model) - print("Batch size = %d" % args.batch_size) - print("Accuracy: %.5f" % acc_result) - - -if __name__ == "__main__": - main() diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh deleted file mode 100644 index 81ce2ae3e91..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh +++ /dev/null @@ -1,6 +0,0 @@ -git clone https://github.com/microsoft/CodeXGLUE/ -cp -r ./CodeXGLUE/Code-Code/Defect-detection/dataset dataset -cd dataset -pip install gdown -gdown https://drive.google.com/uc?id=1x6hoF7G-tSYxg8AFybggypLZgMGDNHfF -python preprocess.py \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py deleted file mode 100644 index 9d941a0e5e4..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py +++ /dev/null @@ -1,21 +0,0 @@ -import argparse -import os -from optimum.exporters.onnx import main_export - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument("--input_model", type=str, required=False, default="checkpoint-best-acc") - parser.add_argument("--output_model", type=str, required=False, default="codebert-exported-onnx") - return parser.parse_args() - -def prepare_model(input_model, output_model): - print("\nexport model...") - print(f"Try to export model from {input_model} to {output_model}") - main_export(input_model, output=output_model, task="text-classification") - - assert os.path.exists(output_model), f"{output_model} doesn't exist!" - - -if __name__ == "__main__": - args = parse_arguments() - prepare_model(args.input_model, args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt deleted file mode 100644 index 6ebc9f078a4..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -torch -transformers -accelerate -onnx -onnxruntime -coloredlogs -sympy -onnxruntime-extensions; python_version < '3.11' -numpy==1.23.5 -sentencepiece -protobuf<=3.20.3 -optimum \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh deleted file mode 100644 index 1f514a25368..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --mode ${mode} \ - --batch_size ${batch_size} \ - --benchmark - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh deleted file mode 100644 index d959b607a14..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_fine_tuning - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --train_dataset_location=*) - train_dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_fine_tuning -function run_fine_tuning { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --train_data_path ${train_dataset_location} \ - --data_path ${dataset_location} \ - --fine_tune - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh deleted file mode 100644 index c234a7c509e..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_tuning - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --quant_format=*) - quant_format=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - - python main.py \ - --model_name_or_path microsoft/codebert-base \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --output_model ${output_model} \ - --quant_format ${quant_format} \ - --tune - -} - -main "$@" diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md deleted file mode 100644 index 7833386e16c..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md +++ /dev/null @@ -1,69 +0,0 @@ -Step-by-Step -============ - -This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). - -# Prerequisite - -## 1. Environment -```shell -pip install neural-compressor -pip install -r requirements.txt -``` -> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). - -## 2. Prepare Model - -Supported model identifier from [huggingface.co](https://huggingface.co/): - -| Model Identifier | -|:-----------------------------------------------:| -| Intel/bert-base-uncased-mrpc | -| Intel/roberta-base-mrpc | -| Intel/xlm-roberta-base-mrpc | -| Intel/camembert-base-mrpc | -| distilbert-base-uncased-finetuned-sst-2-english | -| Alireza1044/albert-base-v2-sst2 | -| Intel/MiniLM-L12-H384-uncased-mrpc | -| philschmid/MiniLM-L6-H384-uncased-sst2 | -| bert-base-cased-finetuned-mrpc | -| Intel/electra-small-discriminator-mrpc | -| M-FAC/bert-mini-finetuned-mrpc | -| Intel/xlnet-base-cased-mrpc | -| Intel/bart-large-mrpc | -| Intel/deberta-v3-base-mrpc | - -```bash -python prepare_model.py --input_model=Intel/bert-base-uncased-mrpc --output_model=bert-base-uncased-mrpc.onnx -``` - -## 3. Prepare Dataset -Download the GLUE data with `prepare_data.sh` script. - -```shell -export GLUE_DIR=/path/to/glue_data -export TASK_NAME=MRPC # or SST - -bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME -``` - -# Run - -## 1. Quantization - -Dynamic quantization: - -```bash -bash run_quant.sh --input_model=path/to/model \ # model path as *.onnx - --output_model=path/to/model_tune \ # model path as *.onnx - --dataset_location=path/to/glue/data -``` - -## 2. Benchmark - -```bash -bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/glue/data \ - --batch_size=batch_size \ - --mode=performance # or accuracy -``` diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py deleted file mode 100644 index 18151ff9884..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py +++ /dev/null @@ -1,431 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint:disable=redefined-outer-name,logging-format-interpolation - -import logging -import argparse -import onnx -import onnxruntime as ort -import transformers -import os -import torch -import numpy as np -from dataclasses import dataclass -from typing import List, Optional, Union -from neural_compressor.data import DataLoader - - -class ONNXRTBertDataset: - """Dataset used for model Bert. - Args: data_dir (str): The input data dir. - model_name_or_path (str): Path to pre-trained student model or shortcut name, - selected in the list: - max_seq_length (int, default=128): The maximum length after tokenization. - Sequences longer than this will be truncated, - sequences shorter will be padded. - do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. - task (str, default=mrpc): The name of the task to fine-tune. - Choices include mrpc, qqp, qnli, rte, - sts-b, cola, mnli, wnli. - model_type (str, default='bert'): model type, support 'distilbert', 'bert', - 'mobilebert', 'roberta'. - dynamic_length (bool, default=False): Whether to use fixed sequence length. - evaluate (bool, default=True): Whether do evaluation or training. - transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according - to specific conditions. - """ - def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ - do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ - evaluate=True, transform=None, filter=None): - self.inputs = [inp.name for inp in onnx.load(model).graph.input] - task = task.lower() - model_type = model_type.lower() - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' - assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ - model type' - self.dynamic_length = dynamic_length - self.model_type = model_type - self.max_seq_length = max_seq_length - tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, - do_lower_case=do_lower_case) - self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ - max_seq_length, task, model_type, tokenizer, evaluate) - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, index): - batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) - return batch[:len(self.inputs)], batch[-1] - -def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ - model_type, tokenizer, evaluate): - from torch.utils.data import TensorDataset - - processor = transformers.glue_processors[task]() - output_mode = transformers.glue_output_modes[task] - # Load data features from cache or dataset file - if not os.path.exists("./dataset_cached"): - os.makedirs("./dataset_cached") - cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, model_name_or_path.split('/'))).pop(), - str(max_seq_length), - str(task))) - if os.path.exists(cached_features_file): - logger.info("Load features from cached file {}.".format(cached_features_file)) - features = torch.load(cached_features_file) - else: - logger.info("Create features from dataset file at {}.".format(data_dir)) - label_list = processor.get_labels() - examples = processor.get_dev_examples(data_dir) if evaluate else \ - processor.get_train_examples(data_dir) - features = convert_examples_to_features(examples, - tokenizer, - task=task, - label_list=label_list, - max_length=max_seq_length, - output_mode=output_mode, - ) - logger.info("Save features into cached file {}.".format(cached_features_file)) - torch.save(features, cached_features_file) - # Convert to Tensors and build dataset - all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) - all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) - all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) - all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) - if output_mode == "classification": - all_labels = torch.tensor([f.label for f in features], dtype=torch.long) - elif output_mode == "regression": - all_labels = torch.tensor([f.label for f in features], dtype=torch.float) - dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ - all_seq_lengths, all_labels) - return dataset - -def convert_examples_to_features( - examples, - tokenizer, - max_length=128, - task=None, - label_list=None, - output_mode="classification", - pad_token=0, - pad_token_segment_id=0, - mask_padding_with_zero=True, -): - processor = transformers.glue_processors[task]() - if label_list is None: - label_list = processor.get_labels() - logger.info("Use label list {} for task {}.".format(label_list, task)) - label_map = {label: i for i, label in enumerate(label_list)} - features = [] - for (ex_index, example) in enumerate(examples): - inputs = tokenizer.encode_plus( - example.text_a, - example.text_b, - add_special_tokens=True, - max_length=max_length, - return_token_type_ids=True, - truncation=True, - ) - input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) - - # Zero-pad up to the sequence length. - seq_length = len(input_ids) - padding_length = max_length - len(input_ids) - - input_ids = input_ids + ([pad_token] * padding_length) - attention_mask = attention_mask + \ - ([0 if mask_padding_with_zero else 1] * padding_length) - token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) - - assert len(input_ids) == max_length, \ - "Error with input_ids length {} vs {}".format( - len(input_ids), max_length) - assert len(attention_mask) == max_length, \ - "Error with attention_mask length {} vs {}".format( - len(attention_mask), max_length - ) - assert len(token_type_ids) == max_length, \ - "Error with token_type_ids length {} vs {}".format( - len(token_type_ids), max_length - ) - if output_mode == "classification": - label = label_map[example.label] - elif output_mode == "regression": - label = float(example.label) - else: - raise KeyError(output_mode) - - feats = InputFeatures( - input_ids=input_ids, - attention_mask=attention_mask, - token_type_ids=token_type_ids, - label=label, - seq_length=seq_length, - ) - features.append(feats) - return features - -@dataclass(frozen=True) -class InputFeatures: - """ - A single set of features of data. - Property names are the same names as the corresponding inputs to a model. - Args: - input_ids: Indices of input sequence tokens in the vocabulary. - attention_mask: Mask to avoid performing attention on padding token indices. - Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, - ``0`` for MASKED (padded) tokens. - token_type_ids: (Optional) Segment token indices to indicate first and second - portions of the inputs. Only some models use them. - label: (Optional) Label corresponding to the input. Int for classification problems, - float for regression problems. - seq_length: (Optional) The length of input sequence before padding. - """ - - input_ids: List[int] - attention_mask: Optional[List[int]] = None - token_type_ids: Optional[List[int]] = None - label: Optional[Union[int, float]] = None - seq_length: Optional[List[int]] = None - -class ONNXRTGLUE: - """Computes GLUE score. - - Args: - task (str, default=mrpc): The name of the task. - Choices include mrpc, qqp, qnli, rte, - sts-b, cola, mnli, wnli. - - """ - def __init__(self, task='mrpc'): - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' - self.pred_list = None - self.label_list = None - self.task = task - self.return_key = { - "cola": "mcc", - "mrpc": "f1", - "sts-b": "corr", - "qqp": "acc", - "mnli": "mnli/acc", - "qnli": "acc", - "rte": "acc", - "wnli": "acc", - "sst-2": "acc" - } - - def update(self, preds, labels): - """add preds and labels to storage""" - if isinstance(preds, list) and len(preds) == 1: - preds = preds[0] - if isinstance(labels, list) and len(labels) == 1: - labels = labels[0] - if self.pred_list is None: - self.pred_list = preds - self.label_list = labels - else: - self.pred_list = np.append(self.pred_list, preds, axis=0) - self.label_list = np.append(self.label_list, labels, axis=0) - - def reset(self): - """clear preds and labels storage""" - self.pred_list = None - self.label_list = None - - def result(self): - """calculate metric""" - output_mode = transformers.glue_output_modes[self.task] - - if output_mode == "classification": - processed_preds = np.argmax(self.pred_list, axis=1) - elif output_mode == "regression": - processed_preds = np.squeeze(self.pred_list) - result = transformers.glue_compute_metrics(\ - self.task, processed_preds, self.label_list) - return result[self.return_key[self.task]] - -logger = logging.getLogger(__name__) -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.WARN) - -if __name__ == "__main__": - logger.info('Evaluating ONNXRuntime full precision accuracy and performance:') - parser = argparse.ArgumentParser( - description='BERT fine-tune examples for classification/regression tasks.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--model_path', - type=str, - help="Pre-trained resnet50 model on onnx file" - ) - parser.add_argument( - '--benchmark', - action='store_true', \ - default=False - ) - parser.add_argument( - '--tune', - action='store_true', \ - default=False, - help="whether quantize the model" - ) - parser.add_argument( - '--config', - type=str, - help="config yaml path" - ) - parser.add_argument( - '--output_model', - type=str, - default=None, - help="output model path" - ) - parser.add_argument( - '--mode', - type=str, - help="benchmark mode of performance or accuracy" - ) - parser.add_argument( - '--data_path', - type=str, - help="input data path" - ) - parser.add_argument( - '--batch_size', - default=8, - type=int, - ) - parser.add_argument( - '--model_name_or_path', - type=str, - help="pretrained model name or path" - ) - parser.add_argument( - '--task', - type=str, - choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], - help="GLUE task name" - ) - parser.add_argument( - '--num_heads', - default=12, - type=int, - ) - parser.add_argument( - '--hidden_size', - default=768, - type=int, - ) - parser.add_argument( - '--device', - type=str, - default='cpu', - choices=['cpu', 'npu'], - ) - - args = parser.parse_args() - backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default' - - dataset = ONNXRTBertDataset(args.model_path, - data_dir=args.data_path, - model_name_or_path=args.model_name_or_path, - task=args.task) - dataloader = DataLoader(framework='onnxruntime', dataset=dataset, batch_size=args.batch_size) - metric = ONNXRTGLUE(args.task) - - def eval_func(model, *args): - metric.reset() - provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider' - session = ort.InferenceSession(model.SerializeToString(), providers=[provider]) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for idx, (inputs, labels) in enumerate(dataloader): - if not isinstance(labels, list): - labels = [labels] - inputs = inputs[:len_inputs] - for i in range(len_inputs): - ort_inputs.update({inputs_names[i]: inputs[i]}) - predictions = session.run(None, ort_inputs) - metric.update(predictions[0], labels) - return metric.result() - - if args.benchmark: - model = onnx.load(args.model_path) - if args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(iteration=100, - cores_per_instance=28, - num_of_instance=1, - device=args.device, - backend=backend) - fit(model, conf, b_dataloader=dataloader) - elif args.mode == 'accuracy': - acc_result = eval_func(model) - print("Batch size = %d" % args.batch_size) - print("Accuracy: %.5f" % acc_result) - - - if args.tune: - # optimize model - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.fusion_options import FusionOptions - model_type = 'bart' if args.model_name_or_path == 'Intel/bart-large-mrpc' else 'bert' - opt_options = FusionOptions(model_type) - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - args.model_path, - model_type, - num_heads=args.num_heads, - hidden_size=args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model - - # check the optimized model is valid - try: - ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - except Exception as e: - logger.warning("Optimized model is invalid: {}. ".format(e)) - logger.warning("Model optimizer will be skipped. " \ - "Try to upgrade onnxruntime to avoid this error") - model = onnx.load(args.model_path) - - from neural_compressor import quantization, PostTrainingQuantConfig - from neural_compressor.utils.constant import FP32 - specific_quant_config = {} - if args.model_name_or_path == 'Alireza1044/albert-base-v2-sst2': - specific_quant_config['recipes'] = {'first_conv_or_matmul_quantization': False} - config = PostTrainingQuantConfig(approach='dynamic', - device=args.device, - backend=backend, - **specific_quant_config) - q_model = quantization.fit(model, - config, - eval_func=eval_func) - q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh deleted file mode 100644 index 8e434a5c521..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - download_data - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --data_dir=*) - data_dir=$(echo $var |cut -f2 -d=) - ;; - --task_name=*) - task_name=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function download_data { - wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py - python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} -} - -main "$@" - diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py deleted file mode 100644 index be05479d9e3..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py +++ /dev/null @@ -1,97 +0,0 @@ -import argparse -import os - -import torch -from transformers import AutoConfig, AutoModelForSequenceClassification - -def export_onnx_model(args, model): - with torch.no_grad(): - symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} - if args.input_model in [ - 'Intel/roberta-base-mrpc', - 'Intel/xlm-roberta-base-mrpc', - 'Intel/camembert-base-mrpc', - 'distilbert-base-uncased-finetuned-sst-2-english', - 'Intel/xlnet-base-cased-mrpc', - 'Intel/deberta-v3-base-mrpc', - ]: - inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), - 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask']), - args.output_model, # where to save the model (can be a file or file-like object) - opset_version=14, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask'], - output_names=['logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names}) - else: - inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), - 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64), - 'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask'], - inputs['token_type_ids']), - args.output_model, # where to save the model (can be a file or file-like object) - opset_version=14, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask', - 'token_type_ids'], - output_names=['logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names, - 'token_type_ids' : symbolic_names}) - assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!" - print("ONNX Model exported to {0}".format(args.output_model)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export huggingface onnx model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--input_model', - type=str, - default='Intel/bert-base-uncased-mrpc', - const='Intel/bert-base-uncased-mrpc', - nargs='?', - choices=['Intel/bert-base-uncased-mrpc', - 'Intel/roberta-base-mrpc', - 'Intel/xlm-roberta-base-mrpc', - 'Intel/camembert-base-mrpc', - 'distilbert-base-uncased-finetuned-sst-2-english', - 'Alireza1044/albert-base-v2-sst2', - 'philschmid/MiniLM-L6-H384-uncased-sst2', - 'Intel/MiniLM-L12-H384-uncased-mrpc', - 'bert-base-cased-finetuned-mrpc', - 'Intel/electra-small-discriminator-mrpc', - 'M-FAC/bert-mini-finetuned-mrpc', - 'Intel/xlnet-base-cased-mrpc', - 'Intel/bart-large-mrpc', - 'Intel/deberta-v3-base-mrpc' - ], - help='pretrained model name or path') - parser.add_argument("--output_model", type=str, required=True) - parser.add_argument( - '--max_len', - type=int, - default=128, - help='Maximum length of the sentence pairs') - args = parser.parse_args() - - model = AutoModelForSequenceClassification.from_pretrained( - args.input_model, - config=AutoConfig.from_pretrained(args.input_model)) - - if args.input_model == 'Intel/bart-large-mrpc': - import shutil - from optimum.exporters.onnx import main_export - - main_export(args.input_model, output="bart-large-mrpc", task="text-classification") - shutil.move("bart-large-mrpc/model.onnx", args.output_model) - else: - export_onnx_model(args, model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt deleted file mode 100644 index 9988cdf0329..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -torch -transformers -accelerate -onnx -onnxruntime -coloredlogs -sympy -onnxruntime-extensions; python_version < '3.11' -numpy==1.23.5 -sentencepiece -protobuf<=3.20.3 -optimum diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh deleted file mode 100644 index a45b843f555..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - - if [[ "${input_model}" =~ "bert-base-uncased" ]]; then - model_name_or_path="Intel/bert-base-uncased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "roberta-base" ]]; then - model_name_or_path="Intel/roberta-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "distilbert-base" ]]; then - model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "MiniLM-L6" ]]; then - model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "MiniLM-L12" ]]; then - model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bert-base-cased" ]]; then - model_name_or_path="bert-base-cased-finetuned-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then - model_name_or_path="Intel/xlnet-base-cased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bert-mini" ]]; then - model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then - model_name_or_path="Intel/electra-small-discriminator-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bart" ]]; then - model_name_or_path="Intel/bart-large-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "deberta" ]]; then - model_name_or_path="microsoft/deberta-v3-base" - TASK_NAME='mrpc' - fi - - python main.py \ - --model_name_or_path ${model_name_or_path} \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --task ${TASK_NAME} \ - --mode=${mode} \ - --batch_size=${batch_size} \ - --benchmark - -} - -main "$@" - diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh deleted file mode 100644 index 20a6b8b5794..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - - if [[ "${input_model}" =~ "bert-base-uncased" ]]; then - model_name_or_path="Intel/bert-base-uncased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "roberta-base" ]]; then - model_name_or_path="Intel/roberta-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "distilbert-base" ]]; then - model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "MiniLM-L6" ]]; then - model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "MiniLM-L12" ]]; then - model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "bert-base-cased" ]]; then - model_name_or_path="bert-base-cased-finetuned-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then - model_name_or_path="Intel/xlnet-base-cased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "bert-mini" ]]; then - model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" - TASK_NAME='mrpc' - num_heads=4 - hidden_size=256 - fi - if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then - model_name_or_path="Intel/electra-small-discriminator-mrpc" - TASK_NAME='mrpc' - num_heads=4 - hidden_size=256 - fi - if [[ "${input_model}" =~ "bart" ]]; then - model_name_or_path="Intel/bart-large-mrpc" - TASK_NAME='mrpc' - num_heads=16 - hidden_size=4096 - fi - if [[ "${input_model}" =~ "deberta" ]]; then - model_name_or_path="microsoft/deberta-v3-base" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - - python main.py \ - --model_name_or_path ${model_name_or_path} \ - --model_path ${input_model} \ - --output_model ${output_model} \ - --data_path ${dataset_location} \ - --task ${TASK_NAME} \ - --num_heads ${num_heads} \ - --hidden_size ${hidden_size} \ - --tune -} - -main "$@" - - - diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md deleted file mode 100644 index a4215f6876e..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# Step-by-Step - -This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). - -# Prerequisite - -## 1. Environment - -```shell -pip install neural-compressor -pip install -r requirements.txt -``` - -> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). - -## 2. Prepare Model - -Supported model identifier from [huggingface.co](https://huggingface.co/): - -| Model Identifier | -| :---------------------------------------------: | -| Intel/bert-base-uncased-mrpc | -| Intel/roberta-base-mrpc | -| Intel/xlm-roberta-base-mrpc | -| Intel/camembert-base-mrpc | -| distilbert-base-uncased-finetuned-sst-2-english | -| Alireza1044/albert-base-v2-sst2 | -| Intel/MiniLM-L12-H384-uncased-mrpc | -| philschmid/MiniLM-L6-H384-uncased-sst2 | -| bert-base-cased-finetuned-mrpc | -| Intel/electra-small-discriminator-mrpc | -| M-FAC/bert-mini-finetuned-mrpc | -| Intel/xlnet-base-cased-mrpc | -| Intel/bart-large-mrpc | -| Intel/deberta-v3-base-mrpc | - -```bash -python prepare_model.py --input_model=Intel/bert-base-uncased-mrpc --output_model=bert-base-uncased-mrpc.onnx -``` - -## 3. Prepare Dataset - -Download the GLUE data with `prepare_data.sh` script. - -```shell -export GLUE_DIR=/path/to/glue_data -export TASK_NAME=MRPC # or SST - -bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME -``` - -# Run - -## 1. Quantization - -Static quantization with QOperator format: - -```bash -bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune \ - --dataset_location=path/to/glue/data \ - --quant_format="QOperator" -``` - -## 2. Benchmark - -```bash -bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/glue/data \ - --batch_size=batch_size \ - --mode=performance # or accuracy -``` diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py deleted file mode 100644 index bb5bd628f7c..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py +++ /dev/null @@ -1,435 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint:disable=redefined-outer-name,logging-format-interpolation - -import logging -import argparse -import onnx -import onnxruntime as ort -import transformers -import os -import torch -import numpy as np -from dataclasses import dataclass -from typing import List, Optional, Union -from neural_compressor.data import DataLoader - - -class ONNXRTBertDataset: - """Dataset used for model Bert. - Args: data_dir (str): The input data dir. - model_name_or_path (str): Path to pre-trained student model or shortcut name, - selected in the list: - max_seq_length (int, default=128): The maximum length after tokenization. - Sequences longer than this will be truncated, - sequences shorter will be padded. - do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. - task (str, default=mrpc): The name of the task to fine-tune. - Choices include mrpc, qqp, qnli, rte, - sts-b, cola, mnli, wnli. - model_type (str, default='bert'): model type, support 'distilbert', 'bert', - 'mobilebert', 'roberta'. - dynamic_length (bool, default=False): Whether to use fixed sequence length. - evaluate (bool, default=True): Whether do evaluation or training. - transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according - to specific conditions. - """ - def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ - do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ - evaluate=True, transform=None, filter=None): - self.inputs = [inp.name for inp in onnx.load(model).graph.input] - task = task.lower() - model_type = model_type.lower() - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' - assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ - model type' - self.dynamic_length = dynamic_length - self.model_type = model_type - self.max_seq_length = max_seq_length - tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, - do_lower_case=do_lower_case) - self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ - max_seq_length, task, model_type, tokenizer, evaluate) - - def __len__(self): - return len(self.dataset) - - def __getitem__(self, index): - batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) - return batch[:len(self.inputs)], batch[-1] - -def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ - model_type, tokenizer, evaluate): - from torch.utils.data import TensorDataset - - processor = transformers.glue_processors[task]() - output_mode = transformers.glue_output_modes[task] - # Load data features from cache or dataset file - if not os.path.exists("./dataset_cached"): - os.makedirs("./dataset_cached") - cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( - 'dev' if evaluate else 'train', - list(filter(None, model_name_or_path.split('/'))).pop(), - str(max_seq_length), - str(task))) - if os.path.exists(cached_features_file): - logger.info("Load features from cached file {}.".format(cached_features_file)) - features = torch.load(cached_features_file) - else: - logger.info("Create features from dataset file at {}.".format(data_dir)) - label_list = processor.get_labels() - examples = processor.get_dev_examples(data_dir) if evaluate else \ - processor.get_train_examples(data_dir) - features = convert_examples_to_features(examples, - tokenizer, - task=task, - label_list=label_list, - max_length=max_seq_length, - output_mode=output_mode, - ) - logger.info("Save features into cached file {}.".format(cached_features_file)) - torch.save(features, cached_features_file) - # Convert to Tensors and build dataset - all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) - all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) - all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) - all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) - if output_mode == "classification": - all_labels = torch.tensor([f.label for f in features], dtype=torch.long) - elif output_mode == "regression": - all_labels = torch.tensor([f.label for f in features], dtype=torch.float) - dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ - all_seq_lengths, all_labels) - return dataset - -def convert_examples_to_features( - examples, - tokenizer, - max_length=128, - task=None, - label_list=None, - output_mode="classification", - pad_token=0, - pad_token_segment_id=0, - mask_padding_with_zero=True, -): - processor = transformers.glue_processors[task]() - if label_list is None: - label_list = processor.get_labels() - logger.info("Use label list {} for task {}.".format(label_list, task)) - label_map = {label: i for i, label in enumerate(label_list)} - features = [] - for (ex_index, example) in enumerate(examples): - inputs = tokenizer.encode_plus( - example.text_a, - example.text_b, - add_special_tokens=True, - max_length=max_length, - return_token_type_ids=True, - truncation=True, - ) - input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] - # The mask has 1 for real tokens and 0 for padding tokens. Only real - # tokens are attended to. - attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) - - # Zero-pad up to the sequence length. - seq_length = len(input_ids) - padding_length = max_length - len(input_ids) - - input_ids = input_ids + ([pad_token] * padding_length) - attention_mask = attention_mask + \ - ([0 if mask_padding_with_zero else 1] * padding_length) - token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) - - assert len(input_ids) == max_length, \ - "Error with input_ids length {} vs {}".format( - len(input_ids), max_length) - assert len(attention_mask) == max_length, \ - "Error with attention_mask length {} vs {}".format( - len(attention_mask), max_length - ) - assert len(token_type_ids) == max_length, \ - "Error with token_type_ids length {} vs {}".format( - len(token_type_ids), max_length - ) - if output_mode == "classification": - label = label_map[example.label] - elif output_mode == "regression": - label = float(example.label) - else: - raise KeyError(output_mode) - - feats = InputFeatures( - input_ids=input_ids, - attention_mask=attention_mask, - token_type_ids=token_type_ids, - label=label, - seq_length=seq_length, - ) - features.append(feats) - return features - -@dataclass(frozen=True) -class InputFeatures: - """ - A single set of features of data. - Property names are the same names as the corresponding inputs to a model. - Args: - input_ids: Indices of input sequence tokens in the vocabulary. - attention_mask: Mask to avoid performing attention on padding token indices. - Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, - ``0`` for MASKED (padded) tokens. - token_type_ids: (Optional) Segment token indices to indicate first and second - portions of the inputs. Only some models use them. - label: (Optional) Label corresponding to the input. Int for classification problems, - float for regression problems. - seq_length: (Optional) The length of input sequence before padding. - """ - - input_ids: List[int] - attention_mask: Optional[List[int]] = None - token_type_ids: Optional[List[int]] = None - label: Optional[Union[int, float]] = None - seq_length: Optional[List[int]] = None - -class ONNXRTGLUE: - """Computes GLUE score. - - Args: - task (str, default=mrpc): The name of the task. - Choices include mrpc, qqp, qnli, rte, - sts-b, cola, mnli, wnli. - - """ - def __init__(self, task='mrpc'): - assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' - self.pred_list = None - self.label_list = None - self.task = task - self.return_key = { - "cola": "mcc", - "mrpc": "f1", - "sts-b": "corr", - "qqp": "acc", - "mnli": "mnli/acc", - "qnli": "acc", - "rte": "acc", - "wnli": "acc", - "sst-2": "acc" - } - - def update(self, preds, labels): - """add preds and labels to storage""" - if isinstance(preds, list) and len(preds) == 1: - preds = preds[0] - if isinstance(labels, list) and len(labels) == 1: - labels = labels[0] - if self.pred_list is None: - self.pred_list = preds - self.label_list = labels - else: - self.pred_list = np.append(self.pred_list, preds, axis=0) - self.label_list = np.append(self.label_list, labels, axis=0) - - def reset(self): - """clear preds and labels storage""" - self.pred_list = None - self.label_list = None - - def result(self): - """calculate metric""" - output_mode = transformers.glue_output_modes[self.task] - - if output_mode == "classification": - processed_preds = np.argmax(self.pred_list, axis=1) - elif output_mode == "regression": - processed_preds = np.squeeze(self.pred_list) - result = transformers.glue_compute_metrics(\ - self.task, processed_preds, self.label_list) - return result[self.return_key[self.task]] - -logger = logging.getLogger(__name__) -logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', - datefmt = '%m/%d/%Y %H:%M:%S', - level = logging.WARN) - -if __name__ == "__main__": - logger.info('Evaluating ONNXRuntime full precision accuracy and performance:') - parser = argparse.ArgumentParser( - description='BERT fine-tune examples for classification/regression tasks.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--model_path', - type=str, - help="Pre-trained resnet50 model on onnx file" - ) - parser.add_argument( - '--benchmark', - action='store_true', \ - default=False - ) - parser.add_argument( - '--tune', - action='store_true', \ - default=False, - help="whether quantize the model" - ) - parser.add_argument( - '--config', - type=str, - help="config yaml path" - ) - parser.add_argument( - '--output_model', - type=str, - default=None, - help="output model path" - ) - parser.add_argument( - '--mode', - type=str, - help="benchmark mode of performance or accuracy" - ) - parser.add_argument( - '--data_path', - type=str, - help="input data path" - ) - parser.add_argument( - '--batch_size', - default=8, - type=int, - ) - parser.add_argument( - '--model_name_or_path', - type=str, - help="pretrained model name or path" - ) - parser.add_argument( - '--task', - type=str, - choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ - 'mnli', 'wnli', 'sst-2'], - help="GLUE task name" - ) - parser.add_argument( - '--num_heads', - default=12, - type=int, - ) - parser.add_argument( - '--hidden_size', - default=768, - type=int, - ) - parser.add_argument( - '--quant_format', - type=str, - default='QOperator', - choices=['QOperator', 'QDQ'], - help="quantization format" - ) - - args = parser.parse_args() - - dataset = ONNXRTBertDataset(args.model_path, - data_dir=args.data_path, - model_name_or_path=args.model_name_or_path, - task=args.task) - dataloader = DataLoader(framework='onnxruntime', dataset=dataset, batch_size=args.batch_size) - metric = ONNXRTGLUE(args.task) - - def eval_func(model, *args): - metric.reset() - session = ort.InferenceSession(model.SerializeToString(), - providers=ort.get_available_providers()) - ort_inputs = {} - len_inputs = len(session.get_inputs()) - inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] - for idx, (inputs, labels) in enumerate(dataloader): - if not isinstance(labels, list): - labels = [labels] - inputs = inputs[:len_inputs] - for i in range(len_inputs): - ort_inputs.update({inputs_names[i]: inputs[i]}) - predictions = session.run(None, ort_inputs) - metric.update(predictions[0], labels) - return metric.result() - - if args.benchmark: - model = onnx.load(args.model_path) - if args.mode == 'performance': - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - conf = BenchmarkConfig(iteration=100, - cores_per_instance=28, - num_of_instance=1) - fit(model, conf, b_dataloader=dataloader) - elif args.mode == 'accuracy': - acc_result = eval_func(model) - print("Batch size = %d" % args.batch_size) - print("Accuracy: %.5f" % acc_result) - - - if args.tune: - # optimize model - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.fusion_options import FusionOptions - model_type = 'bart' if args.model_name_or_path == 'Intel/bart-large-mrpc' else 'bert' - opt_options = FusionOptions(model_type) - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - args.model_path, - model_type, - num_heads=args.num_heads, - hidden_size=args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model - - # check the optimized model is valid - try: - ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers()) - except Exception as e: - logger.warning("Optimized model is invalid: {}. ".format(e)) - logger.warning("Model optimizer will be skipped. " \ - "Try to upgrade onnxruntime to avoid this error") - model = onnx.load(args.model_path) - - from neural_compressor import quantization, PostTrainingQuantConfig - from neural_compressor.utils.constant import FP32 - specific_quant_config = {} - if args.model_name_or_path == 'Intel/bart-large-mrpc': - fp32_op_names = ['/model/(en|de)coder/layers.*/fc(1|2)/MatMul'] - specific_quant_config['op_name_dict'] = {op_name:FP32 for op_name in fp32_op_names} - elif args.model_name_or_path == 'Alireza1044/albert-base-v2-sst2': - specific_quant_config['recipes'] = {'first_conv_or_matmul_quantization': False} - elif args.model_name_or_path == 'Intel/deberta-v3-base-mrpc': - specific_quant_config['op_type_dict'] = {'^((?!(MatMul|Gather)).)*$': FP32} - specific_quant_config['quant_level'] = 1 - config = PostTrainingQuantConfig(approach='static', - quant_format=args.quant_format, - **specific_quant_config) - q_model = quantization.fit(model, - config, - eval_func=eval_func, - calib_dataloader=dataloader) - q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh deleted file mode 100644 index 8e434a5c521..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - download_data - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --data_dir=*) - data_dir=$(echo $var |cut -f2 -d=) - ;; - --task_name=*) - task_name=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function download_data { - wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py - python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} -} - -main "$@" - diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py deleted file mode 100644 index a8272021d5a..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py +++ /dev/null @@ -1,96 +0,0 @@ -import argparse -import os - -import torch -from transformers import AutoConfig, AutoModelForSequenceClassification - -def export_onnx_model(args, model): - with torch.no_grad(): - symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} - if args.input_model in ['Intel/roberta-base-mrpc', - 'Intel/xlm-roberta-base-mrpc', - 'Intel/camembert-base-mrpc', - 'distilbert-base-uncased-finetuned-sst-2-english', - 'Intel/xlnet-base-cased-mrpc', - 'Intel/deberta-v3-base-mrpc']: - inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), - 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask']), - args.output_model, # where to save the model (can be a file or file-like object) - opset_version=14, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask'], - output_names=['logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names}) - else: - inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), - 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64), - 'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask'], - inputs['token_type_ids']), - args.output_model, # where to save the model (can be a file or file-like object) - opset_version=14, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask', - 'token_type_ids'], - output_names=['logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names, - 'token_type_ids' : symbolic_names}) - - assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!" - print("ONNX Model exported to {0}".format(args.output_model)) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export huggingface onnx model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--input_model', - type=str, - default='Intel/bert-base-uncased-mrpc', - const='Intel/bert-base-uncased-mrpc', - nargs='?', - choices=['Intel/bert-base-uncased-mrpc', - 'Intel/roberta-base-mrpc', - 'Intel/xlm-roberta-base-mrpc', - 'Intel/camembert-base-mrpc', - 'distilbert-base-uncased-finetuned-sst-2-english', - 'Alireza1044/albert-base-v2-sst2', - 'philschmid/MiniLM-L6-H384-uncased-sst2', - 'Intel/MiniLM-L12-H384-uncased-mrpc', - 'bert-base-cased-finetuned-mrpc', - 'Intel/electra-small-discriminator-mrpc', - 'M-FAC/bert-mini-finetuned-mrpc', - 'Intel/xlnet-base-cased-mrpc', - 'Intel/bart-large-mrpc', - 'Intel/deberta-v3-base-mrpc' - ], - help='pretrained model name or path') - parser.add_argument("--output_model", type=str, required=True) - parser.add_argument( - '--max_len', - type=int, - default=128, - help='Maximum length of the sentence pairs') - args = parser.parse_args() - - model = AutoModelForSequenceClassification.from_pretrained( - args.input_model, - config=AutoConfig.from_pretrained(args.input_model)) - - if args.input_model == 'Intel/bart-large-mrpc': - import shutil - from optimum.exporters.onnx import main_export - - main_export(args.input_model, output="bart-large-mrpc", task="text-classification") - shutil.move("bart-large-mrpc/model.onnx", args.output_model) - else: - export_onnx_model(args, model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt deleted file mode 100644 index 9988cdf0329..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -torch -transformers -accelerate -onnx -onnxruntime -coloredlogs -sympy -onnxruntime-extensions; python_version < '3.11' -numpy==1.23.5 -sentencepiece -protobuf<=3.20.3 -optimum diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh deleted file mode 100644 index a45b843f555..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - - if [[ "${input_model}" =~ "bert-base-uncased" ]]; then - model_name_or_path="Intel/bert-base-uncased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "roberta-base" ]]; then - model_name_or_path="Intel/roberta-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "distilbert-base" ]]; then - model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "MiniLM-L6" ]]; then - model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" - TASK_NAME='sst-2' - fi - if [[ "${input_model}" =~ "MiniLM-L12" ]]; then - model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bert-base-cased" ]]; then - model_name_or_path="bert-base-cased-finetuned-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then - model_name_or_path="Intel/xlnet-base-cased-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bert-mini" ]]; then - model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then - model_name_or_path="Intel/electra-small-discriminator-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "bart" ]]; then - model_name_or_path="Intel/bart-large-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "deberta" ]]; then - model_name_or_path="microsoft/deberta-v3-base" - TASK_NAME='mrpc' - fi - - python main.py \ - --model_name_or_path ${model_name_or_path} \ - --model_path ${input_model} \ - --data_path ${dataset_location} \ - --task ${TASK_NAME} \ - --mode=${mode} \ - --batch_size=${batch_size} \ - --benchmark - -} - -main "$@" - diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh deleted file mode 100644 index 26c5bfd36c8..00000000000 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh +++ /dev/null @@ -1,133 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning -} - -# init params -function init_params { - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --quant_format=*) - quant_format=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - - if [[ "${input_model}" =~ "bert-base-uncased" ]]; then - model_name_or_path="Intel/bert-base-uncased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "roberta-base" ]]; then - model_name_or_path="Intel/roberta-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "distilbert-base" ]]; then - model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "MiniLM-L6" ]]; then - model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "MiniLM-L12" ]]; then - model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "bert-base-cased" ]]; then - model_name_or_path="bert-base-cased-finetuned-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=384 - fi - if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then - model_name_or_path="Intel/xlnet-base-cased-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "bert-mini" ]]; then - model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" - TASK_NAME='mrpc' - num_heads=4 - hidden_size=256 - fi - if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then - model_name_or_path="Intel/electra-small-discriminator-mrpc" - TASK_NAME='mrpc' - num_heads=4 - hidden_size=256 - fi - if [[ "${input_model}" =~ "bart" ]]; then - model_name_or_path="Intel/bart-large-mrpc" - TASK_NAME='mrpc' - num_heads=16 - hidden_size=4096 - fi - if [[ "${input_model}" =~ "deberta" ]]; then - model_name_or_path="microsoft/deberta-v3-base" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - - python main.py \ - --model_name_or_path ${model_name_or_path} \ - --model_path ${input_model} \ - --output_model ${output_model} \ - --data_path ${dataset_location} \ - --task ${TASK_NAME} \ - --num_heads ${num_heads} \ - --hidden_size ${hidden_size} \ - --tune -} - -main "$@" - - -