From d837a02e7cf9071512201fb2bae5b67b070a2d02 Mon Sep 17 00:00:00 2001
From: "Sun, Xuehao" <xuehao.sun@intel.com>
Date: Tue, 1 Jul 2025 18:35:12 -0700
Subject: [PATCH] remove unused examples

Signed-off-by: Sun, Xuehao <xuehao.sun@intel.com>
---
 .pre-commit-config.yaml                       |   3 -
 examples/.config/model_params_onnxrt.json     | 219 +--------
 examples/.config/model_params_onnxrt_win.json |   7 -
 examples/README.md                            | 126 -----
 ..._Started_Notebook_of_INC_for_Pytorch.ipynb | 329 -------------
 examples/notebook/pytorch/benchmark.py        | 109 -----
 examples/notebook/pytorch/requirements.txt    |  11 -
 .../unet/quantization/ptq_static/README.md    |  37 --
 .../unet/quantization/ptq_static/main.py      | 102 ----
 .../quantization/ptq_static/prepare_model.py  |  73 ---
 .../quantization/ptq_static/requirements.txt  |   4 -
 .../quantization/ptq_static/run_benchmark.sh  |  40 --
 .../unet/quantization/ptq_static/run_quant.sh |  39 --
 .../quantization/ptq_dynamic/README.md        |  54 ---
 .../quantization/ptq_dynamic/main.py          | 337 --------------
 .../quantization/ptq_dynamic/prepare_data.sh  |   6 -
 .../quantization/ptq_dynamic/prepare_model.py |  21 -
 .../quantization/ptq_dynamic/requirements.txt |  12 -
 .../quantization/ptq_dynamic/run_benchmark.sh |  46 --
 .../ptq_dynamic/run_fine_tuning.sh            |  47 --
 .../quantization/ptq_dynamic/run_quant.sh     |  42 --
 .../quantization/ptq_static/README.md         |  55 ---
 .../quantization/ptq_static/main.py           | 338 --------------
 .../quantization/ptq_static/prepare_data.sh   |   6 -
 .../quantization/ptq_static/prepare_model.py  |  21 -
 .../quantization/ptq_static/requirements.txt  |  12 -
 .../quantization/ptq_static/run_benchmark.sh  |  46 --
 .../ptq_static/run_fine_tuning.sh             |  47 --
 .../quantization/ptq_static/run_quant.sh      |  46 --
 .../quantization/ptq_dynamic/README.md        |  69 ---
 .../quantization/ptq_dynamic/main.py          | 431 -----------------
 .../quantization/ptq_dynamic/prepare_data.sh  |  34 --
 .../quantization/ptq_dynamic/prepare_model.py |  97 ----
 .../quantization/ptq_dynamic/requirements.txt |  12 -
 .../quantization/ptq_dynamic/run_benchmark.sh | 105 -----
 .../quantization/ptq_dynamic/run_quant.sh     | 130 ------
 .../quantization/ptq_static/README.md         |  72 ---
 .../quantization/ptq_static/main.py           | 435 ------------------
 .../quantization/ptq_static/prepare_data.sh   |  34 --
 .../quantization/ptq_static/prepare_model.py  |  96 ----
 .../quantization/ptq_static/requirements.txt  |  12 -
 .../quantization/ptq_static/run_benchmark.sh  | 105 -----
 .../quantization/ptq_static/run_quant.sh      | 133 ------
 43 files changed, 1 insertion(+), 3999 deletions(-)
 delete mode 100644 examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
 delete mode 100644 examples/notebook/pytorch/benchmark.py
 delete mode 100644 examples/notebook/pytorch/requirements.txt
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh
 delete mode 100644 examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh
 delete mode 100644 examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b94000b727b..c5ef44e8e66 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -107,8 +107,6 @@ repos:
         files: (.*\.py)$
         exclude: |
           (?x)^(
-              neural_compressor/conf/config.py|
-              neural_compressor/conf/pythonic_config.py|
               examples/.+|
               neural_compressor/torch/algorithms/fp8_quant/.+|
               test/3x/torch/.+
@@ -124,7 +122,6 @@ repos:
         exclude: |
           (?x)^(
               examples/.+|
-              docs/source-app|
               neural_compressor/torch/algorithms/fp8_quant/.+|
               test/3x/torch/.+
           )$
diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json
index e5059e4d9bb..3a23604fb7d 100644
--- a/examples/.config/model_params_onnxrt.json
+++ b/examples/.config/model_params_onnxrt.json
@@ -483,13 +483,6 @@
       "main_script": "main.py",
       "batch_size": 1
     },
-    "unet": {
-      "model_src_dir": "image_recognition/unet/quantization/ptq_static",
-      "dataset_location": "/tf_dataset2/datasets/imagenet/ImagenetRaw/ILSVRC2012_img_val",
-      "input_model": "/tf_dataset2/models/onnx/unet/unet-export.onnx",
-      "main_script": "main.py",
-      "batch_size": 1
-    },
     "BiDAF_dynamic": {
       "model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset2/datasets/squad/dev-v1.1.json",
@@ -497,118 +490,6 @@
       "main_script": "main.py",
       "batch_size": 1
     },
-    "hf_bert-base-uncased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bert-base-uncased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-uncased_dynamic/bert-base-uncased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_roberta-base_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_roberta-base": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_xlm-roberta-base_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_xlm-roberta-base": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_camembert-base_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_camembert-base": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_MiniLM-L12-H384-uncased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_MiniLM-L12-H384-uncased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L12-H384-uncased_dynamic/MiniLM-L12-H384-uncased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_distilbert-base-uncased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_distilbert-base-uncased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_distilbert-base-uncased_dynamic/distilbert-base-uncased-finetuned-sst-2-english.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_albert-base-v2_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_albert-base-v2": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_MiniLM-L6-H384-uncased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_MiniLM-L6-H384-uncased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/",
-      "input_model": "/tf_dataset2/models/onnx/hf_MiniLM-L6-H384-uncased_dynamic/MiniLM-L6-H384-uncased-sst2.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
     "hf_spanbert_dynamic": {
       "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset2/datasets/squad",
@@ -637,76 +518,6 @@
       "main_script": "main.py",
       "batch_size": 1
     },
-    "hf_bert-base-cased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_dynamic/bert-base-cased-finetuned-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bert-base-cased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_static/bert-base-cased-finetuned-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_electra-small-discriminator_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_electra-small-discriminator": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bert-mini_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bert-mini": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_xlnet-base-cased_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_xlnet-base-cased": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bart-large_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
-    "hf_bart-large": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx",
-      "main_script": "main.py",
-      "batch_size": 8
-    },
     "hf_distilbert-base-uncased-distilled_dynamic": {
       "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic",
       "dataset_location": "/tf_dataset2/datasets/squad",
@@ -917,20 +728,6 @@
       "main_script": "main.py",
       "batch_size": 1
     },
-    "hf_deberta_dynamic": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_deberta/deberta-v3-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 1
-    },
-    "hf_deberta": {
-      "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static",
-      "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC",
-      "input_model": "/tf_dataset2/models/onnx/hf_deberta/deberta-v3-base-mrpc.onnx",
-      "main_script": "main.py",
-      "batch_size": 1
-    },
     "table_transformer_structure_recognition": {
       "model_src_dir": "object_detection/table_transformer/quantization/ptq_static",
       "dataset_location": "/tf_dataset/dataset/PubTables-1M",
@@ -944,20 +741,6 @@
       "input_model": "/tf_dataset2/models/onnx/table-transformer/pubtables1m_detection_detr_r18.onnx",
       "main_script": "patch",
       "batch_size": 1
-    },
-    "hf_codebert": {
-        "model_src_dir": "nlp/huggingface_model/code_detection/quantization/ptq_static",
-        "dataset_location": "/tf_dataset2/datasets/devign_dataset/valid.jsonl",
-        "input_model": "/tf_dataset2/models/onnx/hf_codebert/codebert-model.onnx",
-        "main_script": "main.py",
-        "batch_size": 1
-      },
-      "hf_codebert_dynamic": {
-        "model_src_dir": "nlp/huggingface_model/code_detection/quantization/ptq_dynamic",
-        "dataset_location": "/tf_dataset2/datasets/devign_dataset/valid.jsonl",
-        "input_model": "/tf_dataset2/models/onnx/hf_codebert/codebert-model.onnx",
-        "main_script": "main.py",
-        "batch_size": 1
-      }
+    }
   }
 }
diff --git a/examples/.config/model_params_onnxrt_win.json b/examples/.config/model_params_onnxrt_win.json
index 8c461f9064b..67921c88031 100644
--- a/examples/.config/model_params_onnxrt_win.json
+++ b/examples/.config/model_params_onnxrt_win.json
@@ -29,13 +29,6 @@
             "input_model": "models/onnx/DUC/ResNet101-DUC-12.onnx",
             "main_script": "main.py",
             "batch_size": 1
-        },
-        "hf_roberta-base_dynamic": {
-            "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic",
-            "dataset_location": "glue_data/MRPC",
-            "input_model": "models/onnx/hf_roberta-base_dynamic/roberta-base-mrpc.onnx",
-            "main_script": "main.py",
-            "batch_size": 8
         }
     }
 }
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index d7b2c7ea88a..719873f6b4a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -7,8 +7,6 @@ Intel® Neural Compressor validated examples with multiple compression technique
 
 * [Quick Get Started Notebook of Intel® Neural Compressor for Tensorflow](/examples/notebook/tensorflow/resnet/resnet_quantization.ipynb)
 
-* [Quick Get Started Notebook of Intel® Neural Compressor for Pytorch](/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb)
-
 # Helloworld Examples
 * [torch_llm](/examples/helloworld/torch_llm): apply the weight-only quantization to LLMs.
 * [torch_non_llm](/examples/helloworld/torch_non_llm): apply the static quantization to non-LLMs.
@@ -1109,18 +1107,6 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Post-Training Static Quantization</td>
     <td><a href="./onnxrt/image_recognition/beit/quantization/ptq_static">qlinearops</a></td>
   </tr>
-  <tr>
-    <td>CodeBert</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>CodeBert</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic">integerops</a></td>
-  </tr>
   <tr>
     <td>BERT base MRPC</td>
     <td>Natural Language Processing</td>
@@ -1175,118 +1161,6 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Post-Training Dynamic Quantization</td>
     <td><a href="./onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic">integerops</a></td>
   </tr>
-  <tr>
-    <td>BERT base uncased MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Roberta base MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>XLM Roberta base MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Camembert base MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>MiniLM L12 H384 uncased MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>DistilBERT base uncased SST-2 (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Albert base v2 SST-2 (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>MiniLM L6 H384 uncased SST-2 (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>BERT base cased MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Electra small discriminator MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>BERT mini MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Xlnet base cased MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> /  <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>BART large MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>DeBERTa v3 base MRPC (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td> 
-        <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
   <tr>
     <td>Spanbert SQuAD (HuggingFace)</td>
     <td>Natural Language Processing</td>
diff --git a/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb b/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
deleted file mode 100644
index f504aea0b10..00000000000
--- a/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
+++ /dev/null
@@ -1,329 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Quick Get Started Notebook of Intel® Neural Compressor for Pytorch\n",
-    "\n",
-    "\n",
-    "This notebook is designed to provide an easy-to-follow guide for getting started with the [Intel® Neural Compressor](https://github.com/intel/neural-compressor) (INC) library for [pytorch](https://github.com/pytorch/pytorch) framework.\n",
-    "\n",
-    "In the following sections, we are going to use a DistilBert model fine-tuned on MRPC as an example to show how to apply post-training quantization on [transformers](https://github.com/huggingface/transformers) models using the INC library.\n",
-    "\n",
-    "\n",
-    "The main objectives of this notebook are:\n",
-    "\n",
-    "1. Prerequisite: Prepare necessary environment, model and dataset.\n",
-    "2. Quantization with INC: Walk through the step-by-step process of applying post-training quantization.\n",
-    "3. Benchmark with INC: Evaluate the performance of the FP32 and INT8 models.\n",
-    "\n",
-    "\n",
-    "## 1. Prerequisite\n",
-    "\n",
-    "### 1.1 Environment\n",
-    "\n",
-    "If you have Jupyter Notebook, you may directly run this notebook. We will use pip to install or upgrade [neural-compressor](https://github.com/intel/neural-compressor), [pytorch](https://github.com/pytorch/pytorch) and other required packages.\n",
-    "\n",
-    "Otherwise, you can setup a new environment. First, we install [Anaconda](https://www.anaconda.com/distribution/). Then open an Anaconda prompt window and run the following commands:\n",
-    "\n",
-    "```shell\n",
-    "conda create -n inc_notebook python==3.8\n",
-    "conda activate inc_notebook\n",
-    "pip install jupyter\n",
-    "jupyter notebook\n",
-    "```\n",
-    "The last command will launch Jupyter Notebook and we can open this notebook in browser to continue.\n",
-    "\n",
-    "Then, let's install necessary packages."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# install neural-compressor from source\n",
-    "import sys\n",
-    "!git clone https://github.com/intel/neural-compressor.git\n",
-    "%cd ./neural-compressor\n",
-    "!{sys.executable} -m pip install -r requirements.txt\n",
-    "!{sys.executable} setup.py install\n",
-    "%cd ..\n",
-    "\n",
-    "# or install stable basic version from pypi\n",
-    "!{sys.executable} -m pip install neural-compressor\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# install other packages used in this notebook.\n",
-    "!{sys.executable} -m pip install -r requirements.txt\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1.2 Load Dataset\n",
-    "\n",
-    "The General Language Understanding Evaluation (GLUE) benchmark is a group of nine classification tasks on sentences or pairs of sentences which are:\n",
-    "\n",
-    "- [CoLA](https://nyu-mll.github.io/CoLA/) (Corpus of Linguistic Acceptability) Determine if a sentence is grammatically correct or not.\n",
-    "- [MNLI](https://arxiv.org/abs/1704.05426) (Multi-Genre Natural Language Inference) Determine if a sentence entails, contradicts or is unrelated to a given hypothesis. This dataset has two versions, one with the validation and test set coming from the same distribution, another called mismatched where the validation and test use out-of-domain data.\n",
-    "- [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398) (Microsoft Research Paraphrase Corpus) Determine if two sentences are paraphrases from one another or not.\n",
-    "- [QNLI](https://rajpurkar.github.io/SQuAD-explorer/) (Question-answering Natural Language Inference) Determine if the answer to a question is in the second sentence or not. This dataset is built from the SQuAD dataset.\n",
-    "- [QQP](https://data.quora.com/First-Quora-Dataset-Release-Question-Pairs) (Quora Question Pairs2) Determine if two questions are semantically equivalent or not.\n",
-    "- [RTE](https://aclweb.org/aclwiki/Recognizing_Textual_Entailment) (Recognizing Textual Entailment) Determine if a sentence entails a given hypothesis or not.\n",
-    "- [SST-2](https://nlp.stanford.edu/sentiment/index.html) (Stanford Sentiment Treebank) Determine if the sentence has a positive or negative sentiment.\n",
-    "- [STS-B](http://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark) (Semantic Textual Similarity Benchmark) Determine the similarity of two sentences with a score from 1 to 5.\n",
-    "- [WNLI](https://cs.nyu.edu/faculty/davise/papers/WinogradSchemas/WS.html) (Winograd Natural Language Inference) Determine if a sentence with an anonymous pronoun and a sentence with this pronoun replaced are entailed or not. This dataset is built from the Winograd Schema Challenge dataset.\n",
-    "\n",
-    "Here, we use MRPC task. We download and load the required dataset from hub."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import datasets\n",
-    "import numpy as np\n",
-    "import transformers\n",
-    "from datasets import load_dataset, load_metric\n",
-    "from transformers import (\n",
-    "    AutoConfig,\n",
-    "    AutoModelForSequenceClassification,\n",
-    "    AutoTokenizer,\n",
-    "    EvalPrediction,\n",
-    "    Trainer,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "task_name = 'mrpc'\n",
-    "raw_datasets = load_dataset(\"glue\", task_name)\n",
-    "label_list = raw_datasets[\"train\"].features[\"label\"].names\n",
-    "num_labels = len(label_list)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1.3 Prepare Model\n",
-    "Download the pretrained model [textattack/distilbert-base-uncased-MRPC](https://huggingface.co/textattack/distilbert-base-uncased-MRPC) to a pytorch model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_name = 'textattack/distilbert-base-uncased-MRPC'\n",
-    "\n",
-    "config = AutoConfig.from_pretrained(\n",
-    "    model_name,\n",
-    "    num_labels=num_labels,\n",
-    "    finetuning_task=task_name,\n",
-    "    use_auth_token=None,\n",
-    ")\n",
-    "\n",
-    "tokenizer = AutoTokenizer.from_pretrained(\n",
-    "    model_name,\n",
-    "    use_auth_token=None,\n",
-    ")\n",
-    "\n",
-    "model = AutoModelForSequenceClassification.from_pretrained(\n",
-    "    model_name,\n",
-    "    from_tf=False,\n",
-    "    config=config,\n",
-    "    use_auth_token=None,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1.4 Dataset Preprocessing\n",
-    "We need to preprocess the raw dataset."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sentence1_key, sentence2_key = (\"sentence1\", \"sentence2\")\n",
-    "padding = \"max_length\"\n",
-    "label_to_id = None\n",
-    "max_seq_length = 128\n",
-    "\n",
-    "def preprocess_function(examples):\n",
-    "    args = (\n",
-    "        (examples[sentence1_key], examples[sentence2_key])\n",
-    "    )\n",
-    "    result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)\n",
-    "    return result\n",
-    "\n",
-    "raw_datasets = raw_datasets.map(preprocess_function, batched=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. Quantization with Intel® Neural Compressor"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2.1 Define metric, evaluate function, and dataloader\n",
-    "\n",
-    "In this part, we define a GLUE metirc and use it to generate an evaluate function for INC.\n",
-    "\n",
-    "Refer to doc [metric.md](https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#build-custom-metric-with-python-api) for how to build your own metric.\n",
-    "Refer to doc [dataset.md](https://github.com/intel/neural-compressor/blob/master/docs/source/dataset.md#user-specific-dataset) and [dataloader.md](https://github.com/intel/neural-compressor/blob/master/docs/source/dataloader.md#build-custom-dataloader-with-python-apiapi) for how to build your own dataset and dataloader."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "eval_dataset = raw_datasets[\"validation\"]\n",
-    "metric = load_metric(\"glue\", task_name)\n",
-    "data_collator = None\n",
-    "\n",
-    "def compute_metrics(p: EvalPrediction):\n",
-    "    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions\n",
-    "    preds = np.argmax(preds, axis=1)\n",
-    "    result = metric.compute(predictions=preds, references=p.label_ids)\n",
-    "    if len(result) > 1:\n",
-    "        result[\"combined_score\"] = np.mean(list(result.values())).item()\n",
-    "    return result\n",
-    "\n",
-    "# Initialize our Trainer\n",
-    "trainer = Trainer(\n",
-    "    model=model,\n",
-    "    train_dataset=None,\n",
-    "    eval_dataset=eval_dataset,\n",
-    "    compute_metrics=compute_metrics,\n",
-    "    tokenizer=tokenizer,\n",
-    "    data_collator=data_collator,\n",
-    ")\n",
-    "\n",
-    "eval_dataloader = trainer.get_eval_dataloader()\n",
-    "\n",
-    "# for transformers 4.31.0: accelerate dataloader\n",
-    "# please use the code below to avoid error \n",
-    "if eval_dataloader.batch_size is None:\n",
-    "    def _build_inc_dataloader(dataloader):\n",
-    "        class INCDataLoader:\n",
-    "            __iter__ = dataloader.__iter__\n",
-    "            def __init__(self) -> None:\n",
-    "                self.dataloader = dataloader\n",
-    "                self.batch_size = dataloader.total_batch_size\n",
-    "        return INCDataLoader()\n",
-    "    eval_dataloader = _build_inc_dataloader(eval_dataloader)\n",
-    "batch_size = eval_dataloader.batch_size\n",
-    "\n",
-    "def take_eval_steps(model, trainer, save_metrics=False):\n",
-    "    trainer.model = model\n",
-    "    metrics = trainer.evaluate()\n",
-    "    bert_task_acc_keys = ['eval_f1', 'eval_accuracy', 'eval_matthews_correlation',\n",
-    "                            'eval_pearson', 'eval_mcc', 'eval_spearmanr']\n",
-    "    for key in bert_task_acc_keys:\n",
-    "        if key in metrics.keys():\n",
-    "            throughput = metrics.get(\"eval_samples_per_second\")\n",
-    "            print('Batch size = %d' % batch_size)\n",
-    "            print(\"Finally Eval {} Accuracy: {}\".format(key, metrics[key]))\n",
-    "            print(\"Latency: %.3f ms\" % (1000 / throughput))\n",
-    "            print(\"Throughput: {} samples/sec\".format(throughput))\n",
-    "            return metrics[key]\n",
-    "    assert False, \"No metric returned, Please check inference metric!\"\n",
-    "\n",
-    "def eval_func(model):\n",
-    "    return take_eval_steps(model, trainer)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2.2 Run Quantization\n",
-    "\n",
-    "So far, we can finally start to quantize the model. \n",
-    "\n",
-    "To start, we need to set the configuration for post-training quantization using `PostTrainingQuantConfig` class. Once the configuration is set, we can proceed to the next step by calling the `quantization.fit()` function. This function performs the quantization process on the model and will return the best quantized model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neural_compressor.quantization import fit\n",
-    "from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion\n",
-    "tuning_criterion = TuningCriterion(max_trials=600)\n",
-    "conf = PostTrainingQuantConfig(approach=\"static\", tuning_criterion=tuning_criterion)\n",
-    "q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)\n",
-    "q_model.save(\"./saved_results\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. Benchmark with Intel® Neural Compressor\n",
-    "\n",
-    "INC provides a benchmark feature to measure the model performance with the objective settings."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# fp32 benchmark\n",
-    "!{sys.executable} benchmark.py 2>&1|tee fp32_benchmark.log\n",
-    "\n",
-    "# int8 benchmark\n",
-    "!{sys.executable} benchmark.py --input_model saved_results 2>&1|tee int8_benchmark.log\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.9.12"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/notebook/pytorch/benchmark.py b/examples/notebook/pytorch/benchmark.py
deleted file mode 100644
index 71b6a0f04f1..00000000000
--- a/examples/notebook/pytorch/benchmark.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import datasets
-import numpy as np
-import transformers
-import logging
-import argparse
-from datasets import load_dataset, load_metric
-from transformers import (
-    AutoConfig,
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    EvalPrediction,
-    Trainer,
-)
-
-from neural_compressor.config import BenchmarkConfig
-from neural_compressor import benchmark
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-task_name = 'mrpc'
-raw_datasets = load_dataset("glue", task_name)
-label_list = raw_datasets["train"].features["label"].names
-num_labels = len(label_list)
-
-sentence1_key, sentence2_key = ("sentence1", "sentence2")
-padding = "max_length"
-label_to_id = None
-max_seq_length = 128
-model_name = 'textattack/distilbert-base-uncased-MRPC'
-
-config = AutoConfig.from_pretrained(
-    model_name,
-    num_labels=num_labels,
-    finetuning_task=task_name,
-    use_auth_token=None,
-)
-
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    use_auth_token=None,
-)
-
-model = AutoModelForSequenceClassification.from_pretrained(
-    model_name,
-    from_tf=False,
-    config=config,
-    use_auth_token=None,
-)
-
-def preprocess_function(examples):
-    args = (
-        (examples[sentence1_key], examples[sentence2_key])
-    )
-    result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)
-    return result
-
-raw_datasets = raw_datasets.map(preprocess_function, batched=True)
-eval_dataset = raw_datasets["validation"]
-metric = load_metric("glue", task_name)
-data_collator = None
-
-def compute_metrics(p: EvalPrediction):
-    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
-    preds = np.argmax(preds, axis=1)
-    result = metric.compute(predictions=preds, references=p.label_ids)
-    if len(result) > 1:
-        result["combined_score"] = np.mean(list(result.values())).item()
-    return result
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(__doc__)
-    parser.add_argument("--input_model", type=str, required=False, default=None)
-    args = parser.parse_args()
-
-    # Initialize our Trainer
-    trainer = Trainer(
-        model=model,
-        train_dataset=None,
-        eval_dataset=eval_dataset,
-        compute_metrics=compute_metrics,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-    )
-
-    eval_dataloader = trainer.get_eval_dataloader()
-    if eval_dataloader.batch_size is None:
-        def _build_inc_dataloader(dataloader):
-            class INCDataLoader:
-                __iter__ = dataloader.__iter__
-                def __init__(self) -> None:
-                    self.dataloader = dataloader
-                    self.batch_size = dataloader.total_batch_size
-            return INCDataLoader()
-        eval_dataloader = _build_inc_dataloader(eval_dataloader)
-    batch_size = eval_dataloader.batch_size
-
-    if args.input_model:
-        from neural_compressor.utils.pytorch import load
-        model = load(args.input_model, model, dataloader=eval_dataloader)
-
-    b_conf = BenchmarkConfig(warmup=5,
-                        iteration=100,
-                        cores_per_instance=4,
-                        num_of_instance=1)
-    benchmark.fit(model, b_conf, b_dataloader=eval_dataloader)
diff --git a/examples/notebook/pytorch/requirements.txt b/examples/notebook/pytorch/requirements.txt
deleted file mode 100644
index aa1af71d2b3..00000000000
--- a/examples/notebook/pytorch/requirements.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-torch>=1.9.0
-transformers>=4.16.0
-accelerate
-sympy
-numpy
-sentencepiece!=0.1.92
-protobuf<=3.20.3
-datasets>=1.1.3
-scipy
-scikit-learn
-Keras-Preprocessing
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md
deleted file mode 100644
index d7f119e043c..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Step-by-Step
-
-This is an experimental example to quantize unet model. We use dummy data to do quantization and evaluation, so the accuracy is not guaranteed.
-
-# Prerequisite
-
-## 1. Environment
-
-```shell
-pip install neural-compressor
-pip install -r requirements.txt
-```
-
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-## 2. Prepare Model
-
-```bash
-python prepare_model.py --input_model='CompVis/stable-diffusion-v1-4' --output_model='unet-export.onnx'
-```
-
-# Run
-
-## 1. Quantization
-
-```bash
-bash run_quant.sh --input_model=path/to/model \  # model path as *.onnx
-                   --output_model=path/to/save
-```
-
-## 2. Benchmark
-
-```bash
-bash run_benchmark.sh --input_model=path/to/model \  # model path as *.onnx
-                      --batch_size=batch_size \
-                      --mode=performance
-```
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py
deleted file mode 100644
index a949faeef48..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/main.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint:disable=redefined-outer-name,logging-format-interpolation
-
-
-import logging
-import argparse
-
-import numpy as np
-import onnx
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-class Dataloader:
-    def __init__(self, batch_size):
-        self.batch_size = batch_size
-        shape = [[batch_size, 4, 64, 64], [batch_size], [batch_size, 77, 768]]
-        dtype = ['float32', 'float32', 'float32']
-        self.dataset = []
-        for idx in range(0, len(shape)):
-            tensor = np.random.uniform(size=shape[idx])
-            tensor = tensor.astype(dtype[idx])
-            self.dataset.append(tensor)
-
-    def __iter__(self):
-         yield self.dataset, 0
-
-if __name__ == "__main__":
-    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained model on onnx file"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        '--quant_format',
-        type=str,
-        default='default', 
-        choices=['default', 'QDQ', 'QOperator'],
-        help="quantization format"
-    )
-    parser.add_argument(
-        "--batch_size",
-        default=1,
-        type=int,
-    )
-    args = parser.parse_args()
-
-    dataloader = Dataloader(args.batch_size)
-
-    if args.benchmark and args.mode == 'performance':
-        from neural_compressor.benchmark import fit
-        from neural_compressor.config import BenchmarkConfig
-        conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
-        fit(args.model_path, conf, b_dataloader=dataloader)
-    if args.tune:
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        config = PostTrainingQuantConfig(quant_format=args.quant_format, recipes={'graph_optimization_level':'ENABLE_EXTENDED'})
-        q_model = quantization.fit(args.model_path, config, calib_dataloader=dataloader)
-
-        q_model.save(args.output_model)
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py
deleted file mode 100644
index 18e79ff6cd3..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/prepare_model.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import argparse
-import os
-import shutil
-import subprocess
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_model", type=str, required=False, default='CompVis/stable-diffusion-v1-4')
-    parser.add_argument("--output_model", type=str, required=True)
-    return parser.parse_args()
-
-def move_and_rename_model(source_folder, destination_folder):
-    if not os.path.exists(source_folder):
-        raise RuntimeError("{} path is not exists".format(source_folder))
-    for file_name in os.listdir(source_folder):
-        source_file = os.path.join(source_folder, file_name)
-        destination_file = os.path.join(destination_folder, file_name)
-        
-        if os.path.isdir(source_file):
-            continue
-    
-        shutil.move(source_file, destination_file)
-
-        if file_name == "model.onnx":
-            new_file_name = "unet-export.onnx"
-            new_file_path = os.path.join(destination_folder, new_file_name)
-            os.rename(destination_file, new_file_path)
-
-def prepare_model(input_model, output_model):
-    # Use [tf2onnx tool](https://github.com/onnx/tensorflow-onnx) to convert tflite to onnx model.
-    print("\nexport model...")
-
-    export_file = "prepare_unet"
-    subprocess.run(
-        [
-            "git",
-            "clone",
-            "https://github.com/huggingface/diffusers.git",
-        ],
-        stdout=subprocess.PIPE,
-        text=True,
-    )
-    subprocess.run(
-        ["pip", "install", "--upgrade", "diffusers[torch]", "transformers"],
-        stdout=subprocess.PIPE,
-        text=True,
-    )
-    subprocess.run(
-        [
-            "python",
-            "diffusers/scripts/convert_stable_diffusion_checkpoint_to_onnx.py",
-            "--model_path",
-            input_model,
-            "--output_path",
-            export_file,
-        ],
-        stdout=subprocess.PIPE,
-        text=True,
-    )
-
-    move_and_rename_model(os.path.join(export_file, "unet"), os.path.dirname(output_model))
-    try:
-        shutil.rmtree(export_file, ignore_errors=True)
-    except OSError as e:
-        raise e
-    
-    assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
-
-
-if __name__ == "__main__":
-    args = parse_arguments()
-    prepare_model(args.input_model, args.output_model)
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt
deleted file mode 100644
index b0c7e4bab62..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-onnx
-onnxruntime
-onnxruntime-extensions; python_version < '3.11'
-protobuf==3.20.3
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh
deleted file mode 100644
index 0f5384d8e63..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_benchmark.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  run_benchmark
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_benchmark
-function run_benchmark {
-
-    python main.py \
-            --model_path ${input_model} \
-            --mode=${mode} \
-            --batch_size ${batch_size-1} \
-            --benchmark
-            
-}
-
-main "$@"
diff --git a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh b/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh
deleted file mode 100644
index 8e6133f8bb0..00000000000
--- a/examples/onnxrt/image_recognition/unet/quantization/ptq_static/run_quant.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  run_tuning
-
-}
-
-# init params
-function init_params {
-
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --quant_format=*)
-          quant_format=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function run_tuning {
-    python main.py \
-            --model_path ${input_model} \
-            --output_model ${output_model} \
-            --quant_format ${quant_format-default} \
-            --tune
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md
deleted file mode 100644
index ea1c76aa43e..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/README.md
+++ /dev/null
@@ -1,54 +0,0 @@
-Step-by-Step
-============
-
-This example quantizes the [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) fine-tuned on the the [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task.
-
-# Prerequisite
-
-## 1. Environment
-```shell
-pip install neural-compressor
-pip install -r requirements.txt
-```
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-
-## 2. Prepare Dataset
-Run `prepare_data.sh` script to download dataset from website to `dataset` folder and pre-process it:
-
-```shell
-bash prepare_data.sh
-```
-## 3. Prepare Model
-
-Fine-tuning the model on [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task.
-```
-bash run_fine_tuning.sh --train_dataset_location=./dataset/train.jsonl --dataset_location=./dataset/valid.jsonl  --fine_tune
-```
-
-Export model to ONNX format. 
-```bash
-# By default, the input model path is `checkpoint-best-acc/`.
-python prepare_model.py  --input_model=./checkpoint-best-acc  --output_model=./codebert-exported-onnx
-```
-
-# Run
-
-## 1. Quantization
-
-Static quantization with QOperator format:
-
-```bash
-bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx
-                   --output_model=/path/to/model_tune \
-                   --dataset_location=path/to/glue/data
-```
-
-## 2. Benchmark
-
-```bash
-bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx
-                      --dataset_location=path/to/glue/data \ 
-                      --batch_size=batch_size \ 
-                      --mode=performance # or accuracy
-```
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py
deleted file mode 100644
index b9c023458a0..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/main.py
+++ /dev/null
@@ -1,337 +0,0 @@
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import logging
-from typing import List
-
-import numpy as np
-import onnx
-
-logger = logging.getLogger(__name__)
-
-
-def load_dataset_from_local(file_path, model_name_or_path):
-    """Load the raw data from local."""
-    import json
-
-    import torch
-
-    def read_data(file_path):
-        texts, labels = [], []
-        with open(file_path, "r") as f:
-            for i, line in enumerate(f):
-                js = json.loads(line.strip())
-                code = " ".join(js["func"].split())
-                texts.append(code)
-                labels.append(js["target"])
-        return texts, labels
-
-    texts, labels = read_data(file_path)
-
-    # tokenize the raw data
-    from transformers import AutoTokenizer
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-    encodings = tokenizer(
-        texts, return_tensors="pt", truncation=True, padding="max_length"
-    )
-
-    class CodeDataset(torch.utils.data.Dataset):
-        def __init__(self, encodings, labels):
-            self.encodings = encodings
-            self.labels = labels
-
-        def __getitem__(self, idx):
-            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
-            item["labels"] = torch.tensor(self.labels[idx])
-            return item
-
-        def __len__(self):
-            return len(self.labels)
-
-    dataset = CodeDataset(encodings, labels)
-    return dataset
-
-
-# evaluation func for fine-tuning
-def evaluate(model, val_loader):
-    import torch
-
-    print("***　eval model .. ")
-    all_labels = []
-    all_preds = []
-    for idx, batch in enumerate(val_loader):
-        model.eval()
-        with torch.no_grad():
-            labels = batch.pop("labels")
-            inputs = batch
-            outputs = model(**inputs, labels=labels)
-            loss = outputs.loss
-            logits = outputs.logits
-            all_labels.append(labels.numpy())
-            all_preds.append(np.argmax(logits.detach().numpy(), axis=1))
-            np.concatenate(all_labels, axis=0)
-            np.concatenate(all_preds, axis=0)
-            cur_acc = np.mean(
-                np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-            )
-            print(f"{idx} batch evaluation accuracy: {cur_acc}")
-    cur_acc = np.mean(
-        np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-    )
-    print("Overall evaluation accuracy: ", cur_acc)
-    return cur_acc
-
-
-def fine_tune(args):
-    import os
-    import numpy as np
-    import torch
-    from torch.utils.data import DataLoader
-    from transformers import AdamW, AutoModelForSequenceClassification
-
-    train_dataset = load_dataset_from_local(
-        args.train_data_path, args.model_name_or_path
-    )
-    val_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-    model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path)
-    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
-    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
-
-    optim = AdamW(model.parameters(), lr=5e-5)
-
-    results = {"eval_acc": 0}
-    global_step = -1
-    for epoch in range(3):
-        all_labels = []
-        all_preds = []
-        for idx, batch in enumerate(train_loader):
-            global_step += 1
-            optim.zero_grad()
-            labels = batch.pop("labels")
-            inputs = batch
-            model.train()
-            outputs = model(**inputs, labels=labels)
-            loss = outputs.loss
-            logits = outputs.logits
-            all_labels.append(labels.numpy())
-            all_preds.append(np.argmax(logits.detach().numpy(), axis=1))
-            np.concatenate(all_labels, axis=0)
-            np.concatenate(all_preds, axis=0)
-            cur_acc = np.mean(
-                np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-            )
-            print("  Current acc:%s", round(cur_acc, 4))
-            loss.backward()
-            print(f" Loss: {loss.item()}")
-            optim.step()
-
-            if global_step % 100 == 0:
-                best_acc = results["eval_acc"]
-                cur_acc = evaluate(model, val_loader)
-                if cur_acc > best_acc:
-                    results["eval_acc"] = cur_acc
-                    best_acc = results["eval_acc"]
-                    print("  Best acc:%s", round(best_acc, 4))
-                    checkpoint_prefix = "checkpoint-best-acc"
-                    output_dir = os.path.join("{}".format(checkpoint_prefix))
-                    if not os.path.exists(output_dir):
-                        os.makedirs(output_dir)
-                    model_to_save = model.module if hasattr(model, "module") else model
-                    model.config.to_json_file("{}/config.json".format(checkpoint_prefix))
-                    output_dir = os.path.join(output_dir, "{}".format("pytorch_model.bin"))
-                    torch.save(model_to_save.state_dict(), output_dir)
-                    print("Saving model checkpoint to %s", output_dir)
-
-
-class ONNXRTDataset:
-    def __init__(self, model_path, dataset):
-        self.inputs = [inp.name for inp in onnx.load(model_path).graph.input]
-        self.dataset = dataset
-
-    def __len__(self):
-        return len(self.dataset)
-
-    def __getitem__(self, index):
-        batch = self.dataset[index]
-        labels = batch["labels"].detach().cpu().numpy()
-        batch.pop("labels")
-        inputs = [batch["input_ids"].numpy(), batch["attention_mask"].numpy()]
-        return inputs, labels
-
-
-def get_dataloader(ort_model_path, dataset):
-    """Create INC ORT dataloader."""
-    dataloader = ONNXRTDataset(ort_model_path, dataset)
-    return dataloader
-
-
-def main():
-    # parse args
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--train_data_path",
-        default=None,
-        type=str,
-        help="An optional input training data file to evaluate the perplexity on (a text file).",
-    )
-    parser.add_argument(
-        "--data_path",
-        default=None,
-        type=str,
-        help="An optional input evaluation data file to evaluate the perplexity on (a text file).",
-    )
-    parser.add_argument(
-        "--model_name_or_path",
-        default=None,
-        type=str,
-        help="The model checkpoint for weights initialization.",
-    )
-    parser.add_argument(
-        "--model_path", default=None, type=str, help="The onnx model path."
-    )
-    parser.add_argument("--benchmark", action="store_true", default=False)
-    parser.add_argument(
-        "--fine_tune",
-        action="store_true",
-        default=False,
-        help="whether fine tune the model",
-    )
-    parser.add_argument(
-        "--tune", action="store_true", default=False, help="whether quantize the model"
-    )
-    parser.add_argument(
-        "--output_model", type=str, default=None, help="output model path"
-    )
-    parser.add_argument(
-        "--mode", type=str, help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument("--batch_size", default=1, type=int, help="batch size")
-    parser.add_argument(
-        "--quant_format",
-        type=str,
-        default="QOperator",
-        choices=["QOperator", "QDQ"],
-        help="quantization format",
-    )
-    args = parser.parse_args()
-
-    # fine tune
-    if args.fine_tune:
-        fine_tune(args)
-
-
-    def eval_func(model):
-        session = ort.InferenceSession(
-            model.SerializeToString(), providers=ort.get_available_providers()
-        )
-        ort_inputs = {}
-        len_inputs = len(session.get_inputs())
-        inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-        all_labels = []
-        all_preds = []
-        import tqdm
-
-        for idx, (inputs, labels) in tqdm.tqdm(enumerate(dataloader)):
-            if not isinstance(labels, list):
-                labels: List[np.array] = [labels]  # List[shape: bs]
-            inputs = inputs[:len_inputs]
-            for i in range(len_inputs):
-                ort_inputs.update({inputs_names[i]: inputs[i]})
-            predictions: List[np.array] = session.run(
-                None, ort_inputs
-            )  # List[# shape, (bs, 2)]
-            predictions = [np.argmax(p, axis=1) for p in predictions]
-
-            all_labels += labels
-            all_preds += predictions
-            np.mean(
-                np.concatenate(all_labels, 0) == (np.concatenate(all_preds, 0))
-            )  # [:,0]>0.5))
-        label_flatten = np.concatenate(all_labels, 0)
-        preds_flatten = np.concatenate(all_preds, 0)
-        correct_count = np.sum(label_flatten == preds_flatten)
-        acc = correct_count / len(label_flatten)
-        return acc
-    
-    # tune
-    if args.tune:
-        from neural_compressor import PostTrainingQuantConfig, quantization
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-
-        train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-        ort_dataset = ONNXRTDataset(args.model_path, train_dataset)
-
-        from neural_compressor.data import DataLoader as INC_DataLoader
-
-        dataloader = INC_DataLoader(
-            framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size
-        )
-
-        model_type = "bert"
-        opt_options = FusionOptions(model_type)
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            model_type,
-            num_heads=12,
-            hidden_size=768,
-            optimization_options=opt_options,
-        )
-        model = model_optimizer.model
-
-        # check the optimized model is valid
-        import onnxruntime as ort
-
-        try:
-            ort.InferenceSession(
-                model.SerializeToString(), providers=ort.get_available_providers()
-            )
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning(
-                "Model optimizer will be skipped. "
-                "Try to upgrade onnxruntime to avoid this error"
-            )
-            model = onnx.load(args.model_path)
-
-        config = PostTrainingQuantConfig(
-            approach="dynamic",
-            quant_level=1,
-        )
-        q_model = quantization.fit(
-            model,
-            config,
-            eval_func=eval_func,
-            calib_dataloader=dataloader,
-        )
-        q_model.save(args.output_model)
-
-    # benchmark
-    if args.benchmark:
-        import onnx
-        import onnxruntime as ort
-        from neural_compressor.data import DataLoader as INC_DataLoader
-
-        train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-        ort_dataset = ONNXRTDataset(args.model_path, train_dataset)
-        dataloader = INC_DataLoader(
-            framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size
-        )
-        model = onnx.load(args.model_path)
-        if args.mode == "performance":
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-
-            conf = BenchmarkConfig(iteration=100)
-            fit(model, conf, b_dataloader=dataloader)
-        elif args.mode == "accuracy":
-            acc_result = eval_func(model)
-            print("Batch size = %d" % args.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh
deleted file mode 100644
index 81ce2ae3e91..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_data.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-git clone https://github.com/microsoft/CodeXGLUE/
-cp -r ./CodeXGLUE/Code-Code/Defect-detection/dataset  dataset
-cd dataset
-pip install gdown
-gdown https://drive.google.com/uc?id=1x6hoF7G-tSYxg8AFybggypLZgMGDNHfF
-python preprocess.py
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py
deleted file mode 100644
index 9d941a0e5e4..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/prepare_model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import argparse
-import os
-from optimum.exporters.onnx import main_export
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_model", type=str, required=False, default="checkpoint-best-acc")
-    parser.add_argument("--output_model", type=str, required=False, default="codebert-exported-onnx")
-    return parser.parse_args()
-
-def prepare_model(input_model, output_model):
-    print("\nexport model...")
-    print(f"Try to export model from {input_model} to {output_model}")
-    main_export(input_model, output=output_model, task="text-classification")
-
-    assert os.path.exists(output_model), f"{output_model} doesn't exist!"
-
-
-if __name__ == "__main__":
-    args = parse_arguments()
-    prepare_model(args.input_model, args.output_model)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt
deleted file mode 100644
index 9988cdf0329..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-torch
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-sympy
-onnxruntime-extensions; python_version < '3.11'
-numpy==1.23.5
-sentencepiece
-protobuf<=3.20.3
-optimum
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh
deleted file mode 100644
index 1f514a25368..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_benchmark.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_benchmark
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_benchmark
-function run_benchmark {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --mode ${mode} \
-            --batch_size ${batch_size} \
-            --benchmark
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh
deleted file mode 100644
index d959b607a14..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_fine_tuning.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_fine_tuning
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --train_dataset_location=*)
-          train_dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_fine_tuning
-function run_fine_tuning {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --train_data_path ${train_dataset_location} \
-            --data_path ${dataset_location} \
-            --fine_tune
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh
deleted file mode 100644
index 5f75411f60a..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_dynamic/run_quant.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_tuning
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function run_tuning {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --output_model ${output_model} \
-            --tune
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md
deleted file mode 100644
index 21000ca3aea..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-Step-by-Step
-============
-
-This example quantizes the [microsoft/codebert-base](https://huggingface.co/microsoft/codebert-base) fine-tuned on the the [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task.
-
-# Prerequisite
-
-## 1. Environment
-```shell
-pip install neural-compressor
-pip install -r requirements.txt
-```
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-
-## 2. Prepare Dataset
-Run `prepare_data.sh` script to download dataset from website to `dataset` folder and pre-process it:
-
-```shell
-bash prepare_data.sh
-```
-## 3. Prepare Model
-
-Fine-tuning the model on [code defect detection](https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/Defect-detection#codexglue----defect-detection) task.
-```
-bash run_fine_tuning.sh --train_dataset_location=./dataset/train.jsonl --dataset_location=./dataset/valid.jsonl  --fine_tune
-```
-
-Export model to ONNX format. 
-```bash
-# By default, the input model path is `checkpoint-best-acc/`.
-python prepare_model.py  --input_model=./checkpoint-best-acc  --output_model=./codebert-exported-onnx
-```
-
-# Run
-
-## 1. Quantization
-
-Static quantization with QOperator format:
-
-```bash
-bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx
-                   --output_model=/path/to/model_tune \
-                   --dataset_location=path/to/glue/data \
-                   --quant_format="QOperator"
-```
-
-## 2. Benchmark
-
-```bash
-bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx
-                      --dataset_location=path/to/glue/data \ 
-                      --batch_size=batch_size \ 
-                      --mode=performance # or accuracy
-```
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py
deleted file mode 100644
index 6b4db97faca..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/main.py
+++ /dev/null
@@ -1,338 +0,0 @@
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import logging
-from typing import List
-
-import numpy as np
-import onnx
-
-logger = logging.getLogger(__name__)
-
-
-def load_dataset_from_local(file_path, model_name_or_path):
-    """Load the raw data from local."""
-    import json
-
-    import torch
-
-    def read_data(file_path):
-        texts, labels = [], []
-        with open(file_path, "r") as f:
-            for i, line in enumerate(f):
-                js = json.loads(line.strip())
-                code = " ".join(js["func"].split())
-                texts.append(code)
-                labels.append(js["target"])
-        return texts, labels
-
-    texts, labels = read_data(file_path)
-
-    # tokenize the raw data
-    from transformers import AutoTokenizer
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-    encodings = tokenizer(
-        texts, return_tensors="pt", truncation=True, padding="max_length"
-    )
-
-    class CodeDataset(torch.utils.data.Dataset):
-        def __init__(self, encodings, labels):
-            self.encodings = encodings
-            self.labels = labels
-
-        def __getitem__(self, idx):
-            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
-            item["labels"] = torch.tensor(self.labels[idx])
-            return item
-
-        def __len__(self):
-            return len(self.labels)
-
-    dataset = CodeDataset(encodings, labels)
-    return dataset
-
-
-# evaluation func for fine-tuning
-def evaluate(model, val_loader):
-    import torch
-
-    print("***　eval model .. ")
-    all_labels = []
-    all_preds = []
-    for idx, batch in enumerate(val_loader):
-        model.eval()
-        with torch.no_grad():
-            labels = batch.pop("labels")
-            inputs = batch
-            outputs = model(**inputs, labels=labels)
-            loss = outputs.loss
-            logits = outputs.logits
-            all_labels.append(labels.numpy())
-            all_preds.append(np.argmax(logits.detach().numpy(), axis=1))
-            np.concatenate(all_labels, axis=0)
-            np.concatenate(all_preds, axis=0)
-            cur_acc = np.mean(
-                np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-            )
-            print(f"{idx} batch evaluation accuracy: {cur_acc}")
-    cur_acc = np.mean(
-        np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-    )
-    print("Overall evaluation accuracy: ", cur_acc)
-    return cur_acc
-
-
-def fine_tune(args):
-    import os
-    import numpy as np
-    import torch
-    from torch.utils.data import DataLoader
-    from transformers import AdamW, AutoModelForSequenceClassification
-
-    train_dataset = load_dataset_from_local(
-        args.train_data_path, args.model_name_or_path
-    )
-    val_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-    model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path)
-    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
-    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
-
-    optim = AdamW(model.parameters(), lr=5e-5)
-
-    results = {"eval_acc": 0}
-    global_step = -1
-    for epoch in range(3):
-        all_labels = []
-        all_preds = []
-        for idx, batch in enumerate(train_loader):
-            global_step += 1
-            optim.zero_grad()
-            labels = batch.pop("labels")
-            inputs = batch
-            model.train()
-            outputs = model(**inputs, labels=labels)
-            loss = outputs.loss
-            logits = outputs.logits
-            all_labels.append(labels.numpy())
-            all_preds.append(np.argmax(logits.detach().numpy(), axis=1))
-            np.concatenate(all_labels, axis=0)
-            np.concatenate(all_preds, axis=0)
-            cur_acc = np.mean(
-                np.concatenate(all_labels, axis=0) == np.concatenate(all_preds, axis=0)
-            )
-            print("  Current acc:%s", round(cur_acc, 4))
-            loss.backward()
-            print(f" Loss: {loss.item()}")
-            optim.step()
-
-            if global_step % 100 == 0:
-                best_acc = results["eval_acc"]
-                cur_acc = evaluate(model, val_loader)
-                if cur_acc > best_acc:
-                    results["eval_acc"] = cur_acc
-                    best_acc = results["eval_acc"]
-                    print("  Best acc:%s", round(best_acc, 4))
-                    checkpoint_prefix = "checkpoint-best-acc"
-                    output_dir = os.path.join("{}".format(checkpoint_prefix))
-                    if not os.path.exists(output_dir):
-                        os.makedirs(output_dir)
-                    model_to_save = model.module if hasattr(model, "module") else model
-                    model.config.to_json_file("{}/config.json".format(checkpoint_prefix))
-                    output_dir = os.path.join(output_dir, "{}".format("pytorch_model.bin"))
-                    torch.save(model_to_save.state_dict(), output_dir)
-                    print("Saving model checkpoint to %s", output_dir)
-
-
-class ONNXRTDataset:
-    def __init__(self, model_path, dataset):
-        self.inputs = [inp.name for inp in onnx.load(model_path).graph.input]
-        self.dataset = dataset
-
-    def __len__(self):
-        return len(self.dataset)
-
-    def __getitem__(self, index):
-        batch = self.dataset[index]
-        labels = batch["labels"].detach().cpu().numpy()
-        batch.pop("labels")
-        inputs = [batch["input_ids"].numpy(), batch["attention_mask"].numpy()]
-        return inputs, labels
-
-
-def get_dataloader(ort_model_path, dataset):
-    """Create INC ORT dataloader."""
-    dataloader = ONNXRTDataset(ort_model_path, dataset)
-    return dataloader
-
-
-def main():
-    # parse args
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--train_data_path",
-        default=None,
-        type=str,
-        help="An optional input training data file to evaluate the perplexity on (a text file).",
-    )
-    parser.add_argument(
-        "--data_path",
-        default=None,
-        type=str,
-        help="An optional input evaluation data file to evaluate the perplexity on (a text file).",
-    )
-    parser.add_argument(
-        "--model_name_or_path",
-        default=None,
-        type=str,
-        help="The model checkpoint for weights initialization.",
-    )
-    parser.add_argument(
-        "--model_path", default=None, type=str, help="The onnx model path."
-    )
-    parser.add_argument("--benchmark", action="store_true", default=False)
-    parser.add_argument(
-        "--fine_tune",
-        action="store_true",
-        default=False,
-        help="whether fine tune the model",
-    )
-    parser.add_argument(
-        "--tune", action="store_true", default=False, help="whether quantize the model"
-    )
-    parser.add_argument(
-        "--output_model", type=str, default=None, help="output model path"
-    )
-    parser.add_argument(
-        "--mode", type=str, help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument("--batch_size", default=1, type=int, help="batch size")
-    parser.add_argument(
-        "--quant_format",
-        type=str,
-        default="QOperator",
-        choices=["QOperator", "QDQ"],
-        help="quantization format",
-    )
-    args = parser.parse_args()
-
-    # fine tune
-    if args.fine_tune:
-        fine_tune(args)
-
-
-    def eval_func(model):
-        session = ort.InferenceSession(
-            model.SerializeToString(), providers=ort.get_available_providers()
-        )
-        ort_inputs = {}
-        len_inputs = len(session.get_inputs())
-        inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-        all_labels = []
-        all_preds = []
-        import tqdm
-
-        for idx, (inputs, labels) in tqdm.tqdm(enumerate(dataloader)):
-            if not isinstance(labels, list):
-                labels: List[np.array] = [labels]  # List[shape: bs]
-            inputs = inputs[:len_inputs]
-            for i in range(len_inputs):
-                ort_inputs.update({inputs_names[i]: inputs[i]})
-            predictions: List[np.array] = session.run(
-                None, ort_inputs
-            )  # List[# shape, (bs, 2)]
-            predictions = [np.argmax(p, axis=1) for p in predictions]
-
-            all_labels += labels
-            all_preds += predictions
-            np.mean(
-                np.concatenate(all_labels, 0) == (np.concatenate(all_preds, 0))
-            )  # [:,0]>0.5))
-        label_flatten = np.concatenate(all_labels, 0)
-        preds_flatten = np.concatenate(all_preds, 0)
-        correct_count = np.sum(label_flatten == preds_flatten)
-        acc = correct_count / len(label_flatten)
-        return acc
-    
-    # tune
-    if args.tune:
-        from neural_compressor import PostTrainingQuantConfig, quantization
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-
-        train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-        ort_dataset = ONNXRTDataset(args.model_path, train_dataset)
-
-        from neural_compressor.data import DataLoader as INC_DataLoader
-
-        dataloader = INC_DataLoader(
-            framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size
-        )
-
-        model_type = "bert"
-        opt_options = FusionOptions(model_type)
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            model_type,
-            num_heads=12,
-            hidden_size=768,
-            optimization_options=opt_options,
-        )
-        model = model_optimizer.model
-
-        # check the optimized model is valid
-        import onnxruntime as ort
-
-        try:
-            ort.InferenceSession(
-                model.SerializeToString(), providers=ort.get_available_providers()
-            )
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning(
-                "Model optimizer will be skipped. "
-                "Try to upgrade onnxruntime to avoid this error"
-            )
-            model = onnx.load(args.model_path)
-
-        config = PostTrainingQuantConfig(
-            approach="static",
-            quant_level=1,
-            quant_format=args.quant_format,
-        )
-        q_model = quantization.fit(
-            model,
-            config,
-            eval_func=eval_func,
-            calib_dataloader=dataloader,
-        )
-        q_model.save(args.output_model)
-
-    # benchmark
-    if args.benchmark:
-        import onnx
-        import onnxruntime as ort
-        from neural_compressor.data import DataLoader as INC_DataLoader
-
-        train_dataset = load_dataset_from_local(args.data_path, args.model_name_or_path)
-        ort_dataset = ONNXRTDataset(args.model_path, train_dataset)
-        dataloader = INC_DataLoader(
-            framework="onnxruntime", dataset=ort_dataset, batch_size=args.batch_size
-        )
-        model = onnx.load(args.model_path)
-        if args.mode == "performance":
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-
-            conf = BenchmarkConfig(iteration=100)
-            fit(model, conf, b_dataloader=dataloader)
-        elif args.mode == "accuracy":
-            acc_result = eval_func(model)
-            print("Batch size = %d" % args.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh
deleted file mode 100644
index 81ce2ae3e91..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_data.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-git clone https://github.com/microsoft/CodeXGLUE/
-cp -r ./CodeXGLUE/Code-Code/Defect-detection/dataset  dataset
-cd dataset
-pip install gdown
-gdown https://drive.google.com/uc?id=1x6hoF7G-tSYxg8AFybggypLZgMGDNHfF
-python preprocess.py
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py
deleted file mode 100644
index 9d941a0e5e4..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/prepare_model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import argparse
-import os
-from optimum.exporters.onnx import main_export
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_model", type=str, required=False, default="checkpoint-best-acc")
-    parser.add_argument("--output_model", type=str, required=False, default="codebert-exported-onnx")
-    return parser.parse_args()
-
-def prepare_model(input_model, output_model):
-    print("\nexport model...")
-    print(f"Try to export model from {input_model} to {output_model}")
-    main_export(input_model, output=output_model, task="text-classification")
-
-    assert os.path.exists(output_model), f"{output_model} doesn't exist!"
-
-
-if __name__ == "__main__":
-    args = parse_arguments()
-    prepare_model(args.input_model, args.output_model)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt
deleted file mode 100644
index 6ebc9f078a4..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-torch
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-sympy
-onnxruntime-extensions; python_version < '3.11'
-numpy==1.23.5
-sentencepiece
-protobuf<=3.20.3
-optimum
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh
deleted file mode 100644
index 1f514a25368..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_benchmark.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_benchmark
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_benchmark
-function run_benchmark {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --mode ${mode} \
-            --batch_size ${batch_size} \
-            --benchmark
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh
deleted file mode 100644
index d959b607a14..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_fine_tuning.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_fine_tuning
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --train_dataset_location=*)
-          train_dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_fine_tuning
-function run_fine_tuning {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --train_data_path ${train_dataset_location} \
-            --data_path ${dataset_location} \
-            --fine_tune
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh
deleted file mode 100644
index c234a7c509e..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/code_detection/quantization/ptq_static/run_quant.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_tuning
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --quant_format=*)
-          quant_format=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function run_tuning {
-
-    python main.py \
-            --model_name_or_path microsoft/codebert-base \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --output_model ${output_model} \
-            --quant_format ${quant_format} \
-            --tune
-
-}
-
-main "$@"
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md
deleted file mode 100644
index 7833386e16c..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md
+++ /dev/null
@@ -1,69 +0,0 @@
-Step-by-Step
-============
-
-This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/).
-
-# Prerequisite
-
-## 1. Environment
-```shell
-pip install neural-compressor
-pip install -r requirements.txt
-```
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-## 2. Prepare Model
-
-Supported model identifier from [huggingface.co](https://huggingface.co/):
-
-|                 Model Identifier                |
-|:-----------------------------------------------:|
-|           Intel/bert-base-uncased-mrpc          |
-|             Intel/roberta-base-mrpc             |
-|           Intel/xlm-roberta-base-mrpc           |
-|            Intel/camembert-base-mrpc            |
-| distilbert-base-uncased-finetuned-sst-2-english |
-|         Alireza1044/albert-base-v2-sst2         |
-|        Intel/MiniLM-L12-H384-uncased-mrpc       |
-|      philschmid/MiniLM-L6-H384-uncased-sst2     |
-|     bert-base-cased-finetuned-mrpc              |
-|        Intel/electra-small-discriminator-mrpc   |
-|         M-FAC/bert-mini-finetuned-mrpc          |
-|           Intel/xlnet-base-cased-mrpc           |
-|            Intel/bart-large-mrpc                |
-|             Intel/deberta-v3-base-mrpc          |
-
-```bash
-python prepare_model.py  --input_model=Intel/bert-base-uncased-mrpc  --output_model=bert-base-uncased-mrpc.onnx
-```
-
-## 3. Prepare Dataset
-Download the GLUE data with `prepare_data.sh` script.
-
-```shell
-export GLUE_DIR=/path/to/glue_data
-export TASK_NAME=MRPC # or SST
-
-bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME
-```
-
-# Run
-
-## 1. Quantization
-
-Dynamic quantization:
-
-```bash
-bash run_quant.sh --input_model=path/to/model \ # model path as *.onnx
-                   --output_model=path/to/model_tune \ # model path as *.onnx
-                   --dataset_location=path/to/glue/data
-```
-
-## 2. Benchmark
-
-```bash
-bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx
-                      --dataset_location=path/to/glue/data \ 
-                      --batch_size=batch_size \ 
-                      --mode=performance # or accuracy
-```
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py
deleted file mode 100644
index 18151ff9884..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py
+++ /dev/null
@@ -1,431 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint:disable=redefined-outer-name,logging-format-interpolation
-
-import logging
-import argparse
-import onnx
-import onnxruntime as ort
-import transformers
-import os
-import torch
-import numpy as np
-from dataclasses import dataclass
-from typing import List, Optional, Union
-from neural_compressor.data import DataLoader
-
-
-class ONNXRTBertDataset:
-    """Dataset used for model Bert.
-    Args: data_dir (str): The input data dir.
-          model_name_or_path (str): Path to pre-trained student model or shortcut name,
-                                    selected in the list:
-          max_seq_length (int, default=128): The maximum length after tokenization.
-                                Sequences longer than this will be truncated,
-                                sequences shorter will be padded.
-          do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing.
-          task (str, default=mrpc): The name of the task to fine-tune.
-                                    Choices include mrpc, qqp, qnli, rte,
-                                    sts-b, cola, mnli, wnli.
-          model_type (str, default='bert'): model type, support 'distilbert', 'bert',
-                                            'mobilebert', 'roberta'.
-          dynamic_length (bool, default=False): Whether to use fixed sequence length.
-          evaluate (bool, default=True): Whether do evaluation or training.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according
-                                                 to specific conditions.
-    """
-    def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\
-                do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\
-                evaluate=True, transform=None, filter=None):
-        self.inputs = [inp.name for inp in onnx.load(model).graph.input]
-        task = task.lower()
-        model_type = model_type.lower()
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
-        assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \
-            model type'
-        self.dynamic_length = dynamic_length
-        self.model_type = model_type
-        self.max_seq_length = max_seq_length
-        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path,
-            do_lower_case=do_lower_case)
-        self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \
-            max_seq_length, task, model_type, tokenizer, evaluate)
-
-    def __len__(self):
-        return len(self.dataset)
-
-    def __getitem__(self, index):
-        batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index])
-        return batch[:len(self.inputs)], batch[-1]
-
-def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \
-    model_type, tokenizer, evaluate):
-    from torch.utils.data import TensorDataset
-
-    processor = transformers.glue_processors[task]()
-    output_mode = transformers.glue_output_modes[task]
-    # Load data features from cache or dataset file
-    if not os.path.exists("./dataset_cached"):
-        os.makedirs("./dataset_cached")
-    cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format(
-        'dev' if evaluate else 'train',
-        list(filter(None, model_name_or_path.split('/'))).pop(),
-        str(max_seq_length),
-        str(task)))
-    if os.path.exists(cached_features_file):
-        logger.info("Load features from cached file {}.".format(cached_features_file))
-        features = torch.load(cached_features_file)
-    else:
-        logger.info("Create features from dataset file at {}.".format(data_dir))
-        label_list = processor.get_labels()
-        examples = processor.get_dev_examples(data_dir) if evaluate else \
-            processor.get_train_examples(data_dir)
-        features = convert_examples_to_features(examples,
-                                                tokenizer,
-                                                task=task,
-                                                label_list=label_list,
-                                                max_length=max_seq_length,
-                                                output_mode=output_mode,
-        )
-        logger.info("Save features into cached file {}.".format(cached_features_file))
-        torch.save(features, cached_features_file)
-    # Convert to Tensors and build dataset
-    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
-    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
-    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
-    all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)
-    if output_mode == "classification":
-        all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
-    elif output_mode == "regression":
-        all_labels = torch.tensor([f.label for f in features], dtype=torch.float)
-    dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \
-        all_seq_lengths, all_labels)
-    return dataset
-
-def convert_examples_to_features(
-    examples,
-    tokenizer,
-    max_length=128,
-    task=None,
-    label_list=None,
-    output_mode="classification",
-    pad_token=0,
-    pad_token_segment_id=0,
-    mask_padding_with_zero=True,
-):
-    processor = transformers.glue_processors[task]()
-    if label_list is None:
-        label_list = processor.get_labels()
-        logger.info("Use label list {} for task {}.".format(label_list, task))
-    label_map = {label: i for i, label in enumerate(label_list)}
-    features = []
-    for (ex_index, example) in enumerate(examples):
-        inputs = tokenizer.encode_plus(
-            example.text_a,
-            example.text_b,
-            add_special_tokens=True,
-            max_length=max_length,
-            return_token_type_ids=True,
-            truncation=True,
-        )
-        input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
-        # The mask has 1 for real tokens and 0 for padding tokens. Only real
-        # tokens are attended to.
-        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
-
-        # Zero-pad up to the sequence length.
-        seq_length = len(input_ids)
-        padding_length = max_length - len(input_ids)
-
-        input_ids = input_ids + ([pad_token] * padding_length)
-        attention_mask = attention_mask + \
-            ([0 if mask_padding_with_zero else 1] * padding_length)
-        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
-
-        assert len(input_ids) == max_length, \
-            "Error with input_ids length {} vs {}".format(
-            len(input_ids), max_length)
-        assert len(attention_mask) == max_length, \
-            "Error with attention_mask length {} vs {}".format(
-            len(attention_mask), max_length
-        )
-        assert len(token_type_ids) == max_length, \
-            "Error with token_type_ids length {} vs {}".format(
-            len(token_type_ids), max_length
-        )
-        if output_mode == "classification":
-            label = label_map[example.label]
-        elif output_mode == "regression":
-            label = float(example.label)
-        else:
-            raise KeyError(output_mode)
-
-        feats = InputFeatures(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            label=label,
-            seq_length=seq_length,
-        )
-        features.append(feats)
-    return features
-
-@dataclass(frozen=True)
-class InputFeatures:
-    """
-    A single set of features of data.
-    Property names are the same names as the corresponding inputs to a model.
-    Args:
-        input_ids: Indices of input sequence tokens in the vocabulary.
-        attention_mask: Mask to avoid performing attention on padding token indices.
-            Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED,
-            ``0`` for MASKED (padded) tokens.
-        token_type_ids: (Optional) Segment token indices to indicate first and second
-            portions of the inputs. Only some models use them.
-        label: (Optional) Label corresponding to the input. Int for classification problems,
-            float for regression problems.
-        seq_length: (Optional) The length of input sequence before padding.
-    """
-
-    input_ids: List[int]
-    attention_mask: Optional[List[int]] = None
-    token_type_ids: Optional[List[int]] = None
-    label: Optional[Union[int, float]] = None
-    seq_length: Optional[List[int]] = None
-
-class ONNXRTGLUE:
-    """Computes GLUE score.
-
-    Args:
-        task (str, default=mrpc): The name of the task.
-                                  Choices include mrpc, qqp, qnli, rte,
-                                  sts-b, cola, mnli, wnli.
-
-    """
-    def __init__(self, task='mrpc'):
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
-        self.pred_list = None
-        self.label_list = None
-        self.task = task
-        self.return_key = {
-            "cola": "mcc",
-            "mrpc": "f1",
-            "sts-b": "corr",
-            "qqp": "acc",
-            "mnli": "mnli/acc",
-            "qnli": "acc",
-            "rte": "acc",
-            "wnli": "acc",
-            "sst-2": "acc"
-        }
-
-    def update(self, preds, labels):
-        """add preds and labels to storage"""
-        if isinstance(preds, list) and len(preds) == 1:
-            preds = preds[0]
-        if isinstance(labels, list) and len(labels) == 1:
-            labels = labels[0]
-        if self.pred_list is None:
-            self.pred_list = preds
-            self.label_list = labels
-        else:
-            self.pred_list = np.append(self.pred_list, preds, axis=0)
-            self.label_list = np.append(self.label_list, labels, axis=0)
-
-    def reset(self):
-        """clear preds and labels storage"""
-        self.pred_list = None
-        self.label_list = None
-
-    def result(self):
-        """calculate metric"""
-        output_mode = transformers.glue_output_modes[self.task]
-
-        if output_mode == "classification":
-            processed_preds = np.argmax(self.pred_list, axis=1)
-        elif output_mode == "regression":
-            processed_preds = np.squeeze(self.pred_list)
-        result = transformers.glue_compute_metrics(\
-            self.task, processed_preds, self.label_list)
-        return result[self.return_key[self.task]]
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-if __name__ == "__main__":
-    logger.info('Evaluating ONNXRuntime full precision accuracy and performance:')
-    parser = argparse.ArgumentParser(
-    description='BERT fine-tune examples for classification/regression tasks.',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained resnet50 model on onnx file"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-       '--config',
-       type=str,
-       help="config yaml path"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        default=None,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        help="input data path"
-    )
-    parser.add_argument(
-        '--batch_size',
-        default=8,
-        type=int,
-    )
-    parser.add_argument(
-        '--model_name_or_path',
-        type=str,
-        help="pretrained model name or path"
-    )
-    parser.add_argument(
-        '--task',
-        type=str,
-        choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-                'mnli', 'wnli', 'sst-2'],
-        help="GLUE task name"
-    )
-    parser.add_argument(
-        '--num_heads',
-        default=12,
-        type=int,
-    )
-    parser.add_argument(
-        '--hidden_size',
-        default=768,
-        type=int,
-    )
-    parser.add_argument(
-        '--device',
-        type=str,
-        default='cpu',
-        choices=['cpu', 'npu'],
-    )
- 
-    args = parser.parse_args()
-    backend = 'onnxrt_dml_ep' if args.device == 'npu' else 'default'
-
-    dataset = ONNXRTBertDataset(args.model_path,
-                                data_dir=args.data_path,
-                                model_name_or_path=args.model_name_or_path,
-                                task=args.task)
-    dataloader = DataLoader(framework='onnxruntime', dataset=dataset, batch_size=args.batch_size)
-    metric = ONNXRTGLUE(args.task)
-
-    def eval_func(model, *args):
-        metric.reset()
-        provider = 'DmlExecutionProvider' if backend == 'onnxrt_dml_ep' else 'CPUExecutionProvider'
-        session = ort.InferenceSession(model.SerializeToString(), providers=[provider])
-        ort_inputs = {}
-        len_inputs = len(session.get_inputs())
-        inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-        for idx, (inputs, labels) in enumerate(dataloader):
-            if not isinstance(labels, list):
-                labels = [labels]
-            inputs = inputs[:len_inputs]
-            for i in range(len_inputs):
-                ort_inputs.update({inputs_names[i]: inputs[i]})
-            predictions = session.run(None, ort_inputs)
-            metric.update(predictions[0], labels)
-        return metric.result()
-
-    if args.benchmark:
-        model = onnx.load(args.model_path)
-        if args.mode == 'performance':            
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=28,
-                                   num_of_instance=1,
-                                   device=args.device,
-                                   backend=backend)
-            fit(model, conf, b_dataloader=dataloader)
-        elif args.mode == 'accuracy':
-            acc_result = eval_func(model)
-            print("Batch size = %d" % args.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-
-
-    if args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        model_type = 'bart' if args.model_name_or_path == 'Intel/bart-large-mrpc' else 'bert'
-        opt_options = FusionOptions(model_type)
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            model_type,
-            num_heads=args.num_heads,
-            hidden_size=args.hidden_size,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-
-        # check the optimized model is valid
-        try:
-            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(args.model_path)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        from neural_compressor.utils.constant import FP32
-        specific_quant_config = {}
-        if args.model_name_or_path == 'Alireza1044/albert-base-v2-sst2':
-            specific_quant_config['recipes'] = {'first_conv_or_matmul_quantization': False}
-        config = PostTrainingQuantConfig(approach='dynamic',
-                                         device=args.device,
-                                         backend=backend,
-                                         **specific_quant_config)
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func)
-        q_model.save(args.output_model)
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh
deleted file mode 100644
index 8e434a5c521..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  download_data
-
-}
-
-# init params
-function init_params {
-
-  for var in "$@"
-  do
-    case $var in
-      --data_dir=*)
-          data_dir=$(echo $var |cut -f2 -d=)
-      ;;
-      --task_name=*)
-          task_name=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function download_data {
-    wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py
-    python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name}
-}
-
-main "$@"
-
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py
deleted file mode 100644
index be05479d9e3..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import argparse
-import os
-
-import torch
-from transformers import AutoConfig, AutoModelForSequenceClassification
-
-def export_onnx_model(args, model):
-    with torch.no_grad():
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
-        if args.input_model in [
-                'Intel/roberta-base-mrpc',
-                'Intel/xlm-roberta-base-mrpc',
-                'Intel/camembert-base-mrpc',
-                'distilbert-base-uncased-finetuned-sst-2-english',
-                'Intel/xlnet-base-cased-mrpc',
-                'Intel/deberta-v3-base-mrpc',
-        ]:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                        'attention_mask'],
-                            output_names=['logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                        'attention_mask' : symbolic_names})
-        else:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
-                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask'],
-                            inputs['token_type_ids']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                        'attention_mask',
-                                        'token_type_ids'],
-                            output_names=['logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                        'attention_mask' : symbolic_names,
-                                        'token_type_ids' : symbolic_names})
-        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
-        print("ONNX Model exported to {0}".format(args.output_model))
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export huggingface onnx model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--input_model',
-        type=str,
-        default='Intel/bert-base-uncased-mrpc',
-        const='Intel/bert-base-uncased-mrpc',
-        nargs='?',
-        choices=['Intel/bert-base-uncased-mrpc',
-                'Intel/roberta-base-mrpc',
-                'Intel/xlm-roberta-base-mrpc',
-                'Intel/camembert-base-mrpc',
-                'distilbert-base-uncased-finetuned-sst-2-english',
-                'Alireza1044/albert-base-v2-sst2',
-                'philschmid/MiniLM-L6-H384-uncased-sst2',
-                'Intel/MiniLM-L12-H384-uncased-mrpc',
-                'bert-base-cased-finetuned-mrpc',
-                'Intel/electra-small-discriminator-mrpc',
-                'M-FAC/bert-mini-finetuned-mrpc',
-                'Intel/xlnet-base-cased-mrpc',
-                'Intel/bart-large-mrpc',
-                'Intel/deberta-v3-base-mrpc'
-                ],
-        help='pretrained model name or path')
-    parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument(
-        '--max_len',
-        type=int,
-        default=128,
-        help='Maximum length of the sentence pairs')
-    args = parser.parse_args()
-
-    model = AutoModelForSequenceClassification.from_pretrained(
-        args.input_model,
-        config=AutoConfig.from_pretrained(args.input_model))
-
-    if args.input_model == 'Intel/bart-large-mrpc':
-        import shutil
-        from optimum.exporters.onnx import main_export
-
-        main_export(args.input_model, output="bart-large-mrpc", task="text-classification")
-        shutil.move("bart-large-mrpc/model.onnx", args.output_model)
-    else:
-        export_onnx_model(args, model)
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt
deleted file mode 100644
index 9988cdf0329..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-torch
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-sympy
-onnxruntime-extensions; python_version < '3.11'
-numpy==1.23.5
-sentencepiece
-protobuf<=3.20.3
-optimum
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh
deleted file mode 100644
index a45b843f555..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_benchmark
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_benchmark
-function run_benchmark {
-    
-    if [[ "${input_model}" =~ "bert-base-uncased" ]]; then
-        model_name_or_path="Intel/bert-base-uncased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "roberta-base" ]]; then
-        model_name_or_path="Intel/roberta-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then
-        model_name_or_path="Intel/xlm-roberta-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "camembert-base" ]]; then
-        model_name_or_path="Intel/camembert-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "distilbert-base" ]]; then
-        model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "albert-base" ]]; then
-        model_name_or_path="Alireza1044/albert-base-v2-sst2"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L6" ]]; then
-        model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L12" ]]; then
-        model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bert-base-cased" ]]; then
-        model_name_or_path="bert-base-cased-finetuned-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then
-        model_name_or_path="Intel/xlnet-base-cased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bert-mini" ]]; then
-        model_name_or_path="M-FAC/bert-mini-finetuned-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then
-        model_name_or_path="Intel/electra-small-discriminator-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bart" ]]; then
-        model_name_or_path="Intel/bart-large-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "deberta" ]]; then
-        model_name_or_path="microsoft/deberta-v3-base"
-        TASK_NAME='mrpc'
-    fi
-
-    python main.py \
-            --model_name_or_path ${model_name_or_path} \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --task ${TASK_NAME} \
-            --mode=${mode} \
-            --batch_size=${batch_size} \
-            --benchmark
-            
-}
-
-main "$@"
-
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh
deleted file mode 100644
index 20a6b8b5794..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_quant.sh
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  run_tuning
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function run_tuning {
-
-    if [[ "${input_model}" =~ "bert-base-uncased" ]]; then
-        model_name_or_path="Intel/bert-base-uncased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "roberta-base" ]]; then
-        model_name_or_path="Intel/roberta-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then
-        model_name_or_path="Intel/xlm-roberta-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "camembert-base" ]]; then
-        model_name_or_path="Intel/camembert-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "distilbert-base" ]]; then
-        model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "albert-base" ]]; then
-        model_name_or_path="Alireza1044/albert-base-v2-sst2"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L6" ]]; then
-        model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L12" ]]; then
-        model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "bert-base-cased" ]]; then
-        model_name_or_path="bert-base-cased-finetuned-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then
-        model_name_or_path="Intel/xlnet-base-cased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "bert-mini" ]]; then
-        model_name_or_path="M-FAC/bert-mini-finetuned-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=4
-        hidden_size=256
-    fi
-    if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then
-        model_name_or_path="Intel/electra-small-discriminator-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=4
-        hidden_size=256
-    fi
-    if [[ "${input_model}" =~ "bart" ]]; then
-        model_name_or_path="Intel/bart-large-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=16
-        hidden_size=4096
-    fi
-    if [[ "${input_model}" =~ "deberta" ]]; then
-        model_name_or_path="microsoft/deberta-v3-base"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-
-    python main.py \
-            --model_name_or_path ${model_name_or_path} \
-            --model_path ${input_model} \
-            --output_model ${output_model} \
-            --data_path ${dataset_location} \
-            --task ${TASK_NAME} \
-            --num_heads ${num_heads} \
-            --hidden_size ${hidden_size} \
-            --tune
-}
-
-main "$@"
-
-
-
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md
deleted file mode 100644
index a4215f6876e..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# Step-by-Step
-
-This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/).
-
-# Prerequisite
-
-## 1. Environment
-
-```shell
-pip install neural-compressor
-pip install -r requirements.txt
-```
-
-> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-## 2. Prepare Model
-
-Supported model identifier from [huggingface.co](https://huggingface.co/):
-
-|                Model Identifier                 |
-| :---------------------------------------------: |
-|          Intel/bert-base-uncased-mrpc           |
-|             Intel/roberta-base-mrpc             |
-|           Intel/xlm-roberta-base-mrpc           |
-|            Intel/camembert-base-mrpc            |
-| distilbert-base-uncased-finetuned-sst-2-english |
-|         Alireza1044/albert-base-v2-sst2         |
-|       Intel/MiniLM-L12-H384-uncased-mrpc        |
-|     philschmid/MiniLM-L6-H384-uncased-sst2      |
-|         bert-base-cased-finetuned-mrpc          |
-|     Intel/electra-small-discriminator-mrpc      |
-|         M-FAC/bert-mini-finetuned-mrpc          |
-|           Intel/xlnet-base-cased-mrpc           |
-|              Intel/bart-large-mrpc              |
-|           Intel/deberta-v3-base-mrpc            |
-
-```bash
-python prepare_model.py  --input_model=Intel/bert-base-uncased-mrpc  --output_model=bert-base-uncased-mrpc.onnx
-```
-
-## 3. Prepare Dataset
-
-Download the GLUE data with `prepare_data.sh` script.
-
-```shell
-export GLUE_DIR=/path/to/glue_data
-export TASK_NAME=MRPC # or SST
-
-bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME
-```
-
-# Run
-
-## 1. Quantization
-
-Static quantization with QOperator format:
-
-```bash
-bash run_quant.sh --input_model=/path/to/model \ # model path as *.onnx
-                   --output_model=/path/to/model_tune \
-                   --dataset_location=path/to/glue/data \
-                   --quant_format="QOperator"
-```
-
-## 2. Benchmark
-
-```bash
-bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx
-                      --dataset_location=path/to/glue/data \
-                      --batch_size=batch_size \
-                      --mode=performance # or accuracy
-```
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py
deleted file mode 100644
index bb5bd628f7c..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py
+++ /dev/null
@@ -1,435 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint:disable=redefined-outer-name,logging-format-interpolation
-
-import logging
-import argparse
-import onnx
-import onnxruntime as ort
-import transformers
-import os
-import torch
-import numpy as np
-from dataclasses import dataclass
-from typing import List, Optional, Union
-from neural_compressor.data import DataLoader
-
-
-class ONNXRTBertDataset:
-    """Dataset used for model Bert.
-    Args: data_dir (str): The input data dir.
-          model_name_or_path (str): Path to pre-trained student model or shortcut name,
-                                    selected in the list:
-          max_seq_length (int, default=128): The maximum length after tokenization.
-                                Sequences longer than this will be truncated,
-                                sequences shorter will be padded.
-          do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing.
-          task (str, default=mrpc): The name of the task to fine-tune.
-                                    Choices include mrpc, qqp, qnli, rte,
-                                    sts-b, cola, mnli, wnli.
-          model_type (str, default='bert'): model type, support 'distilbert', 'bert',
-                                            'mobilebert', 'roberta'.
-          dynamic_length (bool, default=False): Whether to use fixed sequence length.
-          evaluate (bool, default=True): Whether do evaluation or training.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according
-                                                 to specific conditions.
-    """
-    def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\
-                do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\
-                evaluate=True, transform=None, filter=None):
-        self.inputs = [inp.name for inp in onnx.load(model).graph.input]
-        task = task.lower()
-        model_type = model_type.lower()
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
-        assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \
-            model type'
-        self.dynamic_length = dynamic_length
-        self.model_type = model_type
-        self.max_seq_length = max_seq_length
-        tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path,
-            do_lower_case=do_lower_case)
-        self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \
-            max_seq_length, task, model_type, tokenizer, evaluate)
-
-    def __len__(self):
-        return len(self.dataset)
-
-    def __getitem__(self, index):
-        batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index])
-        return batch[:len(self.inputs)], batch[-1]
-
-def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \
-    model_type, tokenizer, evaluate):
-    from torch.utils.data import TensorDataset
-
-    processor = transformers.glue_processors[task]()
-    output_mode = transformers.glue_output_modes[task]
-    # Load data features from cache or dataset file
-    if not os.path.exists("./dataset_cached"):
-        os.makedirs("./dataset_cached")
-    cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format(
-        'dev' if evaluate else 'train',
-        list(filter(None, model_name_or_path.split('/'))).pop(),
-        str(max_seq_length),
-        str(task)))
-    if os.path.exists(cached_features_file):
-        logger.info("Load features from cached file {}.".format(cached_features_file))
-        features = torch.load(cached_features_file)
-    else:
-        logger.info("Create features from dataset file at {}.".format(data_dir))
-        label_list = processor.get_labels()
-        examples = processor.get_dev_examples(data_dir) if evaluate else \
-            processor.get_train_examples(data_dir)
-        features = convert_examples_to_features(examples,
-                                                tokenizer,
-                                                task=task,
-                                                label_list=label_list,
-                                                max_length=max_seq_length,
-                                                output_mode=output_mode,
-        )
-        logger.info("Save features into cached file {}.".format(cached_features_file))
-        torch.save(features, cached_features_file)
-    # Convert to Tensors and build dataset
-    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
-    all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
-    all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
-    all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long)
-    if output_mode == "classification":
-        all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
-    elif output_mode == "regression":
-        all_labels = torch.tensor([f.label for f in features], dtype=torch.float)
-    dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \
-        all_seq_lengths, all_labels)
-    return dataset
-
-def convert_examples_to_features(
-    examples,
-    tokenizer,
-    max_length=128,
-    task=None,
-    label_list=None,
-    output_mode="classification",
-    pad_token=0,
-    pad_token_segment_id=0,
-    mask_padding_with_zero=True,
-):
-    processor = transformers.glue_processors[task]()
-    if label_list is None:
-        label_list = processor.get_labels()
-        logger.info("Use label list {} for task {}.".format(label_list, task))
-    label_map = {label: i for i, label in enumerate(label_list)}
-    features = []
-    for (ex_index, example) in enumerate(examples):
-        inputs = tokenizer.encode_plus(
-            example.text_a,
-            example.text_b,
-            add_special_tokens=True,
-            max_length=max_length,
-            return_token_type_ids=True,
-            truncation=True,
-        )
-        input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
-        # The mask has 1 for real tokens and 0 for padding tokens. Only real
-        # tokens are attended to.
-        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
-
-        # Zero-pad up to the sequence length.
-        seq_length = len(input_ids)
-        padding_length = max_length - len(input_ids)
-
-        input_ids = input_ids + ([pad_token] * padding_length)
-        attention_mask = attention_mask + \
-            ([0 if mask_padding_with_zero else 1] * padding_length)
-        token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
-
-        assert len(input_ids) == max_length, \
-            "Error with input_ids length {} vs {}".format(
-            len(input_ids), max_length)
-        assert len(attention_mask) == max_length, \
-            "Error with attention_mask length {} vs {}".format(
-            len(attention_mask), max_length
-        )
-        assert len(token_type_ids) == max_length, \
-            "Error with token_type_ids length {} vs {}".format(
-            len(token_type_ids), max_length
-        )
-        if output_mode == "classification":
-            label = label_map[example.label]
-        elif output_mode == "regression":
-            label = float(example.label)
-        else:
-            raise KeyError(output_mode)
-
-        feats = InputFeatures(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            label=label,
-            seq_length=seq_length,
-        )
-        features.append(feats)
-    return features
-
-@dataclass(frozen=True)
-class InputFeatures:
-    """
-    A single set of features of data.
-    Property names are the same names as the corresponding inputs to a model.
-    Args:
-        input_ids: Indices of input sequence tokens in the vocabulary.
-        attention_mask: Mask to avoid performing attention on padding token indices.
-            Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED,
-            ``0`` for MASKED (padded) tokens.
-        token_type_ids: (Optional) Segment token indices to indicate first and second
-            portions of the inputs. Only some models use them.
-        label: (Optional) Label corresponding to the input. Int for classification problems,
-            float for regression problems.
-        seq_length: (Optional) The length of input sequence before padding.
-    """
-
-    input_ids: List[int]
-    attention_mask: Optional[List[int]] = None
-    token_type_ids: Optional[List[int]] = None
-    label: Optional[Union[int, float]] = None
-    seq_length: Optional[List[int]] = None
-
-class ONNXRTGLUE:
-    """Computes GLUE score.
-
-    Args:
-        task (str, default=mrpc): The name of the task.
-                                  Choices include mrpc, qqp, qnli, rte,
-                                  sts-b, cola, mnli, wnli.
-
-    """
-    def __init__(self, task='mrpc'):
-        assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-            'mnli', 'wnli', 'sst-2'], 'Unsupported task type'
-        self.pred_list = None
-        self.label_list = None
-        self.task = task
-        self.return_key = {
-            "cola": "mcc",
-            "mrpc": "f1",
-            "sts-b": "corr",
-            "qqp": "acc",
-            "mnli": "mnli/acc",
-            "qnli": "acc",
-            "rte": "acc",
-            "wnli": "acc",
-            "sst-2": "acc"
-        }
-
-    def update(self, preds, labels):
-        """add preds and labels to storage"""
-        if isinstance(preds, list) and len(preds) == 1:
-            preds = preds[0]
-        if isinstance(labels, list) and len(labels) == 1:
-            labels = labels[0]
-        if self.pred_list is None:
-            self.pred_list = preds
-            self.label_list = labels
-        else:
-            self.pred_list = np.append(self.pred_list, preds, axis=0)
-            self.label_list = np.append(self.label_list, labels, axis=0)
-
-    def reset(self):
-        """clear preds and labels storage"""
-        self.pred_list = None
-        self.label_list = None
-
-    def result(self):
-        """calculate metric"""
-        output_mode = transformers.glue_output_modes[self.task]
-
-        if output_mode == "classification":
-            processed_preds = np.argmax(self.pred_list, axis=1)
-        elif output_mode == "regression":
-            processed_preds = np.squeeze(self.pred_list)
-        result = transformers.glue_compute_metrics(\
-            self.task, processed_preds, self.label_list)
-        return result[self.return_key[self.task]]
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-if __name__ == "__main__":
-    logger.info('Evaluating ONNXRuntime full precision accuracy and performance:')
-    parser = argparse.ArgumentParser(
-    description='BERT fine-tune examples for classification/regression tasks.',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained resnet50 model on onnx file"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-       '--config',
-       type=str,
-       help="config yaml path"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        default=None,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        help="input data path"
-    )
-    parser.add_argument(
-        '--batch_size',
-        default=8,
-        type=int,
-    )
-    parser.add_argument(
-        '--model_name_or_path',
-        type=str,
-        help="pretrained model name or path"
-    )
-    parser.add_argument(
-        '--task',
-        type=str,
-        choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \
-                'mnli', 'wnli', 'sst-2'],
-        help="GLUE task name"
-    )
-    parser.add_argument(
-        '--num_heads',
-        default=12,
-        type=int,
-    )
-    parser.add_argument(
-        '--hidden_size',
-        default=768,
-        type=int,
-    )
-    parser.add_argument(
-        '--quant_format',
-        type=str,
-        default='QOperator', 
-        choices=['QOperator', 'QDQ'],
-        help="quantization format"
-    )
- 
-    args = parser.parse_args()
-
-    dataset = ONNXRTBertDataset(args.model_path,
-                                data_dir=args.data_path,
-                                model_name_or_path=args.model_name_or_path,
-                                task=args.task)
-    dataloader = DataLoader(framework='onnxruntime', dataset=dataset, batch_size=args.batch_size)
-    metric = ONNXRTGLUE(args.task)
-
-    def eval_func(model, *args):
-        metric.reset()
-        session = ort.InferenceSession(model.SerializeToString(), 
-                                       providers=ort.get_available_providers())
-        ort_inputs = {}
-        len_inputs = len(session.get_inputs())
-        inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-        for idx, (inputs, labels) in enumerate(dataloader):
-            if not isinstance(labels, list):
-                labels = [labels]
-            inputs = inputs[:len_inputs]
-            for i in range(len_inputs):
-                ort_inputs.update({inputs_names[i]: inputs[i]})
-            predictions = session.run(None, ort_inputs)
-            metric.update(predictions[0], labels)
-        return metric.result()
-
-    if args.benchmark:
-        model = onnx.load(args.model_path)
-        if args.mode == 'performance':            
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=28,
-                                   num_of_instance=1)
-            fit(model, conf, b_dataloader=dataloader)
-        elif args.mode == 'accuracy':
-            acc_result = eval_func(model)
-            print("Batch size = %d" % args.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-
-
-    if args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        model_type = 'bart' if args.model_name_or_path == 'Intel/bart-large-mrpc' else 'bert'
-        opt_options = FusionOptions(model_type)
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            model_type,
-            num_heads=args.num_heads,
-            hidden_size=args.hidden_size,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-        
-        # check the optimized model is valid
-        try:
-            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(args.model_path)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        from neural_compressor.utils.constant import FP32
-        specific_quant_config = {}
-        if args.model_name_or_path == 'Intel/bart-large-mrpc':
-            fp32_op_names = ['/model/(en|de)coder/layers.*/fc(1|2)/MatMul']
-            specific_quant_config['op_name_dict'] = {op_name:FP32 for op_name in fp32_op_names}
-        elif args.model_name_or_path == 'Alireza1044/albert-base-v2-sst2':
-            specific_quant_config['recipes'] = {'first_conv_or_matmul_quantization': False}
-        elif args.model_name_or_path == 'Intel/deberta-v3-base-mrpc':
-            specific_quant_config['op_type_dict'] = {'^((?!(MatMul|Gather)).)*$': FP32}
-            specific_quant_config['quant_level'] = 1
-        config = PostTrainingQuantConfig(approach='static',
-                                         quant_format=args.quant_format,
-                                         **specific_quant_config)
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func,
-                                   calib_dataloader=dataloader)
-        q_model.save(args.output_model)
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh
deleted file mode 100644
index 8e434a5c521..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  download_data
-
-}
-
-# init params
-function init_params {
-
-  for var in "$@"
-  do
-    case $var in
-      --data_dir=*)
-          data_dir=$(echo $var |cut -f2 -d=)
-      ;;
-      --task_name=*)
-          task_name=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function download_data {
-    wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py
-    python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name}
-}
-
-main "$@"
-
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py
deleted file mode 100644
index a8272021d5a..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import argparse
-import os
-
-import torch
-from transformers import AutoConfig, AutoModelForSequenceClassification
-
-def export_onnx_model(args, model):
-    with torch.no_grad():
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
-        if args.input_model in ['Intel/roberta-base-mrpc', 
-                                        'Intel/xlm-roberta-base-mrpc', 
-                                        'Intel/camembert-base-mrpc', 
-                                        'distilbert-base-uncased-finetuned-sst-2-english',
-                                        'Intel/xlnet-base-cased-mrpc',
-                                        'Intel/deberta-v3-base-mrpc']:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask']),          
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                        'attention_mask'],
-                            output_names=['logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                        'attention_mask' : symbolic_names})
-        else:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
-                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask'],
-                            inputs['token_type_ids']),          
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                         'attention_mask',
-                                         'token_type_ids'],
-                            output_names=['logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                          'attention_mask' : symbolic_names,
-                                          'token_type_ids' : symbolic_names})
-            
-        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
-        print("ONNX Model exported to {0}".format(args.output_model))
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export huggingface onnx model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--input_model',
-        type=str,
-        default='Intel/bert-base-uncased-mrpc',
-        const='Intel/bert-base-uncased-mrpc',
-        nargs='?',        
-        choices=['Intel/bert-base-uncased-mrpc',
-                'Intel/roberta-base-mrpc',
-                'Intel/xlm-roberta-base-mrpc',
-                'Intel/camembert-base-mrpc',
-                'distilbert-base-uncased-finetuned-sst-2-english',
-                'Alireza1044/albert-base-v2-sst2',
-                'philschmid/MiniLM-L6-H384-uncased-sst2',
-                'Intel/MiniLM-L12-H384-uncased-mrpc',
-                'bert-base-cased-finetuned-mrpc',
-                'Intel/electra-small-discriminator-mrpc',
-                'M-FAC/bert-mini-finetuned-mrpc',
-                'Intel/xlnet-base-cased-mrpc',
-                'Intel/bart-large-mrpc',
-                'Intel/deberta-v3-base-mrpc'
-                ],
-        help='pretrained model name or path')
-    parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument(
-        '--max_len',
-        type=int,
-        default=128,
-        help='Maximum length of the sentence pairs')
-    args = parser.parse_args()
-
-    model = AutoModelForSequenceClassification.from_pretrained(
-        args.input_model,
-        config=AutoConfig.from_pretrained(args.input_model))
-
-    if args.input_model == 'Intel/bart-large-mrpc':
-        import shutil
-        from optimum.exporters.onnx import main_export
-
-        main_export(args.input_model, output="bart-large-mrpc", task="text-classification")
-        shutil.move("bart-large-mrpc/model.onnx", args.output_model)
-    else:
-        export_onnx_model(args, model)
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt
deleted file mode 100644
index 9988cdf0329..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-torch
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-sympy
-onnxruntime-extensions; python_version < '3.11'
-numpy==1.23.5
-sentencepiece
-protobuf<=3.20.3
-optimum
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh
deleted file mode 100644
index a45b843f555..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-
-  init_params "$@"
-  run_benchmark
-
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --mode=*)
-          mode=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --batch_size=*)
-          batch_size=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_benchmark
-function run_benchmark {
-    
-    if [[ "${input_model}" =~ "bert-base-uncased" ]]; then
-        model_name_or_path="Intel/bert-base-uncased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "roberta-base" ]]; then
-        model_name_or_path="Intel/roberta-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then
-        model_name_or_path="Intel/xlm-roberta-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "camembert-base" ]]; then
-        model_name_or_path="Intel/camembert-base-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "distilbert-base" ]]; then
-        model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "albert-base" ]]; then
-        model_name_or_path="Alireza1044/albert-base-v2-sst2"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L6" ]]; then
-        model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2"
-        TASK_NAME='sst-2'
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L12" ]]; then
-        model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bert-base-cased" ]]; then
-        model_name_or_path="bert-base-cased-finetuned-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then
-        model_name_or_path="Intel/xlnet-base-cased-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bert-mini" ]]; then
-        model_name_or_path="M-FAC/bert-mini-finetuned-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then
-        model_name_or_path="Intel/electra-small-discriminator-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "bart" ]]; then
-        model_name_or_path="Intel/bart-large-mrpc"
-        TASK_NAME='mrpc'
-    fi
-    if [[ "${input_model}" =~ "deberta" ]]; then
-        model_name_or_path="microsoft/deberta-v3-base"
-        TASK_NAME='mrpc'
-    fi
-
-    python main.py \
-            --model_name_or_path ${model_name_or_path} \
-            --model_path ${input_model} \
-            --data_path ${dataset_location} \
-            --task ${TASK_NAME} \
-            --mode=${mode} \
-            --batch_size=${batch_size} \
-            --benchmark
-            
-}
-
-main "$@"
-
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh
deleted file mode 100644
index 26c5bfd36c8..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_quant.sh
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/bin/bash
-set -x
-
-function main {
-  init_params "$@"
-  run_tuning
-}
-
-# init params
-function init_params {
-  for var in "$@"
-  do
-    case $var in
-      --input_model=*)
-          input_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --output_model=*)
-          output_model=$(echo $var |cut -f2 -d=)
-      ;;
-      --dataset_location=*)
-          dataset_location=$(echo $var |cut -f2 -d=)
-      ;;
-      --quant_format=*)
-          quant_format=$(echo $var |cut -f2 -d=)
-      ;;
-    esac
-  done
-
-}
-
-# run_tuning
-function run_tuning {
-
-    if [[ "${input_model}" =~ "bert-base-uncased" ]]; then
-        model_name_or_path="Intel/bert-base-uncased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "roberta-base" ]]; then
-        model_name_or_path="Intel/roberta-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then
-        model_name_or_path="Intel/xlm-roberta-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "camembert-base" ]]; then
-        model_name_or_path="Intel/camembert-base-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "distilbert-base" ]]; then
-        model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "albert-base" ]]; then
-        model_name_or_path="Alireza1044/albert-base-v2-sst2"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L6" ]]; then
-        model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2"
-        TASK_NAME='sst-2'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "MiniLM-L12" ]]; then
-        model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "bert-base-cased" ]]; then
-        model_name_or_path="bert-base-cased-finetuned-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=384
-    fi
-    if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then
-        model_name_or_path="Intel/xlnet-base-cased-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-    if [[ "${input_model}" =~ "bert-mini" ]]; then
-        model_name_or_path="M-FAC/bert-mini-finetuned-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=4
-        hidden_size=256
-    fi
-    if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then
-        model_name_or_path="Intel/electra-small-discriminator-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=4
-        hidden_size=256
-    fi
-    if [[ "${input_model}" =~ "bart" ]]; then
-        model_name_or_path="Intel/bart-large-mrpc"
-        TASK_NAME='mrpc'
-        num_heads=16
-        hidden_size=4096
-    fi
-    if [[ "${input_model}" =~ "deberta" ]]; then
-        model_name_or_path="microsoft/deberta-v3-base"
-        TASK_NAME='mrpc'
-        num_heads=12
-        hidden_size=768
-    fi
-
-    python main.py \
-            --model_name_or_path ${model_name_or_path} \
-            --model_path ${input_model} \
-            --output_model ${output_model} \
-            --data_path ${dataset_location} \
-            --task ${TASK_NAME} \
-            --num_heads ${num_heads} \
-            --hidden_size ${hidden_size} \
-            --tune
-}
-
-main "$@"
-
-
-