From 4c5459c8edceed4497029b19b579b36a57025140 Mon Sep 17 00:00:00 2001
From: Winter Deng <winter110840@gmail.com>
Date: Thu, 9 Oct 2025 15:55:34 +0800
Subject: [PATCH 1/6] classify flags to 'general', 'linear', 'nn' categories,
 and change some flags orders in main.py

---
 docs/cli/classifier.py | 137 +++++++++++++++++++++++++++++++++++++++++
 docs/cli/genflags.py   |  50 +++++++++++----
 main.py                | 123 +++++++++++++++++++-----------------
 3 files changed, 242 insertions(+), 68 deletions(-)
 create mode 100644 docs/cli/classifier.py

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
new file mode 100644
index 00000000..31f2e8fe
--- /dev/null
+++ b/docs/cli/classifier.py
@@ -0,0 +1,137 @@
+import os
+import sys
+import glob
+import re
+from pathlib import Path
+from collections import defaultdict
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+lib_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
+sys.path.insert(0, lib_path)
+
+def classify_file_category(path):
+
+    relative_path = Path(path).relative_to(lib_path)
+    return_path = relative_path.as_posix()
+    filename = Path(*relative_path.parts[1:]).as_posix() if len(relative_path.parts) > 1 else return_path
+
+    if filename.startswith("linear"):
+        category = "linear"
+    elif filename.startswith("torch") or filename.startswith("nn"):
+        category = "nn"
+    else:
+        category = "general"
+    return category, return_path
+
+
+def fetch_option_flags(flags):
+    # flags = genflags.parser.flags
+    flag_list = []
+
+    for flag in flags:
+        flag_list.append(
+                {
+                    "name": flag["name"].replace("\\", ""),
+                    "instruction": flag["name"].split("-")[-1],
+                    "description": flag["description"]
+                }
+            )
+
+    return flag_list
+
+
+def fetch_all_files():
+    main_files = [
+        os.path.join(lib_path, "linear_trainer.py"),
+        os.path.join(lib_path, "torch_trainer.py")
+    ]
+    lib_files = glob.glob(os.path.join(lib_path, "libmultilabel/**/*.py"), recursive=True)
+    file_set = set(map(os.path.abspath, main_files + lib_files))
+    return file_set
+
+
+def find_config_usages_in_file(file_path, allowed_keys):
+    pattern = re.compile(r'\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)')
+    detailed_results = {}
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+    except (IOError, UnicodeDecodeError):
+        return []
+
+    category, path = classify_file_category(file_path)
+
+    for i, line in enumerate(lines, start=1):
+        matches = pattern.findall(line)
+        for key in matches:
+            if key in allowed_keys:
+                if key not in detailed_results:
+                    detailed_results[key] = {"file": path, "lines": []}
+                detailed_results[key]["lines"].append(str(i))
+
+    return detailed_results
+
+
+def move_duplicates_together(data, keep):
+    all_keys = list(data.keys())
+    duplicates = set()
+
+    for i, key1 in enumerate(all_keys):
+        for key2 in all_keys[i+1:]:
+            duplicates |= data[key1] & data[key2]
+
+    data[keep] |= duplicates
+
+    for key in all_keys:
+        if key != keep:
+            data[key] -= duplicates
+
+    return data
+
+
+def classify(raw_flags):
+
+    category_set = {"general": set(), "linear": set(), "nn": set()}
+    flags = fetch_option_flags(raw_flags)
+    allowed_keys = set(flag["instruction"] for flag in flags)
+    file_set = fetch_all_files()
+    usage_map = defaultdict(list)
+    collected = {}
+
+    for file_path in file_set:
+        detailed_results = find_config_usages_in_file(file_path, allowed_keys)        
+        if detailed_results:
+            usage_map[file_path] = set(detailed_results.keys())
+            for k, v in detailed_results.items():
+                if k not in collected:
+                    collected[k] = []
+                collected[k].append(v)
+
+    for path, keys in usage_map.items():
+        category, path = classify_file_category(path)
+        category_set[category] = category_set[category].union(keys)
+
+    category_set = move_duplicates_together(category_set, "general")
+
+    for flag in flags:
+        for k, v in category_set.items():
+            for i in v:
+                if flag["instruction"] == i:
+                    flag["category"] = k
+        if "category" not in flag:
+            flag["category"] = "general"
+
+    result = {}
+    for flag in flags:
+        if flag["category"] not in result:
+            result[flag["category"]] = []
+        result[flag["category"]].append({"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]})
+
+    result["details"] = []
+    for k, v in collected.items():
+        result["details"].append({"name": k, "file": v[0]["file"], "location": ", ".join(v[0]["lines"])})
+        if len(v) > 1:
+            for i in v[1:]:
+                result["details"].append({"name": "", "file": i["file"], "location": ", ".join(i["lines"])})
+
+    return result
diff --git a/docs/cli/genflags.py b/docs/cli/genflags.py
index 006991e8..e6f409c2 100644
--- a/docs/cli/genflags.py
+++ b/docs/cli/genflags.py
@@ -2,8 +2,11 @@
 import os
 
 sys.path.insert(1, os.path.join(sys.path[0], "..", ".."))
+
 import main
 
+from classifier import classify
+
 
 class FakeParser(dict):
     def __init__(self):
@@ -29,21 +32,42 @@ def add_argument(
 parser.add_argument("-c", "--config", help="Path to configuration file")
 main.add_all_arguments(parser)
 
+classified = classify(parser.flags)
 
-def width(key):
-    return max(map(lambda f: len(f[key]), parser.flags))
+def width_title(key, title):
+    return max(map(lambda f: len(f[key]), classified[title]))
 
+def print_table(title, flags, intro):
+    print()
+    print(intro)
+    print()
 
-wn = width("name")
-wd = width("description")
+    wn = width_title("name", title)
+    wd = width_title("description", title)
 
-print(
-    """..
-    Do not modify this file. This file is generated by genflags.py.\n"""
+    print("=" * wn, "=" * wd)
+    print("Name".ljust(wn), "Description".ljust(wd))
+    print("=" * wn, "=" * wd)
+    for flag in flags:
+        print(flag["name"].ljust(wn), flag["description"].ljust(wd))
+    print("=" * wn, "=" * wd)
+    print()
+
+print_table(
+    "general",
+    classified["general"],
+    intro="**General options**:\n\
+Common configurations shared across both linear and neural network trainers."
+)
+print_table(
+    "linear",
+    classified["linear"],
+    intro="**Linear options**:\n\
+Configurations specific to linear trainer."
 )
-print("=" * wn, "=" * wd)
-print("Name".ljust(wn), "Description".ljust(wd))
-print("=" * wn, "=" * wd)
-for flag in parser.flags:
-    print(flag["name"].ljust(wn), flag["description"].ljust(wd))
-print("=" * wn, "=" * wd)
+print_table(
+    "nn",
+    classified["nn"],
+    intro="**Neural network options**:\n\
+Configurations specific to torch (neural networks) trainer."
+)
\ No newline at end of file
diff --git a/main.py b/main.py
index 70907edf..7a523f1f 100644
--- a/main.py
+++ b/main.py
@@ -11,21 +11,50 @@
 
 
 def add_all_arguments(parser):
-    # path / directory
+
     parser.add_argument(
-        "--result_dir", default="./runs", help="The directory to save checkpoints and logs (default: %(default)s)"
+        "-h",
+        "--help",
+        action="help",
+        help="Quickstart: https://www.csie.ntu.edu.tw/~cjlin/libmultilabel/cli/quickstart.html",
     )
 
+    parser.add_argument("--seed", type=int, help="Random seed (default: %(default)s)")
+
+    # choose model (linear / nn)
+    parser.add_argument("--linear", action="store_true", help="Train linear model")
+
+    # others
+    parser.add_argument("--cpu", action="store_true", help="Disable CUDA")
+    parser.add_argument("--silent", action="store_true", help="Enable silent mode")
+    parser.add_argument(
+        "--data_workers", type=int, default=4, help="Use multi-cpu core for data pre-processing (default: %(default)s)"
+    )
+    parser.add_argument(
+        "--embed_cache_dir",
+        type=str,
+        help="For parameter search only: path to a directory for storing embeddings for multiple runs. (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--eval", action="store_true", help="Only run evaluation on the test set (default: %(default)s)"
+    )
+    parser.add_argument("--checkpoint_path", help="The checkpoint to warm-up with (default: %(default)s)")
+
     # data
-    parser.add_argument("--data_name", default="unnamed_data", help="Dataset name (default: %(default)s)")
+    parser.add_argument(
+        "--data_name",
+        default="unnamed_data",
+        help="Dataset name for generating the output directory (default: %(default)s)",
+    )
     parser.add_argument("--training_file", help="Path to training data (default: %(default)s)")
     parser.add_argument("--val_file", help="Path to validation data (default: %(default)s)")
-    parser.add_argument("--test_file", help="Path to test data (default: %(default)s")
+    parser.add_argument("--test_file", help="Path to test data (default: %(default)s)")
+    parser.add_argument("--label_file", type=str, help="Path to a file holding all labels (default: %(default)s)")
     parser.add_argument(
         "--val_size",
         type=float,
         default=0.2,
-        help="Training-validation split: a ratio in [0, 1] or an integer for the size of the validation set (default: %(default)s).",
+        help="Training-validation split: a ratio in [0, 1] or an integer for the size of the validation set (default: %(default)s)",
     )
     parser.add_argument(
         "--min_vocab_freq",
@@ -67,8 +96,24 @@ def add_all_arguments(parser):
         help="Whether to add the special tokens for inputs of the transformer-based language model. (default: %(default)s)",
     )
 
+    # model
+    parser.add_argument("--model_name", default="unnamed_model", help="Model to be used (default: %(default)s)")
+    parser.add_argument(
+        "--init_weight", default="kaiming_uniform", help="Weight initialization to be used (default: %(default)s)"
+    )
+    parser.add_argument(
+        "--loss_function", default="binary_cross_entropy_with_logits", help="Loss function (default: %(default)s)"
+    )
+
+    # pretrained vocab / embeddings
+    parser.add_argument("--vocab_file", type=str, help="Path to a file holding vocabuaries (default: %(default)s)")
+    parser.add_argument(
+        "--embed_file",
+        type=str,
+        help="Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding (default: %(default)s)",
+    )
+
     # train
-    parser.add_argument("--seed", type=int, help="Random seed (default: %(default)s)")
     parser.add_argument(
         "--epochs", type=int, default=10000, help="The number of epochs to train (default: %(default)s)"
     )
@@ -109,15 +154,6 @@ def add_all_arguments(parser):
         help="Whether the embeddings of each word is normalized to a unit vector (default: %(default)s)",
     )
 
-    # model
-    parser.add_argument("--model_name", default="unnamed_model", help="Model to be used (default: %(default)s)")
-    parser.add_argument(
-        "--init_weight", default="kaiming_uniform", help="Weight initialization to be used (default: %(default)s)"
-    )
-    parser.add_argument(
-        "--loss_function", default="binary_cross_entropy_with_logits", help="Loss function (default: %(default)s)"
-    )
-
     # eval
     parser.add_argument(
         "--eval_batch_size", type=int, default=256, help="Size of evaluating batches (default: %(default)s)"
@@ -138,28 +174,6 @@ def add_all_arguments(parser):
         "--val_metric", default="P@1", help="The metric to select the best model for testing (default: %(default)s)"
     )
 
-    # pretrained vocab / embeddings
-    parser.add_argument("--vocab_file", type=str, help="Path to a file holding vocabuaries (default: %(default)s)")
-    parser.add_argument(
-        "--embed_file", type=str, help="Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding (default: %(default)s)"
-    )
-    parser.add_argument("--label_file", type=str, help="Path to a file holding all labels (default: %(default)s)")
-
-    # log
-    parser.add_argument(
-        "--save_k_predictions",
-        type=int,
-        nargs="?",
-        const=100,
-        default=0,
-        help="Save top k predictions on test set. k=%(const)s if not specified. (default: %(default)s)",
-    )
-    parser.add_argument(
-        "--predict_out_path",
-        default="./predictions.txt",
-        help="Path to the output file holding label results (default: %(default)s)",
-    )
-
     # auto-test
     parser.add_argument(
         "--limit_train_batches",
@@ -180,24 +194,27 @@ def add_all_arguments(parser):
         help="Percentage of test dataset to use for auto-testing (default: %(default)s)",
     )
 
-    # others
-    parser.add_argument("--cpu", action="store_true", help="Disable CUDA")
-    parser.add_argument("--silent", action="store_true", help="Enable silent mode")
+    # log
     parser.add_argument(
-        "--data_workers", type=int, default=4, help="Use multi-cpu core for data pre-processing (default: %(default)s)"
+        "--save_k_predictions",
+        type=int,
+        nargs="?",
+        const=100,
+        default=0,
+        help="Save top k predictions on test set. k=%(const)s if not specified. (default: %(default)s)",
     )
     parser.add_argument(
-        "--embed_cache_dir",
-        type=str,
-        help="For parameter search only: path to a directory for storing embeddings for multiple runs. (default: %(default)s)",
+        "--predict_out_path",
+        default="./predictions.txt",
+        help="Path to the output file holding label results (default: %(default)s)",
     )
+
+    # path / directory
     parser.add_argument(
-        "--eval", action="store_true", help="Only run evaluation on the test set (default: %(default)s)"
+        "--result_dir", default="./runs", help="The directory to save checkpoints and logs (default: %(default)s)"
     )
-    parser.add_argument("--checkpoint_path", help="The checkpoint to warm-up with (default: %(default)s)")
 
     # linear options
-    parser.add_argument("--linear", action="store_true", help="Train linear model")
     parser.add_argument(
         "--data_format",
         type=str,
@@ -224,7 +241,10 @@ def add_all_arguments(parser):
         "--tree_max_depth", type=int, default=10, help="Maximum depth of the tree (default: %(default)s)"
     )
     parser.add_argument(
-        "--tree_ensemble_models", type=int, default=1, help="Number of models in the tree ensemble (default: %(default)s)"
+        "--tree_ensemble_models",
+        type=int,
+        default=1,
+        help="Number of models in the tree ensemble (default: %(default)s)",
     )
     parser.add_argument(
         "--beam_width",
@@ -239,13 +259,6 @@ def add_all_arguments(parser):
         default=8,
         help="the maximal number of labels inside a cluster (default: %(default)s)",
     )
-    parser.add_argument(
-        "-h",
-        "--help",
-        action="help",
-        help="If you are trying to specify network config such as dropout or activation or config of the learning rate scheduler, use a yaml file instead. "
-        "See example configs in example_config",
-    )
 
 
 def get_config():

From 38fc479a9d641f41e6d8ceda42f3304f7cb247e5 Mon Sep 17 00:00:00 2001
From: Winter Deng <winter110840@gmail.com>
Date: Thu, 9 Oct 2025 15:57:12 +0800
Subject: [PATCH 2/6] reformat changed code

---
 docs/cli/classifier.py | 28 ++++++++++++++--------------
 docs/cli/genflags.py   | 11 +++++++----
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
index 31f2e8fe..09d4dc81 100644
--- a/docs/cli/classifier.py
+++ b/docs/cli/classifier.py
@@ -9,6 +9,7 @@
 lib_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
 sys.path.insert(0, lib_path)
 
+
 def classify_file_category(path):
 
     relative_path = Path(path).relative_to(lib_path)
@@ -30,28 +31,25 @@ def fetch_option_flags(flags):
 
     for flag in flags:
         flag_list.append(
-                {
-                    "name": flag["name"].replace("\\", ""),
-                    "instruction": flag["name"].split("-")[-1],
-                    "description": flag["description"]
-                }
-            )
+            {
+                "name": flag["name"].replace("\\", ""),
+                "instruction": flag["name"].split("-")[-1],
+                "description": flag["description"],
+            }
+        )
 
     return flag_list
 
 
 def fetch_all_files():
-    main_files = [
-        os.path.join(lib_path, "linear_trainer.py"),
-        os.path.join(lib_path, "torch_trainer.py")
-    ]
+    main_files = [os.path.join(lib_path, "linear_trainer.py"), os.path.join(lib_path, "torch_trainer.py")]
     lib_files = glob.glob(os.path.join(lib_path, "libmultilabel/**/*.py"), recursive=True)
     file_set = set(map(os.path.abspath, main_files + lib_files))
     return file_set
 
 
 def find_config_usages_in_file(file_path, allowed_keys):
-    pattern = re.compile(r'\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)')
+    pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
     detailed_results = {}
     try:
         with open(file_path, "r", encoding="utf-8") as f:
@@ -77,7 +75,7 @@ def move_duplicates_together(data, keep):
     duplicates = set()
 
     for i, key1 in enumerate(all_keys):
-        for key2 in all_keys[i+1:]:
+        for key2 in all_keys[i + 1 :]:
             duplicates |= data[key1] & data[key2]
 
     data[keep] |= duplicates
@@ -99,7 +97,7 @@ def classify(raw_flags):
     collected = {}
 
     for file_path in file_set:
-        detailed_results = find_config_usages_in_file(file_path, allowed_keys)        
+        detailed_results = find_config_usages_in_file(file_path, allowed_keys)
         if detailed_results:
             usage_map[file_path] = set(detailed_results.keys())
             for k, v in detailed_results.items():
@@ -125,7 +123,9 @@ def classify(raw_flags):
     for flag in flags:
         if flag["category"] not in result:
             result[flag["category"]] = []
-        result[flag["category"]].append({"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]})
+        result[flag["category"]].append(
+            {"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
+        )
 
     result["details"] = []
     for k, v in collected.items():
diff --git a/docs/cli/genflags.py b/docs/cli/genflags.py
index e6f409c2..c2036a75 100644
--- a/docs/cli/genflags.py
+++ b/docs/cli/genflags.py
@@ -34,9 +34,11 @@ def add_argument(
 
 classified = classify(parser.flags)
 
+
 def width_title(key, title):
     return max(map(lambda f: len(f[key]), classified[title]))
 
+
 def print_table(title, flags, intro):
     print()
     print(intro)
@@ -53,21 +55,22 @@ def print_table(title, flags, intro):
     print("=" * wn, "=" * wd)
     print()
 
+
 print_table(
     "general",
     classified["general"],
     intro="**General options**:\n\
-Common configurations shared across both linear and neural network trainers."
+Common configurations shared across both linear and neural network trainers.",
 )
 print_table(
     "linear",
     classified["linear"],
     intro="**Linear options**:\n\
-Configurations specific to linear trainer."
+Configurations specific to linear trainer.",
 )
 print_table(
     "nn",
     classified["nn"],
     intro="**Neural network options**:\n\
-Configurations specific to torch (neural networks) trainer."
-)
\ No newline at end of file
+Configurations specific to torch (neural networks) trainer.",
+)

From cd7554dc37b7a0f20b69dbb94de26302521a71e7 Mon Sep 17 00:00:00 2001
From: Winter Deng <winter110840@gmail.com>
Date: Sat, 11 Oct 2025 00:18:15 +0800
Subject: [PATCH 3/6] - change settings in docs/conf.py to prevent writing
 sg_execution_times.rst in Sphinx 5 - optimize code in docs/cli/classifier.py
 - reformat the above scripts

---
 docs/cli/classifier.py | 26 ++++++++++++++++++++++----
 docs/conf.py           |  1 +
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
index 09d4dc81..5fd088f3 100644
--- a/docs/cli/classifier.py
+++ b/docs/cli/classifier.py
@@ -26,7 +26,6 @@ def classify_file_category(path):
 
 
 def fetch_option_flags(flags):
-    # flags = genflags.parser.flags
     flag_list = []
 
     for flag in flags:
@@ -42,7 +41,11 @@ def fetch_option_flags(flags):
 
 
 def fetch_all_files():
-    main_files = [os.path.join(lib_path, "linear_trainer.py"), os.path.join(lib_path, "torch_trainer.py")]
+    main_files = [
+        os.path.join(lib_path, "main.py"),
+        os.path.join(lib_path, "linear_trainer.py"),
+        os.path.join(lib_path, "torch_trainer.py"),
+    ]
     lib_files = glob.glob(os.path.join(lib_path, "libmultilabel/**/*.py"), recursive=True)
     file_set = set(map(os.path.abspath, main_files + lib_files))
     return file_set
@@ -57,7 +60,18 @@ def find_config_usages_in_file(file_path, allowed_keys):
     except (IOError, UnicodeDecodeError):
         return []
 
-    category, path = classify_file_category(file_path)
+    _, path = classify_file_category(file_path)
+
+    if file_path.endswith("main.py"):
+        for idx in range(len(lines)):
+            if lines[idx].startswith("def main("):
+                lines = lines[idx:]
+                main_start = idx
+                break
+        for i, line in enumerate(lines[1:]):
+            if line and line[0] not in (" ", "\t") and line.strip() != "":
+                lines = lines[:i]
+                break
 
     for i, line in enumerate(lines, start=1):
         matches = pattern.findall(line)
@@ -65,7 +79,10 @@ def find_config_usages_in_file(file_path, allowed_keys):
             if key in allowed_keys:
                 if key not in detailed_results:
                     detailed_results[key] = {"file": path, "lines": []}
-                detailed_results[key]["lines"].append(str(i))
+                if file_path.endswith("main.py"):
+                    detailed_results[key]["lines"].append(str(i + main_start))
+                else:
+                    detailed_results[key]["lines"].append(str(i))
 
     return detailed_results
 
@@ -123,6 +140,7 @@ def classify(raw_flags):
     for flag in flags:
         if flag["category"] not in result:
             result[flag["category"]] = []
+
         result[flag["category"]].append(
             {"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
         )
diff --git a/docs/conf.py b/docs/conf.py
index 2d39be73..d07438e1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,6 +49,7 @@
     "examples_dirs": "./examples",  # path to your example scripts
     "gallery_dirs": "auto_examples",  # path to where to save gallery generated output
     "plot_gallery": False,
+    "write_computation_times": False,
 }
 
 # bibtex files

From b50a14d7be37a36bae8c2737383be67cf8e479cb Mon Sep 17 00:00:00 2001
From: Winter Deng <winter110840@gmail.com>
Date: Mon, 3 Nov 2025 11:08:33 +0800
Subject: [PATCH 4/6] - optimize code in docs/cli/classifier.py (functions:
 classify_file_category, find_config_usages_in_file, move_duplicates_together,
 classify) - reformat the above script

---
 docs/cli/classifier.py | 120 ++++++++++++-----------------------------
 1 file changed, 35 insertions(+), 85 deletions(-)

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
index 5fd088f3..ad253cb4 100644
--- a/docs/cli/classifier.py
+++ b/docs/cli/classifier.py
@@ -5,24 +5,20 @@
 from pathlib import Path
 from collections import defaultdict
 
-current_dir = os.path.dirname(os.path.abspath(__file__))
-lib_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
-sys.path.insert(0, lib_path)
+lib_path = Path.cwd().parent
+sys.path.insert(0, str(lib_path))
 
 
 def classify_file_category(path):
 
     relative_path = Path(path).relative_to(lib_path)
-    return_path = relative_path.as_posix()
-    filename = Path(*relative_path.parts[1:]).as_posix() if len(relative_path.parts) > 1 else return_path
+    filename = "/".join(relative_path.parts[1:]) or relative_path.as_posix()
 
     if filename.startswith("linear"):
-        category = "linear"
-    elif filename.startswith("torch") or filename.startswith("nn"):
-        category = "nn"
-    else:
-        category = "general"
-    return category, return_path
+        return "linear"
+    if filename.startswith(("torch", "nn")):
+        return "nn"
+    return "general"
 
 
 def fetch_option_flags(flags):
@@ -51,105 +47,59 @@ def fetch_all_files():
     return file_set
 
 
-def find_config_usages_in_file(file_path, allowed_keys):
+def find_config_usages_in_file(file_path, allowed_keys, category_set):
     pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
-    detailed_results = {}
-    try:
-        with open(file_path, "r", encoding="utf-8") as f:
-            lines = f.readlines()
-    except (IOError, UnicodeDecodeError):
-        return []
 
-    _, path = classify_file_category(file_path)
+    with open(file_path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
 
     if file_path.endswith("main.py"):
         for idx in range(len(lines)):
             if lines[idx].startswith("def main("):
                 lines = lines[idx:]
-                main_start = idx
                 break
-        for i, line in enumerate(lines[1:]):
-            if line and line[0] not in (" ", "\t") and line.strip() != "":
-                lines = lines[:i]
-                break
-
-    for i, line in enumerate(lines, start=1):
-        matches = pattern.findall(line)
-        for key in matches:
-            if key in allowed_keys:
-                if key not in detailed_results:
-                    detailed_results[key] = {"file": path, "lines": []}
-                if file_path.endswith("main.py"):
-                    detailed_results[key]["lines"].append(str(i + main_start))
-                else:
-                    detailed_results[key]["lines"].append(str(i))
-
-    return detailed_results
+    all_str = " ".join(lines)
+    matches = set(pattern.findall(all_str)) & allowed_keys
 
+    category = classify_file_category(file_path)
+    for key in matches:
+        category_set[category].add(key)
 
-def move_duplicates_together(data, keep):
-    all_keys = list(data.keys())
-    duplicates = set()
 
-    for i, key1 in enumerate(all_keys):
-        for key2 in all_keys[i + 1 :]:
-            duplicates |= data[key1] & data[key2]
-
-    data[keep] |= duplicates
-
-    for key in all_keys:
-        if key != keep:
-            data[key] -= duplicates
+def move_duplicates_together(data):
+    duplicates = (data["general"] & data["linear"]) | (data["general"] & data["nn"]) | (data["linear"] & data["nn"])
+    data["general"].update(duplicates)
+    data["linear"] -= duplicates
+    data["nn"] -= duplicates
 
     return data
 
 
 def classify(raw_flags):
-
     category_set = {"general": set(), "linear": set(), "nn": set()}
+
     flags = fetch_option_flags(raw_flags)
     allowed_keys = set(flag["instruction"] for flag in flags)
     file_set = fetch_all_files()
-    usage_map = defaultdict(list)
-    collected = {}
 
     for file_path in file_set:
-        detailed_results = find_config_usages_in_file(file_path, allowed_keys)
-        if detailed_results:
-            usage_map[file_path] = set(detailed_results.keys())
-            for k, v in detailed_results.items():
-                if k not in collected:
-                    collected[k] = []
-                collected[k].append(v)
-
-    for path, keys in usage_map.items():
-        category, path = classify_file_category(path)
-        category_set[category] = category_set[category].union(keys)
+        find_config_usages_in_file(file_path, allowed_keys, category_set)
 
-    category_set = move_duplicates_together(category_set, "general")
+    category_set = move_duplicates_together(category_set)
 
-    for flag in flags:
-        for k, v in category_set.items():
-            for i in v:
-                if flag["instruction"] == i:
-                    flag["category"] = k
-        if "category" not in flag:
-            flag["category"] = "general"
-
-    result = {}
-    for flag in flags:
-        if flag["category"] not in result:
-            result[flag["category"]] = []
+    result = defaultdict(list)
+    for flag in raw_flags:
+        instr = flag["name"].replace("\\", "").split("-")[-1]
+        flag_name = flag["name"].replace("--", r"\-\-")
 
-        result[flag["category"]].append(
-            {"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
-        )
+        matched = False
+        for category, keys in category_set.items():
+            if instr in keys:
+                result[category].append({"name": flag_name, "description": flag["description"]})
+                matched = True
+                break
 
-    result["details"] = []
-    for k, v in collected.items():
-        result["details"].append({"name": k, "file": v[0]["file"], "location": ", ".join(v[0]["lines"])})
-        if len(v) > 1:
-            for i in v[1:]:
-                result["details"].append({"name": "", "file": i["file"], "location": ", ".join(i["lines"])})
+        if not matched:
+            result["general"].append({"name": flag_name, "description": flag["description"]})
 
     return result

From 41d80569871c744771573925149bc8a6aa0657f8 Mon Sep 17 00:00:00 2001
From: Jie-Jyun Liu <jiejyunliu@gmail.com>
Date: Mon, 10 Nov 2025 14:05:59 +0800
Subject: [PATCH 5/6] Apply suggestions from code review

---
 docs/cli/classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
index ad253cb4..82490640 100644
--- a/docs/cli/classifier.py
+++ b/docs/cli/classifier.py
@@ -85,7 +85,7 @@ def classify(raw_flags):
     for file_path in file_set:
         find_config_usages_in_file(file_path, allowed_keys, category_set)
 
-    category_set = move_duplicates_together(category_set)
+    move_duplicates_together(category_set)
 
     result = defaultdict(list)
     for flag in raw_flags:

From 3d725f5c6a3243d18a30730422e9284477f6f2a5 Mon Sep 17 00:00:00 2001
From: Winter Deng <winter110840@gmail.com>
Date: Mon, 10 Nov 2025 14:11:38 +0800
Subject: [PATCH 6/6] optimize function

---
 docs/cli/classifier.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docs/cli/classifier.py b/docs/cli/classifier.py
index 82490640..0ff52ad8 100644
--- a/docs/cli/classifier.py
+++ b/docs/cli/classifier.py
@@ -10,7 +10,6 @@
 
 
 def classify_file_category(path):
-
     relative_path = Path(path).relative_to(lib_path)
     filename = "/".join(relative_path.parts[1:]) or relative_path.as_posix()
 
@@ -72,8 +71,6 @@ def move_duplicates_together(data):
     data["linear"] -= duplicates
     data["nn"] -= duplicates
 
-    return data
-
 
 def classify(raw_flags):
     category_set = {"general": set(), "linear": set(), "nn": set()}