From 575eaf46aec175736b60b07566eca9c189ae8ebf Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 28 Oct 2025 03:31:19 +0000
Subject: [PATCH 01/23] update mvp

---
 grid.py                        | 171 +++++++++++++++++++++++++++++++++
 libmultilabel/linear/linear.py |   1 +
 libmultilabel/linear/tree.py   |  25 ++++-
 linear_trainer.py              |   2 +
 run_exp.py                     |  93 ++++++++++++++++++
 5 files changed, 288 insertions(+), 4 deletions(-)
 create mode 100644 grid.py
 create mode 100644 run_exp.py

diff --git a/grid.py b/grid.py
new file mode 100644
index 00000000..b9769f2c
--- /dev/null
+++ b/grid.py
@@ -0,0 +1,171 @@
+from abc import abstractmethod
+
+import libmultilabel.linear as linear
+import numpy as np
+import math
+
+class Parameter:
+    def __init__(self, **params):
+        self.params = params
+    
+    def tfidf(self):  # pad default value for compatibility
+        return self.params['tfidf']
+
+    def tree(self):
+        return self.params['tree']
+
+    def params(self):
+        return self.params['params']
+
+    def inference(self):
+        return self.params['inference']
+
+
+param = Parameter(tfidf={'min_df': 1, 'max_features': 10000}, tree={'K': 2, 'dmax': 100})
+
+class GridSearch:
+    def __init__(self, data_source, n_folds, search_space, config=None):
+        self.data_source = data_source
+        self.search_space = search_space
+        self.config = config
+        self.n_folds = n_folds
+        self.metrics = ["P@1", "P@3", "P@5"]
+
+    def __call__(self):
+        self.build_data()
+        self.build_fold_idx()
+
+        results = {
+            (str(tfidf_param), str(param)): {metric: 0 for metric in self.metrics}
+            for tfidf_param in self.search_space['tfidf'] for param in self.search_space['params']
+            }
+        # for fold, params in zip(self.fold_space, self.search_space):
+        for tfidf_param in self.search_space['tfidf']:  # param should be an instance of a config class
+            avg_score = {metric: 0 for metric in self.metrics}
+            for i in range(self.n_folds):
+                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
+                    self.get_fold_data(i, tfidf_param)
+                for tree
+                for param in self.search_space['params']:
+                    print(f'\nRunning fold {i}\ntfidf: {tfidf_param}\nparams: {param}')
+                    model = self.get_model(y_train_fold, x_train_fold, param)
+                    cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, param)
+                    print(f'cv_score: {cv_score}\n')
+                    for metric in self.metrics:
+                        results[(str(tfidf_param), str(param))][metric] += cv_score[metric] / self.n_folds
+
+        # TODO: Return a function
+        return sorted(results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
+
+    def build_fold_idx(self):
+        permutation = np.random.permutation(self.num_instances)
+        index_per_fold = [
+            permutation[int(fold * self.num_instances / self.n_folds):int((fold+1) * self.num_instances / self.n_folds)]
+            for fold in range(self.n_folds)
+        ]
+
+        self.fold_idx = {
+            fold: {
+                'train': np.concatenate(index_per_fold[:fold] + index_per_fold[fold+1:]),
+                'valid': index_per_fold[fold]
+                } for fold in range(self.n_folds)
+            }
+
+    @abstractmethod
+    def build_data(self):
+        pass
+
+    @abstractmethod
+    def get_fold_data(self, i, param):
+        pass
+
+    @abstractmethod
+    def get_model(self, y_train_fold, x_train_fold, param):
+        pass
+
+    @abstractmethod
+    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
+        pass
+
+
+class HyperparameterSearch(GridSearch):
+    def __init__(self, data_source, n_folds, search_space, config=None):
+        super().__init__(data_source, n_folds, search_space, config)
+
+    def preprocess_tfidf(self, dataset, param):
+        preprocessor = linear.Preprocessor(tfidf_params=param)
+        return preprocessor.fit_transform(dataset)
+
+    def build_data(self):
+        self.data = {}
+
+        dataset = linear.load_dataset("svm", self.data_source[0], self.data_source[1])
+        self.num_instances = len(dataset["train"]["y"])
+        tfidf_params = self.search_space['tfidf']
+        for param in tfidf_params:
+            print(f'Preprocessing tfidf: {param}..')
+            tfidf_data = self.preprocess_tfidf(dataset, param)
+            self.data[str(param)] = {'dataset': tfidf_data}
+        # use yield? (however, hard to reuse)
+
+    def get_fold_data(self, i, param):
+        dataset = self.data[str(param)]['dataset']["train"]
+        return (
+            dataset["y"][self.fold_idx[i]['train']], dataset["x"][self.fold_idx[i]['train']],
+            dataset["y"][self.fold_idx[i]['valid']], dataset["x"][self.fold_idx[i]['valid']]
+            )
+
+    def get_model(self, y_train_fold, x_train_fold, param):
+        model = linear.train_tree(y_train_fold, x_train_fold, **param)  # train with param and fold data
+        return model
+
+    def metrics_in_batches(self, y, x, model, *args, **kwargs):
+        batch_size = 256
+        num_instances = x.shape[0]
+        num_batches = math.ceil(num_instances / batch_size)
+
+        metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
+
+        for i in range(num_batches):
+            preds = linear.predict_values(model, x[i * batch_size : (i + 1) * batch_size])
+            target = y[i * batch_size : (i + 1) * batch_size].toarray()
+            metrics.update(preds, target)
+
+        return metrics.compute()
+
+    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
+        # calculate the metric with the model
+        score = self.metrics_in_batches(
+            y_valid_fold,
+            x_valid_fold,
+            model,
+            **param
+            )
+        return score
+
+
+class ProbEstimatiteSearch(GridSearch):
+    def __init__(self, data_source, n_folds, search_space, config=None):
+        super().__init__(data_source, n_folds, search_space, config)
+
+    def build_data(self):
+        data = {'unique': {}}
+        unique_data = None  # from libmultilabel preprocessing
+        for i in range(self.n_folds):
+            train_idx, valid_idx = None, None
+            y_train_fold, x_train_fold = unique_data[train_idx]
+            y_valid_fold, x_valid_fold = unique_data[valid_idx]
+            data['unique'][i] = unique_data
+
+        return data
+
+    def get_fold_data(self, data, i, param):
+        return data['unique'][i]
+
+    def get_model(self, y_train_fold, x_train_fold, param):
+        model = None  # train normally with fold data
+        return model
+
+    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
+        score = None  # calculate the metric with the model and the hyperparameter A
+        return score
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 04d25a21..6a47800a 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -198,6 +198,7 @@ def train_1vsrest(
     multiclass: bool = False,
     options: str = "",
     verbose: bool = True,
+    *args, **kwargs,
 ) -> FlatModel:
     """Train a linear model parallel on labels for multi-label data using a one-vs-rest strategy.
 
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 7f1ce851..c4bada55 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -204,6 +204,7 @@ def train_tree(
     K=DEFAULT_K,
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
+    *args, **kwargs,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
     The algorithm used is based on https://github.com/xmc-aalto/bonsai.
@@ -264,6 +265,18 @@ def visit(node):
     return TreeModel(root, flat_model, node_ptr)
 
 
+import os, sys
+
+class silent_print:
+    def __enter__(self):
+        self._original_stdout = sys.stdout
+        sys.stdout = open(os.devnull, 'w')
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.stdout.close()
+        sys.stdout = self._original_stdout
+
+
 def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
     """Build the tree recursively by kmeans clustering.
 
@@ -284,10 +297,14 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
         else:
             kmeans_algo = LloydKmeans
 
-        kmeans = kmeans_algo(
-            n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=True
-        )
-        metalabels = kmeans.fit(label_representation)
+        if True:
+            metalabels = np.random.randint(0, K, label_representation.shape[0])
+        else:
+            with silent_print():
+                kmeans = kmeans_algo(
+                    n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
+                )
+                metalabels = kmeans.fit(label_representation)
 
         unique_labels = np.unique(metalabels)
         if len(unique_labels) == K:
diff --git a/linear_trainer.py b/linear_trainer.py
index b9133857..1105f62a 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -45,6 +45,7 @@ def linear_train(datasets, config):
     multiclass = is_multiclass_dataset(datasets["train"], "y")
 
     # train
+    # 1
     if config.linear_technique == "tree":
         if multiclass:
             raise ValueError("Tree model should only be used with multilabel datasets.")
@@ -74,6 +75,7 @@ def linear_train(datasets, config):
             multiclass=multiclass,
             options=config.liblinear_options,
         )
+    # 2
     return model
 
 
diff --git a/run_exp.py b/run_exp.py
new file mode 100644
index 00000000..c3c16257
--- /dev/null
+++ b/run_exp.py
@@ -0,0 +1,93 @@
+import libmultilabel.linear as linear
+import grid as grid
+import numpy as np
+
+import time
+import json
+from tqdm import tqdm
+
+
+def run_ovr(dataset, options, *args, **kwargs):
+    training_start = time.time()
+    ovr_model = linear.train_1vsrest(
+        dataset["train"]["y"],
+        dataset["train"]["x"],
+        options=options
+        )
+    training_time = time.time() - training_start
+    return ovr_model, training_time
+
+def run_tree(dataset, options, K, dmax, *args, **kwargs):
+    training_start = time.time()
+    tree_model = linear.train_tree(
+        dataset["train"]["y"],
+        dataset["train"]["x"],
+        options=options,
+        K=K,
+        dmax=dmax
+        )
+    training_time = time.time() - training_start
+    return tree_model, training_time
+
+
+if __name__ == "__main__":
+    import argparse
+    np.random.seed(20250820)
+
+    parser = argparse.ArgumentParser(description="Parse command-line arguments.")
+    parser.add_argument("--dataset", type=str, default="EUR-Lex", help="Dataset name (e.g., AmazonCat-13K, EUR-Lex)")
+    args = parser.parse_args()
+
+    dataset_ = args.dataset
+
+    # dataset = linear.load_dataset("svm", f"data/{dataset_}/train.svm")  # , f"data/{dataset}/test.svm"
+    data_source = [f'data/{dataset_}/train.svm', f'data/{dataset_}/test.svm']
+    search_space = {
+        'tfidf': {
+            'min_df': [1, 2],
+            'max_features': [10000, 320000],
+        },
+        'params': {
+            'C': [1, 2],
+            'K': [2, 100],
+        },
+    }
+    search_space = {
+        'tfidf': [
+            {'max_features': i} for i in [10000]
+        ],
+        'params': [
+            {'K': i} for i in [2, 100]
+        ],
+    }
+    print(search_space)
+    n_folds = 3
+    grid_search = grid.HyperparameterSearch(data_source, n_folds, search_space)
+    results = grid_search()
+    print(results)
+    # if num_classes != -1:
+    #     dataset["train"]["y"] = [[yij % num_classes for yij in yi] for yi in dataset["train"]["y"]]
+
+    # preprocessor = linear.Preprocessor()
+    # dataset = preprocessor.fit_transform(dataset)
+
+    # results = {
+    #     exp_name: {
+    #         t: 0 for t in exp_threads
+    #     }
+    #     for exp_name in exp_names
+    # }
+
+    # for exp_name in exp_names:
+    #     for exp_thread in tqdm(exp_threads, leave=True, colour="blue", desc=exp_name):
+    #         if exp_name == 'Strategy B':
+    #             do_parallel = True
+    #             options = "-m 1"
+    #             num_threads = exp_thread
+    #         else:
+    #             do_parallel = False
+    #             options = f"-m {exp_thread}"
+    #             num_threads = -1
+
+    #         _, training_time = run_ovr(dataset, options, num_threads, do_parallel, use_dedicated_x)
+    #         results[exp_name][exp_thread] = training_time

From 95da0085f601a41272710dc9a1457c2e939aad7c Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 28 Oct 2025 18:38:11 +0000
Subject: [PATCH 02/23] almost finish GridParameter

---
 grid.py                      | 245 ++++++++++++++++++++++-------------
 libmultilabel/linear/tree.py |  10 +-
 2 files changed, 158 insertions(+), 97 deletions(-)

diff --git a/grid.py b/grid.py
index b9769f2c..fe3af7b4 100644
--- a/grid.py
+++ b/grid.py
@@ -1,27 +1,72 @@
 from abc import abstractmethod
+from dataclasses import make_dataclass, field, fields, asdict
+from typing import Callable
 
 import libmultilabel.linear as linear
 import numpy as np
 import math
 
-class Parameter:
-    def __init__(self, **params):
+
+class GridParameter:
+
+    _tfidf_fields = [
+        ('ngram_range', tuple[int, int], field(default=(1, 1))),
+        ('max_features', int, field(default=None)),
+        ('min_df', float | int, field(default=1)),
+        ('stop_words', str | list, field(default=None)),
+        ('strip_accents', str | Callable, field(default=None)),
+        ('tokenizer', Callable, field(default=None)),
+        ]
+    _tree_fields = [
+        ('dmax', int, field(default=10)),
+        ('K', int, field(default=8)),
+        ]
+    _linear_fields = [
+        ('s', int, field(default=1)),
+        ('c', float, field(default=1)),
+        ('B', int, field(default=-1)),
+        ]
+    _predict_fields = [
+        ('beam_width', int, field(default=10)),
+        ('A', int, field(default=1)),
+        ]
+
+    param_types = {
+        'tfidf': make_dataclass('_TfidfParams', _tfidf_fields, frozen=True, order=True),
+        'tree': make_dataclass('_TreeParams', _tree_fields, frozen=True, order=True),
+        'linear': make_dataclass('_LinearParams', _linear_fields, frozen=True, order=True),
+        'predict': make_dataclass('_PredictParams', _predict_fields, frozen=True, order=True),
+    }
+
+    def __init__(self, params: dict):
         self.params = params
-    
-    def tfidf(self):  # pad default value for compatibility
-        return self.params['tfidf']
+        for param_type, class_name in self.param_types.items():
+            field_names = {f.name for f in fields(class_name)}
+            _params = {k: v for k, v in self.params.items() if k in field_names}
+            setattr(self, param_type, class_name(**_params))
+
+    @property
+    def linear_options(self):
+        options = ''
+        for f in fields(self.linear):
+            options += f" -{f.name} {getattr(self.linear, f.name)}"
+        return options.strip()
 
-    def tree(self):
-        return self.params['tree']
+    def __repr__(self):
+        return str(self.params)
 
-    def params(self):
-        return self.params['params']
+    def __eq__(self, other):
+        return all(getattr(self, t) == getattr(other, t) for t in self.param_types)
 
-    def inference(self):
-        return self.params['inference']
+    def __lt__(self, other):
+        # "<" for tuple is automatically lexicographic ordering
+        my_values = tuple(getattr(self, t) for t in self.param_types)
+        other_values = tuple(getattr(other, t) for t in self.param_types)
+        return my_values < other_values
 
+    def __hash__(self):
+        return hash(tuple(getattr(self, t) for t in self.param_types))
 
-param = Parameter(tfidf={'min_df': 1, 'max_features': 10000}, tree={'K': 2, 'dmax': 100})
 
 class GridSearch:
     def __init__(self, data_source, n_folds, search_space, config=None):
@@ -31,31 +76,8 @@ def __init__(self, data_source, n_folds, search_space, config=None):
         self.n_folds = n_folds
         self.metrics = ["P@1", "P@3", "P@5"]
 
-    def __call__(self):
-        self.build_data()
-        self.build_fold_idx()
-
-        results = {
-            (str(tfidf_param), str(param)): {metric: 0 for metric in self.metrics}
-            for tfidf_param in self.search_space['tfidf'] for param in self.search_space['params']
-            }
-        # for fold, params in zip(self.fold_space, self.search_space):
-        for tfidf_param in self.search_space['tfidf']:  # param should be an instance of a config class
-            avg_score = {metric: 0 for metric in self.metrics}
-            for i in range(self.n_folds):
-                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
-                    self.get_fold_data(i, tfidf_param)
-                for tree
-                for param in self.search_space['params']:
-                    print(f'\nRunning fold {i}\ntfidf: {tfidf_param}\nparams: {param}')
-                    model = self.get_model(y_train_fold, x_train_fold, param)
-                    cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, param)
-                    print(f'cv_score: {cv_score}\n')
-                    for metric in self.metrics:
-                        results[(str(tfidf_param), str(param))][metric] += cv_score[metric] / self.n_folds
-
-        # TODO: Return a function
-        return sorted(results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
+    def sort_search_space(self):
+        self.search_space.sort()
 
     def build_fold_idx(self):
         permutation = np.random.permutation(self.num_instances)
@@ -71,55 +93,13 @@ def build_fold_idx(self):
                 } for fold in range(self.n_folds)
             }
 
-    @abstractmethod
-    def build_data(self):
-        pass
-
-    @abstractmethod
-    def get_fold_data(self, i, param):
-        pass
-
-    @abstractmethod
-    def get_model(self, y_train_fold, x_train_fold, param):
-        pass
-
-    @abstractmethod
-    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
-        pass
-
-
-class HyperparameterSearch(GridSearch):
-    def __init__(self, data_source, n_folds, search_space, config=None):
-        super().__init__(data_source, n_folds, search_space, config)
-
-    def preprocess_tfidf(self, dataset, param):
-        preprocessor = linear.Preprocessor(tfidf_params=param)
-        return preprocessor.fit_transform(dataset)
-
-    def build_data(self):
-        self.data = {}
-
-        dataset = linear.load_dataset("svm", self.data_source[0], self.data_source[1])
-        self.num_instances = len(dataset["train"]["y"])
-        tfidf_params = self.search_space['tfidf']
-        for param in tfidf_params:
-            print(f'Preprocessing tfidf: {param}..')
-            tfidf_data = self.preprocess_tfidf(dataset, param)
-            self.data[str(param)] = {'dataset': tfidf_data}
-        # use yield? (however, hard to reuse)
-
-    def get_fold_data(self, i, param):
-        dataset = self.data[str(param)]['dataset']["train"]
+    def get_fold_data(self, dataset, i, params):
         return (
             dataset["y"][self.fold_idx[i]['train']], dataset["x"][self.fold_idx[i]['train']],
             dataset["y"][self.fold_idx[i]['valid']], dataset["x"][self.fold_idx[i]['valid']]
             )
 
-    def get_model(self, y_train_fold, x_train_fold, param):
-        model = linear.train_tree(y_train_fold, x_train_fold, **param)  # train with param and fold data
-        return model
-
-    def metrics_in_batches(self, y, x, model, *args, **kwargs):
+    def get_cv_score(self, y, x, model, params):
         batch_size = 256
         num_instances = x.shape[0]
         num_batches = math.ceil(num_instances / batch_size)
@@ -133,15 +113,94 @@ def metrics_in_batches(self, y, x, model, *args, **kwargs):
 
         return metrics.compute()
 
-    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
-        # calculate the metric with the model
-        score = self.metrics_in_batches(
-            y_valid_fold,
-            x_valid_fold,
-            model,
-            **param
-            )
-        return score
+    def output(self):  # return sorted params list with scores by default
+        return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
+
+    def __call__(self):
+        self.sort_search_space()
+        self.build_fold_idx()
+
+        self.results = {
+            params: {metric: 0 for metric in self.metrics}
+            for params in self.search_space
+            }
+        # for fold, params in zip(self.fold_space, self.search_space):
+        for params in self.search_space:  # params should be an instance of a config class
+            avg_score = {metric: 0 for metric in self.metrics}
+            dataset = self.get_dataset(params)
+            # should be 000111222... or 012012012... (for same tfidf params but different params)
+            # don't know whether 012012012 waste space (view or new data)?
+            for i in range(self.n_folds):
+                # secretly caching the tree root for each fold..
+                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
+                    self.get_fold_data(dataset, i, params)
+
+                print(f'\nRunning fold {i}\nparams: {params}')
+                self.model = self.get_model(y_train_fold, x_train_fold, params)
+                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
+                print(f'cv_score: {cv_score}\n')
+
+                for metric in self.metrics:
+                    self.results[params][metric] += cv_score[metric] / self.n_folds
+
+        return self.output()
+
+    @abstractmethod
+    def get_dataset(self, params) -> dict[str, np.matrix]:
+        """
+        Get the dataset for the given params.
+
+        Args:
+            params (GridParameter): The params to build the dataset.
+
+        Returns:
+            dict[str, np.matrix]: The keys should be 'y' and 'x'.
+        """
+        pass
+
+    @abstractmethod
+    def get_model(self, y, x, params) -> linear.FlatModel | linear.TreeModel:
+        """
+        Get the model for the given params.
+
+        Args:
+            y (np.matrix): The labels of the training data.
+            x (np.matrix): The features of the training data.
+            params (GridParameter): The params to build the model.
+
+        Returns:
+            linear.FlatModel | linear.TreeModel: The model for the given params.
+        """
+        pass
+
+
+class HyperparameterSearch(GridSearch):
+    def __init__(self, data_source, n_folds, search_space, config=None):
+        super().__init__(data_source, n_folds, search_space, config)
+        self._cached_tfidf_params = None
+        self._cached_tfidf_data = None
+        self._cached_tree_params = None
+        # pass directly in the product code (linear_trainer.py)
+        self.dataset = linear.load_dataset("svm", self.data_source[0], self.data_source[1])
+        self.num_instances = len(self.dataset["train"]["y"])
+
+    def get_dataset(self, params):
+        tfidf_params = params.tfidf
+        if tfidf_params != self._cached_tfidf_params:
+            print(f'Preprocessing tfidf: {tfidf_params}..')
+            self._cached_tfidf_params = tfidf_params
+            self._cached_tfidf_data = linear.Preprocessor(tfidf_params=tfidf_params).fit_transform(self.dataset)
+        return self._cached_tfidf_data
+
+    def get_tree_root(self, y, x, params):
+        label_representation = (y.T * x).tocsr()
+        label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
+        root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
+        root.is_root = True
+
+    def get_model(self, y, x, params):
+        model = linear.train_tree(y, x, **params)  # train with params and fold data
+        return model
 
 
 class ProbEstimatiteSearch(GridSearch):
@@ -159,13 +218,13 @@ def build_data(self):
 
         return data
 
-    def get_fold_data(self, data, i, param):
+    def get_fold_data(self, data, i, params):
         return data['unique'][i]
 
-    def get_model(self, y_train_fold, x_train_fold, param):
+    def get_model(self, y_train_fold, x_train_fold, params):
         model = None  # train normally with fold data
         return model
 
-    def get_cv_score(self, y_valid_fold, x_valid_fold, model, param):
+    def get_cv_score(self, y_valid_fold, x_valid_fold, model, params):
         score = None  # calculate the metric with the model and the hyperparameter A
         return score
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index c4bada55..d2044abe 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -204,6 +204,7 @@ def train_tree(
     K=DEFAULT_K,
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
+    tree_root: Node = None,
     *args, **kwargs,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
@@ -220,10 +221,11 @@ def train_tree(
     Returns:
         TreeModel: A model which can be used in predict_values.
     """
-    label_representation = (y.T * x).tocsr()
-    label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
-    root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
-    root.is_root = True
+    if tree_root is None:
+        label_representation = (y.T * x).tocsr()
+        label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
+        root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
+        root.is_root = True
 
     num_nodes = 0
     # Both type(x) and type(y) are sparse.csr_matrix

From 47063e05419ed07ab102b038065c6d92b55d8515 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 28 Oct 2025 19:08:13 +0000
Subject: [PATCH 03/23] first version implemented

---
 grid.py                      | 62 +++++++++++++++++++++++++++---------
 libmultilabel/linear/tree.py | 25 ++++-----------
 run_exp.py                   | 11 ++-----
 3 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/grid.py b/grid.py
index fe3af7b4..beb79595 100644
--- a/grid.py
+++ b/grid.py
@@ -1,12 +1,27 @@
+import os
+import sys
 from abc import abstractmethod
 from dataclasses import make_dataclass, field, fields, asdict
 from typing import Callable
 
 import libmultilabel.linear as linear
+from libmultilabel.linear.tree import _build_tree, silent_print
+
+import sklearn.preprocessing
 import numpy as np
 import math
 
 
+class silent_print:
+    def __enter__(self):
+        self._original_stdout = sys.stdout
+        sys.stdout = open(os.devnull, 'w')
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.stdout.close()
+        sys.stdout = self._original_stdout
+
+
 class GridParameter:
 
     _tfidf_fields = [
@@ -69,9 +84,9 @@ def __hash__(self):
 
 
 class GridSearch:
-    def __init__(self, data_source, n_folds, search_space, config=None):
+    def __init__(self, data_source: tuple[str, str], n_folds: int, search_space: list[dict], config=None):
         self.data_source = data_source
-        self.search_space = search_space
+        self.search_space = [GridParameter(params) for params in search_space]
         self.config = config
         self.n_folds = n_folds
         self.metrics = ["P@1", "P@3", "P@5"]
@@ -130,14 +145,14 @@ def __call__(self):
             dataset = self.get_dataset(params)
             # should be 000111222... or 012012012... (for same tfidf params but different params)
             # don't know whether 012012012 waste space (view or new data)?
-            for i in range(self.n_folds):
+            for fold in range(self.n_folds):
                 # secretly caching the tree root for each fold..
                 y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
-                    self.get_fold_data(dataset, i, params)
+                    self.get_fold_data(dataset, fold, params)
 
-                print(f'\nRunning fold {i}\nparams: {params}')
-                self.model = self.get_model(y_train_fold, x_train_fold, params)
-                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
+                print(f'\nRunning fold {fold}\nparams: {params}')
+                self.model = self.get_model(y_train_fold, x_train_fold, fold, params)
+                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, self.model, params)
                 print(f'cv_score: {cv_score}\n')
 
                 for metric in self.metrics:
@@ -159,7 +174,7 @@ def get_dataset(self, params) -> dict[str, np.matrix]:
         pass
 
     @abstractmethod
-    def get_model(self, y, x, params) -> linear.FlatModel | linear.TreeModel:
+    def get_model(self, y, x, fold, params) -> linear.FlatModel | linear.TreeModel:
         """
         Get the model for the given params.
 
@@ -180,6 +195,7 @@ def __init__(self, data_source, n_folds, search_space, config=None):
         self._cached_tfidf_params = None
         self._cached_tfidf_data = None
         self._cached_tree_params = None
+        self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
         # pass directly in the product code (linear_trainer.py)
         self.dataset = linear.load_dataset("svm", self.data_source[0], self.data_source[1])
         self.num_instances = len(self.dataset["train"]["y"])
@@ -189,17 +205,33 @@ def get_dataset(self, params):
         if tfidf_params != self._cached_tfidf_params:
             print(f'Preprocessing tfidf: {tfidf_params}..')
             self._cached_tfidf_params = tfidf_params
-            self._cached_tfidf_data = linear.Preprocessor(tfidf_params=tfidf_params).fit_transform(self.dataset)
+            with silent_print():
+                preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
+                self._cached_tfidf_data = preprocessor.fit_transform(self.dataset)['train']
+
         return self._cached_tfidf_data
 
     def get_tree_root(self, y, x, params):
-        label_representation = (y.T * x).tocsr()
-        label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
-        root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
-        root.is_root = True
+        with silent_print():
+            label_representation = (y.T * x).tocsr()
+            label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
+            root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **params)
+            root.is_root = True
+
+        return root
+
+    def get_model(self, y, x, fold, params):
+        tree_params = params.tree
+        if tree_params != self._cached_tree_params:
+            self._cached_tree_params = tree_params
+            self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
+
+        if self._cached_tree_roots[fold] is None:
+            print(f'Preprocessing tree: {tree_params} on fold {fold}..')
+            self._cached_tree_roots[fold] = self.get_tree_root(y, x, asdict(tree_params))
+
+        model = linear.train_tree(y, x, root=self._cached_tree_roots[fold], options=params.linear_options)
 
-    def get_model(self, y, x, params):
-        model = linear.train_tree(y, x, **params)  # train with params and fold data
         return model
 
 
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index d2044abe..777838c1 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -204,7 +204,7 @@ def train_tree(
     K=DEFAULT_K,
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
-    tree_root: Node = None,
+    root: Node = None,
     *args, **kwargs,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
@@ -221,7 +221,7 @@ def train_tree(
     Returns:
         TreeModel: A model which can be used in predict_values.
     """
-    if tree_root is None:
+    if root is None:
         label_representation = (y.T * x).tocsr()
         label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
         root = _build_tree(label_representation, np.arange(y.shape[1]), 0, K, dmax)
@@ -267,18 +267,6 @@ def visit(node):
     return TreeModel(root, flat_model, node_ptr)
 
 
-import os, sys
-
-class silent_print:
-    def __enter__(self):
-        self._original_stdout = sys.stdout
-        sys.stdout = open(os.devnull, 'w')
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        sys.stdout.close()
-        sys.stdout = self._original_stdout
-
-
 def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
     """Build the tree recursively by kmeans clustering.
 
@@ -302,11 +290,10 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
         if True:
             metalabels = np.random.randint(0, K, label_representation.shape[0])
         else:
-            with silent_print():
-                kmeans = kmeans_algo(
-                    n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
-                )
-                metalabels = kmeans.fit(label_representation)
+            kmeans = kmeans_algo(
+                n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
+            )
+            metalabels = kmeans.fit(label_representation)
 
         unique_labels = np.unique(metalabels)
         if len(unique_labels) == K:
diff --git a/run_exp.py b/run_exp.py
index c3c16257..be04591b 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -52,14 +52,9 @@ def run_tree(dataset, options, K, dmax, *args, **kwargs):
             'K': [2, 100],
         },
     }
-    search_space = {
-        'tfidf': [
-            {'max_features': i} for i in [10000]
-        ],
-        'params': [
-            {'K': i} for i in [2, 100]
-        ],
-    }
+    search_space = [
+        {'max_features': i, 'K': j} for i in [10000] for j in [2, 100]
+    ]
     print(search_space)
     n_folds = 3
     grid_search = grid.HyperparameterSearch(data_source, n_folds, search_space)

From 214b28c4a720c80b15289a38249ec33805481e0b Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 6 Nov 2025 05:46:12 +0000
Subject: [PATCH 04/23] update for demo

---
 grid.py                          | 146 +++++++++++++++++--------------
 libmultilabel/linear/__init__.py |   2 +-
 libmultilabel/linear/linear.py   |   2 +-
 libmultilabel/linear/tree.py     |   3 +-
 linear_trainer.py                |  13 ++-
 run_exp.py                       |  88 +++++--------------
 6 files changed, 120 insertions(+), 134 deletions(-)

diff --git a/grid.py b/grid.py
index beb79595..e8c50d65 100644
--- a/grid.py
+++ b/grid.py
@@ -1,25 +1,36 @@
-import os
-import sys
 from abc import abstractmethod
 from dataclasses import make_dataclass, field, fields, asdict
 from typing import Callable
 
+import os
+import sys
+import logging
+
 import libmultilabel.linear as linear
-from libmultilabel.linear.tree import _build_tree, silent_print
+from libmultilabel.linear.tree import _build_tree
 
 import sklearn.preprocessing
 import numpy as np
 import math
 
 
-class silent_print:
+# suppress inevitable outputs from sparsekmeans and sklearn preprocessors
+class _silent_:
+    def __init__(self):
+        self.stderr = os.dup(2)
+        self.devnull = os.open(os.devnull, os.O_WRONLY)
+
     def __enter__(self):
-        self._original_stdout = sys.stdout
+        os.dup2(self.devnull, 2)
+        self.stdout = sys.stdout
         sys.stdout = open(os.devnull, 'w')
 
-    def __exit__(self, exc_type, exc_val, exc_tb):
+    def __exit__(self, type, value, traceback):
+        os.dup2(self.stderr, 2)
+        os.close(self.devnull)
+        os.close(self.stderr)
         sys.stdout.close()
-        sys.stdout = self._original_stdout
+        sys.stdout = self.stdout
 
 
 class GridParameter:
@@ -47,10 +58,10 @@ class GridParameter:
         ]
 
     param_types = {
-        'tfidf': make_dataclass('_TfidfParams', _tfidf_fields, frozen=True, order=True),
-        'tree': make_dataclass('_TreeParams', _tree_fields, frozen=True, order=True),
-        'linear': make_dataclass('_LinearParams', _linear_fields, frozen=True, order=True),
-        'predict': make_dataclass('_PredictParams', _predict_fields, frozen=True, order=True),
+        'tfidf': make_dataclass('TfidfParams', _tfidf_fields, frozen=True, order=True),
+        'tree': make_dataclass('TreeParams', _tree_fields, frozen=True, order=True),
+        'linear': make_dataclass('LinearParams', _linear_fields, frozen=True, order=True),
+        'predict': make_dataclass('PredictParams', _predict_fields, frozen=True, order=True),
     }
 
     def __init__(self, params: dict):
@@ -84,12 +95,20 @@ def __hash__(self):
 
 
 class GridSearch:
-    def __init__(self, data_source: tuple[str, str], n_folds: int, search_space: list[dict], config=None):
-        self.data_source = data_source
+    def __init__(
+        self,
+        datasets: dict[str, np.matrix],
+        n_folds: int,
+        search_space: list[dict],
+        metrics: list[str],
+    ):
+        self.datasets = datasets
         self.search_space = [GridParameter(params) for params in search_space]
-        self.config = config
         self.n_folds = n_folds
-        self.metrics = ["P@1", "P@3", "P@5"]
+        self.metrics = metrics
+        self.results = {
+            params: {metric: 0 for metric in self.metrics} for params in self.search_space
+            }
 
     def sort_search_space(self):
         self.search_space.sort()
@@ -108,13 +127,15 @@ def build_fold_idx(self):
                 } for fold in range(self.n_folds)
             }
 
-    def get_fold_data(self, dataset, i, params):
+    def get_fold_data(self, dataset, fold, params):
         return (
-            dataset["y"][self.fold_idx[i]['train']], dataset["x"][self.fold_idx[i]['train']],
-            dataset["y"][self.fold_idx[i]['valid']], dataset["x"][self.fold_idx[i]['valid']]
+            dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
+            dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
             )
 
     def get_cv_score(self, y, x, model, params):
+        logging.info(f'Scoring params: {params.predict}')
+
         batch_size = 256
         num_instances = x.shape[0]
         num_batches = math.ceil(num_instances / batch_size)
@@ -122,38 +143,32 @@ def get_cv_score(self, y, x, model, params):
         metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
 
         for i in range(num_batches):
-            preds = linear.predict_values(model, x[i * batch_size : (i + 1) * batch_size])
+            preds = model.predict_values(
+                x[i * batch_size : (i + 1) * batch_size],
+                **asdict(params.predict))
             target = y[i * batch_size : (i + 1) * batch_size].toarray()
             metrics.update(preds, target)
 
-        return metrics.compute()
+        scores = metrics.compute()
+        logging.info(f'cv_score: {scores}\n')
+
+        return scores
 
-    def output(self):  # return sorted params list with scores by default
+    def output(self):
         return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
 
     def __call__(self):
         self.sort_search_space()
         self.build_fold_idx()
 
-        self.results = {
-            params: {metric: 0 for metric in self.metrics}
-            for params in self.search_space
-            }
-        # for fold, params in zip(self.fold_space, self.search_space):
-        for params in self.search_space:  # params should be an instance of a config class
-            avg_score = {metric: 0 for metric in self.metrics}
+        for params in self.search_space:
             dataset = self.get_dataset(params)
-            # should be 000111222... or 012012012... (for same tfidf params but different params)
-            # don't know whether 012012012 waste space (view or new data)?
             for fold in range(self.n_folds):
-                # secretly caching the tree root for each fold..
                 y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
                     self.get_fold_data(dataset, fold, params)
 
-                print(f'\nRunning fold {fold}\nparams: {params}')
-                self.model = self.get_model(y_train_fold, x_train_fold, fold, params)
-                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, self.model, params)
-                print(f'cv_score: {cv_score}\n')
+                model = self.get_model(y_train_fold, x_train_fold, fold, params)
+                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
 
                 for metric in self.metrics:
                     self.results[params][metric] += cv_score[metric] / self.n_folds
@@ -190,29 +205,28 @@ def get_model(self, y, x, fold, params) -> linear.FlatModel | linear.TreeModel:
 
 
 class HyperparameterSearch(GridSearch):
-    def __init__(self, data_source, n_folds, search_space, config=None):
-        super().__init__(data_source, n_folds, search_space, config)
+    def __init__(self, datasets, n_folds, search_space, metrics=["P@1", "P@3", "P@5"]):
+        super().__init__(datasets, n_folds, search_space, metrics)
         self._cached_tfidf_params = None
         self._cached_tfidf_data = None
         self._cached_tree_params = None
         self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
-        # pass directly in the product code (linear_trainer.py)
-        self.dataset = linear.load_dataset("svm", self.data_source[0], self.data_source[1])
-        self.num_instances = len(self.dataset["train"]["y"])
+
+        self.num_instances = len(self.datasets["train"]["y"])
 
     def get_dataset(self, params):
         tfidf_params = params.tfidf
         if tfidf_params != self._cached_tfidf_params:
-            print(f'Preprocessing tfidf: {tfidf_params}..')
+            logging.info(f'Preprocessing tfidf: {tfidf_params}..')
             self._cached_tfidf_params = tfidf_params
-            with silent_print():
+            with _silent_():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
-                self._cached_tfidf_data = preprocessor.fit_transform(self.dataset)['train']
+                self._cached_tfidf_data = preprocessor.fit_transform(self.datasets)['train']
 
         return self._cached_tfidf_data
 
     def get_tree_root(self, y, x, params):
-        with silent_print():
+        with _silent_():
             label_representation = (y.T * x).tocsr()
             label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
             root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **params)
@@ -221,13 +235,15 @@ def get_tree_root(self, y, x, params):
         return root
 
     def get_model(self, y, x, fold, params):
+        logging.info(f'\nRunning fold {fold}\nparams: {params}')
+
         tree_params = params.tree
         if tree_params != self._cached_tree_params:
             self._cached_tree_params = tree_params
             self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
 
         if self._cached_tree_roots[fold] is None:
-            print(f'Preprocessing tree: {tree_params} on fold {fold}..')
+            logging.info(f'Preprocessing tree: {tree_params} on fold {fold}..')
             self._cached_tree_roots[fold] = self.get_tree_root(y, x, asdict(tree_params))
 
         model = linear.train_tree(y, x, root=self._cached_tree_roots[fold], options=params.linear_options)
@@ -235,28 +251,28 @@ def get_model(self, y, x, fold, params):
         return model
 
 
-class ProbEstimatiteSearch(GridSearch):
-    def __init__(self, data_source, n_folds, search_space, config=None):
-        super().__init__(data_source, n_folds, search_space, config)
+# class ProbEstimatiteSearch(GridSearch):
+#     def __init__(self, datasets, n_folds, search_space, config=None):
+#         super().__init__(datasets, n_folds, search_space, config)
 
-    def build_data(self):
-        data = {'unique': {}}
-        unique_data = None  # from libmultilabel preprocessing
-        for i in range(self.n_folds):
-            train_idx, valid_idx = None, None
-            y_train_fold, x_train_fold = unique_data[train_idx]
-            y_valid_fold, x_valid_fold = unique_data[valid_idx]
-            data['unique'][i] = unique_data
+#     def build_data(self):
+#         data = {'unique': {}}
+#         unique_data = None  # from libmultilabel preprocessing
+#         for i in range(self.n_folds):
+#             train_idx, valid_idx = None, None
+#             y_train_fold, x_train_fold = unique_data[train_idx]
+#             y_valid_fold, x_valid_fold = unique_data[valid_idx]
+#             data['unique'][i] = unique_data
 
-        return data
+#         return data
 
-    def get_fold_data(self, data, i, params):
-        return data['unique'][i]
+#     def get_fold_data(self, data, i, params):
+#         return data['unique'][i]
 
-    def get_model(self, y_train_fold, x_train_fold, params):
-        model = None  # train normally with fold data
-        return model
+#     def get_model(self, y_train_fold, x_train_fold, params):
+#         model = None  # train normally with fold data
+#         return model
 
-    def get_cv_score(self, y_valid_fold, x_valid_fold, model, params):
-        score = None  # calculate the metric with the model and the hyperparameter A
-        return score
+#     def get_cv_score(self, y_valid_fold, x_valid_fold, model, params):
+#         score = None  # calculate the metric with the model and the hyperparameter A
+#         return score
diff --git a/libmultilabel/linear/__init__.py b/libmultilabel/linear/__init__.py
index 7cdf30bb..efe24120 100644
--- a/libmultilabel/linear/__init__.py
+++ b/libmultilabel/linear/__init__.py
@@ -3,4 +3,4 @@
 from .metrics import *
 from .preprocessor import *
 from .tree import *
-from .utils import *
+from .utils import *
\ No newline at end of file
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 6a47800a..d70620bc 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -44,7 +44,7 @@ def __init__(
         self.thresholds = thresholds
         self.multiclass = multiclass
 
-    def predict_values(self, x: sparse.csr_matrix) -> np.ndarray:
+    def predict_values(self, x: sparse.csr_matrix, *args, **kwargs) -> np.ndarray:
         """Calculate the decision values associated with x.
 
         Args:
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 777838c1..288e3061 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -62,6 +62,7 @@ def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
+        *args, **kwargs,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
 
@@ -287,7 +288,7 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
         else:
             kmeans_algo = LloydKmeans
 
-        if True:
+        if False:
             metalabels = np.random.randint(0, K, label_representation.shape[0])
         else:
             kmeans = kmeans_algo(
diff --git a/linear_trainer.py b/linear_trainer.py
index 1105f62a..49cc6401 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -44,8 +44,17 @@ def linear_train(datasets, config):
     # detect task type
     multiclass = is_multiclass_dataset(datasets["train"], "y")
 
-    # train
-    # 1
+    do_grid = False
+    if do_grid:
+        search_space = [
+            {'max_features': i, 'K': j, 'min_df': k, 'c': l}
+            for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
+        ]
+        n_folds = 3
+        grid_search = linear.HyperparameterSearch(datasets, n_folds, search_space)
+        results = grid_search()
+        best_params = results[0]
+
     if config.linear_technique == "tree":
         if multiclass:
             raise ValueError("Tree model should only be used with multilabel datasets.")
diff --git a/run_exp.py b/run_exp.py
index be04591b..84f44328 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -7,31 +7,11 @@
 from tqdm import tqdm
 
 
-def run_ovr(dataset, options, *args, **kwargs):
-    training_start = time.time()
-    ovr_model = linear.train_1vsrest(
-        dataset["train"]["y"],
-        dataset["train"]["x"],
-        options=options
-        )
-    training_time = time.time() - training_start
-    return ovr_model, training_time
-
-def run_tree(dataset, options, K, dmax, *args, **kwargs):
-    training_start = time.time()
-    tree_model = linear.train_tree(
-        dataset["train"]["y"],
-        dataset["train"]["x"],
-        options=options,
-        K=K,
-        dmax=dmax
-        )
-    training_time = time.time() - training_start
-    return tree_model, training_time
-
-
 if __name__ == "__main__":
     import argparse
+    import logging
+
+    logging.basicConfig(level=logging.INFO)
     np.random.seed(20250820)
 
     parser = argparse.ArgumentParser(description="Parse command-line arguments.")
@@ -40,49 +20,29 @@ def run_tree(dataset, options, K, dmax, *args, **kwargs):
 
     dataset_ = args.dataset
 
-    # dataset = linear.load_dataset("svm", f"data/{dataset_}/train.svm")  # , f"data/{dataset}/test.svm"
-    data_source = [f'data/{dataset_}/train.svm', f'data/{dataset_}/test.svm']
-    search_space = {
-        'tfidf': {
-            'min_df': [1, 2],
-            'max_features': [10000, 320000],
-        },
-        'params': {
-            'C': [1, 2],
-            'K': [2, 100],
-        },
-    }
+    datasets = linear.load_dataset("svm", f"data/{dataset_}/train.svm")  # , f"data/{dataset}/test.svm"
+    # data_source = [f'data/{dataset_}/train.svm', f'data/{dataset_}/test.svm']
+    # search_space = {
+    #     'tfidf': {
+    #         'min_df': [1, 2],
+    #         'max_features': [10000, 320000],
+    #     },
+    #     'params': {
+    #         'C': [1, 2],
+    #         'K': [2, 100],
+    #     },
+    # }
     search_space = [
-        {'max_features': i, 'K': j} for i in [10000] for j in [2, 100]
+        {'max_features': i, 'K': j, 'min_df': k, 'c': l}
+        for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
     ]
-    print(search_space)
-    n_folds = 3
-    grid_search = grid.HyperparameterSearch(data_source, n_folds, search_space)
-    results = grid_search()
-    print(results)
-    # if num_classes != -1:
-    #     dataset["train"]["y"] = [[yij % num_classes for yij in yi] for yi in dataset["train"]["y"]]
-
-    # preprocessor = linear.Preprocessor()
-    # dataset = preprocessor.fit_transform(dataset)
 
-    # results = {
-    #     exp_name: {
-    #         t: 0 for t in exp_threads
-    #     }
-    #     for exp_name in exp_names
-    # }
+    for i in search_space:
+        print(i)
 
-    # for exp_name in exp_names:
-    #     for exp_thread in tqdm(exp_threads, leave=True, colour="blue", desc=exp_name):
-    #         if exp_name == 'Strategy B':
-    #             do_parallel = True
-    #             options = "-m 1"
-    #             num_threads = exp_thread
-    #         else:
-    #             do_parallel = False
-    #             options = f"-m {exp_thread}"
-    #             num_threads = -1
+    n_folds = 3
+    grid_search = grid.HyperparameterSearch(datasets, n_folds, search_space)
+    results = grid_search()
 
-    #         _, training_time = run_ovr(dataset, options, num_threads, do_parallel, use_dedicated_x)
-    #         results[exp_name][exp_thread] = training_time
+    for i in results:
+        print(i)

From 6e033e85b14a1e237ba3e7cb2d0e75551bdf517e Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 8 Jan 2026 06:23:46 +0000
Subject: [PATCH 05/23] reset linear_trainer.py to master

---
 linear_trainer.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/linear_trainer.py b/linear_trainer.py
index 49cc6401..b9133857 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -44,17 +44,7 @@ def linear_train(datasets, config):
     # detect task type
     multiclass = is_multiclass_dataset(datasets["train"], "y")
 
-    do_grid = False
-    if do_grid:
-        search_space = [
-            {'max_features': i, 'K': j, 'min_df': k, 'c': l}
-            for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
-        ]
-        n_folds = 3
-        grid_search = linear.HyperparameterSearch(datasets, n_folds, search_space)
-        results = grid_search()
-        best_params = results[0]
-
+    # train
     if config.linear_technique == "tree":
         if multiclass:
             raise ValueError("Tree model should only be used with multilabel datasets.")
@@ -84,7 +74,6 @@ def linear_train(datasets, config):
             multiclass=multiclass,
             options=config.liblinear_options,
         )
-    # 2
     return model
 
 

From 11b6a8369111c694b1a92735ddcbf705855a7921 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 8 Jan 2026 07:01:29 +0000
Subject: [PATCH 06/23] merge hyperparametersearch into gridsearch

---
 grid.py | 190 +++++++++++++++++++++++---------------------------------
 1 file changed, 77 insertions(+), 113 deletions(-)

diff --git a/grid.py b/grid.py
index e8c50d65..bd54fde4 100644
--- a/grid.py
+++ b/grid.py
@@ -1,4 +1,3 @@
-from abc import abstractmethod
 from dataclasses import make_dataclass, field, fields, asdict
 from typing import Callable
 
@@ -15,7 +14,7 @@
 
 
 # suppress inevitable outputs from sparsekmeans and sklearn preprocessors
-class _silent_:
+class __silent__:
     def __init__(self):
         self.stderr = os.dup(2)
         self.devnull = os.open(os.devnull, os.O_WRONLY)
@@ -51,6 +50,7 @@ class GridParameter:
         ('s', int, field(default=1)),
         ('c', float, field(default=1)),
         ('B', int, field(default=-1)),
+        ('alpha', float, field(default=1))
         ]
     _predict_fields = [
         ('beam_width', int, field(default=10)),
@@ -98,17 +98,23 @@ class GridSearch:
     def __init__(
         self,
         datasets: dict[str, np.matrix],
-        n_folds: int,
-        search_space: list[dict],
-        metrics: list[str],
+        n_folds: int = 3,
+        metrics: list[str] = ["P@1", "P@3", "P@5"],
     ):
         self.datasets = datasets
-        self.search_space = [GridParameter(params) for params in search_space]
         self.n_folds = n_folds
         self.metrics = metrics
-        self.results = {
-            params: {metric: 0 for metric in self.metrics} for params in self.search_space
-            }
+
+        self._cached_tfidf_params = None
+        self._cached_tfidf_data = None
+        self._cached_tree_params = None
+        self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
+
+        self.num_instances = len(self.datasets["train"]["y"])
+
+    def init_tfidf_cache(self, datasets, params):
+        self._cached_tfidf_params = params.tfidf
+        self._cached_tfidf_data = datasets
 
     def sort_search_space(self):
         self.search_space.sort()
@@ -127,56 +133,7 @@ def build_fold_idx(self):
                 } for fold in range(self.n_folds)
             }
 
-    def get_fold_data(self, dataset, fold, params):
-        return (
-            dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
-            dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
-            )
-
-    def get_cv_score(self, y, x, model, params):
-        logging.info(f'Scoring params: {params.predict}')
-
-        batch_size = 256
-        num_instances = x.shape[0]
-        num_batches = math.ceil(num_instances / batch_size)
-
-        metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
-
-        for i in range(num_batches):
-            preds = model.predict_values(
-                x[i * batch_size : (i + 1) * batch_size],
-                **asdict(params.predict))
-            target = y[i * batch_size : (i + 1) * batch_size].toarray()
-            metrics.update(preds, target)
-
-        scores = metrics.compute()
-        logging.info(f'cv_score: {scores}\n')
-
-        return scores
-
-    def output(self):
-        return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
-
-    def __call__(self):
-        self.sort_search_space()
-        self.build_fold_idx()
-
-        for params in self.search_space:
-            dataset = self.get_dataset(params)
-            for fold in range(self.n_folds):
-                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
-                    self.get_fold_data(dataset, fold, params)
-
-                model = self.get_model(y_train_fold, x_train_fold, fold, params)
-                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
-
-                for metric in self.metrics:
-                    self.results[params][metric] += cv_score[metric] / self.n_folds
-
-        return self.output()
-
-    @abstractmethod
-    def get_dataset(self, params) -> dict[str, np.matrix]:
+    def get_dataset(self, params):
         """
         Get the dataset for the given params.
 
@@ -186,55 +143,43 @@ def get_dataset(self, params) -> dict[str, np.matrix]:
         Returns:
             dict[str, np.matrix]: The keys should be 'y' and 'x'.
         """
-        pass
-
-    @abstractmethod
-    def get_model(self, y, x, fold, params) -> linear.FlatModel | linear.TreeModel:
-        """
-        Get the model for the given params.
-
-        Args:
-            y (np.matrix): The labels of the training data.
-            x (np.matrix): The features of the training data.
-            params (GridParameter): The params to build the model.
-
-        Returns:
-            linear.FlatModel | linear.TreeModel: The model for the given params.
-        """
-        pass
-
-
-class HyperparameterSearch(GridSearch):
-    def __init__(self, datasets, n_folds, search_space, metrics=["P@1", "P@3", "P@5"]):
-        super().__init__(datasets, n_folds, search_space, metrics)
-        self._cached_tfidf_params = None
-        self._cached_tfidf_data = None
-        self._cached_tree_params = None
-        self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
-
-        self.num_instances = len(self.datasets["train"]["y"])
-
-    def get_dataset(self, params):
         tfidf_params = params.tfidf
         if tfidf_params != self._cached_tfidf_params:
             logging.info(f'Preprocessing tfidf: {tfidf_params}..')
             self._cached_tfidf_params = tfidf_params
-            with _silent_():
+            with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
                 self._cached_tfidf_data = preprocessor.fit_transform(self.datasets)['train']
 
         return self._cached_tfidf_data
 
-    def get_tree_root(self, y, x, params):
-        with _silent_():
+    def get_fold_data(self, dataset, fold):
+        return (
+            dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
+            dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
+            )
+
+    def get_tree_root(self, y, x, tree_params):
+        with __silent__():
             label_representation = (y.T * x).tocsr()
             label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
-            root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **params)
+            root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params))
             root.is_root = True
 
         return root
 
     def get_model(self, y, x, fold, params):
+        """
+        Get the model for the given params.
+
+        Args:
+            y (np.matrix): The labels of the training data.
+            x (np.matrix): The features of the training data.
+            params (GridParameter): The params to build the model.
+
+        Returns:
+            linear.FlatModel | linear.TreeModel: The model for the given params.
+        """
         logging.info(f'\nRunning fold {fold}\nparams: {params}')
 
         tree_params = params.tree
@@ -244,35 +189,54 @@ def get_model(self, y, x, fold, params):
 
         if self._cached_tree_roots[fold] is None:
             logging.info(f'Preprocessing tree: {tree_params} on fold {fold}..')
-            self._cached_tree_roots[fold] = self.get_tree_root(y, x, asdict(tree_params))
+            self._cached_tree_roots[fold] = self.get_tree_root(y, x, tree_params)
 
         model = linear.train_tree(y, x, root=self._cached_tree_roots[fold], options=params.linear_options)
 
         return model
 
+    def get_cv_score(self, y, x, model, params):
+        logging.info(f'Scoring params: {params.predict}')
 
-# class ProbEstimatiteSearch(GridSearch):
-#     def __init__(self, datasets, n_folds, search_space, config=None):
-#         super().__init__(datasets, n_folds, search_space, config)
+        batch_size = 256
+        num_instances = x.shape[0]
+        num_batches = math.ceil(num_instances / batch_size)
 
-#     def build_data(self):
-#         data = {'unique': {}}
-#         unique_data = None  # from libmultilabel preprocessing
-#         for i in range(self.n_folds):
-#             train_idx, valid_idx = None, None
-#             y_train_fold, x_train_fold = unique_data[train_idx]
-#             y_valid_fold, x_valid_fold = unique_data[valid_idx]
-#             data['unique'][i] = unique_data
+        metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
 
-#         return data
+        for i in range(num_batches):
+            preds = model.predict_values(
+                x[i * batch_size : (i + 1) * batch_size],
+                **asdict(params.predict))
+            target = y[i * batch_size : (i + 1) * batch_size].toarray()
+            metrics.update(preds, target)
 
-#     def get_fold_data(self, data, i, params):
-#         return data['unique'][i]
+        scores = metrics.compute()
+        logging.info(f'cv_score: {scores}\n')
 
-#     def get_model(self, y_train_fold, x_train_fold, params):
-#         model = None  # train normally with fold data
-#         return model
+        return scores
+
+    def output(self):
+        return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
+
+    def __call__(self, search_space):
+        self.search_space = [GridParameter(params) for params in search_space]
+        self.sort_search_space()
+        self.build_fold_idx()
 
-#     def get_cv_score(self, y_valid_fold, x_valid_fold, model, params):
-#         score = None  # calculate the metric with the model and the hyperparameter A
-#         return score
+        self.results = {
+            params: {metric: 0 for metric in self.metrics} for params in self.search_space
+            }
+        for params in self.search_space:
+            dataset = self.get_dataset(params)
+            for fold in range(self.n_folds):
+                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
+                    self.get_fold_data(dataset, fold, params)
+
+                model = self.get_model(y_train_fold, x_train_fold, fold, params)
+                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
+
+                for metric in self.metrics:
+                    self.results[params][metric] += cv_score[metric] / self.n_folds
+
+        return self.output()

From 51f69caa546fbb04cd80fdef0f3b40e0eed0d1b3 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 8 Jan 2026 07:01:59 +0000
Subject: [PATCH 07/23] update examples with pruning and prob estimate

---
 run_exp.py | 52 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/run_exp.py b/run_exp.py
index 84f44328..cdb6a2a3 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -1,10 +1,16 @@
 import libmultilabel.linear as linear
 import grid as grid
 import numpy as np
+from dataclasses import asdict
 
 import time
 import json
 from tqdm import tqdm
+import itertools
+
+
+def prune_model(*args, **kwargs):
+    pass
 
 
 if __name__ == "__main__":
@@ -32,17 +38,49 @@
     #         'K': [2, 100],
     #     },
     # }
+    n_folds = 3
+    retrain = True
+    linear_technique = 'tree'
+    search_space_dict = {
+        'max_features': [10000, 20000],
+        'K': [10, 100],
+        'min_df': [1, 2],
+        'c': [0.1, 0.2],
+    }
+    param_names = search_space_dict.keys()
     search_space = [
-        {'max_features': i, 'K': j, 'min_df': k, 'c': l}
-        for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
+        dict(zip(param_names, param_values))
+        for param_values in itertools.product(*search_space_dict.values())
     ]
+    # search_space = [dict()]  # all default values
+
+    # search_space = [
+    #     {'max_features': i, 'K': j, 'min_df': k, 'c': l}
+    #     for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
+    # ]
 
     for i in search_space:
         print(i)
 
-    n_folds = 3
-    grid_search = grid.HyperparameterSearch(datasets, n_folds, search_space)
-    results = grid_search()
+    search = linear.GridSearch(datasets, n_folds)
+    best_params = search(['hyper'])[0]
 
-    for i in results:
-        print(i)
+    if best_params.tfidf == search._cached_tfidf_params:
+        datasets = search._cached_tfidf_data
+    else:
+        preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
+        datasets = preprocessor.fit_transform(datasets)
+        search.init_tfidf_cache(datasets, best_params)
+
+    best_alpha = search(['alpha'])[0]
+    best_A = search(['A'])[0]
+    # TODO (the fields are frozen)
+    best_params.linear.alpha = best_alpha
+    best_params.linear.A = best_A
+
+    if retrain:
+        model = linear.LINEAR_TECHNIQUES[linear_technique](
+                    datasets["train"]["y"],
+                    datasets["train"]["x"],
+                    **asdict(best_params.linear),
+                )

From b15878af70909dc47022803922e66533ddc13572 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Mon, 12 Jan 2026 09:30:44 +0000
Subject: [PATCH 08/23] update cache logic for search_space

---
 grid.py | 99 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 56 insertions(+), 43 deletions(-)

diff --git a/grid.py b/grid.py
index bd54fde4..e667ec3a 100644
--- a/grid.py
+++ b/grid.py
@@ -50,26 +50,28 @@ class GridParameter:
         ('s', int, field(default=1)),
         ('c', float, field(default=1)),
         ('B', int, field(default=-1)),
-        ('alpha', float, field(default=1))
+        ('alpha', float, field(default=1)),
         ]
     _predict_fields = [
         ('beam_width', int, field(default=10)),
         ('A', int, field(default=1)),
         ]
 
-    param_types = {
+    _param_types = {
         'tfidf': make_dataclass('TfidfParams', _tfidf_fields, frozen=True, order=True),
         'tree': make_dataclass('TreeParams', _tree_fields, frozen=True, order=True),
         'linear': make_dataclass('LinearParams', _linear_fields, frozen=True, order=True),
         'predict': make_dataclass('PredictParams', _predict_fields, frozen=True, order=True),
     }
 
-    def __init__(self, params: dict):
+    def __init__(self, params: dict, fold: int = -1):
         self.params = params
-        for param_type, class_name in self.param_types.items():
+        for param_type, class_name in self._param_types.items():
             field_names = {f.name for f in fields(class_name)}
             _params = {k: v for k, v in self.params.items() if k in field_names}
             setattr(self, param_type, class_name(**_params))
+        self.param_types = dict(self._param_types, fold=-1)
+        self.fold = fold
 
     @property
     def linear_options(self):
@@ -105,10 +107,13 @@ def __init__(
         self.n_folds = n_folds
         self.metrics = metrics
 
-        self._cached_tfidf_params = None
+        self._cached_params = GridParameter()
+        for param_type in self._cached_params.param_types:
+            self._cached_params[param_type] = None
         self._cached_tfidf_data = None
-        self._cached_tree_params = None
-        self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
+        self._cached_tree_root = None
+        self._cached_fold_data = None
+        self._cached_model = None
 
         self.num_instances = len(self.datasets["train"]["y"])
 
@@ -144,31 +149,42 @@ def get_dataset(self, params):
             dict[str, np.matrix]: The keys should be 'y' and 'x'.
         """
         tfidf_params = params.tfidf
-        if tfidf_params != self._cached_tfidf_params:
+        if tfidf_params != self._cached_params.tfidf:
             logging.info(f'Preprocessing tfidf: {tfidf_params}..')
-            self._cached_tfidf_params = tfidf_params
             with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
+                self._cached_params.tfidf = tfidf_params
                 self._cached_tfidf_data = preprocessor.fit_transform(self.datasets)['train']
 
         return self._cached_tfidf_data
 
-    def get_fold_data(self, dataset, fold):
-        return (
-            dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
-            dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
-            )
+    def get_fold_data(self, dataset, params):
+        fold = params.fold
+        if params.tfidf != self._cached_params.tfidf or fold != self._cached_params.fold:
+            logging.info(f'Preprocessing fold: {fold} for tfidf: {params.tfidf}..')
+            self._cached_params.fold = fold
+            self._cached_fold_data = (
+                dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
+                dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
+                )
 
-    def get_tree_root(self, y, x, tree_params):
-        with __silent__():
-            label_representation = (y.T * x).tocsr()
-            label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
-            root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params))
-            root.is_root = True
+        return self._cached_fold_data
 
-        return root
+    def get_tree_root(self, y, x, params):
+        tree_params = params.tree
+        if params.tfidf != self._cached_params.tfidf or tree_params != self._cached_params.tree or \
+            params.fold != self._cached_params.fold:
+            logging.info(f'Preprocessing tree: {tree_params} on fold {params.fold} for tfidf: {params.tfidf}..')
+            with __silent__():
+                label_representation = (y.T * x).tocsr()
+                label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
+                self._cached_params.tree = tree_params
+                self._cached_tree_root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params))
+                self._cached_tree_root.is_root = True
+
+        return self._cached_tree_root
 
-    def get_model(self, y, x, fold, params):
+    def get_model(self, y, x, params):
         """
         Get the model for the given params.
 
@@ -180,20 +196,17 @@ def get_model(self, y, x, fold, params):
         Returns:
             linear.FlatModel | linear.TreeModel: The model for the given params.
         """
-        logging.info(f'\nRunning fold {fold}\nparams: {params}')
-
-        tree_params = params.tree
-        if tree_params != self._cached_tree_params:
-            self._cached_tree_params = tree_params
-            self._cached_tree_roots = {fold: None for fold in range(self.n_folds)}
-
-        if self._cached_tree_roots[fold] is None:
-            logging.info(f'Preprocessing tree: {tree_params} on fold {fold}..')
-            self._cached_tree_roots[fold] = self.get_tree_root(y, x, tree_params)
+        logging.info(f'\nRunning fold {params.fold}\nparams: {params}')
 
-        model = linear.train_tree(y, x, root=self._cached_tree_roots[fold], options=params.linear_options)
+        linear_params = params.linear
+        if params.tfidf != self._cached_params.tfidf or params.tree != self._cached_params.tree or \
+            linear_params != self._cached_params.linear or params.fold != self._cached_params.fold:
+            logging.info(f'Preprocessing linear: {linear_params}, tree: {params.tree} on fold {params.fold} for tfidf: {params.tfidf}..')
+            root = self.get_tree_root(y, x, params)
+            self._cached_params.linear = linear_params
+            self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
 
-        return model
+        return self._cached_model
 
     def get_cv_score(self, y, x, model, params):
         logging.info(f'Scoring params: {params.predict}')
@@ -220,23 +233,23 @@ def output(self):
         return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
 
     def __call__(self, search_space):
-        self.search_space = [GridParameter(params) for params in search_space]
+        self.search_space = [GridParameter(params, fold) for params in search_space for fold in range(self.n_folds)]
         self.sort_search_space()
         self.build_fold_idx()
 
         self.results = {
-            params: {metric: 0 for metric in self.metrics} for params in self.search_space
+            GridParameter(params): {metric: 0 for metric in self.metrics} for params in search_space
             }
         for params in self.search_space:
             dataset = self.get_dataset(params)
-            for fold in range(self.n_folds):
-                y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
-                    self.get_fold_data(dataset, fold, params)
+            y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
+                self.get_fold_data(dataset, params)
 
-                model = self.get_model(y_train_fold, x_train_fold, fold, params)
-                cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
+            model = self.get_model(y_train_fold, x_train_fold, params)
+            cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
 
-                for metric in self.metrics:
-                    self.results[params][metric] += cv_score[metric] / self.n_folds
+            params.fold = -1
+            for metric in self.metrics:
+                self.results[params][metric] += cv_score[metric] / self.n_folds
 
         return self.output()

From 7300a5fdd16fc6438c8c62674006cf94dfa82e5a Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Mon, 19 Jan 2026 18:07:44 +0000
Subject: [PATCH 09/23] update better sorting logic

---
 grid.py                          | 28 ++++++++++++++---------
 libmultilabel/linear/__init__.py |  2 +-
 run_exp.py                       | 38 ++++++++++++++++++--------------
 3 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/grid.py b/grid.py
index e667ec3a..c4b89f50 100644
--- a/grid.py
+++ b/grid.py
@@ -50,28 +50,34 @@ class GridParameter:
         ('s', int, field(default=1)),
         ('c', float, field(default=1)),
         ('B', int, field(default=-1)),
-        ('alpha', float, field(default=1)),
+        # ('alpha', float, field(default=1)),
         ]
     _predict_fields = [
         ('beam_width', int, field(default=10)),
         ('A', int, field(default=1)),
         ]
 
-    _param_types = {
+    param_types = {
         'tfidf': make_dataclass('TfidfParams', _tfidf_fields, frozen=True, order=True),
+        'fold': lambda fold: fold,
         'tree': make_dataclass('TreeParams', _tree_fields, frozen=True, order=True),
         'linear': make_dataclass('LinearParams', _linear_fields, frozen=True, order=True),
         'predict': make_dataclass('PredictParams', _predict_fields, frozen=True, order=True),
     }
 
-    def __init__(self, params: dict, fold: int = -1):
-        self.params = params
-        for param_type, class_name in self._param_types.items():
-            field_names = {f.name for f in fields(class_name)}
-            _params = {k: v for k, v in self.params.items() if k in field_names}
-            setattr(self, param_type, class_name(**_params))
-        self.param_types = dict(self._param_types, fold=-1)
-        self.fold = fold
+    def __init__(self, params: dict | None = None, fold: int = -1):
+        self.params = params or {}
+
+        params_set = set(self.params)
+        for param_type, class_name in self.param_types.items():
+            if param_type == 'fold':
+                filtered_params = {'fold': fold}
+            else:
+                field_names = {f.name for f in fields(class_name)}
+                filtered_keys = params_set & field_names
+                params_set -= field_names
+                filtered_params = {k: self.params[k] for k in filtered_keys}
+            setattr(self, param_type, class_name(**filtered_params))
 
     @property
     def linear_options(self):
@@ -109,7 +115,7 @@ def __init__(
 
         self._cached_params = GridParameter()
         for param_type in self._cached_params.param_types:
-            self._cached_params[param_type] = None
+            setattr(self._cached_params, param_type, None)
         self._cached_tfidf_data = None
         self._cached_tree_root = None
         self._cached_fold_data = None
diff --git a/libmultilabel/linear/__init__.py b/libmultilabel/linear/__init__.py
index efe24120..7cdf30bb 100644
--- a/libmultilabel/linear/__init__.py
+++ b/libmultilabel/linear/__init__.py
@@ -3,4 +3,4 @@
 from .metrics import *
 from .preprocessor import *
 from .tree import *
-from .utils import *
\ No newline at end of file
+from .utils import *
diff --git a/run_exp.py b/run_exp.py
index cdb6a2a3..b359b8a7 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -1,5 +1,6 @@
 import libmultilabel.linear as linear
-import grid as grid
+import grid
+
 import numpy as np
 from dataclasses import asdict
 
@@ -39,12 +40,13 @@ def prune_model(*args, **kwargs):
     #     },
     # }
     n_folds = 3
-    retrain = True
+    retrain = False
     linear_technique = 'tree'
     search_space_dict = {
         'max_features': [10000, 20000],
         'K': [10, 100],
         'min_df': [1, 2],
+        'A': [2, 3],
         'c': [0.1, 0.2],
     }
     param_names = search_space_dict.keys()
@@ -62,21 +64,23 @@ def prune_model(*args, **kwargs):
     for i in search_space:
         print(i)
 
-    search = linear.GridSearch(datasets, n_folds)
-    best_params = search(['hyper'])[0]
-
-    if best_params.tfidf == search._cached_tfidf_params:
-        datasets = search._cached_tfidf_data
-    else:
-        preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
-        datasets = preprocessor.fit_transform(datasets)
-        search.init_tfidf_cache(datasets, best_params)
-
-    best_alpha = search(['alpha'])[0]
-    best_A = search(['A'])[0]
-    # TODO (the fields are frozen)
-    best_params.linear.alpha = best_alpha
-    best_params.linear.A = best_A
+    search = grid.GridSearch(datasets, n_folds)
+    best_params = search(search_space)
+    print(best_params)
+    breakpoint()
+
+    # if best_params.tfidf == search._cached_tfidf_params:
+    #     datasets = search._cached_tfidf_data
+    # else:
+    #     preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
+    #     datasets = preprocessor.fit_transform(datasets)
+    #     search.init_tfidf_cache(datasets, best_params)
+
+    # best_alpha = search(['alpha'])[0]
+    # best_A = search(['A'])[0]
+    # # TODO (the fields are frozen)
+    # best_params.linear.alpha = best_alpha
+    # best_params.linear.A = best_A
 
     if retrain:
         model = linear.LINEAR_TECHNIQUES[linear_technique](

From 9412d8cc8cfc95114d7a812894708928b2867279 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Mon, 19 Jan 2026 18:08:07 +0000
Subject: [PATCH 10/23] update the parameter A

---
 libmultilabel/linear/linear.py |  3 +--
 libmultilabel/linear/tree.py   | 22 +++++++++++++---------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index d70620bc..04d25a21 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -44,7 +44,7 @@ def __init__(
         self.thresholds = thresholds
         self.multiclass = multiclass
 
-    def predict_values(self, x: sparse.csr_matrix, *args, **kwargs) -> np.ndarray:
+    def predict_values(self, x: sparse.csr_matrix) -> np.ndarray:
         """Calculate the decision values associated with x.
 
         Args:
@@ -198,7 +198,6 @@ def train_1vsrest(
     multiclass: bool = False,
     options: str = "",
     verbose: bool = True,
-    *args, **kwargs,
 ) -> FlatModel:
     """Train a linear model parallel on labels for multi-label data using a one-vs-rest strategy.
 
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 288e3061..96681cbd 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import scipy.sparse as sparse
+from scipy.special import log_expit
 from sparsekmeans import LloydKmeans, ElkanKmeans
 import sklearn.preprocessing
 from tqdm import tqdm
@@ -62,7 +63,7 @@ def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
-        *args, **kwargs,
+        A: int = 3,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
 
@@ -73,6 +74,7 @@ def predict_values(
         Returns:
             np.ndarray: A matrix with dimension number of instances * number of classes.
         """
+        sigmoid_A = lambda x: log_expit(A * x)
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
@@ -82,8 +84,8 @@ def predict_values(
             if not self._model_separated:
                 self._separate_model_for_pruning_tree()
                 self._model_separated = True
-            all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
-        return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
+            all_preds = self._prune_tree_and_predict_values(x, beam_width, sigmoid_A) # number of instances * (number of labels + total number of metalabels)
+        return np.vstack([self._beam_search(all_preds[i], beam_width, sigmoid_A) for i in range(all_preds.shape[0])])
 
     def _separate_model_for_pruning_tree(self):
         """
@@ -114,7 +116,7 @@ def _separate_model_for_pruning_tree(self):
             )
             self.subtree_models.append(subtree_flatmodel)
         
-    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
+    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int, sigmoid_A: Callable) -> np.ndarray:
         """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
 
         Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
@@ -133,7 +135,8 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
 
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
-        children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
+        # children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
+        children_scores = 0.0 + self.sigmoid_A(root_preds)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -160,7 +163,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
 
         return all_preds
 
-    def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
+    def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: Callable) -> np.ndarray:
         """Predict with beam search using cached probability estimates for a single instance.
 
         Args:
@@ -183,7 +186,8 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
                     continue
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - np.square(np.maximum(0, 1 - pred))
+                # children_score = score - np.square(np.maximum(0, 1 - pred))
+                children_score = score + self.sigmoid_A(pred)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -194,7 +198,8 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
         for node, score in cur_level:
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
+            # scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
+            scores[node.label_map] = np.exp(score + self.sigmoid_A(pred))
         return scores
 
 
@@ -206,7 +211,6 @@ def train_tree(
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
     root: Node = None,
-    *args, **kwargs,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
     The algorithm used is based on https://github.com/xmc-aalto/bonsai.

From ba3d1a3ce5b757b79dcc801c397a1f71fad04e36 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Mon, 19 Jan 2026 18:11:18 +0000
Subject: [PATCH 11/23] fix bug for the parameter A

---
 libmultilabel/linear/tree.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 96681cbd..87e7242e 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -136,7 +136,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
         # children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
-        children_scores = 0.0 + self.sigmoid_A(root_preds)
+        children_scores = 0.0 + sigmoid_A(root_preds)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -187,7 +187,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: C
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
                 # children_score = score - np.square(np.maximum(0, 1 - pred))
-                children_score = score + self.sigmoid_A(pred)
+                children_score = score + sigmoid_A(pred)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -199,7 +199,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: C
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
             # scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
-            scores[node.label_map] = np.exp(score + self.sigmoid_A(pred))
+            scores[node.label_map] = np.exp(score + sigmoid_A(pred))
         return scores
 
 

From 7c3abb8c3c87068ae73d4c2e1846fa46eb139a9d Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 22 Jan 2026 15:48:24 +0000
Subject: [PATCH 12/23] use self.no_cache to control the cache

---
 grid.py | 46 +++++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/grid.py b/grid.py
index c4b89f50..937bb68c 100644
--- a/grid.py
+++ b/grid.py
@@ -120,12 +120,13 @@ def __init__(
         self._cached_tree_root = None
         self._cached_fold_data = None
         self._cached_model = None
+        self.no_cache = True
 
         self.num_instances = len(self.datasets["train"]["y"])
 
-    def init_tfidf_cache(self, datasets, params):
-        self._cached_tfidf_params = params.tfidf
-        self._cached_tfidf_data = datasets
+    # def init_tfidf_cache(self, datasets, params):
+    #     self._cached_tfidf_params = params.tfidf
+    #     self._cached_tfidf_data = datasets
 
     def sort_search_space(self):
         self.search_space.sort()
@@ -155,7 +156,8 @@ def get_dataset(self, params):
             dict[str, np.matrix]: The keys should be 'y' and 'x'.
         """
         tfidf_params = params.tfidf
-        if tfidf_params != self._cached_params.tfidf:
+        self.no_cache = (tfidf_params != self._cached_params.tfidf)
+        if self.no_cache:
             logging.info(f'Preprocessing tfidf: {tfidf_params}..')
             with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
@@ -166,7 +168,8 @@ def get_dataset(self, params):
 
     def get_fold_data(self, dataset, params):
         fold = params.fold
-        if params.tfidf != self._cached_params.tfidf or fold != self._cached_params.fold:
+        self.no_cache |= (fold != self._cached_params.fold)
+        if self.no_cache:
             logging.info(f'Preprocessing fold: {fold} for tfidf: {params.tfidf}..')
             self._cached_params.fold = fold
             self._cached_fold_data = (
@@ -178,8 +181,8 @@ def get_fold_data(self, dataset, params):
 
     def get_tree_root(self, y, x, params):
         tree_params = params.tree
-        if params.tfidf != self._cached_params.tfidf or tree_params != self._cached_params.tree or \
-            params.fold != self._cached_params.fold:
+        self.no_cache |= (tree_params != self._cached_params.tree)
+        if self.no_cache:
             logging.info(f'Preprocessing tree: {tree_params} on fold {params.fold} for tfidf: {params.tfidf}..')
             with __silent__():
                 label_representation = (y.T * x).tocsr()
@@ -204,24 +207,26 @@ def get_model(self, y, x, params):
         """
         logging.info(f'\nRunning fold {params.fold}\nparams: {params}')
 
+        root = self.get_tree_root(y, x, params)
+
         linear_params = params.linear
-        if params.tfidf != self._cached_params.tfidf or params.tree != self._cached_params.tree or \
-            linear_params != self._cached_params.linear or params.fold != self._cached_params.fold:
+        self.no_cache |= (linear_params != self._cached_params.linear)
+        if self.no_cache:
             logging.info(f'Preprocessing linear: {linear_params}, tree: {params.tree} on fold {params.fold} for tfidf: {params.tfidf}..')
-            root = self.get_tree_root(y, x, params)
-            self._cached_params.linear = linear_params
-            self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
+            with __silent__():
+                self._cached_params.linear = linear_params
+                self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
 
         return self._cached_model
 
-    def get_cv_score(self, y, x, model, params):
+    def get_cv_score(self, y, x, model, params, metrics):
         logging.info(f'Scoring params: {params.predict}')
 
         batch_size = 256
         num_instances = x.shape[0]
         num_batches = math.ceil(num_instances / batch_size)
 
-        metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
+        # metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
 
         for i in range(num_batches):
             preds = model.predict_values(
@@ -230,6 +235,7 @@ def get_cv_score(self, y, x, model, params):
             target = y[i * batch_size : (i + 1) * batch_size].toarray()
             metrics.update(preds, target)
 
+        # return metrics
         scores = metrics.compute()
         logging.info(f'cv_score: {scores}\n')
 
@@ -244,18 +250,20 @@ def __call__(self, search_space):
         self.build_fold_idx()
 
         self.results = {
-            GridParameter(params): {metric: 0 for metric in self.metrics} for params in search_space
+            GridParameter(params): linear.get_metrics(self.metrics, num_classes=y.shape[1]) for params in search_space
             }
         for params in self.search_space:
+            # for fold in self.n_folds:
             dataset = self.get_dataset(params)
             y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
                 self.get_fold_data(dataset, params)
 
             model = self.get_model(y_train_fold, x_train_fold, params)
-            cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params)
-
             params.fold = -1
-            for metric in self.metrics:
-                self.results[params][metric] += cv_score[metric] / self.n_folds
+            cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params, self.results[params])
+
+            # params.fold = -1
+            # for metric in self.metrics:
+            #     # self.results[params][metric] += cv_score[metric] / self.n_folds
 
         return self.output()

From f4a44d8e08fb834b8acc474dd751f2e81e25e317 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Thu, 22 Jan 2026 17:54:59 +0000
Subject: [PATCH 13/23] rewrite fold and tfidf for correctness

---
 grid.py    | 207 ++++++++++++++++++++++++++---------------------------
 run_exp.py |  55 +++-----------
 2 files changed, 113 insertions(+), 149 deletions(-)

diff --git a/grid.py b/grid.py
index 937bb68c..c25d7940 100644
--- a/grid.py
+++ b/grid.py
@@ -3,6 +3,7 @@
 
 import os
 import sys
+import itertools
 import logging
 
 import libmultilabel.linear as linear
@@ -22,7 +23,7 @@ def __init__(self):
     def __enter__(self):
         os.dup2(self.devnull, 2)
         self.stdout = sys.stdout
-        sys.stdout = open(os.devnull, 'w')
+        sys.stdout = open(os.devnull, "w")
 
     def __exit__(self, type, value, traceback):
         os.dup2(self.stderr, 2)
@@ -35,34 +36,37 @@ def __exit__(self, type, value, traceback):
 class GridParameter:
 
     _tfidf_fields = [
-        ('ngram_range', tuple[int, int], field(default=(1, 1))),
-        ('max_features', int, field(default=None)),
-        ('min_df', float | int, field(default=1)),
-        ('stop_words', str | list, field(default=None)),
-        ('strip_accents', str | Callable, field(default=None)),
-        ('tokenizer', Callable, field(default=None)),
+        ("ngram_range", tuple[int, int], field(default=(1, 1))),
+        ("max_features", int, field(default=None)),
+        ("min_df", float | int, field(default=1)),
+        ("stop_words", str | list, field(default=None)),
+        ("strip_accents", str | Callable, field(default=None)),
+        ("tokenizer", Callable, field(default=None)),
         ]
     _tree_fields = [
-        ('dmax', int, field(default=10)),
-        ('K', int, field(default=8)),
+        ("dmax", int, field(default=10)),
+        ("K", int, field(default=8)),
         ]
     _linear_fields = [
-        ('s', int, field(default=1)),
-        ('c', float, field(default=1)),
-        ('B', int, field(default=-1)),
-        # ('alpha', float, field(default=1)),
+        ("s", int, field(default=1)),
+        ("c", float, field(default=1)),
+        ("B", int, field(default=-1)),
+        # ("alpha", float, field(default=1)),
         ]
     _predict_fields = [
-        ('beam_width', int, field(default=10)),
-        ('A', int, field(default=1)),
+        ("beam_width", int, field(default=10)),
+        ("A", int, field(default=1)),
         ]
 
     param_types = {
-        'tfidf': make_dataclass('TfidfParams', _tfidf_fields, frozen=True, order=True),
-        'fold': lambda fold: fold,
-        'tree': make_dataclass('TreeParams', _tree_fields, frozen=True, order=True),
-        'linear': make_dataclass('LinearParams', _linear_fields, frozen=True, order=True),
-        'predict': make_dataclass('PredictParams', _predict_fields, frozen=True, order=True),
+        "tfidf": make_dataclass("TfidfParams", _tfidf_fields, frozen=True, order=True),
+        "tree": make_dataclass("TreeParams", _tree_fields, frozen=True, order=True),
+        "linear": make_dataclass("LinearParams", _linear_fields, frozen=True, order=True),
+        "predict": make_dataclass("PredictParams", _predict_fields, frozen=True, order=True),
+    }
+    _param_field_names = {
+        param_type: {f.name for f in fields(class_name)}
+        for param_type, class_name in param_types.items()
     }
 
     def __init__(self, params: dict | None = None, fold: int = -1):
@@ -70,18 +74,16 @@ def __init__(self, params: dict | None = None, fold: int = -1):
 
         params_set = set(self.params)
         for param_type, class_name in self.param_types.items():
-            if param_type == 'fold':
-                filtered_params = {'fold': fold}
-            else:
-                field_names = {f.name for f in fields(class_name)}
-                filtered_keys = params_set & field_names
-                params_set -= field_names
-                filtered_params = {k: self.params[k] for k in filtered_keys}
+            field_names = self._param_field_names[param_type]
+            filtered_keys = params_set & field_names
+            params_set -= field_names
+
+            filtered_params = {k: self.params[k] for k in filtered_keys}
             setattr(self, param_type, class_name(**filtered_params))
 
     @property
     def linear_options(self):
-        options = ''
+        options = ""
         for f in fields(self.linear):
             options += f" -{f.name} {getattr(self.linear, f.name)}"
         return options.strip()
@@ -107,16 +109,17 @@ def __init__(
         self,
         datasets: dict[str, np.matrix],
         n_folds: int = 3,
-        metrics: list[str] = ["P@1", "P@3", "P@5"],
+        monitor_metrics: list[str] = ["P@1", "P@3", "P@5"],
     ):
         self.datasets = datasets
         self.n_folds = n_folds
-        self.metrics = metrics
+        self.monitor_metrics = monitor_metrics
+        self.param_metrics = dict()
 
         self._cached_params = GridParameter()
         for param_type in self._cached_params.param_types:
             setattr(self._cached_params, param_type, None)
-        self._cached_tfidf_data = None
+        self._cached_transformed_dataset = None
         self._cached_tree_root = None
         self._cached_fold_data = None
         self._cached_model = None
@@ -124,66 +127,53 @@ def __init__(
 
         self.num_instances = len(self.datasets["train"]["y"])
 
-    # def init_tfidf_cache(self, datasets, params):
-    #     self._cached_tfidf_params = params.tfidf
-    #     self._cached_tfidf_data = datasets
-
-    def sort_search_space(self):
-        self.search_space.sort()
-
-    def build_fold_idx(self):
-        permutation = np.random.permutation(self.num_instances)
-        index_per_fold = [
-            permutation[int(fold * self.num_instances / self.n_folds):int((fold+1) * self.num_instances / self.n_folds)]
-            for fold in range(self.n_folds)
-        ]
-
-        self.fold_idx = {
-            fold: {
-                'train': np.concatenate(index_per_fold[:fold] + index_per_fold[fold+1:]),
-                'valid': index_per_fold[fold]
-                } for fold in range(self.n_folds)
+    def get_fold_dataset(self, train_idx, valid_idx):
+        def take(data, idx):
+            if isinstance(data, list):
+                return [data[i] for i in idx]
+            else:
+                return data[idx]
+
+        return {
+            "data_format": self.datasets["data_format"],
+            "train": {
+                "y": take(self.datasets["train"]["y"], train_idx),
+                "x": take(self.datasets["train"]["x"], train_idx)
+            },
+            "test": {
+                "y": take(self.datasets["train"]["y"], valid_idx),
+                "x": take(self.datasets["train"]["x"], valid_idx)
             }
+        }
 
-    def get_dataset(self, params):
+    def get_transformed_dataset(self, dataset, params):
         """
-        Get the dataset for the given params.
+        Get the dataset for the given tf-idf params.
 
         Args:
             params (GridParameter): The params to build the dataset.
 
         Returns:
-            dict[str, np.matrix]: The keys should be 'y' and 'x'.
+            dict[str, np.matrix]: The keys should be "y" and "x".
         """
         tfidf_params = params.tfidf
         self.no_cache = (tfidf_params != self._cached_params.tfidf)
         if self.no_cache:
-            logging.info(f'Preprocessing tfidf: {tfidf_params}..')
+            logging.info(f"Preprocessing tfidf: {tfidf_params}")
+            if self.datasets["data_format"] not in {"txt", "dataframe"}:
+                logging.info('The TF-IDF parameters are only meaningful for the “txt” and “dataframe” data formats.')
             with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
                 self._cached_params.tfidf = tfidf_params
-                self._cached_tfidf_data = preprocessor.fit_transform(self.datasets)['train']
+                self._cached_transformed_dataset = preprocessor.fit_transform(dataset)
 
-        return self._cached_tfidf_data
-
-    def get_fold_data(self, dataset, params):
-        fold = params.fold
-        self.no_cache |= (fold != self._cached_params.fold)
-        if self.no_cache:
-            logging.info(f'Preprocessing fold: {fold} for tfidf: {params.tfidf}..')
-            self._cached_params.fold = fold
-            self._cached_fold_data = (
-                dataset["y"][self.fold_idx[fold]['train']], dataset["x"][self.fold_idx[fold]['train']],
-                dataset["y"][self.fold_idx[fold]['valid']], dataset["x"][self.fold_idx[fold]['valid']]
-                )
-
-        return self._cached_fold_data
+        return self._cached_transformed_dataset
 
     def get_tree_root(self, y, x, params):
         tree_params = params.tree
         self.no_cache |= (tree_params != self._cached_params.tree)
         if self.no_cache:
-            logging.info(f'Preprocessing tree: {tree_params} on fold {params.fold} for tfidf: {params.tfidf}..')
+            logging.info(f"Preprocessing tree: {tree_params}")
             with __silent__():
                 label_representation = (y.T * x).tocsr()
                 label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
@@ -205,65 +195,72 @@ def get_model(self, y, x, params):
         Returns:
             linear.FlatModel | linear.TreeModel: The model for the given params.
         """
-        logging.info(f'\nRunning fold {params.fold}\nparams: {params}')
-
         root = self.get_tree_root(y, x, params)
 
         linear_params = params.linear
         self.no_cache |= (linear_params != self._cached_params.linear)
         if self.no_cache:
-            logging.info(f'Preprocessing linear: {linear_params}, tree: {params.tree} on fold {params.fold} for tfidf: {params.tfidf}..')
+            logging.info(f"Training: {linear_params}")
             with __silent__():
                 self._cached_params.linear = linear_params
                 self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
 
         return self._cached_model
 
-    def get_cv_score(self, y, x, model, params, metrics):
-        logging.info(f'Scoring params: {params.predict}')
+    def compute_scores(self, y, x, model, params):
+        logging.info(f"Scoring: {params.predict}")
 
         batch_size = 256
         num_instances = x.shape[0]
         num_batches = math.ceil(num_instances / batch_size)
 
-        # metrics = linear.get_metrics(self.metrics, num_classes=y.shape[1])
+        if params not in self.param_metrics.keys():
+            self.param_metrics[params] = linear.get_metrics(self.monitor_metrics, num_classes=y.shape[1])
 
         for i in range(num_batches):
             preds = model.predict_values(
                 x[i * batch_size : (i + 1) * batch_size],
                 **asdict(params.predict))
             target = y[i * batch_size : (i + 1) * batch_size].toarray()
-            metrics.update(preds, target)
-
-        # return metrics
-        scores = metrics.compute()
-        logging.info(f'cv_score: {scores}\n')
-
-        return scores
-
-    def output(self):
-        return sorted(self.results.items(), key=lambda x: x[1][self.metrics[0]], reverse=True)
+            self.param_metrics[params].update(preds, target)
 
-    def __call__(self, search_space):
-        self.search_space = [GridParameter(params, fold) for params in search_space for fold in range(self.n_folds)]
-        self.sort_search_space()
-        self.build_fold_idx()
+        logging.info(f"cv_score: {self.param_metrics[params].compute()}\n")
 
-        self.results = {
-            GridParameter(params): linear.get_metrics(self.metrics, num_classes=y.shape[1]) for params in search_space
-            }
-        for params in self.search_space:
-            # for fold in self.n_folds:
-            dataset = self.get_dataset(params)
-            y_train_fold, x_train_fold, y_valid_fold, x_valid_fold = \
-                self.get_fold_data(dataset, params)
+    def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, dict[str, float]]:
+        self.param_metrics.clear()
 
-            model = self.get_model(y_train_fold, x_train_fold, params)
-            params.fold = -1
-            cv_score = self.get_cv_score(y_valid_fold, x_valid_fold, model, params, self.results[params])
+        param_names = search_space_dict.keys()
+        self.search_space = sorted([
+            GridParameter(dict(zip(param_names, param_values)))
+            for param_values in itertools.product(*search_space_dict.values())
+        ])
 
-            # params.fold = -1
-            # for metric in self.metrics:
-            #     # self.results[params][metric] += cv_score[metric] / self.n_folds
+        permutation = np.random.permutation(self.num_instances)
+        index_per_fold = [
+            permutation[int(fold * self.num_instances / self.n_folds):int((fold+1) * self.num_instances / self.n_folds)]
+            for fold in range(self.n_folds)
+        ]
 
-        return self.output()
+        for fold in range(self.n_folds):
+            train_idx = np.concatenate(index_per_fold[:fold] + index_per_fold[fold+1:])
+            valid_idx = index_per_fold[fold]
+            fold_dataset = self.get_fold_dataset(train_idx, valid_idx)
+
+            self._cached_params.tfidf = None
+            for params in self.search_space:
+                logging.info(f"Running fold {fold}, params: {params}")
+
+                transformed_dataset = self.get_transformed_dataset(fold_dataset, params)
+                model = self.get_model(
+                    transformed_dataset["train"]["y"],
+                    transformed_dataset["train"]["x"],
+                    params
+                    )
+                self.compute_scores(
+                    transformed_dataset["test"]["y"],
+                    transformed_dataset["test"]["x"],
+                    model,
+                    params
+                    )
+
+        return {params: metrics.compute() for params, metrics in self.param_metrics.items()}
diff --git a/run_exp.py b/run_exp.py
index b359b8a7..88d376bf 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -23,22 +23,11 @@ def prune_model(*args, **kwargs):
 
     parser = argparse.ArgumentParser(description="Parse command-line arguments.")
     parser.add_argument("--dataset", type=str, default="EUR-Lex", help="Dataset name (e.g., AmazonCat-13K, EUR-Lex)")
+    parser.add_argument("--data_format", type=str, default="txt", help="Data format.")
     args = parser.parse_args()
 
-    dataset_ = args.dataset
+    dataset = linear.load_dataset(args.data_format, f"data/{args.dataset}/train.{args.data_format}")  # , f"data/{dataset}/test.{args.data_format}"
 
-    datasets = linear.load_dataset("svm", f"data/{dataset_}/train.svm")  # , f"data/{dataset}/test.svm"
-    # data_source = [f'data/{dataset_}/train.svm', f'data/{dataset_}/test.svm']
-    # search_space = {
-    #     'tfidf': {
-    #         'min_df': [1, 2],
-    #         'max_features': [10000, 320000],
-    #     },
-    #     'params': {
-    #         'C': [1, 2],
-    #         'K': [2, 100],
-    #     },
-    # }
     n_folds = 3
     retrain = False
     linear_technique = 'tree'
@@ -49,42 +38,20 @@ def prune_model(*args, **kwargs):
         'A': [2, 3],
         'c': [0.1, 0.2],
     }
-    param_names = search_space_dict.keys()
-    search_space = [
-        dict(zip(param_names, param_values))
-        for param_values in itertools.product(*search_space_dict.values())
-    ]
-    # search_space = [dict()]  # all default values
 
-    # search_space = [
-    #     {'max_features': i, 'K': j, 'min_df': k, 'c': l}
-    #     for i in [10000, 20000] for j in [10, 100] for k in [1, 2] for l in [0.1, 0.2]
-    # ]
+    # for i in search_space:
+    #     print(i)
 
-    for i in search_space:
-        print(i)
-
-    search = grid.GridSearch(datasets, n_folds)
-    best_params = search(search_space)
-    print(best_params)
+    search = grid.GridSearch(dataset, n_folds)
+    scores = search(search_space_dict)
+    print(scores)
     breakpoint()
 
-    # if best_params.tfidf == search._cached_tfidf_params:
-    #     datasets = search._cached_tfidf_data
-    # else:
-    #     preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
-    #     datasets = preprocessor.fit_transform(datasets)
-    #     search.init_tfidf_cache(datasets, best_params)
-
-    # best_alpha = search(['alpha'])[0]
-    # best_A = search(['A'])[0]
-    # # TODO (the fields are frozen)
-    # best_params.linear.alpha = best_alpha
-    # best_params.linear.A = best_A
-
     if retrain:
+        # TODO
+        best_params = None
         model = linear.LINEAR_TECHNIQUES[linear_technique](
-                    datasets["train"]["y"],
-                    datasets["train"]["x"],
+                    dataset["train"]["y"],
+                    dataset["train"]["x"],
                     **asdict(best_params.linear),
                 )

From 4942b805f749546aa62aa95d755818df4baf494f Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Fri, 23 Jan 2026 06:06:17 +0000
Subject: [PATCH 14/23] update retrain examplef

---
 run_exp.py | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/run_exp.py b/run_exp.py
index 88d376bf..b38fccf9 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -4,11 +4,6 @@
 import numpy as np
 from dataclasses import asdict
 
-import time
-import json
-from tqdm import tqdm
-import itertools
-
 
 def prune_model(*args, **kwargs):
     pass
@@ -19,7 +14,7 @@ def prune_model(*args, **kwargs):
     import logging
 
     logging.basicConfig(level=logging.INFO)
-    np.random.seed(20250820)
+    np.random.seed(20260123)
 
     parser = argparse.ArgumentParser(description="Parse command-line arguments.")
     parser.add_argument("--dataset", type=str, default="EUR-Lex", help="Dataset name (e.g., AmazonCat-13K, EUR-Lex)")
@@ -28,9 +23,9 @@ def prune_model(*args, **kwargs):
 
     dataset = linear.load_dataset(args.data_format, f"data/{args.dataset}/train.{args.data_format}")  # , f"data/{dataset}/test.{args.data_format}"
 
+    retrain = True
     n_folds = 3
-    retrain = False
-    linear_technique = 'tree'
+    monitor_metrics = ["P@1", "P@3", "P@5"]
     search_space_dict = {
         'max_features': [10000, 20000],
         'K': [10, 100],
@@ -39,19 +34,21 @@ def prune_model(*args, **kwargs):
         'c': [0.1, 0.2],
     }
 
-    # for i in search_space:
-    #     print(i)
-
-    search = grid.GridSearch(dataset, n_folds)
-    scores = search(search_space_dict)
-    print(scores)
-    breakpoint()
+    search = grid.GridSearch(dataset, n_folds, monitor_metrics)
+    cv_scores = search(search_space_dict)
+    sorted_cv_scores = sorted(cv_scores.items(), key=lambda x: x[1][monitor_metrics[0]], reverse=True)
+    print(sorted_cv_scores)
 
     if retrain:
-        # TODO
-        best_params = None
-        model = linear.LINEAR_TECHNIQUES[linear_technique](
-                    dataset["train"]["y"],
-                    dataset["train"]["x"],
-                    **asdict(best_params.linear),
+        # TODO: test set
+        best_params, best_cv_scores = list(sorted_cv_scores)[0]
+        print(best_params, best_cv_scores)
+
+        preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
+        transformed_dataset = preprocessor.fit_transform(dataset)
+        model = linear.train_tree(
+                    transformed_dataset["train"]["y"],
+                    transformed_dataset["train"]["x"],
+                    best_params.linear_options,
+                    **asdict(best_params.tree),
                 )

From 5815e3ec680ca48249b1be8b61b4de644d7f555a Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Fri, 23 Jan 2026 06:07:21 +0000
Subject: [PATCH 15/23] make the logging info prettier

---
 grid.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/grid.py b/grid.py
index c25d7940..5593644e 100644
--- a/grid.py
+++ b/grid.py
@@ -159,13 +159,15 @@ def get_transformed_dataset(self, dataset, params):
         tfidf_params = params.tfidf
         self.no_cache = (tfidf_params != self._cached_params.tfidf)
         if self.no_cache:
-            logging.info(f"Preprocessing tfidf: {tfidf_params}")
+            logging.info(f"TFIDF  - Preprocessing: {tfidf_params}")
             if self.datasets["data_format"] not in {"txt", "dataframe"}:
                 logging.info('The TF-IDF parameters are only meaningful for the “txt” and “dataframe” data formats.')
             with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
                 self._cached_params.tfidf = tfidf_params
                 self._cached_transformed_dataset = preprocessor.fit_transform(dataset)
+        else:
+            logging.info(f"TFIDF  - Using cached data: {tfidf_params}")
 
         return self._cached_transformed_dataset
 
@@ -173,13 +175,15 @@ def get_tree_root(self, y, x, params):
         tree_params = params.tree
         self.no_cache |= (tree_params != self._cached_params.tree)
         if self.no_cache:
-            logging.info(f"Preprocessing tree: {tree_params}")
+            logging.info(f"Tree   - Preprocessing: {tree_params}")
             with __silent__():
                 label_representation = (y.T * x).tocsr()
                 label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
                 self._cached_params.tree = tree_params
                 self._cached_tree_root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params))
                 self._cached_tree_root.is_root = True
+        else:
+            logging.info(f"Tree   - Using cached data: {tree_params}")
 
         return self._cached_tree_root
 
@@ -200,15 +204,17 @@ def get_model(self, y, x, params):
         linear_params = params.linear
         self.no_cache |= (linear_params != self._cached_params.linear)
         if self.no_cache:
-            logging.info(f"Training: {linear_params}")
+            logging.info(f"Model  - Training: {linear_params}")
             with __silent__():
                 self._cached_params.linear = linear_params
                 self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
+        else:
+            logging.info(f"Model  - Using cached data: {linear_params}")
 
         return self._cached_model
 
     def compute_scores(self, y, x, model, params):
-        logging.info(f"Scoring: {params.predict}")
+        logging.info(f"Metric - Scoring: {params.predict}\n")
 
         batch_size = 256
         num_instances = x.shape[0]
@@ -224,8 +230,6 @@ def compute_scores(self, y, x, model, params):
             target = y[i * batch_size : (i + 1) * batch_size].toarray()
             self.param_metrics[params].update(preds, target)
 
-        logging.info(f"cv_score: {self.param_metrics[params].compute()}\n")
-
     def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, dict[str, float]]:
         self.param_metrics.clear()
 
@@ -248,7 +252,7 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
 
             self._cached_params.tfidf = None
             for params in self.search_space:
-                logging.info(f"Running fold {fold}, params: {params}")
+                logging.info(f"Status - Running fold {fold}, params: {params}")
 
                 transformed_dataset = self.get_transformed_dataset(fold_dataset, params)
                 model = self.get_model(

From 1e284cecd77e90eff7cab1cd4cb1f5bd808a013a Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 27 Jan 2026 16:44:40 +0000
Subject: [PATCH 16/23] update weights pruning

- the implementation is based on code from Zhi-Bao's repo.
---
 libmultilabel/linear/linear.py | 41 ++++++++++++++++++++++++++++++++--
 libmultilabel/linear/tree.py   |  8 +++++--
 linear_trainer.py              |  1 +
 main.py                        |  6 +++++
 4 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 04d25a21..2e7d6611 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -92,6 +92,37 @@ def _to_dense_array(self, matrix: np.matrix | sparse.csr_matrix) -> np.ndarray:
             return np.asarray(matrix)
 
 
+def _pruning_weights(weights: np.ndarray, pruning_alpha: float) -> np.ndarray:
+    """Prune the weights of the linear model.
+
+    Args:
+        weights (np.ndarray): Linear model weights.
+        pruning_alpha (float): Fraction of weights to keep after pruning.
+
+    Returns:
+        np.ndarray: The pruned weights.
+    """
+    pruning_ratio = 1-pruning_alpha
+
+    if 0 >= pruning_ratio:
+        return weights
+    elif pruning_ratio >= 1:
+        return np.zeros_like(weights)
+    else:
+        # Perform pruning algorithm
+        # Reduce the number of nonzero features per column by a factor of pruning_ratio.
+        nonzero_indices = np.flatnonzero(weights)
+        num_nonzeros = nonzero_indices.size
+        # Threshold
+        k = np.clip(int(pruning_ratio * num_nonzeros), 0, num_nonzeros)
+        k_nonzero_indices = np.argpartition(np.abs(weights[nonzero_indices]), kth=k-1)[:k]
+
+        pruned_indices = nonzero_indices[k_nonzero_indices]
+        weights[pruned_indices] = 0
+
+        return weights
+
+
 class ParallelOVRTrainer(threading.Thread):
     """A trainer for parallel 1vsrest training."""
 
@@ -103,6 +134,7 @@ class ParallelOVRTrainer(threading.Thread):
     weights: np.ndarray
     pbar: tqdm
     queue: queue.SimpleQueue
+    pruning_alpha: float
 
     def __init__(self):
         threading.Thread.__init__(self)
@@ -114,6 +146,7 @@ def init_trainer(
         x: sparse.csr_matrix,
         options: str,
         verbose: bool,
+        pruning_alpha: float,
     ):
         """Initialize the parallel trainer by setting y, x, parameter and threading related
         variables as class variables of ParallelOVRTrainer.
@@ -123,11 +156,13 @@ def init_trainer(
             x (sparse.csr_matrix): A matrix with dimensions number of instances * number of features.
             options (str): The option string passed to liblinear.
             verbose (bool): Output extra progress information.
+            pruning_alpha (float): Fraction of weights to keep after pruning.
         """
         x, options, bias = _prepare_options(x, options)
         cls.y = y.tocsc()
         cls.x = x
         cls.bias = bias
+        cls.pruning_alpha = pruning_alpha
         num_instances, num_classes = cls.y.shape
         num_features = cls.x.shape[1]
         cls.prob = problem(np.ones((num_instances,)), cls.x)
@@ -187,7 +222,7 @@ def run(self):
             except queue.Empty:
                 break
             yi = self.y[:, label_idx].toarray().reshape(-1)
-            self.weights[:, label_idx] = self._do_parallel_train(2 * yi - 1).ravel()
+            self.weights[:, label_idx] = _pruning_weights(self._do_parallel_train(2 * yi - 1).ravel(), self.pruning_alpha)
 
             self.pbar.update()
 
@@ -198,6 +233,7 @@ def train_1vsrest(
     multiclass: bool = False,
     options: str = "",
     verbose: bool = True,
+    pruning_alpha: float = 1.0,
 ) -> FlatModel:
     """Train a linear model parallel on labels for multi-label data using a one-vs-rest strategy.
 
@@ -207,12 +243,13 @@ def train_1vsrest(
         multiclass (bool, optional): A flag indicating if the dataset is multiclass.
         options (str, optional): The option string passed to liblinear. Defaults to ''.
         verbose (bool, optional): Output extra progress information. Defaults to True.
+        pruning_alpha (float, optional): Fraction of weights to keep after pruning. Defaults to 1.0 (no pruning).
 
     Returns:
         A model which can be used in predict_values.
     """
     # Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
-    ParallelOVRTrainer.init_trainer(y, x, options, verbose)
+    ParallelOVRTrainer.init_trainer(y, x, options, verbose, pruning_alpha)
     num_threads = psutil.cpu_count(logical=False)
     trainers = [ParallelOVRTrainer() for _ in range(num_threads)]
     for trainer in trainers:
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 87e7242e..485540df 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -211,6 +211,7 @@ def train_tree(
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
     root: Node = None,
+    pruning_alpha: float = 1,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
     The algorithm used is based on https://github.com/xmc-aalto/bonsai.
@@ -222,6 +223,8 @@ def train_tree(
         K (int, optional): Maximum degree of nodes in the tree. Defaults to 100.
         dmax (int, optional): Maximum depth of the tree. Defaults to 10.
         verbose (bool, optional): Output extra progress information. Defaults to True.
+        root (Node, optional): Pre-built tree root. Defaults to None.
+        pruning_alpha (float optional): Fraction of weights to keep after pruning. Defaults to 1.0 (no pruning).
 
     Returns:
         TreeModel: A model which can be used in predict_values.
@@ -242,6 +245,7 @@ def count(node):
         nonlocal num_nodes
         num_nodes += 1
         node.num_features_used = np.count_nonzero(features_used_perlabel[:, node.label_map].sum(axis=1))
+        node.pruning_alpha = pruning_alpha
 
     root.dfs(count)
 
@@ -344,14 +348,14 @@ def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node:
         node (Node): Node to be trained.
     """
     if node.isLeaf():
-        node.model = linear.train_1vsrest(y[:, node.label_map], x, False, options, False)
+        node.model = linear.train_1vsrest(y[:, node.label_map], x, False, options, False, node.pruning_alpha)
     else:
         # meta_y[i, j] is 1 if the ith instance is relevant to the jth child.
         # getnnz returns an ndarray of shape number of instances.
         # This must be reshaped into number of instances * 1 to be interpreted as a column.
         meta_y = [y[:, child.label_map].getnnz(axis=1)[:, np.newaxis] > 0 for child in node.children]
         meta_y = sparse.csr_matrix(np.hstack(meta_y))
-        node.model = linear.train_1vsrest(meta_y, x, False, options, False)
+        node.model = linear.train_1vsrest(meta_y, x, False, options, False, node.pruning_alpha)
 
     node.model.weights = sparse.csc_matrix(node.model.weights)
 
diff --git a/linear_trainer.py b/linear_trainer.py
index b9133857..d84991f3 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -66,6 +66,7 @@ def linear_train(datasets, config):
                 options=config.liblinear_options,
                 K=config.tree_degree,
                 dmax=config.tree_max_depth,
+                pruning_alpha=config.pruning_alpha,
             )
     else:
         model = LINEAR_TECHNIQUES[config.linear_technique](
diff --git a/main.py b/main.py
index 70907edf..47bbea92 100644
--- a/main.py
+++ b/main.py
@@ -217,6 +217,12 @@ def add_all_arguments(parser):
         action="store_true",
         help="Save all the predictions with decision value larger then 0. If used, the save_k_predictions must be set to 0",
     )
+    parser.add_argument(
+        "--pruning_alpha",
+        type=float,
+        default=1.0,
+        help="Fraction of weights to keep after pruning (1.0 means no pruning)."
+    )
 
     # tree options
     parser.add_argument("--tree_degree", type=int, default=100, help="Degree of the tree (default: %(default)s)")

From 487e20eb0362c75a3c2995c7a011581c3320d905 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 27 Jan 2026 17:06:39 +0000
Subject: [PATCH 17/23] update prob estimation

- the implementation is based on code from Guan-Ting's PR.
---
 grid.py                      |  2 +-
 libmultilabel/linear/tree.py | 36 ++++++++++++++++++++++++------------
 linear_trainer.py            |  1 +
 main.py                      |  8 +++++++-
 run_exp.py                   |  2 +-
 5 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/grid.py b/grid.py
index 5593644e..d6a1b2f9 100644
--- a/grid.py
+++ b/grid.py
@@ -55,7 +55,7 @@ class GridParameter:
         ]
     _predict_fields = [
         ("beam_width", int, field(default=10)),
-        ("A", int, field(default=1)),
+        ("prob_A", int, field(default=1)),
         ]
 
     param_types = {
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 485540df..b5c2ca20 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -59,22 +59,35 @@ def __init__(
         self.multiclass = False
         self._model_separated = False # Indicates whether the model has been separated for pruning tree.
 
+    def sigmoid_A(self, x: np.ndarray, prob_A: int):
+        """
+        Calculate log(sigmoid(prob_A * x)).
+
+        Args:
+            x (np.ndarray): A matrix with dimension number of instances * number of classes.
+            prob_A (int): The tunable parameter of probability estimation function, that is sigmoid(prob_A * preds).
+
+        Returns:
+            np.ndarray: A matrix with dimension number of instances * number of classes.
+        """
+        return log_expit(prob_A * x)
+
     def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
-        A: int = 3,
+        prob_A: int = 3,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
 
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int, optional): Number of candidates considered during beam search. Defaults to 10.
+            prob_A (int, optional): The tunable parameter of probability estimation function, that is sigmoid(prob_A * preds). Defaults to 3.
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * number of classes.
         """
-        sigmoid_A = lambda x: log_expit(A * x)
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
@@ -84,8 +97,8 @@ def predict_values(
             if not self._model_separated:
                 self._separate_model_for_pruning_tree()
                 self._model_separated = True
-            all_preds = self._prune_tree_and_predict_values(x, beam_width, sigmoid_A) # number of instances * (number of labels + total number of metalabels)
-        return np.vstack([self._beam_search(all_preds[i], beam_width, sigmoid_A) for i in range(all_preds.shape[0])])
+            all_preds = self._prune_tree_and_predict_values(x, beam_width, prob_A) # number of instances * (number of labels + total number of metalabels)
+        return np.vstack([self._beam_search(all_preds[i], beam_width, prob_A) for i in range(all_preds.shape[0])])
 
     def _separate_model_for_pruning_tree(self):
         """
@@ -116,7 +129,7 @@ def _separate_model_for_pruning_tree(self):
             )
             self.subtree_models.append(subtree_flatmodel)
         
-    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int, sigmoid_A: Callable) -> np.ndarray:
+    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int, prob_A: int) -> np.ndarray:
         """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
 
         Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
@@ -125,6 +138,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int): Number of top candidate branches considered for prediction.
+            prob_A (int, optional): The tunable parameter of probability estimation function, that is sigmoid(prob_A * preds).
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * (number of labels + total number of metalabels).
@@ -135,8 +149,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
 
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
-        # children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
-        children_scores = 0.0 + sigmoid_A(root_preds)
+        children_scores = 0.0 + self.sigmoid_A(root_preds, prob_A)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -163,12 +176,13 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
 
         return all_preds
 
-    def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: Callable) -> np.ndarray:
+    def _beam_search(self, instance_preds: np.ndarray, beam_width: int, prob_A: int) -> np.ndarray:
         """Predict with beam search using cached probability estimates for a single instance.
 
         Args:
             instance_preds (np.ndarray): A vector of cached probability estimates of each node, has dimension number of labels + total number of metalabels.
             beam_width (int): Number of candidates considered.
+            prob_A (int, optional): The tunable parameter of probability estimation function, that is sigmoid(prob_A * preds).
 
         Returns:
             np.ndarray: A vector with dimension number of classes.
@@ -186,8 +200,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: C
                     continue
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
-                # children_score = score - np.square(np.maximum(0, 1 - pred))
-                children_score = score + sigmoid_A(pred)
+                children_score = score + self.sigmoid_A(pred, prob_A)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -198,8 +211,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, sigmoid_A: C
         for node, score in cur_level:
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
-            # scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
-            scores[node.label_map] = np.exp(score + sigmoid_A(pred))
+            scores[node.label_map] = np.exp(score + self.sigmoid_A(pred, prob_A))
         return scores
 
 
diff --git a/linear_trainer.py b/linear_trainer.py
index d84991f3..1ff18584 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -24,6 +24,7 @@ def linear_test(config, model, datasets, label_mapping):
     predict_kwargs = {}
     if isinstance(model, (TreeModel, EnsembleTreeModel)):
         predict_kwargs["beam_width"] = config.beam_width
+        predict_kwargs["prob_A"] = config.prob_A
 
     for i in tqdm(range(ceil(num_instance / config.eval_batch_size))):
         slice = np.s_[i * config.eval_batch_size : (i + 1) * config.eval_batch_size]
diff --git a/main.py b/main.py
index 47bbea92..490c633b 100644
--- a/main.py
+++ b/main.py
@@ -221,7 +221,7 @@ def add_all_arguments(parser):
         "--pruning_alpha",
         type=float,
         default=1.0,
-        help="Fraction of weights to keep after pruning (1.0 means no pruning)."
+        help="Fraction of weights to keep after pruning (1.0 means no pruning, default: %(default)s)"
     )
 
     # tree options
@@ -238,6 +238,12 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
+    parser.add_argument(
+        "--prob_A",
+        type=int,
+        default=3,
+        help="The tunable parameter of probability estimation function, that is sigmoid(prob_A * preds) (default: %(default)s)",
+    )
     # AttentionXML
     parser.add_argument(
         "--cluster_size",
diff --git a/run_exp.py b/run_exp.py
index b38fccf9..b277d3c5 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -30,7 +30,7 @@ def prune_model(*args, **kwargs):
         'max_features': [10000, 20000],
         'K': [10, 100],
         'min_df': [1, 2],
-        'A': [2, 3],
+        'prob_A': [2, 3],
         'c': [0.1, 0.2],
     }
 

From d525c8568993bf21772d058ed83158012120faa0 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 27 Jan 2026 19:21:10 +0000
Subject: [PATCH 18/23] update grid search for pruning_alpha

---
 grid.py                        | 65 +++++++++++++++++++++++++++-------
 libmultilabel/linear/linear.py |  5 ++-
 libmultilabel/linear/tree.py   | 11 +++---
 run_exp.py                     |  1 +
 4 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/grid.py b/grid.py
index d6a1b2f9..6862b494 100644
--- a/grid.py
+++ b/grid.py
@@ -7,10 +7,12 @@
 import logging
 
 import libmultilabel.linear as linear
-from libmultilabel.linear.tree import _build_tree
+from libmultilabel.linear.tree import TreeModel, _build_tree
+from libmultilabel.linear.linear import _pruning_weights
 
 import sklearn.preprocessing
 import numpy as np
+import scipy.sparse as sparse
 import math
 
 
@@ -51,11 +53,11 @@ class GridParameter:
         ("s", int, field(default=1)),
         ("c", float, field(default=1)),
         ("B", int, field(default=-1)),
-        # ("alpha", float, field(default=1)),
+        ("pruning_alpha", float, field(default=1)),
         ]
     _predict_fields = [
         ("beam_width", int, field(default=10)),
-        ("prob_A", int, field(default=1)),
+        ("prob_A", int, field(default=3)),
         ]
 
     param_types = {
@@ -84,8 +86,9 @@ def __init__(self, params: dict | None = None, fold: int = -1):
     @property
     def linear_options(self):
         options = ""
-        for f in fields(self.linear):
-            options += f" -{f.name} {getattr(self.linear, f.name)}"
+        linear_field_names = (self._param_field_names['linear'] - {'pruning_alpha'})
+        for field_name in linear_field_names:
+            options += f" -{field_name} {getattr(self.linear, field_name)}"
         return options.strip()
 
     def __repr__(self):
@@ -104,6 +107,27 @@ def __hash__(self):
         return hash(tuple(getattr(self, t) for t in self.param_types))
 
 
+def pruning_flat_model(flat_model: linear.FlatModel, pruning_alpha: float) -> np.ndarray:
+    """Prune the weights of the flat model.
+
+    Args:
+        flat_model (linear.FlatModel): The flat model.
+        pruning_alpha (float): Fraction of weights to keep after pruning.
+
+    Returns:
+        np.ndarray: The flat model with the pruned weights.
+    """
+    num_classes = flat_model.weights.shape[1]
+    weights = []
+
+    for i in range(num_classes):
+        weight = flat_model.weights[:, i].toarray().ravel()
+        weights.append(sparse.csc_matrix(_pruning_weights(weight, pruning_alpha)))
+
+    flat_model.weights = sparse.hstack(weights, "csc")
+    return flat_model
+
+
 class GridSearch:
     def __init__(
         self,
@@ -202,12 +226,29 @@ def get_model(self, y, x, params):
         root = self.get_tree_root(y, x, params)
 
         linear_params = params.linear
-        self.no_cache |= (linear_params != self._cached_params.linear)
-        if self.no_cache:
-            logging.info(f"Model  - Training: {linear_params}")
-            with __silent__():
-                self._cached_params.linear = linear_params
-                self._cached_model = linear.train_tree(y, x, root=root, options=params.linear_options)
+        pruning_alpha = linear_params.pruning_alpha
+
+        if self.no_cache or (linear_params != self._cached_params.linear):
+            if not self.no_cache and params.linear_options == self._cached_params.linear_options:
+                # The y, x, and linear_options are the same, which means the pruning_alpha is different.
+                # We prune the weights in-place, and the pruning_alpha is sorted in decreasing order.
+                # Therefore, we must divide by the previous pruning_alpha.
+                previous_alpha = self._cached_params.linear.pruning_alpha
+                pruning_alpha /= previous_alpha
+                logging.info(f"Model  - Pruning: {linear_params}, alpha: {pruning_alpha}")
+                self._cached_model.flat_model = pruning_flat_model(self._cached_model.flat_model, pruning_alpha)
+            else:
+                logging.info(f"Model  - Training: {linear_params}")
+                with __silent__():
+                    self._cached_model = linear.train_tree(
+                        y,
+                        x,
+                        root=root,
+                        options=params.linear_options,
+                        pruning_alpha=pruning_alpha
+                        )
+
+            self._cached_params.linear = linear_params
         else:
             logging.info(f"Model  - Using cached data: {linear_params}")
 
@@ -237,7 +278,7 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
         self.search_space = sorted([
             GridParameter(dict(zip(param_names, param_values)))
             for param_values in itertools.product(*search_space_dict.values())
-        ])
+        ], reverse=True)
 
         permutation = np.random.permutation(self.num_instances)
         index_per_fold = [
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 2e7d6611..60431e5b 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -186,14 +186,14 @@ def del_trainer(cls):
         for key in list(cls.__annotations__):
             delattr(cls, key)
 
-    def _do_parallel_train(self, y: np.ndarray) -> np.matrix:
+    def _do_parallel_train(self, y: np.ndarray) -> np.ndarray:
         """Wrap around liblinear.liblinearutil.train.
 
         Args:
             y (np.ndarray): A +1/-1 array with dimensions number of instances * 1.
 
         Returns:
-            np.matrix: The weights.
+            np.ndarray: The weights.
         """
         if y.shape[0] == 0:
             return np.matrix(np.zeros((self.prob.n, 1)))
@@ -203,7 +203,6 @@ def _do_parallel_train(self, y: np.ndarray) -> np.matrix:
         model = train(prob, self.param)
 
         w = np.ctypeslib.as_array(model.w, (self.prob.n, 1))
-        w = np.asmatrix(w)
         # When all labels are -1, we must flip the sign of the weights
         # because LIBLINEAR treats the first label as positive, which
         # is -1 in this case. But for our usage we need them to be negative.
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index b5c2ca20..a66826b8 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -308,13 +308,10 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
         else:
             kmeans_algo = LloydKmeans
 
-        if False:
-            metalabels = np.random.randint(0, K, label_representation.shape[0])
-        else:
-            kmeans = kmeans_algo(
-                n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
-            )
-            metalabels = kmeans.fit(label_representation)
+        kmeans = kmeans_algo(
+            n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
+        )
+        metalabels = kmeans.fit(label_representation)
 
         unique_labels = np.unique(metalabels)
         if len(unique_labels) == K:
diff --git a/run_exp.py b/run_exp.py
index b277d3c5..602dd78e 100644
--- a/run_exp.py
+++ b/run_exp.py
@@ -32,6 +32,7 @@ def prune_model(*args, **kwargs):
         'min_df': [1, 2],
         'prob_A': [2, 3],
         'c': [0.1, 0.2],
+        'pruning_alpha': [0.9, 0.7],
     }
 
     search = grid.GridSearch(dataset, n_folds, monitor_metrics)

From 6cd10e4b44e685fd9fa1a8f60c293e0bd332e93e Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Wed, 28 Jan 2026 05:26:50 +0000
Subject: [PATCH 19/23] fix the column dimension of the weights when pruning

---
 grid.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grid.py b/grid.py
index 6862b494..5f07571c 100644
--- a/grid.py
+++ b/grid.py
@@ -122,7 +122,7 @@ def pruning_flat_model(flat_model: linear.FlatModel, pruning_alpha: float) -> np
 
     for i in range(num_classes):
         weight = flat_model.weights[:, i].toarray().ravel()
-        weights.append(sparse.csc_matrix(_pruning_weights(weight, pruning_alpha)))
+        weights.append(sparse.csc_matrix(_pruning_weights(weight, pruning_alpha)[:, None]))
 
     flat_model.weights = sparse.hstack(weights, "csc")
     return flat_model

From 9f496a7d7240ee9fdc2479a0e8f6f8fa2b238977 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 10 Feb 2026 06:12:10 +0000
Subject: [PATCH 20/23] remove weights pruning code

---
 grid.py                        | 58 +++++++---------------------------
 libmultilabel/linear/linear.py | 41 ++----------------------
 libmultilabel/linear/tree.py   |  7 ++--
 linear_trainer.py              |  1 -
 main.py                        |  6 ----
 5 files changed, 15 insertions(+), 98 deletions(-)

diff --git a/grid.py b/grid.py
index 5f07571c..d5736988 100644
--- a/grid.py
+++ b/grid.py
@@ -7,8 +7,7 @@
 import logging
 
 import libmultilabel.linear as linear
-from libmultilabel.linear.tree import TreeModel, _build_tree
-from libmultilabel.linear.linear import _pruning_weights
+from libmultilabel.linear.tree import _build_tree
 
 import sklearn.preprocessing
 import numpy as np
@@ -53,7 +52,6 @@ class GridParameter:
         ("s", int, field(default=1)),
         ("c", float, field(default=1)),
         ("B", int, field(default=-1)),
-        ("pruning_alpha", float, field(default=1)),
         ]
     _predict_fields = [
         ("beam_width", int, field(default=10)),
@@ -86,8 +84,7 @@ def __init__(self, params: dict | None = None, fold: int = -1):
     @property
     def linear_options(self):
         options = ""
-        linear_field_names = (self._param_field_names['linear'] - {'pruning_alpha'})
-        for field_name in linear_field_names:
+        for field_name in self._param_field_names['linear']:
             options += f" -{field_name} {getattr(self.linear, field_name)}"
         return options.strip()
 
@@ -107,27 +104,6 @@ def __hash__(self):
         return hash(tuple(getattr(self, t) for t in self.param_types))
 
 
-def pruning_flat_model(flat_model: linear.FlatModel, pruning_alpha: float) -> np.ndarray:
-    """Prune the weights of the flat model.
-
-    Args:
-        flat_model (linear.FlatModel): The flat model.
-        pruning_alpha (float): Fraction of weights to keep after pruning.
-
-    Returns:
-        np.ndarray: The flat model with the pruned weights.
-    """
-    num_classes = flat_model.weights.shape[1]
-    weights = []
-
-    for i in range(num_classes):
-        weight = flat_model.weights[:, i].toarray().ravel()
-        weights.append(sparse.csc_matrix(_pruning_weights(weight, pruning_alpha)[:, None]))
-
-    flat_model.weights = sparse.hstack(weights, "csc")
-    return flat_model
-
-
 class GridSearch:
     def __init__(
         self,
@@ -226,29 +202,17 @@ def get_model(self, y, x, params):
         root = self.get_tree_root(y, x, params)
 
         linear_params = params.linear
-        pruning_alpha = linear_params.pruning_alpha
 
         if self.no_cache or (linear_params != self._cached_params.linear):
-            if not self.no_cache and params.linear_options == self._cached_params.linear_options:
-                # The y, x, and linear_options are the same, which means the pruning_alpha is different.
-                # We prune the weights in-place, and the pruning_alpha is sorted in decreasing order.
-                # Therefore, we must divide by the previous pruning_alpha.
-                previous_alpha = self._cached_params.linear.pruning_alpha
-                pruning_alpha /= previous_alpha
-                logging.info(f"Model  - Pruning: {linear_params}, alpha: {pruning_alpha}")
-                self._cached_model.flat_model = pruning_flat_model(self._cached_model.flat_model, pruning_alpha)
-            else:
-                logging.info(f"Model  - Training: {linear_params}")
-                with __silent__():
-                    self._cached_model = linear.train_tree(
-                        y,
-                        x,
-                        root=root,
-                        options=params.linear_options,
-                        pruning_alpha=pruning_alpha
-                        )
-
-            self._cached_params.linear = linear_params
+            logging.info(f"Model  - Training: {linear_params}")
+            with __silent__():
+                self._cached_params.linear = linear_params
+                self._cached_model = linear.train_tree(
+                    y,
+                    x,
+                    root=root,
+                    options=params.linear_options,
+                    )
         else:
             logging.info(f"Model  - Using cached data: {linear_params}")
 
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 60431e5b..2ecade65 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -92,37 +92,6 @@ def _to_dense_array(self, matrix: np.matrix | sparse.csr_matrix) -> np.ndarray:
             return np.asarray(matrix)
 
 
-def _pruning_weights(weights: np.ndarray, pruning_alpha: float) -> np.ndarray:
-    """Prune the weights of the linear model.
-
-    Args:
-        weights (np.ndarray): Linear model weights.
-        pruning_alpha (float): Fraction of weights to keep after pruning.
-
-    Returns:
-        np.ndarray: The pruned weights.
-    """
-    pruning_ratio = 1-pruning_alpha
-
-    if 0 >= pruning_ratio:
-        return weights
-    elif pruning_ratio >= 1:
-        return np.zeros_like(weights)
-    else:
-        # Perform pruning algorithm
-        # Reduce the number of nonzero features per column by a factor of pruning_ratio.
-        nonzero_indices = np.flatnonzero(weights)
-        num_nonzeros = nonzero_indices.size
-        # Threshold
-        k = np.clip(int(pruning_ratio * num_nonzeros), 0, num_nonzeros)
-        k_nonzero_indices = np.argpartition(np.abs(weights[nonzero_indices]), kth=k-1)[:k]
-
-        pruned_indices = nonzero_indices[k_nonzero_indices]
-        weights[pruned_indices] = 0
-
-        return weights
-
-
 class ParallelOVRTrainer(threading.Thread):
     """A trainer for parallel 1vsrest training."""
 
@@ -134,7 +103,6 @@ class ParallelOVRTrainer(threading.Thread):
     weights: np.ndarray
     pbar: tqdm
     queue: queue.SimpleQueue
-    pruning_alpha: float
 
     def __init__(self):
         threading.Thread.__init__(self)
@@ -146,7 +114,6 @@ def init_trainer(
         x: sparse.csr_matrix,
         options: str,
         verbose: bool,
-        pruning_alpha: float,
     ):
         """Initialize the parallel trainer by setting y, x, parameter and threading related
         variables as class variables of ParallelOVRTrainer.
@@ -156,13 +123,11 @@ def init_trainer(
             x (sparse.csr_matrix): A matrix with dimensions number of instances * number of features.
             options (str): The option string passed to liblinear.
             verbose (bool): Output extra progress information.
-            pruning_alpha (float): Fraction of weights to keep after pruning.
         """
         x, options, bias = _prepare_options(x, options)
         cls.y = y.tocsc()
         cls.x = x
         cls.bias = bias
-        cls.pruning_alpha = pruning_alpha
         num_instances, num_classes = cls.y.shape
         num_features = cls.x.shape[1]
         cls.prob = problem(np.ones((num_instances,)), cls.x)
@@ -221,7 +186,7 @@ def run(self):
             except queue.Empty:
                 break
             yi = self.y[:, label_idx].toarray().reshape(-1)
-            self.weights[:, label_idx] = _pruning_weights(self._do_parallel_train(2 * yi - 1).ravel(), self.pruning_alpha)
+            self.weights[:, label_idx] = self._do_parallel_train(2 * yi - 1).ravel()
 
             self.pbar.update()
 
@@ -232,7 +197,6 @@ def train_1vsrest(
     multiclass: bool = False,
     options: str = "",
     verbose: bool = True,
-    pruning_alpha: float = 1.0,
 ) -> FlatModel:
     """Train a linear model parallel on labels for multi-label data using a one-vs-rest strategy.
 
@@ -242,13 +206,12 @@ def train_1vsrest(
         multiclass (bool, optional): A flag indicating if the dataset is multiclass.
         options (str, optional): The option string passed to liblinear. Defaults to ''.
         verbose (bool, optional): Output extra progress information. Defaults to True.
-        pruning_alpha (float, optional): Fraction of weights to keep after pruning. Defaults to 1.0 (no pruning).
 
     Returns:
         A model which can be used in predict_values.
     """
     # Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
-    ParallelOVRTrainer.init_trainer(y, x, options, verbose, pruning_alpha)
+    ParallelOVRTrainer.init_trainer(y, x, options, verbose)
     num_threads = psutil.cpu_count(logical=False)
     trainers = [ParallelOVRTrainer() for _ in range(num_threads)]
     for trainer in trainers:
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index a66826b8..9dba351e 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -223,7 +223,6 @@ def train_tree(
     dmax=DEFAULT_DMAX,
     verbose: bool = True,
     root: Node = None,
-    pruning_alpha: float = 1,
 ) -> TreeModel:
     """Train a linear model for multi-label data using a divide-and-conquer strategy.
     The algorithm used is based on https://github.com/xmc-aalto/bonsai.
@@ -236,7 +235,6 @@ def train_tree(
         dmax (int, optional): Maximum depth of the tree. Defaults to 10.
         verbose (bool, optional): Output extra progress information. Defaults to True.
         root (Node, optional): Pre-built tree root. Defaults to None.
-        pruning_alpha (float optional): Fraction of weights to keep after pruning. Defaults to 1.0 (no pruning).
 
     Returns:
         TreeModel: A model which can be used in predict_values.
@@ -257,7 +255,6 @@ def count(node):
         nonlocal num_nodes
         num_nodes += 1
         node.num_features_used = np.count_nonzero(features_used_perlabel[:, node.label_map].sum(axis=1))
-        node.pruning_alpha = pruning_alpha
 
     root.dfs(count)
 
@@ -357,14 +354,14 @@ def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node:
         node (Node): Node to be trained.
     """
     if node.isLeaf():
-        node.model = linear.train_1vsrest(y[:, node.label_map], x, False, options, False, node.pruning_alpha)
+        node.model = linear.train_1vsrest(y[:, node.label_map], x, False, options, False)
     else:
         # meta_y[i, j] is 1 if the ith instance is relevant to the jth child.
         # getnnz returns an ndarray of shape number of instances.
         # This must be reshaped into number of instances * 1 to be interpreted as a column.
         meta_y = [y[:, child.label_map].getnnz(axis=1)[:, np.newaxis] > 0 for child in node.children]
         meta_y = sparse.csr_matrix(np.hstack(meta_y))
-        node.model = linear.train_1vsrest(meta_y, x, False, options, False, node.pruning_alpha)
+        node.model = linear.train_1vsrest(meta_y, x, False, options, False)
 
     node.model.weights = sparse.csc_matrix(node.model.weights)
 
diff --git a/linear_trainer.py b/linear_trainer.py
index 1ff18584..f5f374fa 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -67,7 +67,6 @@ def linear_train(datasets, config):
                 options=config.liblinear_options,
                 K=config.tree_degree,
                 dmax=config.tree_max_depth,
-                pruning_alpha=config.pruning_alpha,
             )
     else:
         model = LINEAR_TECHNIQUES[config.linear_technique](
diff --git a/main.py b/main.py
index 490c633b..b330b8b7 100644
--- a/main.py
+++ b/main.py
@@ -217,12 +217,6 @@ def add_all_arguments(parser):
         action="store_true",
         help="Save all the predictions with decision value larger then 0. If used, the save_k_predictions must be set to 0",
     )
-    parser.add_argument(
-        "--pruning_alpha",
-        type=float,
-        default=1.0,
-        help="Fraction of weights to keep after pruning (1.0 means no pruning, default: %(default)s)"
-    )
 
     # tree options
     parser.add_argument("--tree_degree", type=int, default=100, help="Degree of the tree (default: %(default)s)")

From 464aa593eb5e9d8252a7dd0e35a7f9ac92969488 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 10 Feb 2026 06:37:29 +0000
Subject: [PATCH 21/23] move example code into main() in grid.py

---
 grid.py    | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 run_exp.py | 55 --------------------------------------------
 2 files changed, 66 insertions(+), 56 deletions(-)
 delete mode 100644 run_exp.py

diff --git a/grid.py b/grid.py
index d5736988..3ffbb23a 100644
--- a/grid.py
+++ b/grid.py
@@ -4,14 +4,15 @@
 import os
 import sys
 import itertools
+import argparse
 import logging
 
 import libmultilabel.linear as linear
 from libmultilabel.linear.tree import _build_tree
+from libmultilabel.common_utils import timer
 
 import sklearn.preprocessing
 import numpy as np
-import scipy.sparse as sparse
 import math
 
 
@@ -273,3 +274,67 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
                     )
 
         return {params: metrics.compute() for params, metrics in self.param_metrics.items()}
+
+
+@timer
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--seed",
+        type=int,
+        help="Random seed."
+    )
+    parser.add_argument(
+        "--training_file",
+        help="Path to training data."
+    )
+    parser.add_argument(
+        "--test_file",
+        help="Path to test data."
+    )
+    parser.add_argument(
+        "--data_format",
+        type=str,
+        default="txt",
+        help="'svm' for SVM format or 'txt' for LibMultiLabel format."
+    )
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO)
+    if args.seed is not None:
+        np.random.seed(args.seed)
+
+    dataset = linear.load_dataset(
+        args.data_format,
+        args.training_file,
+        args.test_file,
+    )
+
+    retrain = True
+    n_folds = 3
+    monitor_metrics = ["P@1", "P@3", "P@5"]
+    search_space_dict = {
+        'max_features': [10000]
+    }
+
+    search = GridSearch(dataset, n_folds, monitor_metrics)
+    cv_scores = search(search_space_dict)
+    sorted_cv_scores = sorted(cv_scores.items(), key=lambda x: x[1][monitor_metrics[0]], reverse=True)
+    print(sorted_cv_scores)
+
+    if retrain:
+        best_params, best_cv_scores = list(sorted_cv_scores)[0]
+        print(best_params, best_cv_scores)
+
+        preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
+        transformed_dataset = preprocessor.fit_transform(dataset)
+        model = linear.train_tree(
+                    transformed_dataset["train"]["y"],
+                    transformed_dataset["train"]["x"],
+                    best_params.linear_options,
+                    **asdict(best_params.tree),
+                )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/run_exp.py b/run_exp.py
deleted file mode 100644
index 602dd78e..00000000
--- a/run_exp.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import libmultilabel.linear as linear
-import grid
-
-import numpy as np
-from dataclasses import asdict
-
-
-def prune_model(*args, **kwargs):
-    pass
-
-
-if __name__ == "__main__":
-    import argparse
-    import logging
-
-    logging.basicConfig(level=logging.INFO)
-    np.random.seed(20260123)
-
-    parser = argparse.ArgumentParser(description="Parse command-line arguments.")
-    parser.add_argument("--dataset", type=str, default="EUR-Lex", help="Dataset name (e.g., AmazonCat-13K, EUR-Lex)")
-    parser.add_argument("--data_format", type=str, default="txt", help="Data format.")
-    args = parser.parse_args()
-
-    dataset = linear.load_dataset(args.data_format, f"data/{args.dataset}/train.{args.data_format}")  # , f"data/{dataset}/test.{args.data_format}"
-
-    retrain = True
-    n_folds = 3
-    monitor_metrics = ["P@1", "P@3", "P@5"]
-    search_space_dict = {
-        'max_features': [10000, 20000],
-        'K': [10, 100],
-        'min_df': [1, 2],
-        'prob_A': [2, 3],
-        'c': [0.1, 0.2],
-        'pruning_alpha': [0.9, 0.7],
-    }
-
-    search = grid.GridSearch(dataset, n_folds, monitor_metrics)
-    cv_scores = search(search_space_dict)
-    sorted_cv_scores = sorted(cv_scores.items(), key=lambda x: x[1][monitor_metrics[0]], reverse=True)
-    print(sorted_cv_scores)
-
-    if retrain:
-        # TODO: test set
-        best_params, best_cv_scores = list(sorted_cv_scores)[0]
-        print(best_params, best_cv_scores)
-
-        preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
-        transformed_dataset = preprocessor.fit_transform(dataset)
-        model = linear.train_tree(
-                    transformed_dataset["train"]["y"],
-                    transformed_dataset["train"]["x"],
-                    best_params.linear_options,
-                    **asdict(best_params.tree),
-                )

From d7ffd2971f94c2d86a295dbbf45d85a9a311f818 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 10 Feb 2026 06:44:53 +0000
Subject: [PATCH 22/23] apply black formatter

---
 grid.py                        | 100 +++++++++++++--------------------
 libmultilabel/linear/linear.py |   5 +-
 libmultilabel/linear/tree.py   |   2 +-
 3 files changed, 44 insertions(+), 63 deletions(-)

diff --git a/grid.py b/grid.py
index 3ffbb23a..e219bca3 100644
--- a/grid.py
+++ b/grid.py
@@ -44,20 +44,20 @@ class GridParameter:
         ("stop_words", str | list, field(default=None)),
         ("strip_accents", str | Callable, field(default=None)),
         ("tokenizer", Callable, field(default=None)),
-        ]
+    ]
     _tree_fields = [
         ("dmax", int, field(default=10)),
         ("K", int, field(default=8)),
-        ]
+    ]
     _linear_fields = [
         ("s", int, field(default=1)),
         ("c", float, field(default=1)),
         ("B", int, field(default=-1)),
-        ]
+    ]
     _predict_fields = [
         ("beam_width", int, field(default=10)),
         ("prob_A", int, field(default=3)),
-        ]
+    ]
 
     param_types = {
         "tfidf": make_dataclass("TfidfParams", _tfidf_fields, frozen=True, order=True),
@@ -66,8 +66,7 @@ class GridParameter:
         "predict": make_dataclass("PredictParams", _predict_fields, frozen=True, order=True),
     }
     _param_field_names = {
-        param_type: {f.name for f in fields(class_name)}
-        for param_type, class_name in param_types.items()
+        param_type: {f.name for f in fields(class_name)} for param_type, class_name in param_types.items()
     }
 
     def __init__(self, params: dict | None = None, fold: int = -1):
@@ -85,7 +84,7 @@ def __init__(self, params: dict | None = None, fold: int = -1):
     @property
     def linear_options(self):
         options = ""
-        for field_name in self._param_field_names['linear']:
+        for field_name in self._param_field_names["linear"]:
             options += f" -{field_name} {getattr(self.linear, field_name)}"
         return options.strip()
 
@@ -139,12 +138,12 @@ def take(data, idx):
             "data_format": self.datasets["data_format"],
             "train": {
                 "y": take(self.datasets["train"]["y"], train_idx),
-                "x": take(self.datasets["train"]["x"], train_idx)
+                "x": take(self.datasets["train"]["x"], train_idx),
             },
             "test": {
                 "y": take(self.datasets["train"]["y"], valid_idx),
-                "x": take(self.datasets["train"]["x"], valid_idx)
-            }
+                "x": take(self.datasets["train"]["x"], valid_idx),
+            },
         }
 
     def get_transformed_dataset(self, dataset, params):
@@ -158,11 +157,11 @@ def get_transformed_dataset(self, dataset, params):
             dict[str, np.matrix]: The keys should be "y" and "x".
         """
         tfidf_params = params.tfidf
-        self.no_cache = (tfidf_params != self._cached_params.tfidf)
+        self.no_cache = tfidf_params != self._cached_params.tfidf
         if self.no_cache:
             logging.info(f"TFIDF  - Preprocessing: {tfidf_params}")
             if self.datasets["data_format"] not in {"txt", "dataframe"}:
-                logging.info('The TF-IDF parameters are only meaningful for the “txt” and “dataframe” data formats.')
+                logging.info("The TF-IDF parameters are only meaningful for the “txt” and “dataframe” data formats.")
             with __silent__():
                 preprocessor = linear.Preprocessor(tfidf_params=asdict(tfidf_params))
                 self._cached_params.tfidf = tfidf_params
@@ -174,14 +173,16 @@ def get_transformed_dataset(self, dataset, params):
 
     def get_tree_root(self, y, x, params):
         tree_params = params.tree
-        self.no_cache |= (tree_params != self._cached_params.tree)
+        self.no_cache |= tree_params != self._cached_params.tree
         if self.no_cache:
             logging.info(f"Tree   - Preprocessing: {tree_params}")
             with __silent__():
                 label_representation = (y.T * x).tocsr()
                 label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
                 self._cached_params.tree = tree_params
-                self._cached_tree_root = _build_tree(label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params))
+                self._cached_tree_root = _build_tree(
+                    label_representation, np.arange(y.shape[1]), 0, **asdict(tree_params)
+                )
                 self._cached_tree_root.is_root = True
         else:
             logging.info(f"Tree   - Using cached data: {tree_params}")
@@ -213,7 +214,7 @@ def get_model(self, y, x, params):
                     x,
                     root=root,
                     options=params.linear_options,
-                    )
+                )
         else:
             logging.info(f"Model  - Using cached data: {linear_params}")
 
@@ -230,9 +231,7 @@ def compute_scores(self, y, x, model, params):
             self.param_metrics[params] = linear.get_metrics(self.monitor_metrics, num_classes=y.shape[1])
 
         for i in range(num_batches):
-            preds = model.predict_values(
-                x[i * batch_size : (i + 1) * batch_size],
-                **asdict(params.predict))
+            preds = model.predict_values(x[i * batch_size : (i + 1) * batch_size], **asdict(params.predict))
             target = y[i * batch_size : (i + 1) * batch_size].toarray()
             self.param_metrics[params].update(preds, target)
 
@@ -240,19 +239,24 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
         self.param_metrics.clear()
 
         param_names = search_space_dict.keys()
-        self.search_space = sorted([
-            GridParameter(dict(zip(param_names, param_values)))
-            for param_values in itertools.product(*search_space_dict.values())
-        ], reverse=True)
+        self.search_space = sorted(
+            [
+                GridParameter(dict(zip(param_names, param_values)))
+                for param_values in itertools.product(*search_space_dict.values())
+            ],
+            reverse=True,
+        )
 
         permutation = np.random.permutation(self.num_instances)
         index_per_fold = [
-            permutation[int(fold * self.num_instances / self.n_folds):int((fold+1) * self.num_instances / self.n_folds)]
+            permutation[
+                int(fold * self.num_instances / self.n_folds) : int((fold + 1) * self.num_instances / self.n_folds)
+            ]
             for fold in range(self.n_folds)
         ]
 
         for fold in range(self.n_folds):
-            train_idx = np.concatenate(index_per_fold[:fold] + index_per_fold[fold+1:])
+            train_idx = np.concatenate(index_per_fold[:fold] + index_per_fold[fold + 1 :])
             valid_idx = index_per_fold[fold]
             fold_dataset = self.get_fold_dataset(train_idx, valid_idx)
 
@@ -261,17 +265,8 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
                 logging.info(f"Status - Running fold {fold}, params: {params}")
 
                 transformed_dataset = self.get_transformed_dataset(fold_dataset, params)
-                model = self.get_model(
-                    transformed_dataset["train"]["y"],
-                    transformed_dataset["train"]["x"],
-                    params
-                    )
-                self.compute_scores(
-                    transformed_dataset["test"]["y"],
-                    transformed_dataset["test"]["x"],
-                    model,
-                    params
-                    )
+                model = self.get_model(transformed_dataset["train"]["y"], transformed_dataset["train"]["x"], params)
+                self.compute_scores(transformed_dataset["test"]["y"], transformed_dataset["test"]["x"], model, params)
 
         return {params: metrics.compute() for params, metrics in self.param_metrics.items()}
 
@@ -279,24 +274,11 @@ def __call__(self, search_space_dict: dict[str, list]) -> dict[GridParameter, di
 @timer
 def main():
     parser = argparse.ArgumentParser()
+    parser.add_argument("--seed", type=int, help="Random seed.")
+    parser.add_argument("--training_file", help="Path to training data.")
+    parser.add_argument("--test_file", help="Path to test data.")
     parser.add_argument(
-        "--seed",
-        type=int,
-        help="Random seed."
-    )
-    parser.add_argument(
-        "--training_file",
-        help="Path to training data."
-    )
-    parser.add_argument(
-        "--test_file",
-        help="Path to test data."
-    )
-    parser.add_argument(
-        "--data_format",
-        type=str,
-        default="txt",
-        help="'svm' for SVM format or 'txt' for LibMultiLabel format."
+        "--data_format", type=str, default="txt", help="'svm' for SVM format or 'txt' for LibMultiLabel format."
     )
     args = parser.parse_args()
 
@@ -313,9 +295,7 @@ def main():
     retrain = True
     n_folds = 3
     monitor_metrics = ["P@1", "P@3", "P@5"]
-    search_space_dict = {
-        'max_features': [10000]
-    }
+    search_space_dict = {"max_features": [10000]}
 
     search = GridSearch(dataset, n_folds, monitor_metrics)
     cv_scores = search(search_space_dict)
@@ -329,11 +309,11 @@ def main():
         preprocessor = linear.Preprocessor(tfidf_params=asdict(best_params.tfidf))
         transformed_dataset = preprocessor.fit_transform(dataset)
         model = linear.train_tree(
-                    transformed_dataset["train"]["y"],
-                    transformed_dataset["train"]["x"],
-                    best_params.linear_options,
-                    **asdict(best_params.tree),
-                )
+            transformed_dataset["train"]["y"],
+            transformed_dataset["train"]["x"],
+            best_params.linear_options,
+            **asdict(best_params.tree),
+        )
 
 
 if __name__ == "__main__":
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
index 2ecade65..04d25a21 100644
--- a/libmultilabel/linear/linear.py
+++ b/libmultilabel/linear/linear.py
@@ -151,14 +151,14 @@ def del_trainer(cls):
         for key in list(cls.__annotations__):
             delattr(cls, key)
 
-    def _do_parallel_train(self, y: np.ndarray) -> np.ndarray:
+    def _do_parallel_train(self, y: np.ndarray) -> np.matrix:
         """Wrap around liblinear.liblinearutil.train.
 
         Args:
             y (np.ndarray): A +1/-1 array with dimensions number of instances * 1.
 
         Returns:
-            np.ndarray: The weights.
+            np.matrix: The weights.
         """
         if y.shape[0] == 0:
             return np.matrix(np.zeros((self.prob.n, 1)))
@@ -168,6 +168,7 @@ def _do_parallel_train(self, y: np.ndarray) -> np.ndarray:
         model = train(prob, self.param)
 
         w = np.ctypeslib.as_array(model.w, (self.prob.n, 1))
+        w = np.asmatrix(w)
         # When all labels are -1, we must flip the sign of the weights
         # because LIBLINEAR treats the first label as positive, which
         # is -1 in this case. But for our usage we need them to be negative.
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 9dba351e..eb4934eb 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -306,7 +306,7 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
             kmeans_algo = LloydKmeans
 
         kmeans = kmeans_algo(
-            n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=False
+            n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=True
         )
         metalabels = kmeans.fit(label_representation)
 

From 70fdfcb6d52cb3f0abe1c0b37b549f464e527a81 Mon Sep 17 00:00:00 2001
From: chcwww <chcwww1@gmail.com>
Date: Tue, 10 Feb 2026 09:02:09 +0000
Subject: [PATCH 23/23] update search_space_dict in the grid search example

---
 grid.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/grid.py b/grid.py
index e219bca3..2a83b3c6 100644
--- a/grid.py
+++ b/grid.py
@@ -295,7 +295,13 @@ def main():
     retrain = True
     n_folds = 3
     monitor_metrics = ["P@1", "P@3", "P@5"]
-    search_space_dict = {"max_features": [10000]}
+    search_space_dict = {
+        "max_features": [10000, 20000, 100000],
+        "K": [10, 50, 100],
+        "min_df": [1, 2],
+        "prob_A": [2, 3, 4],
+        "c": [0.1, 0.2, 1, 10],
+    }
 
     search = GridSearch(dataset, n_folds, monitor_metrics)
     cv_scores = search(search_space_dict)