one-covenant · joellidin · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025 · Aug 31, 2025
diff --git a/hparams/hparams.json b/hparams/hparams.json
@@ -10,9 +10,8 @@
     "blocks_per_window": 65,
     "windows_per_weights": 5,
     "momentum_decay": 0.95,
-    "topk_compression": 32,
+    "topk_compression": 128,
     "target_chunk": 64,
-    "use_dct": false,
     "binary_score_ma_alpha": 0.05,
     "moving_average_window": 5,
     "model_size": "70B",

diff --git a/neurons/evaluator.py b/neurons/evaluator.py
@@ -60,6 +60,7 @@
 import bittensor as bt
 import torch
 import torch.distributed as dist
+from lm_eval import simple_evaluate
 from torch.cuda import device_count as _cuda_device_count
 from torch.utils.data import DataLoader
 from torchtitan.components.loss import cross_entropy_loss

diff --git a/neurons/miner.py b/neurons/miner.py
@@ -258,8 +258,7 @@ def __init__(self):
                 )
 
             enc = self.transformer.encode(
-                torch.empty(p.shape, dtype=torch.float16, device=self.device),
-                use_dct=self.hparams.use_dct,
+                torch.empty(p.shape, dtype=torch.float16, device=self.device)
             )
             _, _, xshape, totalk, _ = self.compressor.compress(
                 enc,

diff --git a/neurons/trainer.py b/neurons/trainer.py
@@ -676,7 +676,6 @@ def outer_step(self, gather_result):
             device=str(self.device),
             is_master=self.is_master,
             world_size=self.world_size,
-            use_dct=self.hparams.use_dct,
         )
         return
 

diff --git a/neurons/validator.py b/neurons/validator.py
@@ -259,18 +259,41 @@ def __init__(self):
         self.xshapes = {}
         self.totalks = {}
         # Use bare_model like the miner does to ensure consistent parameter iteration
+        import time
+        total_compress_time = 0.0
+        total_encode_time = 0.0
+
+        # Enable debug timing in compressor
+        self.compressor._debug_timing = True
+
         for n, p in self.model.named_parameters():
             # Use the same approach as miner for creating xshapes and totalks
+            encode_start = time.time()
             enc = self.transformer.encode(
-                torch.empty(p.shape, dtype=torch.float16, device=self.device),
-                use_dct=self.hparams.use_dct,
+                torch.empty(p.shape, dtype=torch.float16, device=self.device)
             )
+            encode_time = time.time() - encode_start
+
+            compress_start = time.time()
             _, _, xshape, totalk, _ = self.compressor.compress(
                 enc,
                 self.hparams.topk_compression,
             )
+            compress_time = time.time() - compress_start
+
             self.xshapes[n] = xshape
             self.totalks[n] = totalk
+
+            total_encode_time += encode_time
+            total_compress_time += compress_time
+
+            # Log timing for each layer
+            tplr.logger.info(f"[COMPRESS TIMING] {n}: encode={encode_time:.3f}s, compress={compress_time:.3f}s, shape={p.shape}")
+
+        tplr.logger.info(f"[COMPRESS TIMING TOTAL] encode={total_encode_time:.3f}s, compress={total_compress_time:.3f}s")
+
+        # Disable debug timing after initialization
+        self.compressor._debug_timing = False
 
         self.openskill_model = PlackettLuce(
             beta=self.hparams.openskill_beta, tau=self.hparams.openskill_tau
@@ -1698,7 +1721,6 @@ async def run(self):
                 device=cast(str, self.device),
                 is_master=self.is_master,
                 world_size=self.world_size,
-                use_dct=self.hparams.use_dct,
                 wandb_run=self.wandb if self.is_master else None,
                 global_step=self.global_step,
             )
@@ -2804,9 +2826,7 @@ def update_model_with_gradient(
                         quant_params,
                     )
 
-                    full_grad_src = self.transformer.decode(
-                        decompressed, use_dct=self.hparams.use_dct
-                    )
+                    full_grad_src = self.transformer.decode(decompressed)
                     # Single conversion to target dtype+device to avoid extra temporaries
                     full_grad_src = full_grad_src.to(
                         dtype=p.dtype, device=p.device, non_blocking=True

diff --git a/src/tplr/comms.py b/src/tplr/comms.py
@@ -34,6 +34,7 @@
 import bittensor as bt
 import boto3
 import botocore
+import numpy as np
 import torch
 import torch.distributed as dist
 from aiobotocore.client import AioBaseClient
@@ -48,7 +49,7 @@
 
 import tplr
 from tplr.chain import ChainManager
-from tplr.compress import TopKCompressor, unpack_12bit_indices
+from tplr.compress import TopKCompressor, decode_batch_rows
 from tplr.config import BUCKET_SECRETS, client_config
 from tplr.schemas import Bucket, CommsGetResult
 
@@ -2622,68 +2623,82 @@ def check_compressed_indices(
         """
         Validates the integrity and format of compressed gradient indices.
 
-        This is a crucial security and stability check to ensure that gradients
-        received from peers are well-formed. It verifies that indices are within
-        the expected bounds and that the compression format (e.g., 12-bit packing)
-        is correctly applied.
+        This ensures indices are within bounds and that the **new Rice/bitmap**
+        codec payload matches the provided values tensor shape (top‑k).
 
         Args:
             param_name (str): The name of the parameter being checked.
             idxs (torch.Tensor): The tensor of indices.
             totalk (int): The total number of elements in the original uncompressed tensor.
             allowed_topk (int | None, optional): The expected number of top-k values.
                 Defaults to the hparams configuration.
-            vals (torch.Tensor | None, optional): The corresponding values tensor,
-                required for validating 12-bit packed indices. Defaults to None.
+            vals (torch.Tensor | None, optional): The corresponding values tensor.
 
         Raises:
             ValueError: If any validation check fails, such as out-of-bounds
-                indices, incorrect data types, or malformed packed data.
+                indices, incorrect data types, or malformed payload.
         """
         allowed_topk = (
             min(self.hparams.topk_compression, totalk)
             if allowed_topk is None
             else min(allowed_topk, totalk)
         )
 
-        def _bounds_check(t: torch.Tensor):
-            """fast min/max bounds check"""
-            if t.numel() == 0:
-                raise ValueError(f"[{param_name}] empty index list")
-            if t.min().item() < 0 or t.max().item() >= totalk:
-                bad = t[(t < 0) | (t >= totalk)][0].item()
-                raise ValueError(
-                    f"[{param_name}] Index {bad} out of bounds (totalk = {totalk})"
-                )
+        if not isinstance(idxs, torch.Tensor):
+            raise ValueError(
+                f"[{param_name}] Expected tensor for indices, got {type(idxs)}"
+            )
+        if vals is None:
+            raise ValueError(
+                f"[{param_name}] Values tensor required for index validation"
+            )
+        if idxs.dtype != torch.uint8:
+            raise ValueError(
+                f"[{param_name}] Expected uint8 (Rice/bitmap payload), got {idxs.dtype}"
+            )
+        if idxs.numel() == 0:
+            raise ValueError(f"[{param_name}] Empty indices payload")
 
-        # Handle 12-bit packed index format only
-        if isinstance(idxs, torch.Tensor):
-            if idxs.dtype != torch.uint8:
-                raise ValueError(
-                    f"[{param_name}] Expected uint8 for 12-bit packed indices, got {idxs.dtype}"
-                )
-            # 12-bit packed format is the only supported format
-            if vals is None:
-                raise ValueError(
-                    f"[{param_name}] Values tensor required to validate 12-bit packed indices"
-                )
-            if idxs.numel() == 0:
-                raise ValueError(f"[{param_name}] Empty packed indices tensor")
+        # Decode (CPU) and perform structural checks
+        try:
+            payload_bytes = idxs.detach().cpu().numpy().tobytes()
+            rows_list, C, N = decode_batch_rows(payload_bytes)
+        except Exception as e:
+            raise ValueError(f"[{param_name}] Failed to decode indices payload: {e}")
 
-            # Unpack using the values shape
-            try:
-                unpacked = unpack_12bit_indices(idxs, vals.shape)
-                # Validate that the last dimension matches allowed_topk
-                if unpacked.shape[-1] != allowed_topk:
-                    raise ValueError(
-                        f"[{param_name}] Invalid topk dimension: "
-                        f"shape[-1]={unpacked.shape[-1]} but expected {allowed_topk}"
-                    )
-                _bounds_check(unpacked)
-            except Exception as e:
-                raise ValueError(f"[{param_name}] Failed to unpack 12-bit indices: {e}")
-        else:
-            raise ValueError(f"[{param_name}] Expected tensor but got {type(idxs)}")
+        if C != totalk:
+            raise ValueError(
+                f"[{param_name}] Payload column size C={C} but expected {totalk}"
+            )
+
+        # compute expected rows from values shape (flatten all but last dim)
+        if vals.ndim == 0:
+            raise ValueError(f"[{param_name}] Values tensor has no top‑k dimension")
+        expected_rows = int(np.prod(vals.shape[:-1])) if vals.ndim > 1 else 1
+        if N != expected_rows:
+            raise ValueError(
+                f"[{param_name}] Payload rows N={N} but values imply {expected_rows}"
+            )
+
+        k = vals.shape[-1]
+        if k != allowed_topk:
+            raise ValueError(
+                f"[{param_name}] Values top‑k={k} but allowed_topk={allowed_topk}"
+            )
+        if any(len(r) != k for r in rows_list):
+            raise ValueError(
+                f"[{param_name}] At least one row has mismatched top‑k size"
+            )
+
+        # bounds check without materialising full tensor
+        max_idx = max((max(r) if len(r) > 0 else -1) for r in rows_list)
+        min_idx = (
+            min((min(r) if len(r) > 0 else 0) for r in rows_list) if rows_list else 0
+        )
+        if min_idx < 0 or max_idx >= totalk:
+            raise ValueError(
+                f"[{param_name}] Index out of bounds (min={min_idx}, max={max_idx}, totalk={totalk})"
+            )
 
-        This ensures indices are within bounds and that the **new Rice/bitmap**
-        codec payload matches the provided values tensor shape (top‑k).
-
-        Args:
-            param_name (str): The name of the parameter being checked.
-            idxs (torch.Tensor): The tensor of indices.
-            totalk (int): The total number of elements in the original uncompressed tensor.
-            allowed_topk (int | None, optional): The expected number of top-k values.
-                Defaults to the hparams configuration.
-            vals (torch.Tensor | None, optional): The corresponding values tensor,
-                required for validating 12-bit packed indices. Defaults to None.
-            vals (torch.Tensor | None, optional): The corresponding values tensor.
-
-        Raises:
-            ValueError: If any validation check fails, such as out-of-bounds
-                indices, incorrect data types, or malformed packed data.
-                indices, incorrect data types, or malformed payload.
-        """
-        allowed_topk = (
-            min(self.hparams.topk_compression, totalk)
-            if allowed_topk is None
-            else min(allowed_topk, totalk)
-        )
-
-        def _bounds_check(t: torch.Tensor):
-            """fast min/max bounds check"""
-            if t.numel() == 0:
-                raise ValueError(f"[{param_name}] empty index list")
-            if t.min().item() < 0 or t.max().item() >= totalk:
-                bad = t[(t < 0) | (t >= totalk)][0].item()
-                raise ValueError(
-                    f"[{param_name}] Index {bad} out of bounds (totalk = {totalk})"
-                )
-        if not isinstance(idxs, torch.Tensor):
-            raise ValueError(
-                f"[{param_name}] Expected tensor for indices, got {type(idxs)}"
-            )
-        if vals is None:
-            raise ValueError(
-                f"[{param_name}] Values tensor required for index validation"
-            )
-        if idxs.dtype != torch.uint8:
-            raise ValueError(
-                f"[{param_name}] Expected uint8 (Rice/bitmap payload), got {idxs.dtype}"
-            )
-        if idxs.numel() == 0:
-            raise ValueError(f"[{param_name}] Empty indices payload")
-
-        # Handle 12-bit packed index format only
-        if isinstance(idxs, torch.Tensor):
-            if idxs.dtype != torch.uint8:
-                raise ValueError(
-                    f"[{param_name}] Expected uint8 for 12-bit packed indices, got {idxs.dtype}"
-                )
-            # 12-bit packed format is the only supported format
-            if vals is None:
-                raise ValueError(
-                    f"[{param_name}] Values tensor required to validate 12-bit packed indices"
-                )
-            if idxs.numel() == 0:
-                raise ValueError(f"[{param_name}] Empty packed indices tensor")
-        # Decode (CPU) and perform structural checks
-        try:
-            payload_bytes = idxs.detach().cpu().numpy().tobytes()
-            rows_list, C, N = decode_batch_rows(payload_bytes)
-        except Exception as e:
-            raise ValueError(f"[{param_name}] Failed to decode indices payload: {e}")
-
-            # Unpack using the values shape
-            try:
-                unpacked = unpack_12bit_indices(idxs, vals.shape)
-                # Validate that the last dimension matches allowed_topk
-                if unpacked.shape[-1] != allowed_topk:
-                    raise ValueError(
-                        f"[{param_name}] Invalid topk dimension: "
-                        f"shape[-1]={unpacked.shape[-1]} but expected {allowed_topk}"
-                    )
-                _bounds_check(unpacked)
-            except Exception as e:
-                raise ValueError(f"[{param_name}] Failed to unpack 12-bit indices: {e}")
-        else:
-            raise ValueError(f"[{param_name}] Expected tensor but got {type(idxs)}")
-        if C != totalk:
-            raise ValueError(
-                f"[{param_name}] Payload column size C={C} but expected {totalk}"
-            )
-
-        # compute expected rows from values shape (flatten all but last dim)
-        if vals.ndim == 0:
-            raise ValueError(f"[{param_name}] Values tensor has no top‑k dimension")
-        expected_rows = int(np.prod(vals.shape[:-1])) if vals.ndim > 1 else 1
-        if N != expected_rows:
-            raise ValueError(
-                f"[{param_name}] Payload rows N={N} but values imply {expected_rows}"
-            )
-
-        k = vals.shape[-1]
-        if k != allowed_topk:
-            raise ValueError(
-                f"[{param_name}] Values top‑k={k} but allowed_topk={allowed_topk}"
-            )
-        if any(len(r) != k for r in rows_list):
-            raise ValueError(
-                f"[{param_name}] At least one row has mismatched top‑k size"
-            )
-
-        # bounds check without materialising full tensor
-        max_idx = max((max(r) if len(r) > 0 else -1) for r in rows_list)
-        min_idx = (
-            min((min(r) if len(r) > 0 else 0) for r in rows_list) if rows_list else 0
-        )
-        if min_idx < 0 or max_idx >= totalk:
-            raise ValueError(
-                f"[{param_name}] Index out of bounds (min={min_idx}, max={max_idx}, totalk={totalk})"
-            )
+        if not isinstance(idxs, torch.Tensor):
+            raise ValueError(
+                f"[{param_name}] Expected tensor for indices, got {type(idxs)}"
+            )
+        if vals is None:
+            raise ValueError(
+                f"[{param_name}] Values tensor required for index validation"
+            )
+        if idxs.dtype == torch.uint8:
+            pass  # Rice/bitmap fast-path below
+        elif idxs.dtype in (torch.int64, torch.long):
+            # Legacy/unpacked path: shape and bounds checks only
+            if vals is None:
+                raise ValueError(f"[{param_name}] Values tensor required for index validation")
+            if tuple(idxs.shape) != tuple(vals.shape):
+                raise ValueError(f"[{param_name}] Indices shape {tuple(idxs.shape)} != values shape {tuple(vals.shape)}")
+            k = vals.shape[-1] if vals.ndim > 0 else 0
+            if k != allowed_topk:
+                raise ValueError(f"[{param_name}] Values top-k={k} but allowed_topk={allowed_topk}")
+            if idxs.numel() > 0:
+                i_min = int(idxs.min().item())
+                i_max = int(idxs.max().item())
+                if i_min < 0 or i_max >= totalk:
+                    raise ValueError(f"[{param_name}] Index out of bounds (min={i_min}, max={i_max}, totalk={totalk})")
+            return  # legacy path validated
+        else:
+            raise ValueError(
+                f"[{param_name}] Expected uint8 (Rice/bitmap payload), got {idxs.dtype}"
+            )
+
+        if idxs.numel() == 0:
+            raise ValueError(f"[{param_name}] Empty indices payload")
+
+        # Decode (CPU) and perform structural checks
+        try:
+            payload_bytes = idxs.detach().cpu().numpy().tobytes()
+            rows_list, C, N = decode_batch_rows(payload_bytes)
+        except Exception as e:
+            raise ValueError(f"[{param_name}] Failed to decode indices payload: {e}")
-        This ensures indices are within bounds and that the **new Rice/bitmap**
-        codec payload matches the provided values tensor shape (top‑k).
-
-        Args:
-            param_name (str): The name of the parameter being checked.
-            idxs (torch.Tensor): The tensor of indices.
-            totalk (int): The total number of elements in the original uncompressed tensor.
-            allowed_topk (int | None, optional): The expected number of top-k values.
-                Defaults to the hparams configuration.
-            vals (torch.Tensor | None, optional): The corresponding values tensor,
-                required for validating 12-bit packed indices. Defaults to None.
-            vals (torch.Tensor | None, optional): The corresponding values tensor.
-
-        Raises:
-            ValueError: If any validation check fails, such as out-of-bounds
-                indices, incorrect data types, or malformed packed data.
-                indices, incorrect data types, or malformed payload.
-        """
-        allowed_topk = (
-            min(self.hparams.topk_compression, totalk)
-            if allowed_topk is None
-            else min(allowed_topk, totalk)
-        )
-
-        def _bounds_check(t: torch.Tensor):
-            """fast min/max bounds check"""
-            if t.numel() == 0:
-                raise ValueError(f"[{param_name}] empty index list")
-            if t.min().item() < 0 or t.max().item() >= totalk:
-                bad = t[(t < 0) | (t >= totalk)][0].item()
-                raise ValueError(
-                    f"[{param_name}] Index {bad} out of bounds (totalk = {totalk})"
-                )
-        if not isinstance(idxs, torch.Tensor):
-            raise ValueError(
-                f"[{param_name}] Expected tensor for indices, got {type(idxs)}"
-            )
-        if vals is None:
-            raise ValueError(
-                f"[{param_name}] Values tensor required for index validation"
-            )
-        if idxs.dtype != torch.uint8:
-            raise ValueError(
-                f"[{param_name}] Expected uint8 (Rice/bitmap payload), got {idxs.dtype}"
-            )
-        if idxs.numel() == 0:
-            raise ValueError(f"[{param_name}] Empty indices payload")
-
-        # Handle 12-bit packed index format only
-        if isinstance(idxs, torch.Tensor):
-            if idxs.dtype != torch.uint8:
-                raise ValueError(
-                    f"[{param_name}] Expected uint8 for 12-bit packed indices, got {idxs.dtype}"
-                )
-            # 12-bit packed format is the only supported format
-            if vals is None:
-                raise ValueError(
-                    f"[{param_name}] Values tensor required to validate 12-bit packed indices"
-                )
-            if idxs.numel() == 0:
-                raise ValueError(f"[{param_name}] Empty packed indices tensor")
-        # Decode (CPU) and perform structural checks
-        try:
-            payload_bytes = idxs.detach().cpu().numpy().tobytes()
-            rows_list, C, N = decode_batch_rows(payload_bytes)
-        except Exception as e:
-            raise ValueError(f"[{param_name}] Failed to decode indices payload: {e}")
-
-            # Unpack using the values shape
-            try:
-                unpacked = unpack_12bit_indices(idxs, vals.shape)
-                # Validate that the last dimension matches allowed_topk
-                if unpacked.shape[-1] != allowed_topk:
-                    raise ValueError(
-                        f"[{param_name}] Invalid topk dimension: "
-                        f"shape[-1]={unpacked.shape[-1]} but expected {allowed_topk}"
-                    )
-                _bounds_check(unpacked)
-            except Exception as e:
-                raise ValueError(f"[{param_name}] Failed to unpack 12-bit indices: {e}")
-        else:
-            raise ValueError(f"[{param_name}] Expected tensor but got {type(idxs)}")
-        if C != totalk:
-            raise ValueError(
-                f"[{param_name}] Payload column size C={C} but expected {totalk}"
-            )
-
-        # compute expected rows from values shape (flatten all but last dim)
-        if vals.ndim == 0:
-            raise ValueError(f"[{param_name}] Values tensor has no top‑k dimension")
-        expected_rows = int(np.prod(vals.shape[:-1])) if vals.ndim > 1 else 1
-        if N != expected_rows:
-            raise ValueError(
-                f"[{param_name}] Payload rows N={N} but values imply {expected_rows}"
-            )
-
-        k = vals.shape[-1]
-        if k != allowed_topk:
-            raise ValueError(
-                f"[{param_name}] Values top‑k={k} but allowed_topk={allowed_topk}"
-            )
-        if any(len(r) != k for r in rows_list):
-            raise ValueError(
-                f"[{param_name}] At least one row has mismatched top‑k size"
-            )
-
-        # bounds check without materialising full tensor
-        max_idx = max((max(r) if len(r) > 0 else -1) for r in rows_list)
-        min_idx = (
-            min((min(r) if len(r) > 0 else 0) for r in rows_list) if rows_list else 0
-        )
-        if min_idx < 0 or max_idx >= totalk:
-            raise ValueError(
-                f"[{param_name}] Index out of bounds (min={min_idx}, max={max_idx}, totalk={totalk})"
-            )
+        if not isinstance(idxs, torch.Tensor):
+            raise ValueError(
+                f"[{param_name}] Expected tensor for indices, got {type(idxs)}"
+            )
+        if vals is None:
+            raise ValueError(
+                f"[{param_name}] Values tensor required for index validation"
+            )
+        if idxs.dtype == torch.uint8:
+            pass  # Rice/bitmap fast-path below
+        elif idxs.dtype in (torch.int64, torch.long):
+            # Legacy/unpacked path: shape and bounds checks only
+            if vals is None:
+                raise ValueError(f"[{param_name}] Values tensor required for index validation")
+            if tuple(idxs.shape) != tuple(vals.shape):
+                raise ValueError(f"[{param_name}] Indices shape {tuple(idxs.shape)} != values shape {tuple(vals.shape)}")
+            k = vals.shape[-1] if vals.ndim > 0 else 0
+            if k != allowed_topk:
+                raise ValueError(f"[{param_name}] Values top-k={k} but allowed_topk={allowed_topk}")
+            if idxs.numel() > 0:
+                i_min = int(idxs.min().item())
+                i_max = int(idxs.max().item())
+                if i_min < 0 or i_max >= totalk:
+                    raise ValueError(f"[{param_name}] Index out of bounds (min={i_min}, max={i_max}, totalk={totalk})")
+            return  # legacy path validated
+        else:
+            raise ValueError(
+                f"[{param_name}] Expected uint8 (Rice/bitmap payload), got {idxs.dtype}"
+            )
+
+        if idxs.numel() == 0:
+            raise ValueError(f"[{param_name}] Empty indices payload")
+
+        # Decode (CPU) and perform structural checks
+        try:
+            payload_bytes = idxs.detach().cpu().numpy().tobytes()
+            rows_list, C, N = decode_batch_rows(payload_bytes)
+        except Exception as e:
+            raise ValueError(f"[{param_name}] Failed to decode indices payload: {e}")
     async def s3_get_object_size(self, bucket: Bucket, key: str) -> int | None:
         """

diff --git a/src/tplr/compress/__init__.py b/src/tplr/compress/__init__.py
@@ -0,0 +1,33 @@
+# The MIT License (MIT)
+# © 2025 tplr.ai
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
+# the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+from .bits import (
+    decode_batch_rows,  # decoder (CPU)
+    encode_batch_rows,  # GPU-accelerated encoder → bytes + perm + meta
+)
+from .pack12 import pack_12bit_indices, unpack_12bit_indices  # legacy
+from .topk import ChunkingTransformer, TopKCompressor
+
+__all__ = [
+    # High level
+    "TopKCompressor",
+    "ChunkingTransformer",
+    "encode_batch_rows",
+    "decode_batch_rows",
+    "pack_12bit_indices",
+    "unpack_12bit_indices",
+]