From d27b456b63c70c19149e723305d571952fd3919c Mon Sep 17 00:00:00 2001 From: Samanvya Tripathi Date: Wed, 25 Mar 2026 20:44:26 -0400 Subject: [PATCH] fix(inference): read max_seq_length and load_in_4bit from config instead of hardcoding _load_unsloth hardcoded max_seq_length=2048 and load_in_4bit=True instead of reading from InferenceConfig. Added both fields to InferenceConfig with matching defaults. Users training with max_seq_length=4096 can now set it for inference too. Fixes #27 --- src/alignrl/inference.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/alignrl/inference.py b/src/alignrl/inference.py index 6f39c31..f00a938 100644 --- a/src/alignrl/inference.py +++ b/src/alignrl/inference.py @@ -14,6 +14,8 @@ class InferenceConfig(BaseModel): max_tokens: int = 512 top_p: float = 0.9 backend: str = "unsloth" # "unsloth", "vllm", or "mlx" + max_seq_length: int = 2048 + load_in_4bit: bool = True def build_prompt(user_message: str, system: str | None = None) -> list[dict[str, str]]: @@ -48,8 +50,8 @@ def _load_unsloth(self) -> None: model_path = self.config.adapter_path or self.config.model_name self._model, self._tokenizer = FastLanguageModel.from_pretrained( model_name=model_path, - max_seq_length=2048, - load_in_4bit=True, + max_seq_length=self.config.max_seq_length, + load_in_4bit=self.config.load_in_4bit, ) from alignrl.config import ensure_chat_template