From d27b456b63c70c19149e723305d571952fd3919c Mon Sep 17 00:00:00 2001
From: Samanvya Tripathi <tripathisamanvya@gmail.com>
Date: Wed, 25 Mar 2026 20:44:26 -0400
Subject: [PATCH] fix(inference): read max_seq_length and load_in_4bit from
 config instead of hardcoding

_load_unsloth hardcoded max_seq_length=2048 and load_in_4bit=True
instead of reading from InferenceConfig. Added both fields to
InferenceConfig with matching defaults. Users training with
max_seq_length=4096 can now set it for inference too.

Fixes #27
---
 src/alignrl/inference.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/alignrl/inference.py b/src/alignrl/inference.py
index 6f39c31..f00a938 100644
--- a/src/alignrl/inference.py
+++ b/src/alignrl/inference.py
@@ -14,6 +14,8 @@ class InferenceConfig(BaseModel):
     max_tokens: int = 512
     top_p: float = 0.9
     backend: str = "unsloth"  # "unsloth", "vllm", or "mlx"
+    max_seq_length: int = 2048
+    load_in_4bit: bool = True
 
 
 def build_prompt(user_message: str, system: str | None = None) -> list[dict[str, str]]:
@@ -48,8 +50,8 @@ def _load_unsloth(self) -> None:
         model_path = self.config.adapter_path or self.config.model_name
         self._model, self._tokenizer = FastLanguageModel.from_pretrained(
             model_name=model_path,
-            max_seq_length=2048,
-            load_in_4bit=True,
+            max_seq_length=self.config.max_seq_length,
+            load_in_4bit=self.config.load_in_4bit,
         )
         from alignrl.config import ensure_chat_template