Update MLX model patterns and reduce max_tokens in eval script

codelion · codelion · commit 61d0b8267fcc · 2025-06-30T14:43:28.000+08:00
Added '-mlx-' to the list of MLX model patterns in should_use_mlx for broader matching. Reduced max_tokens from 32768 to 8192 in get_llm_response within eval_math500_benchmark.py to limit token usage.
diff --git a/optillm/inference.py b/optillm/inference.py
@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:
     # Models that should use MLX
     mlx_patterns = [
         "mlx-community/",
-        "mlx-"
+        "mlx-",
+        "-mlx-"
     ]
     
     # Known problematic models that should prefer MLX on Apple Silicon
diff --git a/scripts/eval_math500_benchmark.py b/scripts/eval_math500_benchmark.py
@@ -692,7 +692,7 @@ def get_llm_response(problem: str, model: str) -> str:
             messages=[
                 {"role": "user", "content": SYSTEM_PROMPT + "\n" + problem}
             ],
-            max_tokens=32768, # for thinking models, we need to use a lot more tokens
+            max_tokens=8192, # for thinking models, we need to use a lot more tokens
             # extra_body = {
             #     "decoding" : "thinkdeeper",
             # }

Original file line number	Diff line number	Diff line change
`@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:`
`189`	`189`	`# Models that should use MLX`
`190`	`190`	`mlx_patterns = [`
`191`	`191`	`"mlx-community/",`
`192`		`- "mlx-"`
	`192`	`+ "mlx-",`
	`193`	`+ "-mlx-"`
`193`	`194`	`]`
`194`	`195`
`195`	`196`	`# Known problematic models that should prefer MLX on Apple Silicon`