Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ make sure you have `uv` installed.
```
This will install additional dependencies for development and building documentation.
The (experimental) auto-kernel application is also under a separate group for now.
4. *(Optional)* Intall Support for MLX optimised LLMs:
```bash
uv sync --group group_name .... --extra apple-silicon --prerelease=allow
```
Select all the groups required, and append it with `--extra apple-silicon --prerelease=allow`, to install
libraries that enable MLX Optimised LLMs to be used in iohblade.

## 💻 Quick Start

Expand Down
8 changes: 7 additions & 1 deletion docs/Installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,10 @@ You can also install the package from source using **uv** (0.7.9).

.. code-block:: bash

uv install --group dev --group docs
uv sync --group dev --group docs

4. (Optional) Install support for running MLX optimised LLMs locally on apple silicon machine:

.. code-block:: bash

uv sync --group dev --group docs --group --extra apple_silicon --prerelease=allow
22 changes: 14 additions & 8 deletions iohblade/benchmarks/geometry/heilbronn_triangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def __init__(
- (None, points) where points is ndarray (n,2) interpreted inside a default unit-area triangle, or
- (triangle, points): with triangle shape (3,2), both of which we rescale similarly as to have area of triangle = 1.
- Upon scaling points must lie inside the tringle, within the given tolerance.
- The solution is scored as minimum triangle area formed by picking 3 of the n points.
- The optimisation goal is to maximise the score.
- The optimisation goal is to maximise the area of the smallest triangle, formed by picking 3 of the n points.
"""
self.task_prompt += (
f"- The tolerence of the solution is set to {self.tolerance}"
Expand Down Expand Up @@ -119,12 +118,15 @@ def evaluate(self, solution, explogger=None):
safe = prepare_namespace(code, self.dependencies)
local_ns = {}
exec(code, safe, local_ns)
local_ns = clean_local_namespace(local_ns, safe)
cls = next(v for v in local_ns.values() if isinstance(v, type))
try:
triangle, points = cls(self.n_points)()
except:
cls = local_ns[solution.name]
if self.best_solution is None:
triangle, points = cls(self.n_points)()
else:
triangle, points = cls(
self.n_points,
best_known_configuration=self.best_solution,
in_triangle=self.triangle_best_solution,
)()
except Exception as e:
# tb = e.__traceback__
solution.set_scores(
Expand Down Expand Up @@ -154,7 +156,11 @@ def evaluate(self, solution, explogger=None):
f"Area of Smallest Triangle={min_area:.6g}, best known={self.best_known}",
)
except Exception as e:
solution.set_scores(float("-inf"), f"calc-error {e}", "calc-failed")
solution.set_scores(
float("-inf"),
f"calc-error {e}.",
f"Values Returned: Triangle {triangle}, points: {points}",
)
return solution

def test(self, solution):
Expand Down
150 changes: 150 additions & 0 deletions iohblade/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
import anthropic
import ollama
import openai
try:
import lmstudio as lms # Platform dependent dependency.
except:
lms = None
try:
from mlx_lm import load, generate # Platform dependent dependency.
except:
load = None
generate = None

from google import genai
from tokencost import (
calculate_completion_cost,
Expand Down Expand Up @@ -681,6 +691,146 @@ def __deepcopy__(self, memo):
return new


class LMStudio_LLM(LLM):
"""A manager for running MLX-Optimised LLM locally."""

def __init__(self, model, config=None, **kwargs):
"""
Initialises the LMStudio LLM inteface.

:param model: Name of the model, to be initialised for interaction.
:param config: Configuration to be set for LLM chat.
:param kwargs: Keyed arguements for setting up the LLM chat.
"""
super().__init__(api_key="", model=model, **kwargs)
self.llm = lms.llm(model)
self.config = config

def _query(self, session: list[dict[str, str]], max_tries: int = 5) -> str:
"""
Query stub for LMStudio class.

## Parameters
`session: list[dict[str, str]]`: A session message is a list of {'role' : 'user'|'system', 'content': 'content'} data, use to make LLM request.
`max_tries: int`: A max count for the number of tries, to get a response.
"""
request = session[-1]["content"]
for _ in range(max_tries):
try:
if self.config is not None:
response = self.llm.respond(request, config=self.config)
else:
response = self.llm.respond(request)
response = re.sub( # Remove thinking section, if avaiable.
r"<think>.*?</think>", "", str(response), flags=re.DOTALL
)
return response
except:
pass
return ""

def __getstate__(self):
state = self.__dict__.copy()
state.pop("llm", None)
return state

def __setstate__(self, state):
self.__dict__.update(state)
self.llm = lms.llm(self.model)

def __deepcopy__(self, memo):
cls = self.__class__
new = cls.__new__(cls)
memo[id(self)] = new
for k, v in self.__dict__.items():
if k == "llm":
continue
setattr(new, k, copy.deepcopy(v, memo))
new.llm = self.llm
return new


class MLX_LM_LLM(LLM):
"""An mlx_lm implementation for running large LLMs locally."""

def __init__(self, model, config=None, max_tokens: int = 12000, **kwargs):
"""
Initialises the LMStudio LLM inteface.

:param model: Name of the model, to be initialised for interaction.
:param config: Configuration to be set for LLM chat.
:param max_tokens: Maximun number of tokens to be generated for a request.
:param kwargs: Keyed arguements for setting up the LLM chat.
"""
super().__init__(api_key="", model=model, **kwargs)
if config is not None:
llm, tokenizer = load(model, model_config=config)
else:
llm, tokenizer = load(model)
self.llm = llm
self.tokenizer = tokenizer

self.config = config
self.max_tokens = max_tokens

def __getstate__(self) -> object:
state = self.__dict__
state.pop("tokenizer", None)
state.pop("llm", None)
return state

def __setstate__(self, state):
self.__dict__.update(state)
if self.config is None:
llm, tokenizer = load(self.model)
else:
llm, tokenizer = load(self.model, model_config=self.config)
self.llm = llm
self.tokenizer = tokenizer

def __deepcopy__(self, memo):
cls = self.__class__
new = cls.__new__(cls)
memo[id(self)] = new
for k, v in self.__dict__.items():
if k in ["llm", "tokenizer"]:
continue
setattr(new, k, copy.deepcopy(v, memo))
new.llm = self.llm # <- reference symantics copy for massive object `llm`.
new.tokenizer = self.tokenizer
return new

def _query(
self, session: list, max_tries: int = 5, add_generation_prompt: bool = False
):
"""
Query stub for LMStudio class.

## Parameters
`session: list[dict[str, str]]`: A session message is a list of {'role' : 'user'|'system', 'content': 'content'} data, use to make LLM request.
`max_tries: int`: A max count for the number of tries, to get a response.
`add_generation_prompt: bool`: MLX_LM come with an option to add_generation_prompt to optimise prompts.
"""
prompt = self.tokenizer.apply_chat_template(
session, add_generation_prompt=add_generation_prompt
)
for _ in range(max_tries):
try:
response = generate(
self.llm,
self.tokenizer,
prompt,
max_tokens=self.max_tokens, # Disable limit on token count.
)
response = re.sub( # Remove thinking section, if avaiable.
r"<think>.*?</think>", "", str(response), flags=re.DOTALL
)
return response
except:
pass
return ""


class Dummy_LLM(LLM):
def __init__(self, model="DUMMY", **kwargs):
"""
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ methods = [
"eoh",
"reevo",
]
apple-silicon = [
"lmstudio>=1.5.0,<2; platform_system == 'Darwin' and platform_machine == 'arm64'",
"mlx_lm>=0.30.2,<1; platform_system == 'Darwin' and platform_machine == 'arm64'",
]
kerneltuner = [
"kernel-tuner",
"autotuning_methodology",
Expand Down
5 changes: 2 additions & 3 deletions run_benchmarks/heilbronn_triangle.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from os import environ

from iohblade.experiment import Experiment
from iohblade.llm import Gemini_LLM, Ollama_LLM
from iohblade.llm import Gemini_LLM
from iohblade.methods import LLaMEA
from iohblade.loggers import ExperimentLogger

Expand All @@ -13,11 +13,10 @@

api_key = environ.get("GOOGLE_API_KEY")

ollama_llm = Ollama_LLM()
gemini_llm = Gemini_LLM(api_key=api_key)

# Helibronn n11 benchmark.
heilbronn_triangle = get_heilbronn_triangle_problems(True)[0]
heilbronn_triangle = get_heilbronn_triangle_problems(False)[0]

methods = []
for llm in [gemini_llm]:
Expand Down
Loading
Loading