From 084fa941b3a71556be5f9559a3103847eeba7c79 Mon Sep 17 00:00:00 2001 From: Liana Mikaelyan Date: Thu, 16 Jan 2025 21:28:07 +0000 Subject: [PATCH] Enable phi3.5 and upgrade Transformers --- README.md | 1 + experiments/bo_options.py | 2 +- pyproject.toml | 2 +- src/slicegpt/adapters/phi3_adapter.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a5b8f601..063188ad 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ Notes: The following models from Hugging Face hub are currently supported - [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) - [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) +- [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) - [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b) - [meta-llama/Llama-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b) - [meta-llama/Llama-2-70b-hf](https://huggingface.co/meta-llama/Llama-2-70b) diff --git a/experiments/bo_options.py b/experiments/bo_options.py index dea363c7..6f35ddb5 100644 --- a/experiments/bo_options.py +++ b/experiments/bo_options.py @@ -81,7 +81,7 @@ def lora_target_map(model: str): 'lm_head', ], } - case 'microsoft/Phi-3-mini-4k-instruct': + case 'microsoft/Phi-3-mini-4k-instruct' | 'microsoft/Phi-3.5-mini-instruct': return { 'qkv_proj': ['qkv_proj'], 'attn_head': ['qkv_proj', 'o_proj'], diff --git a/pyproject.toml b/pyproject.toml index f18dffa9..eefeb851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "numpy", "torch", "tqdm", - "transformers==4.41.0", + "transformers==4.46.3", ] [project.optional-dependencies] diff --git a/src/slicegpt/adapters/phi3_adapter.py b/src/slicegpt/adapters/phi3_adapter.py index ccd268da..42c30e9d 100644 --- a/src/slicegpt/adapters/phi3_adapter.py +++ b/src/slicegpt/adapters/phi3_adapter.py @@ -237,7 +237,7 @@ def _from_pretrained( local_files_only: bool = False, token: str | bool | None = None, ) -> ModelAdapter | None: - if not model_name.startswith("microsoft/Phi-3-mini-4k-instruct"): + if (not model_name.startswith("microsoft/Phi-3-mini-4k-instruct") and not model_name.startswith("microsoft/Phi-3.5-mini-instruct")): return None model = Phi3ForCausalLM.from_pretrained(