From 996e70a06e2c6d4379a3ec276f322d86dafb66a0 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 28 Mar 2025 15:09:32 +0000 Subject: [PATCH 1/6] Pass lora_modules into server --- src/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine.py b/src/engine.py index 88a0101..1169e8b 100644 --- a/src/engine.py +++ b/src/engine.py @@ -143,7 +143,7 @@ async def _initialize_engines(self): engine_client=self.llm, model_config=self.model_config, base_model_paths=self.base_model_paths, - lora_modules=None, + lora_modules=lora_modules, prompt_adapters=None, ) From b027fed0e87587b7514424c6ce54bc3dfc2b6146 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 28 Mar 2025 22:25:04 +0000 Subject: [PATCH 2/6] Force init static lora adapters --- src/engine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/engine.py b/src/engine.py index 1169e8b..952e78f 100644 --- a/src/engine.py +++ b/src/engine.py @@ -147,6 +147,8 @@ async def _initialize_engines(self): prompt_adapters=None, ) + await self.serving_models.init_static_loras() + self.chat_engine = OpenAIServingChat( engine_client=self.llm, model_config=self.model_config, From 4080fbed6500b3a0a380f38bbc9275a0aa5c00a0 Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 1 Apr 2025 13:34:52 +0100 Subject: [PATCH 3/6] Add download for fine-tuned model --- Dockerfile | 16 ++++++++++++ src/download_lora_adapter.py | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/download_lora_adapter.py diff --git a/Dockerfile b/Dockerfile index e48a00d..d420ca5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,5 +46,21 @@ RUN --mount=type=secret,id=HF_TOKEN,required=false \ python3 /src/download_model.py; \ fi +# Customisations for LoRA adapters in wasabi +RUN --mount=type=cache,target=/root/.cache/pip \ + python3 -m pip install boto3 + +#ENV WASABI_LORA_ADAPTER_PATH=$WASABI_LORA_ADAPTER_PATH +#ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID +#ENV AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY + +RUN mkdir /model_adapter +# using credentials file +#RUN --mount=type=secret,id=aws,target=/root/.aws/credentials \ +# python3 /src/download_lora_adapter.py + +# using secrets injected as ENV vars +RUN python3 /src/download_lora_adapter.py + # Start the handler CMD ["python3", "/src/handler.py"] \ No newline at end of file diff --git a/src/download_lora_adapter.py b/src/download_lora_adapter.py new file mode 100644 index 0000000..81cc3db --- /dev/null +++ b/src/download_lora_adapter.py @@ -0,0 +1,48 @@ +import os +from pathlib import Path +from urllib.parse import urlparse + +import boto3 + +# Use the following code to connect using Wasabi profile from .aws/credentials file +# session = boto3.Session(profile_name="default") +# credentials = session.get_credentials() +# +# aws_access_key_id = credentials.access_key +# aws_secret_access_key = credentials.secret_key + +aws_access_key_id = os.getenv("WASABI_ACCESS_KEY") +aws_secret_access_key = os.getenv("WASABI_SECRET_ACCESS_KEY") +# Endpoint is determined when bucket is created +ENDPOINT_URL = 'https://s3.eu-west-1.wasabisys.com' + +s3 = boto3.client('s3', + endpoint_url=ENDPOINT_URL, # s3.wasabisys.com ? + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key) + + +def download_s3_folder(s3_uri, local_dir=None): + """ + Download the contents of a folder directory + Args: + s3_uri: the s3 uri to the top level of the files you wish to download + local_dir: a relative or absolute directory path in the local file system + """ + s3 = boto3.resource("s3", + endpoint_url=ENDPOINT_URL, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key) + bucket = s3.Bucket(urlparse(s3_uri).hostname) + s3_path = urlparse(s3_uri).path.lstrip('/') + if local_dir is not None: + local_dir = Path(local_dir) + for obj in bucket.objects.filter(Prefix=s3_path): + target = Path(obj.key) if local_dir is None else local_dir / Path(obj.key).relative_to(s3_path) + target.parent.mkdir(parents=True, exist_ok=True) + if obj.key[-1] == '/': + continue + bucket.download_file(obj.key, str(target)) + +if __name__ == "__main__": + download_s3_folder(os.getenv("WASABI_LORA_ADAPTER_PATH")) \ No newline at end of file From 9c65e02628b95af104bc5a81659a5817083f947d Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 1 Apr 2025 14:35:17 +0100 Subject: [PATCH 4/6] Log env vars --- src/download_lora_adapter.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/download_lora_adapter.py b/src/download_lora_adapter.py index 81cc3db..880b144 100644 --- a/src/download_lora_adapter.py +++ b/src/download_lora_adapter.py @@ -13,6 +13,12 @@ aws_access_key_id = os.getenv("WASABI_ACCESS_KEY") aws_secret_access_key = os.getenv("WASABI_SECRET_ACCESS_KEY") +adapter_path = os.getenv("WASABI_LORA_ADAPTER_PATH") + +print("WASABI_ACCESS_KEY: ", aws_access_key_id) +print("WASABI_SECRET_ACCESS_KEY: ", aws_secret_access_key) +print("WASABI_LORA_ADAPTER_PATH: ", adapter_path) + # Endpoint is determined when bucket is created ENDPOINT_URL = 'https://s3.eu-west-1.wasabisys.com' @@ -45,4 +51,4 @@ def download_s3_folder(s3_uri, local_dir=None): bucket.download_file(obj.key, str(target)) if __name__ == "__main__": - download_s3_folder(os.getenv("WASABI_LORA_ADAPTER_PATH")) \ No newline at end of file + download_s3_folder(adapter_path) \ No newline at end of file From b7a07e59d17b6d9b852bcc09ed2e966e13ca9180 Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 1 Apr 2025 17:26:33 +0100 Subject: [PATCH 5/6] Revert to building from credentials --- Dockerfile | 10 ++++++---- src/download_lora_adapter.py | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index d420ca5..2ec6677 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,17 +50,19 @@ RUN --mount=type=secret,id=HF_TOKEN,required=false \ RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install boto3 -#ENV WASABI_LORA_ADAPTER_PATH=$WASABI_LORA_ADAPTER_PATH +ARG WASABI_LORA_ADAPTER_PATH + +ENV WASABI_LORA_ADAPTER_PATH=$WASABI_LORA_ADAPTER_PATH #ENV AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID #ENV AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY RUN mkdir /model_adapter # using credentials file -#RUN --mount=type=secret,id=aws,target=/root/.aws/credentials \ -# python3 /src/download_lora_adapter.py +RUN --mount=type=secret,id=credentials,target=/root/.aws/credentials \ + python3 /src/download_lora_adapter.py # using secrets injected as ENV vars -RUN python3 /src/download_lora_adapter.py +# RUN python3 /src/download_lora_adapter.py # Start the handler CMD ["python3", "/src/handler.py"] \ No newline at end of file diff --git a/src/download_lora_adapter.py b/src/download_lora_adapter.py index 880b144..befa34a 100644 --- a/src/download_lora_adapter.py +++ b/src/download_lora_adapter.py @@ -5,14 +5,14 @@ import boto3 # Use the following code to connect using Wasabi profile from .aws/credentials file -# session = boto3.Session(profile_name="default") -# credentials = session.get_credentials() +session = boto3.Session(profile_name="default") +credentials = session.get_credentials() # -# aws_access_key_id = credentials.access_key -# aws_secret_access_key = credentials.secret_key +aws_access_key_id = credentials.access_key +aws_secret_access_key = credentials.secret_key -aws_access_key_id = os.getenv("WASABI_ACCESS_KEY") -aws_secret_access_key = os.getenv("WASABI_SECRET_ACCESS_KEY") +# aws_access_key_id = os.getenv("WASABI_ACCESS_KEY") +# aws_secret_access_key = os.getenv("WASABI_SECRET_ACCESS_KEY") adapter_path = os.getenv("WASABI_LORA_ADAPTER_PATH") print("WASABI_ACCESS_KEY: ", aws_access_key_id) From ee3c290a41acc22289bcc34ad705887091b597e4 Mon Sep 17 00:00:00 2001 From: paul Date: Fri, 11 Apr 2025 17:23:42 +0100 Subject: [PATCH 6/6] Add logging --- src/handler.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/handler.py b/src/handler.py index 176ec7e..d6fd431 100644 --- a/src/handler.py +++ b/src/handler.py @@ -1,12 +1,17 @@ -import os +import logging + import runpod -from utils import JobInput + from engine import vLLMEngine, OpenAIvLLMEngine +from utils import JobInput + +log = logging.getLogger(__name__) vllm_engine = vLLMEngine() OpenAIvLLMEngine = OpenAIvLLMEngine(vllm_engine) async def handler(job): + log.info("handle(job=%s)", job) job_input = JobInput(job["input"]) engine = OpenAIvLLMEngine if job_input.openai_route else vllm_engine results_generator = engine.generate(job_input)