diff --git a/src/engine.py b/src/engine.py index ce82016..e0a7292 100644 --- a/src/engine.py +++ b/src/engine.py @@ -125,6 +125,25 @@ def __init__(self, vllm_engine): super().__init__(vllm_engine) self.served_model_name = os.getenv("OPENAI_SERVED_MODEL_NAME_OVERRIDE") or self.engine_args.model self.response_role = os.getenv("OPENAI_RESPONSE_ROLE") or "assistant" + + adapters = os.getenv("LORA_MODULES", []) + + try: + adapters = json.loads(adapters) + except Exception as e: + logging.info(f"---Initialized adapter json load error: {e}") + adapters = [] + + self.lora_adapters = [] + for adapter in adapters: + try: + lora:LoRAModulePath = LoRAModulePath(**adapter) + self.lora_adapters.append(lora) + logging.info(f"---Initialized adapter: {adapter}") + except Exception as e: + logging.info(f"---Initialized adapter not worked: {e}") + continue + asyncio.run(self._initialize_engines()) self.raw_openai_output = bool(int(os.getenv("RAW_OPENAI_OUTPUT", 1))) @@ -134,19 +153,11 @@ async def _initialize_engines(self): BaseModelPath(name=self.engine_args.model, model_path=self.engine_args.model) ] - lora_modules = os.getenv('LORA_MODULES', None) - if lora_modules is not None: - try: - lora_modules = json.loads(lora_modules) - lora_modules = [LoRAModulePath(**lora_modules)] - except: - lora_modules = None - self.serving_models = OpenAIServingModels( engine_client=self.llm, model_config=self.model_config, base_model_paths=self.base_model_paths, - lora_modules=None, + lora_modules=self.lora_adapters, prompt_adapters=None, )