@@ -122,31 +122,40 @@ def __init__(self, vllm_engine):
122122 super ().__init__ (vllm_engine )
123123 self .served_model_name = os .getenv ("OPENAI_SERVED_MODEL_NAME_OVERRIDE" ) or self .engine_args .model
124124 self .response_role = os .getenv ("OPENAI_RESPONSE_ROLE" ) or "assistant"
125+ self .lora_adapters = self ._load_lora_adapters ()
125126 asyncio .run (self ._initialize_engines ())
126127 self .raw_openai_output = bool (int (os .getenv ("RAW_OPENAI_OUTPUT" , 1 )))
127-
128+
129+ def _load_lora_adapters (self ):
130+ adapters = []
131+ try :
132+ adapters = json .loads (os .getenv ("LORA_MODULES" , '[]' ))
133+ except Exception as e :
134+ logging .info (f"---Initialized adapter json load error: { e } " )
135+
136+ for i , adapter in enumerate (adapters ):
137+ try :
138+ adapters [i ] = LoRAModulePath (** adapter )
139+ logging .info (f"---Initialized adapter: { adapter } " )
140+ except Exception as e :
141+ logging .info (f"---Initialized adapter not worked: { e } " )
142+ continue
143+ return adapters
144+
128145 async def _initialize_engines (self ):
129146 self .model_config = await self .llm .get_model_config ()
130147 self .base_model_paths = [
131148 BaseModelPath (name = self .engine_args .model , model_path = self .engine_args .model )
132149 ]
133150
134- lora_modules = os .getenv ('LORA_MODULES' , None )
135- if lora_modules is not None :
136- try :
137- lora_modules = json .loads (lora_modules )
138- lora_modules = [LoRAModulePath (** lora_modules )]
139- except :
140- lora_modules = None
141-
142151 self .serving_models = OpenAIServingModels (
143152 engine_client = self .llm ,
144153 model_config = self .model_config ,
145154 base_model_paths = self .base_model_paths ,
146- lora_modules = None ,
155+ lora_modules = self . lora_adapters ,
147156 prompt_adapters = None ,
148157 )
149-
158+ await self . serving_models . init_static_loras ()
150159 self .chat_engine = OpenAIServingChat (
151160 engine_client = self .llm ,
152161 model_config = self .model_config ,
0 commit comments