Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/lighteval/models/vllm/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,7 @@ class AsyncVLLMModel(VLLMModel):
is_async = True

def cleanup(self):
self.model.shutdown()
gc.collect()
destroy_distributed_environment()
torch.cuda.empty_cache()
Expand Down Expand Up @@ -627,7 +628,6 @@ async def _async_batch(self, docs: list[Doc], generative: bool) -> list:
results = await asyncio.gather(*processed_requests)
return results

@cached(SamplingMethod.GENERATIVE)
async def greedy_until(
self,
docs: list[Doc],
Expand Down Expand Up @@ -661,7 +661,6 @@ async def greedy_until(

return results

@cached(SamplingMethod.LOGPROBS)
async def loglikelihood(
self,
docs: list[Doc],
Expand Down
7 changes: 4 additions & 3 deletions src/lighteval/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ async def _run_model_async(self):
model_outputs = await self.model.loglikelihood(docs)
outputs[sampling_method] = model_outputs

self.model.cleanup()

return outputs

def _run_model_sync(self):
Expand All @@ -327,6 +329,8 @@ def _run_model_sync(self):
model_outputs = self.model.loglikelihood_rolling(docs)
outputs[sampling_method] = model_outputs

self.model.cleanup()

return outputs

def _run_model(self):
Expand All @@ -339,9 +343,6 @@ def _run_model(self):
else:
outputs = self._run_model_sync()

# Cleaning up the model before running metrics
self.model.cleanup()

return outputs

def _post_process_outputs(self, sampling_method_responses: dict[str, list[ModelResponse]]):
Expand Down