-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
28 lines (28 loc) · 762 Bytes
/
docker-compose.yml
File metadata and controls
28 lines (28 loc) · 762 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
services:
l1-nexus:
image: vllm/vllm-openai:v0.8.5
runtime: nvidia
ports:
- "8000:8000"
environment:
- VLLM_API_KEY=${VLLM_API_KEY:-specsmith-local-key}
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
ipc: host
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
command: >
Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8
--served-model-name l1-nexus
--gpu-memory-utilization 0.92
--max-model-len 16384
--trust-remote-code
--generation-config vllm
--enable-auto-tool-choice
--tool-call-parser hermes