-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
65 lines (53 loc) · 2.85 KB
/
docker-compose.yml
File metadata and controls
65 lines (53 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
services:
scheduler:
build:
context: .
dockerfile: Dockerfile
container_name: llm-scheduler
restart: unless-stopped
# ── Port mapping ────────────────────────────────────────────────
# Left side = host port (what users + Slurm jobs connect to)
# Right side = container port (always 9000)
#
# IMPORTANT: ROUTER_PORT in .env MUST match the left side!
# e.g., if you map 8080:9000, set ROUTER_PORT=8080 in .env
ports:
- "${ROUTER_PORT:-9000}:9000"
env_file:
- .env
environment:
- DATABASE_URL=sqlite:////app/data/router.db
- VLLM_LOG_DIR=/app/logs
- SBATCH_TEMPLATE_PATH=/app/templates/vllm_job.sh
# ── Extra hosts ─────────────────────────────────────────────────
# Allow the container to reach the host via "host.docker.internal"
# (useful if PUBLIC_HOSTNAME is set to the machine's real IP/hostname,
# this isn't strictly needed — but helps for local testing)
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
# ── Persistent data ──────────────────────────────────────────
- scheduler-data:/app/data
# ── Model catalog ────────────────────────────────────────────
- ./config/models.yaml:/app/config/models.yaml:ro
# ── Slurm logs (shared with host) ────────────────────────────
# Slurm jobs write stdout/stderr here on the HOST.
# The scheduler reads them via /admin/leases/{id}/logs.
# These MUST be the same physical directory.
- ${VLLM_LOG_DIR:-./logs}:/app/logs
# ── Slurm binaries (custom build at /usr/bin) ────────────────
- /usr/bin/sbatch:/usr/bin/sbatch:ro
- /usr/bin/scancel:/usr/bin/scancel:ro
- /usr/bin/scontrol:/usr/bin/scontrol:ro
- /usr/bin/squeue:/usr/bin/squeue:ro
- /usr/bin/sacct:/usr/bin/sacct:ro
# ── Slurm shared library ─────────────────────────────────────
- /usr/lib/x86_64-linux-gnu/slurm:/usr/lib/x86_64-linux-gnu/slurm:ro
# ── Slurm config + Munge auth ───────────────────────────────
- /etc/slurm:/etc/slurm:ro
- /etc/munge:/etc/munge:ro
- /var/run/munge:/var/run/munge
# Run as the same UID/GID that has Slurm submit permissions
user: "${UID:-0}:${GID:-0}"
volumes:
scheduler-data: