-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
123 lines (115 loc) · 2.97 KB
/
docker-compose.yml
File metadata and controls
123 lines (115 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
services:
vllm:
image: vllm/vllm-openai:latest
container_name: vllm
restart: unless-stopped
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
- HF_HOME=/root/.cache/huggingface
- HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
- TORCH_HOME=/root/.cache/huggingface
- VLLM_DOWNLOAD_DIR=/root/.cache/huggingface
- HF_HUB_ENABLE_HF_TRANSFER=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
command:
- --model
- Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4
- --gpu-memory-utilization
- "0.75"
- --max-model-len
- "20000"
- --host
- "0.0.0.0"
- --port
- "8001"
- --enable-auto-tool-choice
- --tool-call-parser
- hermes
ports:
- "${VLLM_HOST_PORT:-18001}:8001"
volumes:
- ./hf-cache:/root/.cache/huggingface
ipc: host
healthcheck:
test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8001/v1/models >/dev/null 2>&1"]
interval: 20s
timeout: 5s
retries: 60
start_period: 600s
agent_server:
build:
context: ./agent_server
depends_on:
vllm:
condition: service_healthy
postgres:
condition: service_healthy
env_file:
- .env
environment:
LLM_BASE_URL: ${LLM_BASE_URL:-http://vllm:8001/v1}
BASE_URL: ${LLM_BASE_URL:-http://vllm:8001/v1}
RESULT_FILE: /results/result.txt
volumes:
- ./results:/results
healthcheck:
test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')\""]
interval: 15s
timeout: 5s
retries: 5
start_period: 20s
frontend:
build:
context: ./frontend
depends_on:
agent_server:
condition: service_healthy
env_file:
- .env
environment:
SERVER_URL: ${SERVER_URL:-http://agent_server}
HOME: /app
STREAMLIT_BROWSER_GATHERUSAGESTATS: "false"
STREAMLIT_CONFIG_DIR: /app/.streamlit
ports:
- "8501:8501"
postgres:
image: ankane/pgvector
container_name: postgres
env_file:
- .env
restart: unless-stopped
ports:
- "${POSTGRES_PORT:-5432}:5432"
volumes:
- pgdata:/var/lib/postgresql/data
- ./data/candidates.csv:/data/candidates.csv:ro
- ./db/schema.sql:/docker-entrypoint-initdb.d/01-schema.sql:ro
- ./db/add_rows.sql:/docker-entrypoint-initdb.d/02-add_rows.sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 10s
timeout: 5s
retries: 5
pgadmin:
image: dpage/pgadmin4
container_name: pgadmin
env_file:
- .env
restart: unless-stopped
ports:
- "${PGADMIN_PORT:-8080}:80"
depends_on:
- postgres
volumes:
- pgadmin_data:/var/lib/pgadmin
volumes:
pgdata:
pgadmin_data: