SQL-HR/docker-compose.yml at master · JGSnapp/SQL-HR · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
services:
  vllm:
    image: vllm/vllm-openai:latest
    container_name: vllm
    restart: unless-stopped
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
      - HF_HOME=/root/.cache/huggingface
      - HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface
      - TORCH_HOME=/root/.cache/huggingface
      - VLLM_DOWNLOAD_DIR=/root/.cache/huggingface
      - HF_HUB_ENABLE_HF_TRANSFER=0
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    command:
      - --model
      - Qwen/Qwen2.5-7B-Instruct-GPTQ-Int4
      - --gpu-memory-utilization
      - "0.75"
      - --max-model-len
      - "20000"
      - --host
      - "0.0.0.0"
      - --port
      - "8001"
      - --enable-auto-tool-choice
      - --tool-call-parser
      - hermes

    ports:
      - "${VLLM_HOST_PORT:-18001}:8001"

    volumes:
      - ./hf-cache:/root/.cache/huggingface

    ipc: host
    healthcheck:
      test: ["CMD-SHELL", "wget -qO- http://127.0.0.1:8001/v1/models >/dev/null 2>&1"]
      interval: 20s
      timeout: 5s
      retries: 60
      start_period: 600s

  agent_server:
    build:
      context: ./agent_server
    depends_on:
      vllm:
        condition: service_healthy
      postgres:
        condition: service_healthy
    env_file:
      - .env
    environment:
      LLM_BASE_URL: ${LLM_BASE_URL:-http://vllm:8001/v1}
      BASE_URL: ${LLM_BASE_URL:-http://vllm:8001/v1}
      RESULT_FILE: /results/result.txt
    volumes:
      - ./results:/results
    healthcheck:
      test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health')\""]
      interval: 15s
      timeout: 5s
      retries: 5
      start_period: 20s

  frontend:
    build:
      context: ./frontend
    depends_on:
      agent_server:
        condition: service_healthy
    env_file:
      - .env
    environment:
      SERVER_URL: ${SERVER_URL:-http://agent_server}
      HOME: /app
      STREAMLIT_BROWSER_GATHERUSAGESTATS: "false"
      STREAMLIT_CONFIG_DIR: /app/.streamlit
    ports:
      - "8501:8501"

  postgres:
    image: ankane/pgvector
    container_name: postgres
    env_file:
      - .env
    restart: unless-stopped
    ports:
      - "${POSTGRES_PORT:-5432}:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
      - ./data/candidates.csv:/data/candidates.csv:ro
      - ./db/schema.sql:/docker-entrypoint-initdb.d/01-schema.sql:ro
      - ./db/add_rows.sql:/docker-entrypoint-initdb.d/02-add_rows.sql:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
      interval: 10s
      timeout: 5s
      retries: 5

  pgadmin:
    image: dpage/pgadmin4
    container_name: pgadmin
    env_file:
      - .env
    restart: unless-stopped
    ports:
      - "${PGADMIN_PORT:-8080}:80"
    depends_on:
      - postgres
    volumes:
      - pgadmin_data:/var/lib/pgadmin

volumes:
  pgdata:
  pgadmin_data: