diff --git a/gpu-validation/defaults/main.yaml b/gpu-validation/defaults/main.yaml index 59f8ba3..a12b4c4 100644 --- a/gpu-validation/defaults/main.yaml +++ b/gpu-validation/defaults/main.yaml @@ -75,3 +75,5 @@ gpu_validation_workload_userns: # "--userns=keep-id:uid=1001" for RHAIIS gpu_validation_workload_shm_size: "--shm-size=4g" # [string] Additional environment variables gpu_validation_workload_additional_env: "--env=HF_HUB_OFFLINE=0 --env=VLLM_NO_USAGE_STATS=1" +# [string] vLLM API URL +gpu_validation_vllm_api_url: "http://127.0.0.1:8000" diff --git a/gpu-validation/tasks/model_download_and_serve.yaml b/gpu-validation/tasks/model_download_and_serve.yaml index fa802df..275316e 100644 --- a/gpu-validation/tasks/model_download_and_serve.yaml +++ b/gpu-validation/tasks/model_download_and_serve.yaml @@ -57,7 +57,7 @@ - name: Wait for vllm API to appear ansible.builtin.uri: - url: http://localhost:8000/health + url: "{{ gpu_validation_vllm_api_url }}/health" register: api_result until: ("status" in api_result) and (api_result.status == 200) retries: 180 # 180 * 10 = 30 mins - this includes the time to download the model @@ -66,7 +66,7 @@ - name: Wait for vllm metrics endpoint to appear ansible.builtin.uri: - url: http://localhost:8000/metrics/ + url: "{{ gpu_validation_vllm_api_url }}/metrics/" register: metrics_result until: ("status" in metrics_result) and (metrics_result.status == 200) retries: 12 # 12 * 10 = 2 mins after API appears diff --git a/gpu-validation/tasks/model_performance.yaml b/gpu-validation/tasks/model_performance.yaml index 5b73208..ee45e60 100644 --- a/gpu-validation/tasks/model_performance.yaml +++ b/gpu-validation/tasks/model_performance.yaml @@ -2,6 +2,7 @@ - name: Run the model performance check script ansible.builtin.command: /tmp/scripts/model_performance_check.sh environment: + URL: "{{ gpu_validation_vllm_api_url }}" MODEL_NAME: "{{ gpu_validation_model_name }}" register: performance_script_output changed_when: false