Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions gpu-validation/defaults/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,32 +45,36 @@
10de:27b8: 1

gpu_validation_model_tests_enabled: true # Can be disabled for lighter CUDA-only sanity testing
gpu_validation_model_name: TinyLlama/TinyLlama-1.1B-Chat-v1.0 # RedHatAI/Llama-3.2-1B-Instruct-FP8 for RHAIIS
gpu_validation_model_name: RedHatAI/Llama-3.2-1B-Instruct-FP8
# [string] (optional) HuggingFace token if required for download
gpu_validation_model_download_hf_token:
# [string] (optional) URL to download pre-packaged model from RH intranet (e.g., http://file-server/models/tinyllama.tar.gz)
gpu_validation_model_intranet_url:
# [string] (optional) Local path where pre-downloaded model is located (overrides HuggingFace download)
gpu_validation_model_local_path:

# [float] Performance threshholds
gpu_validation_model_perf_max_avg_time_per_tok: !!float "0.03"
gpu_validation_model_perf_max_avg_time_to_first_tok: !!float "0.3"

# [string] Container image to use for model serving
gpu_validation_workload_container_image: "docker.io/vllm/vllm-openai:latest" # "registry.redhat.io/rhaiis/vllm-cuda-rhel9:3.0.0" for RHAIIS
gpu_validation_workload_container_image: "registry.redhat.io/rhaiis/vllm-cuda-rhel9:3.0.0"
# [string] (optional) registry username if required for container image download
gpu_validation_workload_registry_username:
# [string] (optional) registry password if required for container image download
gpu_validation_workload_registry_password:
# [string] Cache directory path for model downloads
gpu_validation_workload_cache_dir: "/home/cloud-user/.cache/workload"
# [string] Cache mount path inside container
gpu_validation_workload_cache_mount_path: "/root/.cache/huggingface" # "/opt/app-root/src/.cache" for RHAIIS
gpu_validation_workload_cache_mount_path: "/opt/app-root/src/.cache"

Check failure on line 69 in gpu-validation/defaults/main.yaml

View workflow job for this annotation

GitHub Actions / Ansible Lint

yaml[colons]

Too many spaces after colon
# [string] Container Device options
gpu_validation_workload_device_opts: "--device nvidia.com/gpu=all"
# [string] Container security options
gpu_validation_workload_security_opts: "--security-opt=label=disable"
# [string] Container options
gpu_validation_workload_additional_opts: "--rm -it"
# [string] Container user namespace option
gpu_validation_workload_userns: # "--userns=keep-id:uid=1001" for RHAIIS
gpu_validation_workload_userns: --userns=keep-id:uid=1001
# [string] Shared memory size
gpu_validation_workload_shm_size: "--shm-size=4g"
# [string] Additional environment variables
Expand Down
6 changes: 6 additions & 0 deletions main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
- name: GPU Validation VM setup
hosts: control_node
gather_facts: false
pre_tasks:
- name: Install Ansible Galaxy role requirements
ansible.builtin.command:
cmd: ansible-galaxy role install -r requirements.yaml --force
chdir: "{{ playbook_dir }}"
changed_when: true
tasks:
- name: VM Pre-tasks
ansible.builtin.include_role:
Expand Down
Loading