diff --git a/gpu-validation/defaults/main.yaml b/gpu-validation/defaults/main.yaml index a12b4c4..c66aa02 100644 --- a/gpu-validation/defaults/main.yaml +++ b/gpu-validation/defaults/main.yaml @@ -45,16 +45,20 @@ gpu_validation_pci_devices: 10de:27b8: 1 gpu_validation_model_tests_enabled: true # Can be disabled for lighter CUDA-only sanity testing -gpu_validation_model_name: TinyLlama/TinyLlama-1.1B-Chat-v1.0 # RedHatAI/Llama-3.2-1B-Instruct-FP8 for RHAIIS +gpu_validation_model_name: RedHatAI/Llama-3.2-1B-Instruct-FP8 # [string] (optional) HuggingFace token if required for download gpu_validation_model_download_hf_token: +# [string] (optional) URL to download pre-packaged model from RH intranet (e.g., http://file-server/models/tinyllama.tar.gz) +gpu_validation_model_intranet_url: +# [string] (optional) Local path where pre-downloaded model is located (overrides HuggingFace download) +gpu_validation_model_local_path: # [float] Performance threshholds gpu_validation_model_perf_max_avg_time_per_tok: !!float "0.03" gpu_validation_model_perf_max_avg_time_to_first_tok: !!float "0.3" # [string] Container image to use for model serving -gpu_validation_workload_container_image: "docker.io/vllm/vllm-openai:latest" # "registry.redhat.io/rhaiis/vllm-cuda-rhel9:3.0.0" for RHAIIS +gpu_validation_workload_container_image: "registry.redhat.io/rhaiis/vllm-cuda-rhel9:3.0.0" # [string] (optional) registry username if required for container image download gpu_validation_workload_registry_username: # [string] (optional) registry password if required for container image download @@ -62,7 +66,7 @@ gpu_validation_workload_registry_password: # [string] Cache directory path for model downloads gpu_validation_workload_cache_dir: "/home/cloud-user/.cache/workload" # [string] Cache mount path inside container -gpu_validation_workload_cache_mount_path: "/root/.cache/huggingface" # "/opt/app-root/src/.cache" for RHAIIS +gpu_validation_workload_cache_mount_path: "/opt/app-root/src/.cache" # [string] Container Device options gpu_validation_workload_device_opts: "--device nvidia.com/gpu=all" # [string] Container security options @@ -70,7 +74,7 @@ gpu_validation_workload_security_opts: "--security-opt=label=disable" # [string] Container options gpu_validation_workload_additional_opts: "--rm -it" # [string] Container user namespace option -gpu_validation_workload_userns: # "--userns=keep-id:uid=1001" for RHAIIS +gpu_validation_workload_userns: --userns=keep-id:uid=1001 # [string] Shared memory size gpu_validation_workload_shm_size: "--shm-size=4g" # [string] Additional environment variables diff --git a/main.yaml b/main.yaml index fc9b74c..7ec1513 100644 --- a/main.yaml +++ b/main.yaml @@ -2,6 +2,12 @@ - name: GPU Validation VM setup hosts: control_node gather_facts: false + pre_tasks: + - name: Install Ansible Galaxy role requirements + ansible.builtin.command: + cmd: ansible-galaxy role install -r requirements.yaml --force + chdir: "{{ playbook_dir }}" + changed_when: true tasks: - name: VM Pre-tasks ansible.builtin.include_role: