diff --git a/.github/workflows/ansible-lint.yml b/.github/workflows/ansible-lint.yml new file mode 100644 index 0000000..b562ed4 --- /dev/null +++ b/.github/workflows/ansible-lint.yml @@ -0,0 +1,16 @@ +--- +name: ansible-lint +on: + pull_request: + branches: ["main"] +jobs: + build: + name: Ansible Lint + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - name: Run ansible-lint + uses: ansible/ansible-lint@main + with: + setup_python: "true" + requirements_file: "requirements.yaml" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2c9751a..72ac80b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: hooks: - id: shellcheck - repo: https://github.com/ansible/ansible-lint.git - rev: v6.22.2 + rev: v25.9.1 hooks: - id: ansible-lint files: \.(yaml|yml)$ diff --git a/.yamllint b/.yamllint index 01cd9dd..f3e2758 100644 --- a/.yamllint +++ b/.yamllint @@ -3,3 +3,11 @@ extends: default rules: line-length: max: 160 + comments: + min-spaces-from-content: 1 + comments-indentation: false + braces: + max-spaces-inside: 1 + octal-values: + forbid-implicit-octal: true + forbid-explicit-octal: true diff --git a/gpu-validation/tasks/cuda_assertions.yaml b/gpu-validation/tasks/cuda_assertions.yaml index c0c2db0..4db9528 100644 --- a/gpu-validation/tasks/cuda_assertions.yaml +++ b/gpu-validation/tasks/cuda_assertions.yaml @@ -1,6 +1,6 @@ - +--- - name: TEST[CUDA] Run the CUDA Sanity Check ansible.builtin.command: python /tmp/scripts/cuda_sanity_check.py register: cuda_sanity_result failed_when: cuda_sanity_result.rc != 0 - changed_when: false \ No newline at end of file + changed_when: false diff --git a/gpu-validation/tasks/main.yaml b/gpu-validation/tasks/main.yaml index 4004320..328c385 100644 --- a/gpu-validation/tasks/main.yaml +++ b/gpu-validation/tasks/main.yaml @@ -17,7 +17,7 @@ - name: Check CUDA libs ansible.builtin.import_tasks: cuda.yaml -- name: CUDA Assertions +- name: CUDA Assertions # noqa: ignore-errors ansible.builtin.import_tasks: cuda_assertions.yaml ignore_errors: true diff --git a/gpu-validation/tasks/model_performance.yaml b/gpu-validation/tasks/model_performance.yaml index 35aaf34..5b73208 100644 --- a/gpu-validation/tasks/model_performance.yaml +++ b/gpu-validation/tasks/model_performance.yaml @@ -10,4 +10,3 @@ - name: Parse the JSON output from performance check script ansible.builtin.set_fact: performance_json: "{{ performance_script_output.stdout | from_json }}" - diff --git a/gpu-validation/tasks/model_performance_assertions.yaml b/gpu-validation/tasks/model_performance_assertions.yaml index 2254695..4426fac 100644 --- a/gpu-validation/tasks/model_performance_assertions.yaml +++ b/gpu-validation/tasks/model_performance_assertions.yaml @@ -1,3 +1,4 @@ +--- - name: TEST[model_performance]Check the performance thresholds ansible.builtin.debug: var: performance_json @@ -5,4 +6,4 @@ (performance_json.avg_time_per_tok | float) == 0 or (performance_json.avg_time_per_tok | float) > gpu_validation_model_perf_max_avg_time_per_tok or (performance_json.avg_time_to_first_tok | float) == 0 - or (performance_json.avg_time_to_first_tok | float) > gpu_validation_model_perf_max_avg_time_to_first_tok \ No newline at end of file + or (performance_json.avg_time_to_first_tok | float) > gpu_validation_model_perf_max_avg_time_to_first_tok diff --git a/gpu-validation/tasks/nvidia.yaml b/gpu-validation/tasks/nvidia.yaml index b0db81d..0936baf 100644 --- a/gpu-validation/tasks/nvidia.yaml +++ b/gpu-validation/tasks/nvidia.yaml @@ -10,53 +10,55 @@ - name: Install nvidia-driver RPM if needed when: _install_nvidia_driver block: - - name: Blacklist nouveau kernel module - become: true - ansible.builtin.blockinfile: - block: | - blacklist nouveau - path: /etc/modprobe.d/blacklist.conf - create: true + - name: Blacklist nouveau kernel module + become: true + ansible.builtin.blockinfile: + block: | + blacklist nouveau + path: /etc/modprobe.d/blacklist.conf + create: true + mode: '0644' - - name: Remove nouveau kernel module if loaded - become: true - ansible.builtin.command: modprobe -r nouveau - ignore_errors: true + - name: Remove nouveau kernel module if loaded + become: true + ansible.builtin.command: modprobe -r nouveau + failed_when: false + changed_when: false - - name: Add nvidia CUDA repo - become: true - yum_repository: - name: nvidia-cuda-rhel9 - description: NVIDIA CUDA repo for RHEL 9 - baseurl: "{{gpu_validation_nvidia_repo_url}}/$basearch/" - gpgcheck: yes - gpgkey: "{{gpu_validation_nvidia_repo_url}}/$basearch/D42D0685.pub" + - name: Add nvidia CUDA repo + become: true + ansible.builtin.yum_repository: + name: nvidia-cuda-rhel9 + description: NVIDIA CUDA repo for RHEL 9 + baseurl: "{{ gpu_validation_nvidia_repo_url }}/$basearch/" + gpgcheck: true + gpgkey: "{{ gpu_validation_nvidia_repo_url }}/$basearch/D42D0685.pub" - - name: Add EPEL repository for DKMS support - become: true - ansible.builtin.yum_repository: - name: epel - description: EPEL YUM repo - baseurl: https://download.fedoraproject.org/pub/epel/9/Everything/$basearch/ - enabled: 1 - gpgcheck: 1 - gpgkey: https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9 + - name: Add EPEL repository for DKMS support + become: true + ansible.builtin.yum_repository: + name: epel + description: EPEL YUM repo + baseurl: https://download.fedoraproject.org/pub/epel/9/Everything/$basearch/ + enabled: 1 + gpgcheck: 1 + gpgkey: https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9 - - name: Enable nvidia-driver RPM module - become: true - ansible.builtin.dnf: - name: "@nvidia-driver:latest-dkms" - state: present + - name: Enable nvidia-driver RPM module + become: true + ansible.builtin.dnf: + name: "@nvidia-driver:latest-dkms" + state: present - - name: Install the nvidia driver - become: true - ansible.builtin.dnf: - name: cuda-drivers - state: present + - name: Install the nvidia driver + become: true + ansible.builtin.dnf: + name: cuda-drivers + state: present - - name: Refresh package facts after driver installation - ansible.builtin.package_facts: - manager: rpm + - name: Refresh package facts after driver installation + ansible.builtin.package_facts: + manager: rpm - name: Run nvidia-smi to list NVIDIA GPUs and count them ansible.builtin.shell: set -o pipefail && nvidia-smi --list-gpus | wc -l diff --git a/gpu-validation/tasks/setup.yaml b/gpu-validation/tasks/setup.yaml index 9971b79..178838c 100644 --- a/gpu-validation/tasks/setup.yaml +++ b/gpu-validation/tasks/setup.yaml @@ -25,12 +25,12 @@ creates: /etc/alternatives/python3 - name: Set python3 alternative to /usr/bin/python3.12 - ansible.builtin.alternatives: + community.general.alternatives: name: python3 path: /usr/bin/python3.12 - name: Set ansible to use python3.12 - set_fact: + ansible.builtin.set_fact: ansible_python_interpreter: /usr/bin/python3.12 - name: Ensure python3.12-setuptools is installed diff --git a/gpu-validation/tasks/vllm.yaml b/gpu-validation/tasks/vllm.yaml index da884a2..1eb9de7 100644 --- a/gpu-validation/tasks/vllm.yaml +++ b/gpu-validation/tasks/vllm.yaml @@ -1,4 +1,5 @@ -- name: PIP; install vllm +--- +- name: PIP; install vllm ansible.builtin.pip: name: vllm state: present @@ -6,5 +7,5 @@ - ansible.builtin.import_tasks: vllm_config.yaml # noqa: name[missing] - ansible.builtin.import_tasks: model_download_and_serve.yaml # noqa: name[missing] - ansible.builtin.import_tasks: model_performance.yaml # noqa: name[missing] -- ansible.builtin.import_tasks: model_performance_assertions.yaml # noqa: name[missing] - ignore_errors: true \ No newline at end of file +- ansible.builtin.import_tasks: model_performance_assertions.yaml # noqa: name[missing] ignore-errors + ignore_errors: true diff --git a/gpu-validation/tasks/vm_image.yaml b/gpu-validation/tasks/vm_image.yaml index 37a187a..14172f8 100644 --- a/gpu-validation/tasks/vm_image.yaml +++ b/gpu-validation/tasks/vm_image.yaml @@ -1,3 +1,4 @@ +--- - name: Download VM image ansible.builtin.get_url: url: "{{ gpu_validation_image_url }}" @@ -6,7 +7,7 @@ - name: Create glance image from GPU validation download source openstack.cloud.image: - name: "{{gpu_validation_image_name}}" + name: "{{ gpu_validation_image_name }}" container_format: bare disk_format: qcow2 filename: "/tmp/{{ gpu_validation_image_url | basename }}" @@ -15,12 +16,12 @@ - name: Create flavor for GPU validation openstack.cloud.compute_flavor: - name: "{{gpu_validation_flavor_name}}" - ram: "{{gpu_validation_flavor_ram}}" - vcpus: "{{gpu_validation_flavor_vcpus}}" - disk: "{{gpu_validation_flavor_disk}}" + name: "{{ gpu_validation_flavor_name }}" + ram: "{{ gpu_validation_flavor_ram }}" + vcpus: "{{ gpu_validation_flavor_vcpus }}" + disk: "{{ gpu_validation_flavor_disk }}" extra_specs: - "pci_passthrough:alias": "gpu-l4:{{gpu_validation_flavor_gpus}}" + "pci_passthrough:alias": "gpu-l4:{{ gpu_validation_flavor_gpus }}" "hw:pci_numa_affinity_policy": "preferred" "hw:hide_hypervisor_id": "true" ca_cert: "{{ gpu_validation_ca_cert_path }}" diff --git a/gpu-validation/tasks/vm_net.yaml b/gpu-validation/tasks/vm_net.yaml index e6898ee..c1b6898 100644 --- a/gpu-validation/tasks/vm_net.yaml +++ b/gpu-validation/tasks/vm_net.yaml @@ -72,7 +72,7 @@ register: existing_fip_info - name: Set floating IP creation flag - set_fact: + ansible.builtin.set_fact: create_floating_ip: "{{ (existing_fip_info.floating_ips | length) == 0 }}" - name: Create floating ip address on public network diff --git a/requirements.yaml b/requirements.yaml index 6f193e0..f3163f8 100644 --- a/requirements.yaml +++ b/requirements.yaml @@ -2,3 +2,5 @@ collections: - name: openstack.cloud version: ">=2.4.1" + - name: community.general + version: ">=10.0.0"