From bf12402dda5fb85d87da850ed37c3a25ec80910a Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 9 Jul 2025 11:39:11 -0700 Subject: [PATCH] Refine steady state workflow defaults and docs --- kconfigs/workflows/Kconfig | 23 +++- .../roles/steady_state/defaults/main.yml | 17 +++ playbooks/roles/steady_state/tasks/main.yaml | 73 +++++++++++ .../roles/steady_state/templates/ss_bw.ini.j2 | 31 +++++ .../steady_state/templates/ss_iops.ini.j2 | 31 +++++ playbooks/steady_state.yml | 4 + workflows/Makefile | 4 + workflows/steady_state/Kconfig | 119 ++++++++++++++++++ workflows/steady_state/Makefile | 14 +++ 9 files changed, 313 insertions(+), 3 deletions(-) create mode 100644 playbooks/roles/steady_state/defaults/main.yml create mode 100644 playbooks/roles/steady_state/tasks/main.yaml create mode 100644 playbooks/roles/steady_state/templates/ss_bw.ini.j2 create mode 100644 playbooks/roles/steady_state/templates/ss_iops.ini.j2 create mode 100644 playbooks/steady_state.yml create mode 100644 workflows/steady_state/Kconfig create mode 100644 workflows/steady_state/Makefile diff --git a/kconfigs/workflows/Kconfig b/kconfigs/workflows/Kconfig index 4f969c60c..bc4a536c7 100644 --- a/kconfigs/workflows/Kconfig +++ b/kconfigs/workflows/Kconfig @@ -26,14 +26,14 @@ choice config WORKFLOW_LINUX_DISTRO bool "Distro kernel" help - If you are targetting a workflow to run on a distribution kernel + If you are targeting a workflow to run on a distribution kernel enable this. config WORKFLOW_LINUX_CUSTOM bool "Upstream Linux or custom kernel" select BOOTLINUX help - If you are targetting a workflow to run on a vanilla upstream + If you are targeting a workflow to run on a vanilla upstream linux, linux-stable, linux-next, or a custom kernel you want to build and install enable this. @@ -80,7 +80,7 @@ config WORKFLOWS_LINUX_TESTS if WORKFLOWS_LINUX_TESTS config WORKFLOWS_DEDICATED_WORKFLOW - bool "Are you only targetting one subsystem test?" + bool "Are you only targeting one subsystem test?" default y help Enable this to if you are only wanting to test one main Linux @@ -390,6 +390,23 @@ source "workflows/sysbench/Kconfig" endmenu endif # KDEVOPS_WORKFLOW_ENABLE_SYSBENCH +config KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE + bool + output yaml + default n + help + Steady state is a state where performance is considered stable. + SNIA guidelines recommend purging and pre-conditioning a device + before collecting performance data. Enable this workflow if you + want to run the fio steady state scripts prior to other workflows + to verify the target drive has reached steady state. + +if KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE +menu "Configure SSD steady state workflow" +source "workflows/steady_state/Kconfig" +endmenu +endif # KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE + config KDEVOPS_WORKFLOW_GIT_CLONES_KDEVOPS_GIT bool default y if KDEVOPS_WORKFLOW_ENABLE_FSTESTS || KDEVOPS_WORKFLOW_ENABLE_BLKTESTS diff --git a/playbooks/roles/steady_state/defaults/main.yml b/playbooks/roles/steady_state/defaults/main.yml new file mode 100644 index 000000000..ac5635af0 --- /dev/null +++ b/playbooks/roles/steady_state/defaults/main.yml @@ -0,0 +1,17 @@ +--- +steady_state_data: "{{ data_path }}/steady_state" +steady_state_device: "/dev/nvme0n1" +precondition_blocksize: "128k" +precondition_iodepth: "32" +precondition_numjobs: "4" +precondition_prefill_loop: 2 +steady_state_runtime: "6h" +steady_state_iops_mean_limit: "20%" +steady_state_iops_mean_dur: "4h" +steady_state_iops_slope: "10%" +steady_state_iops_slope_dur: "4h" +steady_state_bw_mean_limit: "20%" +steady_state_bw_mean_dur: "2h" +steady_state_bw_slope: "10%" +steady_state_bw_slope_dur: "2h" +kdevops_run_ssd_steady_state: False diff --git a/playbooks/roles/steady_state/tasks/main.yaml b/playbooks/roles/steady_state/tasks/main.yaml new file mode 100644 index 000000000..d94ff9877 --- /dev/null +++ b/playbooks/roles/steady_state/tasks/main.yaml @@ -0,0 +1,73 @@ +--- +- name: Import optional extra_args file + ansible.builtin.include_vars: + file: "{{ item }}" + with_first_found: + - files: + - "../extra_vars.yml" + - "../extra_vars.yaml" + - "../extra_vars.json" + skip: true + failed_when: false + tags: vars + +- name: Ensure steady state directory exists + become: yes + become_method: sudo + ansible.builtin.file: + path: "{{ steady_state_data }}" + state: directory + tags: ['setup'] + +- name: Generate fio steady state configs + become: yes + become_method: sudo + template: + src: "{{ item }}.j2" + dest: "{{ steady_state_data }}/{{ item }}" + mode: '0644' + loop: + - ss_iops.ini + - ss_bw.ini + tags: ['setup'] + +- name: Run prefill helper and execute fio commands + become: yes + become_method: sudo + shell: | + {{ topdir_path }}/scripts/workflows/precondition/prefill-fio-jobs.sh \ + --target {{ steady_state_device }} \ + --blocksize {{ precondition_blocksize }} \ + --jobs {{ precondition_numjobs }} \ + --verbose > {{ steady_state_data }}/prefill.cmd + grep '^fio' {{ steady_state_data }}/prefill.cmd | bash + args: + executable: /bin/bash + when: kdevops_run_ssd_steady_state|bool + tags: ['prefill'] + +- name: Run fio steady state for iops + become: yes + become_method: sudo + command: fio {{ steady_state_data }}/ss_iops.ini + when: kdevops_run_ssd_steady_state|bool + tags: ['steady_state'] + +- name: Run fio steady state for bw + become: yes + become_method: sudo + command: fio {{ steady_state_data }}/ss_bw.ini + when: kdevops_run_ssd_steady_state|bool + tags: ['steady_state'] + +- name: Copy steady state results to controller + ansible.posix.synchronize: + src: "{{ steady_state_data }}/" + dest: "{{ topdir_path }}/workflows/steady_state/results/{{ inventory_hostname }}/" + mode: pull + recursive: true + rsync_opts: + - "--ignore-existing" + delegate_to: localhost + when: kdevops_run_ssd_steady_state|bool + tags: ['results'] diff --git a/playbooks/roles/steady_state/templates/ss_bw.ini.j2 b/playbooks/roles/steady_state/templates/ss_bw.ini.j2 new file mode 100644 index 000000000..e54eb9688 --- /dev/null +++ b/playbooks/roles/steady_state/templates/ss_bw.ini.j2 @@ -0,0 +1,31 @@ +[global] +name=Workload dependent steady state bw random pre-conditioning +threads=1 +group_reporting=1 +time_based +ioengine=io_uring +direct=1 +buffered=0 +norandommap +refill_buffers + +bs={{ precondition_blocksize }} +iodepth={{ precondition_iodepth }} +numjobs={{ precondition_numjobs }} +filename={{ steady_state_device }} + +exitall_on_error +continue_on_error=none + +rw=randwrite + +runtime={{ steady_state_runtime }} +[steady-state-mean-bw] +ss=bw:{{ steady_state_bw_mean_limit }} +ss_dur={{ steady_state_bw_mean_dur }} + +[steady-state-slope-bw] +new_group +group_reporting +ss=bw_slope:{{ steady_state_bw_slope }} +ss_dur={{ steady_state_bw_slope_dur }} diff --git a/playbooks/roles/steady_state/templates/ss_iops.ini.j2 b/playbooks/roles/steady_state/templates/ss_iops.ini.j2 new file mode 100644 index 000000000..8410fefa7 --- /dev/null +++ b/playbooks/roles/steady_state/templates/ss_iops.ini.j2 @@ -0,0 +1,31 @@ +[global] +name=Workload dependent steady state iops random pre-conditioning +threads=1 +group_reporting=1 +time_based +ioengine=io_uring +direct=1 +buffered=0 +norandommap +refill_buffers + +bs={{ precondition_blocksize }} +iodepth={{ precondition_iodepth }} +numjobs={{ precondition_numjobs }} +filename={{ steady_state_device }} + +exitall_on_error +continue_on_error=none + +rw=randwrite + +runtime={{ steady_state_runtime }} +[steady-state-mean-iops] +ss=iops:{{ steady_state_iops_mean_limit }} +ss_dur={{ steady_state_iops_mean_dur }} + +[steady-state-slope-iops] +new_group +group_reporting +ss=iops_slope:{{ steady_state_iops_slope }} +ss_dur={{ steady_state_iops_slope_dur }} diff --git a/playbooks/steady_state.yml b/playbooks/steady_state.yml new file mode 100644 index 000000000..0dd400d55 --- /dev/null +++ b/playbooks/steady_state.yml @@ -0,0 +1,4 @@ +--- +- hosts: all + roles: + - role: steady_state diff --git a/workflows/Makefile b/workflows/Makefile index d2b31d1a4..89f6d3171 100644 --- a/workflows/Makefile +++ b/workflows/Makefile @@ -54,6 +54,10 @@ ifeq (y,$(CONFIG_KDEVOPS_WORKFLOW_ENABLE_SYSBENCH)) include workflows/sysbench/Makefile endif # CONFIG_KDEVOPS_WORKFLOW_ENABLE_SYSBENCH == y +ifeq (y,$(CONFIG_KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE)) +include workflows/steady_state/Makefile +endif # CONFIG_KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE == y + ANSIBLE_EXTRA_ARGS += $(WORKFLOW_ARGS) ANSIBLE_EXTRA_ARGS_SEPARATED += $(WORKFLOW_ARGS_SEPARATED) ANSIBLE_EXTRA_ARGS_DIRECT += $(WORKFLOW_ARGS_DIRECT) diff --git a/workflows/steady_state/Kconfig b/workflows/steady_state/Kconfig new file mode 100644 index 000000000..cb378d59f --- /dev/null +++ b/workflows/steady_state/Kconfig @@ -0,0 +1,119 @@ +config KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE + bool "Enable SSD steady state workflow" + output yaml + default n + help + Enable this workflow to pre-condition a block device and + verify steady state performance using fio before running + other workflows. + +if KDEVOPS_WORKFLOW_ENABLE_SSD_STEADY_STATE +menu "Configure SSD steady state workflow" + +config SSD_STEADY_STATE_DEVICE + string "Device to pre-condition" + output yaml + default "/dev/disk/by-id/nvme-QEMU_NVMe_Ctrl_kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_NVME + default "/dev/disk/by-id/virtio-kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_VIRTIO + default "/dev/disk/by-id/ata-QEMU_HARDDISK_kdevops1" if LIBVIRT && LIBVIRT_EXTRA_STORAGE_DRIVE_IDE + default "/dev/nvme2n1" if TERRAFORM_AWS_INSTANCE_M5AD_4XLARGE + default "/dev/nvme1n1" if TERRAFORM_GCE + default "/dev/sdd" if TERRAFORM_AZURE + default TERRAFORM_OCI_SPARSE_VOLUME_DEVICE_FILE_NAME if TERRAFORM_OCI + help + Block device to operate on for steady state. + +config SSD_STEADY_STATE_PREFILL_BLOCKSIZE + string "Prefill blocksize" + output yaml + default "128k" + help + Block size used during the prefill step before steady + state verification. + +config SSD_STEADY_STATE_IODEPTH + string "Prefill iodepth" + output yaml + default "32" + help + Queue depth used for the prefill workload. + +config SSD_STEADY_STATE_NUMJOBS + string "Prefill number jobs" + output yaml + default "4" + help + Number of fio jobs to spawn for the prefill step. + +config SSD_STEADY_STATE_PREFILL_LOOP + int "Prefill loop count" + output yaml + default 2 + help + How many iterations of the prefill helper to run. + +config SSD_STEADY_STATE_RUNTIME + string "Steady state runtime" + output yaml + default "6h" + help + Maximum runtime allowed for each steady state check. + +config SSD_STEADY_STATE_IOPS_MEAN_LIMIT + string "IOPS steady state mean limit" + output yaml + default "20%" + help + fio ss=iops value defining the IOPS mean limit criteria. + +config SSD_STEADY_STATE_IOPS_MEAN_DUR + string "IOPS steady state mean duration" + output yaml + default "4h" + help + Duration the IOPS mean limit must be sustained. + +config SSD_STEADY_STATE_IOPS_SLOPE + string "IOPS steady state slope" + output yaml + default "10%" + help + fio ss=iops_slope percentage for slope detection. + +config SSD_STEADY_STATE_IOPS_SLOPE_DUR + string "IOPS steady state slope duration" + output yaml + default "4h" + help + Duration the IOPS slope criterion must hold. + +config SSD_STEADY_STATE_BW_MEAN_LIMIT + string "BW steady state mean limit" + output yaml + default "20%" + help + fio ss=bw value defining the throughput mean limit. + +config SSD_STEADY_STATE_BW_MEAN_DUR + string "BW steady state mean duration" + output yaml + default "2h" + help + Duration the throughput mean limit must be sustained. + +config SSD_STEADY_STATE_BW_SLOPE + string "BW steady state slope" + output yaml + default "10%" + help + fio ss=bw_slope percentage for slope detection. + +config SSD_STEADY_STATE_BW_SLOPE_DUR + string "BW steady state slope duration" + output yaml + default "2h" + help + Duration the throughput slope criterion must hold. + +endmenu +endif diff --git a/workflows/steady_state/Makefile b/workflows/steady_state/Makefile new file mode 100644 index 000000000..56897111f --- /dev/null +++ b/workflows/steady_state/Makefile @@ -0,0 +1,14 @@ +PHONY += steady-state steady-state-help-menu + +SSD_STEADY_STATE_DYNAMIC_RUNTIME_VARS := "kdevops_run_ssd_steady_state": True + +steady-state: +$(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ +-i $(KDEVOPS_HOSTFILE) playbooks/steady_state.yml \ +--extra-vars '{ $(SSD_STEADY_STATE_DYNAMIC_RUNTIME_VARS) }' $(LIMIT_HOSTS) + +steady-state-help-menu: +@echo "steady-state - Prefill and run fio steady state" +@echo "" + +HELP_TARGETS += steady-state-help-menu