diff --git a/aci-preupgrade-validation-script.py b/aci-preupgrade-validation-script.py index f29c66b..81aeb48 100644 --- a/aci-preupgrade-validation-script.py +++ b/aci-preupgrade-validation-script.py @@ -6053,6 +6053,35 @@ def auto_firmware_update_on_switch_check(cversion, tversion, **kwargs): return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url) + +@check_wrapper(check_title="APIC Storage Inode Check (F4388, F4389, F4390 equipment-full)") +def apic_storage_inode_check(**kwargs): + result = FAIL_UF + headers = ['Fault', 'Pod', 'Node', 'Mount Point', 'Usage %', 'Recommended Action'] + data = [] + unformatted_headers = ['Fault', 'Fault DN', 'Recommended Action'] + unformatted_data = [] + recommended_action = 'Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault' + doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#apic-storage-inode-check' + dn_regex = node_regex + r'/.+p-\[(?P.+)\]-f' + desc_regex = r'is (?P\d{2,3}%) full for Inodes' + faultInsts = icurl('class', 'faultInst.json?query-target-filter=or(eq(faultInst.code,"F4388"),eq(faultInst.code,"F4389"),eq(faultInst.code,"F4390"))') + for faultInst in faultInsts: + lc = faultInst['faultInst']['attributes']['lc'] + if lc not in ["raised", "soaking"]: + continue + fc = faultInst['faultInst']['attributes']['code'] + dn = re.search(dn_regex, faultInst['faultInst']['attributes']['dn']) + desc = re.search(desc_regex, faultInst['faultInst']['attributes']['descr']) + if dn and desc: + data.append([fc, dn.group('pod'), dn.group('node'), dn.group('mountpoint'), desc.group('usage'), recommended_action]) + else: + unformatted_data.append([fc, faultInst['faultInst']['attributes']['dn'], recommended_action]) + if not data and not unformatted_data: + result = PASS + return Result(result=result, headers=headers, data=data, unformatted_headers=unformatted_headers, unformatted_data=unformatted_data, recommended_action=recommended_action, doc_url=doc_url) + + # ---- Script Execution ---- @@ -6162,6 +6191,7 @@ class CheckManager: fabric_port_down_check, equipment_disk_limits_exceeded, apic_vmm_inventory_sync_faults_check, + apic_storage_inode_check, # Configurations vpc_paired_switches_check, diff --git a/docs/docs/validations.md b/docs/docs/validations.md index f46e03d..ba63890 100644 --- a/docs/docs/validations.md +++ b/docs/docs/validations.md @@ -82,7 +82,7 @@ Items | Faults | This Script [Fabric Port Status][f19] | F1394: ethpm-if-port-down-fabric | :white_check_mark: | :no_entry_sign: [Equipment Disk Limits][f20] | F1820: 80% -minor
F1821: -major
F1822: -critical | :white_check_mark: | :no_entry_sign: [VMM Inventory Partially Synced][f21] | F0132: comp-ctrlr-operational-issues | :white_check_mark: | :no_entry_sign: - +[APIC Storage Inode Check][f22] | F4388: 75% - 85% -warning
F4389: 85% - 90% -major
F4390: 90% or more -critical | :white_check_mark: | :no_entry_sign: [f1]: #apic-disk-space-usage [f2]: #standby-apic-disk-space-usage @@ -105,6 +105,7 @@ Items | Faults | This Script [f19]: #fabric-port-status [f20]: #equipment-disk-limits [f21]: #vmm-inventory-partially-synced +[f22]: #apic-storage-inode-check ### Configuration Checks @@ -1551,6 +1552,56 @@ EPGs using the `pre-provision` resolution immediacy do not rely on the VMM inven This check returns a `MANUAL` result as there are many reasons for a partial inventory sync to be reported. The goal is to ensure that the VMM inventory sync has fully completed before triggering the APIC upgrade to reduce any chance for unexpected inventory changes to occur. + +### APIC Storage Inode Check + +If a Cisco APIC is running low on inode capacity for any reason, the Cisco APIC upgrade can fail. The Cisco APIC will raise three different faults depending on inode utilization. If any of these faults are raised on the system, the issue should be resolved prior to performing the upgrade. + +* **F4388**: A warning level fault for Cisco APIC storage inode utilization. This is raised when utilization is greater than 75%. + +* **F4389**: A major level fault for Cisco APIC storage inode utilization. This is raised when utilization is between 85% and 90%. + +* **F4390**: A critical level fault for Cisco APIC storage inode utilization. This is raised when utilization is greater than 90%. + +Although the storage space for the filesystem might be adequate we might still see issues with inode usage, this happens when we have more number of files or directories created with lower file sizes. + +Recommended Action: + +To recover from this fault, try the following action + +1. Free up space from affected disk partition . +2. TAC may be required to analyze and cleanup certain directories due to filesystem permissions. Cleanup of `/` is one such example. + +!!! example "Fault Example (F4390: " Critical fault for APIC Inode Utilisation) + ``` + moquery -c faultInst -f 'fault.Inst.code=="F4390"' + Total Objects shown: 1 + + # faultInst + ack : yes + alert : no + cause : equipment-full + changeSet : available (Old: 19408344, New: 19407972), inodesFree (Old: 263915, New: 263842), inodesUsed (Old: 2357525, New: 2357598), + used (Old: 19436092, New: 19436464) + code : F4390 + created : 2024-08-05T05:42:31.975+02:00 + delegated : no + descr : Storage unit /scratch-writes on node 3 with hostname b001nnc000003 mounted at /scratch-writes is 90% full for Inodes + dn : topology/pod-2/node-3/sys/ch/p-[/scratch-writes]-f-[/dev/mapper/atx-scratch]/fault-F4390 + domain : infra + highestSeverity : critical + lastTransition : 2024-08-05T09:41:18.152+02:00 + lc : raised + occur : 2 + origSeverity : critical + prevSeverity : cleared + rule : eqpt-storage-inode-critical + severity : critical + subject : equipment-full + type : operational + ``` + + ## Configuration Check Details ### VPC-paired Leaf switches diff --git a/tests/checks/apic_storage_inode_full_check/Fault_combination.json b/tests/checks/apic_storage_inode_full_check/Fault_combination.json new file mode 100644 index 0000000..5f1bb26 --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/Fault_combination.json @@ -0,0 +1,80 @@ +[ + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.579+00:00", + "delegated": "no", + "descr": "Storage unit /data/admin/bin/avread on Node 1 mounted at /data/admin/bin/avread is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/data/admin/bin/avread]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.579+00:00", + "lc": "raised", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.587+00:00", + "delegated": "no", + "descr": "Storage unit /etc/hosts on Node 1 mounted at /etc/hosts is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/etc/hosts]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.587+00:00", + "lc": "soaking", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.595+00:00", + "delegated": "no", + "descr": "Storage unit /scratch-writes on Node 1 mounted at /scratch-writes is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/scratch-writes]-f-[/dev/mapper/atx-scratch]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.595+00:00", + "lc": "raised-clearing", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + } +] \ No newline at end of file diff --git a/tests/checks/apic_storage_inode_full_check/Fault_exists_not_raised.json b/tests/checks/apic_storage_inode_full_check/Fault_exists_not_raised.json new file mode 100644 index 0000000..468facd --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/Fault_exists_not_raised.json @@ -0,0 +1,54 @@ +[ + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.579+00:00", + "delegated": "no", + "descr": "Storage unit /data/admin/bin/avread on Node 1 mounted at /data/admin/bin/avread is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/data/admin/bin/avread]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.579+00:00", + "lc": "cleared", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.587+00:00", + "delegated": "no", + "descr": "Storage unit /etc/hosts on Node 1 mounted at /etc/hosts is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/etc/hosts]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.587+00:00", + "lc": "retaining", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + } +] diff --git a/tests/checks/apic_storage_inode_full_check/Fault_raised.json b/tests/checks/apic_storage_inode_full_check/Fault_raised.json new file mode 100644 index 0000000..ec5760a --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/Fault_raised.json @@ -0,0 +1,80 @@ +[ + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.579+00:00", + "delegated": "no", + "descr": "Storage unit /data/admin/bin/avread on Node 1 mounted at /data/admin/bin/avread is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/data/admin/bin/avread]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T12:00:53.560+00:00", + "lc": "raised", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.587+00:00", + "delegated": "no", + "descr": "Storage unit /etc/hosts on Node 1 mounted at /etc/hosts is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/etc/hosts]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T12:00:53.560+00:00", + "lc": "raised", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.602+00:00", + "delegated": "no", + "descr": "Storage unit / on Node 1 mounted at / is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T12:00:53.560+00:00", + "lc": "raised", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + } + ] diff --git a/tests/checks/apic_storage_inode_full_check/Fault_soaking.json b/tests/checks/apic_storage_inode_full_check/Fault_soaking.json new file mode 100644 index 0000000..3d4bf80 --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/Fault_soaking.json @@ -0,0 +1,81 @@ +[ + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.579+00:00", + "delegated": "no", + "descr": "Storage unit /data/admin/bin/avread on Node 1 mounted at /data/admin/bin/avread is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/data/admin/bin/avread]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.579+00:00", + "lc": "soaking", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.587+00:00", + "delegated": "no", + "descr": "Storage unit /etc/hosts on Node 1 mounted at /etc/hosts is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/etc/hosts]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.587+00:00", + "lc": "soaking", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + }, + + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.602+00:00", + "delegated": "no", + "descr": "Storage unit / on Node 1 mounted at / is 82% full for Inodes", + "dn": "topology/pod-1/node-1/sys/ch/p-[/]-f-[overlayfs]/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.602+00:00", + "lc": "soaking", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + } + ] diff --git a/tests/checks/apic_storage_inode_full_check/Fault_unformatted_data.json b/tests/checks/apic_storage_inode_full_check/Fault_unformatted_data.json new file mode 100644 index 0000000..0c6463c --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/Fault_unformatted_data.json @@ -0,0 +1,28 @@ +[ + { + "faultInst": { + "attributes": { + "ack": "no", + "alert": "no", + "cause": "equipment-full", + "changeSet": "available (Old: 37868344, New: 37859228), inodesFree (Old: 810163, New: 479339), inodesUsed (Old: 1811277, New: 2142101), inodesUtilized (Old: 70, New: 82), used (Old: 976092, New: 985208)", + "code": "F4388", + "created": "2026-03-06T11:58:43.602+00:00", + "delegated": "no", + "descr": "Storage unit /unknown on Node 1 mounted at /unknown is 82% full", + "dn": "topology/pod-1/node-1/sys/ch/invalid/fault-F4388", + "domain": "infra", + "highestSeverity": "warning", + "lastTransition": "2026-03-06T11:58:43.602+00:00", + "lc": "raised", + "occur": "1", + "origSeverity": "warning", + "prevSeverity": "warning", + "rule": "eqpt-storage-inode-warning", + "severity": "warning", + "subject": "equipment-full", + "type": "operational" + } + } + } + ] diff --git a/tests/checks/apic_storage_inode_full_check/test_apic_storage_inode_full_check.py b/tests/checks/apic_storage_inode_full_check/test_apic_storage_inode_full_check.py new file mode 100644 index 0000000..7a5e58c --- /dev/null +++ b/tests/checks/apic_storage_inode_full_check/test_apic_storage_inode_full_check.py @@ -0,0 +1,76 @@ +import os +import pytest +import logging +import importlib +from helpers.utils import read_data + +script = importlib.import_module("aci-preupgrade-validation-script") + +log = logging.getLogger(__name__) +dir = os.path.dirname(os.path.abspath(__file__)) +test_function = "apic_storage_inode_check" +faultInst_api = 'faultInst.json' +faultInst_api += '?query-target-filter=or(eq(faultInst.code,"F4388"),eq(faultInst.code,"F4389"),eq(faultInst.code,"F4390"))' + +@pytest.mark.parametrize( + "icurl_outputs, expected_result, expected_data", + [ + # PASS - No raised faults + ( + {faultInst_api: []}, + script.PASS, + [], + ), + # FAIL - Soaking faults + ( + {faultInst_api: read_data(dir, "Fault_soaking.json")}, + script.FAIL_UF, + [ + ["F4388", "1", "1", "/data/admin/bin/avread", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ["F4388", "1", "1", "/etc/hosts", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ["F4388", "1", "1", "/", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ], + ), + # FAIL - Raised faults + ( + {faultInst_api: read_data(dir, "Fault_raised.json")}, + script.FAIL_UF, + [ + ["F4388", "1", "1", "/data/admin/bin/avread", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ["F4388", "1", "1", "/etc/hosts", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ["F4388", "1", "1", "/", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ], + ), + # PASS - Faults exist but not raised nor soaking (cleared) + ( + {faultInst_api: read_data(dir, "Fault_exists_not_raised.json")}, + script.PASS, + [], + ), + # FAIL - Raised faults with multiple status - Cleared and Active + ( + {faultInst_api: read_data(dir, "Fault_combination.json")}, + script.FAIL_UF, + [ + ["F4388", "1", "1", "/data/admin/bin/avread", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ["F4388", "1", "1", "/etc/hosts", "82%", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ], + ), + # FAIL - Raised faults with unknown mount point (unformatted data) + ( + {faultInst_api: read_data(dir, "Fault_unformatted_data.json")}, + script.FAIL_UF, + [ + ["F4388", "topology/pod-1/node-1/sys/ch/invalid/fault-F4388", "Contact Cisco TAC to remove the files in the mount point to free up space and clear the fault"], + ], + ), + ], +) + +def test_logic(run_check, mock_icurl, expected_result, expected_data): + result = run_check() + assert result.result == expected_result + if result.data: + assert result.data == expected_data + else: + assert result.unformatted_data == expected_data \ No newline at end of file