Skip to content

Commit f5d9ac9

Browse files
authored
Adds improved handling for Metrics over Time (#772)
This adds new and improved handling for Metrics over Time. The overall execution time for the background tasks has been reduced, while also the memory management is way more efficient. In addition to the improved core handling of the feature, performance metrics for metrics over time will NO LONGER BE WRITTEN. This will increase the performance of the graphing solutions like InfluxDB a lot, while the monitoring by using the "-ThresholdInterval" argument is still possible. ```powershell PS> Invoke-IcingaCheckCPU -Warning '5%' -ThresholdInterval '10m'; [WARNING] CPU Load [WARNING] Overall Load, Socket #0 \_ [WARNING] Overall Load: Value 6.546175% is greater than threshold 5% (10m Avg.) \_ [WARNING] Socket #0 \_ [WARNING] Core 0: Value 18.391566% is greater than threshold 5% (10m Avg.) \_ [WARNING] Core 1: Value 14.100505% is greater than threshold 5% (10m Avg.) \_ [WARNING] Core Total: Value 6.546175% is greater than threshold 5% (10m Avg.) | totalload::ifw_cpu::load=5.804053;5;;0;100 0_0::ifw_cpu::load=18.03764;5;;0;100 0_1::ifw_cpu::load=9.36611;5;;0;100 0_2::ifw_cpu::load=5.830669;5;;0;100 0_3::ifw_cpu::load=0.646737;5;;0;100 0_4::ifw_cpu::load=0.926955;5;;0;100 0_5::ifw_cpu::load=0.016205;5;;0;100 0_total::ifw_cpu::load=5.804053;5;;0;100 ```
1 parent 08bea53 commit f5d9ac9

12 files changed

+271
-113
lines changed

doc/100-General/10-Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ Released closed milestones can be found on [GitHub](https://github.com/Icinga/ic
1313

1414
* [#759](https://github.com/Icinga/icinga-powershell-framework/pull/759) Fixes maximum cache duration for service daemons to the right value
1515

16+
### Enhancements
17+
18+
* [#772](https://github.com/Icinga/icinga-powershell-framework/pull/772) Adds new Metric over Time handling
19+
1620
## 1.13.0 Beta-2 (2024-09-19)
1721

1822
[Issues and PRs](https://github.com/Icinga/icinga-powershell-framework/milestone/36)

lib/core/framework/New-IcingaEnvironmentVariable.psm1

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,18 @@ function New-IcingaEnvironmentVariable()
3737
$Global:Icinga.Private.Add(
3838
'Scheduler',
3939
@{
40+
'CheckCommand' = '';
4041
'CheckData' = @{ };
4142
'ThresholdCache' = @{ };
4243
'CheckResults' = @();
4344
'PerformanceData' = '';
4445
'PluginException' = $null;
4546
'ExitCode' = $null;
4647
'PerfDataWriter' = @{
47-
'Cache' = @{};
48-
'Storage' = (New-Object System.Text.StringBuilder);
48+
'Cache' = @{ };
49+
'Storage' = (New-Object System.Text.StringBuilder);
50+
'Daemon' = @{ };
51+
'MetricsOverTime' = '';
4952
}
5053
}
5154
);

lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1

Lines changed: 68 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,14 @@ function Add-IcingaServiceCheckTask()
1717
# Read our check result store data from disk for this service check
1818
Read-IcingaCheckResultStore -CheckCommand $CheckCommand;
1919

20-
[int]$CheckInterval = ConvertTo-Seconds $Interval;
20+
$MetricCacheFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'metrics') -ChildPath ([string]::Format('{0}.xml', $CheckCommand));
21+
[int]$CheckInterval = ConvertTo-Seconds $Interval;
22+
[hashtable]$CheckDataCache = @{ };
23+
[array]$PerfDataEntries = @();
24+
25+
if (Test-Path -Path $MetricCacheFile) {
26+
$CheckDataCache = [System.Management.Automation.PSSerializer]::Deserialize((Get-Content -Path $MetricCacheFile -Raw -Encoding UTF8));
27+
}
2128

2229
while ($TRUE) {
2330
if ($Global:Icinga.Private.Daemons.ServiceCheck.PassedTime -lt $CheckInterval) {
@@ -29,6 +36,9 @@ function Add-IcingaServiceCheckTask()
2936

3037
$Global:Icinga.Private.Daemons.ServiceCheck.PassedTime = 0;
3138

39+
# Clear possible previous performance data from the daemon cache
40+
$Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Clear();
41+
3242
# Execute our check with possible arguments
3343
try {
3444
& $CheckCommand @Arguments | Out-Null;
@@ -45,93 +55,79 @@ function Add-IcingaServiceCheckTask()
4555

4656
$UnixTime = Get-IcingaUnixTime;
4757

48-
try {
49-
foreach ($result in $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys) {
50-
[string]$HashIndex = $result;
51-
$Global:Icinga.Private.Daemons.ServiceCheck.SortedResult = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$HashIndex].GetEnumerator() | Sort-Object name -Descending;
52-
53-
Add-IcingaHashtableItem `
54-
-Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache `
55-
-Key $HashIndex `
56-
-Value @{ } | Out-Null;
58+
foreach ($PerfLabel in $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Keys) {
59+
$PerfValue = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Value;
60+
$PerfUnit = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Unit;
5761

58-
foreach ($timeEntry in $Global:Icinga.Private.Daemons.ServiceCheck.SortedResult) {
59-
60-
if ((Test-Numeric $timeEntry.Value) -eq $FALSE) {
61-
continue;
62-
}
62+
if ($CheckDataCache.ContainsKey($PerfLabel) -eq $FALSE) {
63+
$CheckDataCache.Add($PerfLabel, (New-Object System.Collections.ArrayList));
64+
}
6365

64-
foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
65-
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]$timeEntry.Key) {
66-
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $timeEntry.Value;
67-
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1;
68-
}
69-
}
70-
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -le [int]$timeEntry.Key) {
71-
Add-IcingaHashtableItem `
72-
-Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache[$HashIndex] `
73-
-Key ([string]$timeEntry.Key) `
74-
-Value ([string]$timeEntry.Value) | Out-Null;
75-
}
66+
$CheckDataCache[$PerfLabel].Add(
67+
@{
68+
'Time' = $UnixTime;
69+
'Value' = $PerfValue;
70+
'Unit' = $PerfUnit;
71+
}
72+
) | Out-Null;
73+
74+
[int]$IndexCount = $CheckDataCache[$PerfLabel].Count;
75+
[int]$RemoveIndex = 0;
76+
for ($i = 0; $i -lt $IndexCount; $i++) {
77+
# In case we store more values than we require for our max time range, remove the oldest one
78+
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]($CheckDataCache[$PerfLabel][$i].Time)) {
79+
$RemoveIndex += 1;
80+
continue;
7681
}
7782

83+
# Calculate the average value for our performance data based on the remaining data
7884
foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
79-
if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) {
80-
$AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count);
81-
[string]$MetricMultiName = [string]::Format('::{0}::Interval{1}', (Format-IcingaPerfDataLabel -PerfData $HashIndex -MultiOutput), $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time);
82-
$Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'] | Add-Member -MemberType NoteProperty -Name $MetricMultiName -Value $AverageValue -Force;
85+
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]($CheckDataCache[$PerfLabel][$i].Time)) {
86+
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $CheckDataCache[$PerfLabel][$i].Value;
87+
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1;
8388
}
89+
}
90+
}
8491

85-
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0;
86-
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0;
92+
# Remove older entries more efficiently. As we store the data in an ArrayList, the oldest entries are at the beginning
93+
# Therefore we can just remove a range of entries from the beginning of the list or clear the list if we need to remove all entries
94+
if ($RemoveIndex -gt 0) {
95+
if ($RemoveIndex -ge $IndexCount) {
96+
$CheckDataCache[$PerfLabel].Clear() | Out-Null;
97+
} else {
98+
$CheckDataCache[$PerfLabel].RemoveRange(0, $RemoveIndex) | Out-Null;
8799
}
100+
$RemoveIndex = 0;
88101
}
89102

90-
Write-IcingaDebugMessage `
91-
-Message 'Object dump of service check daemon' `
92-
-Objects @(
93-
$CheckCommand,
94-
'Average Calc',
95-
($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation | Out-String),
96-
'PerformanceCache',
97-
$Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache,
98-
'Max Time in Seconds',
99-
$Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds,
100-
'Unix Time',
101-
$UnixTime
102-
);
103-
104-
# Flush data we no longer require in our cache to free memory
105-
[array]$CheckStores = $Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys;
106-
107-
foreach ($CheckStore in $CheckStores) {
108-
[string]$CheckKey = $CheckStore;
109-
[array]$CheckTimeStamps = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey].Keys;
110-
111-
foreach ($TimeSample in $CheckTimeStamps) {
112-
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]$TimeSample) {
113-
Remove-IcingaHashtableItem -Hashtable $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey] -Key ([string]$TimeSample);
114-
}
103+
# Now calculate the average values for our performance data
104+
foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
105+
if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) {
106+
$AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count);
107+
[string]$MetricMultiName = [string]::Format('{0}::Interval{1}={2}{3}', $PerfLabel, $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time, (Format-IcingaPerfDataValue $AverageValue), $PerfUnit);
108+
# Write our performance data label
109+
$PerfDataEntries += $MetricMultiName;
115110
}
111+
112+
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0;
113+
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0;
116114
}
115+
}
117116

118-
Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult' -KeyName $CheckCommand -Value $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'];
117+
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $PerfDataEntries -Join ' ';
118+
$PerfDataEntries = @();
119119

120-
# Make the performance data available for all threads
121-
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'];
122-
# Write collected metrics to disk in case we reload the daemon. We will load them back into the module after reload then
123-
Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult_store' -KeyName $CheckCommand -Value $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache;
120+
$PerformanceLabelFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'performance_labels') -ChildPath ([string]::Format('{0}.db', $CheckCommand));
121+
$CheckCacheXMLObj = [System.Management.Automation.PSSerializer]::Serialize($CheckDataCache);
124122

125-
} catch {
126-
Write-IcingaEventMessage -EventId 1452 -Namespace 'Framework' -ExceptionObject $_ -Objects $CheckCommand, ($Arguments | Out-String), (Get-IcingaInternalPluginOutput);
123+
if ((Test-Path -Path $PerformanceLabelFile) -eq $FALSE) {
124+
New-Item -Path $PerformanceLabelFile -ItemType File -Force | Out-Null;
125+
}
126+
if ((Test-Path -Path $MetricCacheFile) -eq $FALSE) {
127+
New-Item -Path $MetricCacheFile -ItemType File -Force | Out-Null;
127128
}
128129

129-
# Always ensure our check data is cleared regardless of possible
130-
# exceptions which might occur
131-
Clear-IcingaCheckSchedulerEnvironment;
132-
# Reset certain values from the scheduler environment
133-
Clear-IcingaServiceCheckDaemonEnvironment;
134-
# Force Icinga for Windows Garbage Collection
135-
Optimize-IcingaForWindowsMemory -ClearErrorStack -SmartGC;
130+
Set-Content -Path $PerformanceLabelFile -Value $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] -Force -Encoding UTF8;
131+
Set-Content -Path $MetricCacheFile -Value $CheckCacheXMLObj -Force -Encoding UTF8;
136132
}
137133
}

lib/daemons/ServiceCheckDaemon/tools/New-IcingaServiceCheckDaemonEnvironment.psm1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ function New-IcingaServiceCheckDaemonEnvironment()
88

99
if ($Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.ContainsKey($CheckCommand) -eq $FALSE) {
1010
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.Add(
11-
$CheckCommand, @{ }
11+
$CheckCommand, ''
1212
);
1313
}
1414

lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ function Compare-IcingaPluginThresholds()
104104
# Fix possible numeric value comparison issues
105105
$TestInput = Test-IcingaDecimal $InputValue;
106106
$BaseInput = Test-IcingaDecimal $BaseValue;
107+
$MoTData = @{
108+
'Label' = $PerfDataLabel;
109+
'Interval' = $TimeInterval;
110+
};
107111

108112
if ($TestInput.Decimal) {
109113
[decimal]$InputValue = [decimal]$TestInput.Value;
@@ -132,17 +136,17 @@ function Compare-IcingaPluginThresholds()
132136
$CheckResult = $null;
133137

134138
if ($Matches) {
135-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches;
139+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches -MetricsOverTime $MoTData;
136140
} elseif ($NotMatches) {
137-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches;
141+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches -MetricsOverTime $MoTData;
138142
} elseif ($IsBetween) {
139-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between;
143+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between -MetricsOverTime $MoTData;
140144
} elseif ($IsLowerEqual) {
141-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual;
145+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual -MetricsOverTime $MoTData;
142146
} elseif ($IsGreaterEqual) {
143-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual;
147+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual -MetricsOverTime $MoTData;
144148
} else {
145-
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation;
149+
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -MetricsOverTime $MoTData;
146150
}
147151

148152
$IcingaThresholds.Message = $CheckResult.Message;

0 commit comments

Comments
 (0)