From 60a820e2d500240e647b69cbffd908af85e1c4d9 Mon Sep 17 00:00:00 2001 From: Petr Muller Date: Fri, 9 Jan 2026 15:37:53 +0100 Subject: [PATCH 1/2] TRT-2487: job-run-aggregator: fail on minimum of 3 failures Our current aggregation logic is too sensitive, leading to the rejection of payloads for non-regressions. Analysis shows that a significant portion of rejected payloads are failing due to infrastructure noise or existing flakes rather than genuine code regressions. We have component readiness as a backstop to identify regressions with greater sample sizes. --- .../jobrunaggregatoranalyzer/pass_fail.go | 45 ++- .../pass_fail_test.go | 306 +++++++++++++++++- 2 files changed, 331 insertions(+), 20 deletions(-) diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go index 50c3370d316..c7c607b3eae 100644 --- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go +++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go @@ -501,10 +501,15 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace( numberOfPasses := len(successJobRunIDs) numberOfFailures := len(failureJobRunIDs) workingPercentage := thresholdPercentile // the percentile is our success percentage - requiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage] - // TODO try to tighten this after we can keep the test in for about a week. - // We need to come back and revisit the possibility of removing this adjustment. - requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one + + strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage] + requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses) + + if requiredNumberOfPasses == strictRequiredNumberOfPasses { + // TODO try to tighten this after we can keep the test in for about a week. + // We need to come back and revisit the possibility of removing this adjustment. + requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one + } if requiredNumberOfPasses <= 0 { message := fmt.Sprintf("Current percentile is so low that we cannot latch, skipping (P%d=%.2fs successes=%v failures=%v)", thresholdPercentile, threshold, successRuns, failureRuns) @@ -538,9 +543,16 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace( return requiredNumberOfPasses, failureJobRunIDs, successJobRunIDs, testCaseFailed, summary } - summary := fmt.Sprintf("Passed: Passed %d times, failed %d times. (P%d=%.2fs %srequiredPasses=%d successes=%v failures=%v)", + if numberOfPasses < strictRequiredNumberOfPasses { + pityFactorMsg = fmt.Sprintf(" (%s)", pityFactorMsg) + } else { + pityFactorMsg = "" + } + + summary := fmt.Sprintf("Passed: Passed %d times, failed %d times%s. (P%d=%.2fs %srequiredPasses=%d successes=%v failures=%v)", numberOfPasses, numberOfFailures, + pityFactorMsg, thresholdPercentile, threshold, graceAdded, requiredNumberOfPasses, successRuns, failureRuns, @@ -614,7 +626,9 @@ func (a *weeklyAverageFromTenDays) CheckFailed(ctx context.Context, jobName stri workingPercentage = int(averageTestResult.WorkingPercentage) } - requiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage] + strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage] + requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses) + if numberOfPasses < requiredNumberOfPasses { summary := fmt.Sprintf("Failed: Passed %d times, failed %d times. The historical pass rate is %d%%. The required number of passes is %d.", numberOfPasses, @@ -625,14 +639,31 @@ func (a *weeklyAverageFromTenDays) CheckFailed(ctx context.Context, jobName stri return testCaseFailed, summary, nil } - return testCasePassed, fmt.Sprintf("Passed: Passed %d times, failed %d times. The historical pass rate is %d%%. The required number of passes is %d.", + if numberOfPasses < strictRequiredNumberOfPasses { + pityFactorMsg = fmt.Sprintf(" (%s)", pityFactorMsg) + } else { + pityFactorMsg = "" + } + + return testCasePassed, fmt.Sprintf("Passed: Passed %d times, failed %d times. The historical pass rate is %d%%. The required number of passes is %d%s.", numberOfPasses, numberOfFailures, workingPercentage, requiredNumberOfPasses, + pityFactorMsg, ), nil } +// pityFactor relaxes required success rate to always pass on 2 and fewer failures to reduce aggregation fails caused by +// e.g. infrastructure noise. We can afford to relax because component readiness will find genuine regressions over +// a larger sample size. +func pityFactor(numberOfAttempts int, strictRequiredNumberOfPasses int) (int, string) { + const failurePityFactor = 2 + maxRequiredNumberOfPasses := max(0, numberOfAttempts-failurePityFactor) + requiredNumberOfPasses := min(maxRequiredNumberOfPasses, strictRequiredNumberOfPasses) + return requiredNumberOfPasses, fmt.Sprintf("strict required number of passes is %d but %d failures are allowed as pity factor", strictRequiredNumberOfPasses, failurePityFactor) +} + var testsRequiringHistoryRewrite = make(map[testCoordinates]string) type testCoordinates struct { diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go index 80dca30b1f8..dfc01133b4b 100644 --- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go +++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/openshift/ci-tools/pkg/jobrunaggregator/jobrunaggregatorapi" "github.com/openshift/ci-tools/pkg/jobrunaggregator/jobrunaggregatorlib" ) @@ -57,33 +58,33 @@ func TestCheckPercentileDisruption(t *testing.T) { supportsFuzziness: true, }, { - // Required Passes for 95th percentile is 6 + // Required Passes for 95th percentile is 7 // 5 Natural Passes // graceSeconds = 2 // The Disruption value that == 7 gets flipped to pass name: "Test 95th Percentile Fuzzy Pass", - disruptions: []int{5, 5, 5, 6, 6, 7, 9, 9, 9, 9}, + disruptions: []int{5, 5, 5, 6, 6, 7, 7, 9, 9, 9}, thresholdPercentile: 95, graceSeconds: 2, historicalDisruption: 6, status: testCasePassed, - failedCount: 4, - successCount: 6, + failedCount: 3, + successCount: 7, supportsFuzziness: true, }, { - // Required Passes for 95th percentile is 6 + // Required Passes for 95th percentile is 7 // 4 Natural Passes // graceSeconds = 1 // The Disruption values that == 7 get flipped to passes name: "Test 95th Percentile Multi Fuzzy Pass", - disruptions: []int{5, 5, 5, 6, 7, 7, 8, 8, 8, 86}, + disruptions: []int{5, 5, 5, 6, 7, 7, 7, 8, 8, 86}, thresholdPercentile: 95, graceSeconds: 1, historicalDisruption: 6, status: testCasePassed, - failedCount: 4, - successCount: 6, + failedCount: 3, + successCount: 7, supportsFuzziness: true, }, { @@ -204,16 +205,16 @@ func TestCheckPercentileDisruption(t *testing.T) { supportsFuzziness: false, }, { - // Required Passes for 80th percentile is 4 - // 4 Natural Passes + // Required Passes for 80th percentile is 5 + // 5 Natural Passes name: "Test 80th Percentile Pass", - disruptions: []int{0, 0, 1, 1, 2, 2, 2, 2, 2, 2}, + disruptions: []int{0, 0, 1, 1, 1, 2, 2, 2, 2, 2}, thresholdPercentile: 80, graceSeconds: 0, historicalDisruption: 1, status: testCasePassed, - failedCount: 6, - successCount: 4, + failedCount: 5, + successCount: 5, supportsFuzziness: true, }, { @@ -290,3 +291,282 @@ func TestCheckPercentileDisruption(t *testing.T) { }) } } + +func TestCheckFailedWithPityFactor(t *testing.T) { + tests := []struct { + name string + passes int + failures int + skips int + workingPercentage int + expectedStatus testCaseStatus + description string + }{ + // Tests with 95% working percentage (high reliability test) + { + name: "10 attempts, 95% working: 7 passes, 3 failures - passes strict", + passes: 7, + failures: 3, + skips: 0, + workingPercentage: 95, + expectedStatus: testCasePassed, + description: "Strict=7, pity=min(10-2,7)=7. 7 passes meets requirement", + }, + { + name: "10 attempts, 95% working: 6 passes, 4 failures - fails", + passes: 6, + failures: 4, + skips: 0, + workingPercentage: 95, + expectedStatus: testCaseFailed, + description: "Strict=7, pity=min(10-2,7)=7. 6 passes fails to meet requirement of 7", + }, + { + name: "12 attempts, 95% working: 9 passes, 3 failures - should pass", + passes: 9, + failures: 3, + skips: 0, + workingPercentage: 95, + expectedStatus: testCasePassed, + description: "Strict=9, pity=min(12-2,9)=9. 9 passes meets requirement", + }, + { + name: "12 attempts, 95% working: 8 passes, 4 failures - should fail", + passes: 8, + failures: 4, + skips: 0, + workingPercentage: 95, + expectedStatus: testCaseFailed, + description: "Strict=9, pity=min(12-2,9)=9. 8 passes fails to meet requirement", + }, + + // Tests with 80% working percentage (moderate reliability test) + { + name: "10 attempts, 80% working: 5 passes, 5 failures - passes strict", + passes: 5, + failures: 5, + skips: 0, + workingPercentage: 80, + expectedStatus: testCasePassed, + description: "Strict=5, pity=min(10-2,5)=5. 5 passes meets requirement", + }, + { + name: "10 attempts, 80% working: 4 passes, 6 failures - should fail", + passes: 4, + failures: 6, + skips: 0, + workingPercentage: 80, + expectedStatus: testCaseFailed, + description: "Strict=5, pity=min(10-2,5)=5. 4 passes fails to meet requirement", + }, + + // Tests with 70% working percentage (lower reliability test) + { + name: "10 attempts, 70% working: 3 passes, 7 failures - passes strict", + passes: 3, + failures: 7, + skips: 0, + workingPercentage: 70, + expectedStatus: testCasePassed, + description: "Strict=3, pity=min(10-2,3)=3. 3 passes meets requirement", + }, + { + name: "10 attempts, 70% working: 2 passes, 8 failures - should fail", + passes: 2, + failures: 8, + skips: 0, + workingPercentage: 70, + expectedStatus: testCaseFailed, + description: "Strict=3, pity=min(10-2,3)=3. 2 passes fails to meet requirement", + }, + + // Tests where pity factor doesn't relax (strict requirement is already at or below pity limit) + { + name: "5 attempts, 95% working: 3 passes, 2 failures - passes strict (no relaxation)", + passes: 3, + failures: 2, + skips: 0, + workingPercentage: 95, + expectedStatus: testCasePassed, + description: "Strict=3, pity=min(5-2,3)=3. No relaxation, 3 passes meets requirement", + }, + { + name: "5 attempts, 95% working: 1 pass, 4 failures - should fail", + passes: 1, + failures: 4, + skips: 0, + workingPercentage: 95, + expectedStatus: testCaseFailed, + description: "Strict=3, pity=min(5-2,3)=3. 1 pass fails requirement even with pity factor", + }, + + // More tests showing pity factor behavior + { + name: "10 attempts, 90% working: 6 passes, 4 failures - passes strict (no relaxation)", + passes: 6, + failures: 4, + skips: 0, + workingPercentage: 90, + expectedStatus: testCasePassed, + description: "Strict=6, pity=min(10-2,6)=6. No relaxation, 6 passes meets requirement", + }, + + // Tests with 100% working percentage (perfect reliability expectation) + { + name: "10 attempts, 100% working: 10 passes, 0 failures - passes naturally", + passes: 10, + failures: 0, + skips: 0, + workingPercentage: 100, + expectedStatus: testCasePassed, + description: "Strict=9, pity=min(10-2,9)=8. 10 passes exceeds strict requirement", + }, + { + name: "10 attempts, 100% working: 8 passes, 2 failures - passes with pity factor", + passes: 8, + failures: 2, + skips: 0, + workingPercentage: 100, + expectedStatus: testCasePassed, + description: "Strict=9, pity=min(10-2,9)=8. 8 passes meets pity requirement (relaxed from 9)", + }, + { + name: "10 attempts, 100% working: 7 passes, 3 failures - should fail", + passes: 7, + failures: 3, + skips: 0, + workingPercentage: 100, + expectedStatus: testCaseFailed, + description: "Strict=9, pity=min(10-2,9)=8. 7 passes fails to meet requirement of 8", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + aggregatedTestRuns := map[TestKey]jobrunaggregatorapi.AggregatedTestRunRow{ + {TestCaseName: "test-case", CombinedTestSuiteName: "test-suite"}: { + WorkingPercentage: float64(test.workingPercentage), + }, + } + + baseline := &weeklyAverageFromTenDays{ + aggregatedTestRunsByName: aggregatedTestRuns, + } + // Mark query as already done to avoid nil bigQueryClient access + baseline.queryTestRunsOnce.Do(func() {}) + + testCaseDetails := &jobrunaggregatorlib.TestCaseDetails{ + Name: "test-case", + TestSuiteName: "test-suite", + Passes: make([]jobrunaggregatorlib.TestCasePass, test.passes), + Failures: make([]jobrunaggregatorlib.TestCaseFailure, test.failures), + Skips: make([]jobrunaggregatorlib.TestCaseSkip, test.skips), + } + + // Create unique job run IDs for passes and failures + for i := 0; i < test.passes; i++ { + testCaseDetails.Passes[i] = jobrunaggregatorlib.TestCasePass{ + JobRunID: fmt.Sprintf("pass-%d", i), + } + } + for i := 0; i < test.failures; i++ { + testCaseDetails.Failures[i] = jobrunaggregatorlib.TestCaseFailure{ + JobRunID: fmt.Sprintf("fail-%d", i), + } + } + + status, message, err := baseline.CheckFailed(nil, "test-job", []string{"suite"}, testCaseDetails) + + assert.NoError(t, err, "Should not error for: %s", test.name) + assert.NotEmpty(t, message, "Should have a message for: %s", test.name) + assert.Equal(t, test.expectedStatus, status, "%s: %s", test.description, message) + t.Logf("Test: %s\nStatus: %s\nMessage: %s", test.name, status, message) + }) + } +} + +func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { + weeklyAverage := &weeklyAverageFromTenDays{} + + tests := []struct { + name string + disruptions []int + thresholdPercentile int // also used as workingPercentage in innerCheckPercentileDisruptionWithGrace + historicalDisruption float64 + graceSeconds int + expectedStatus testCaseStatus + expectedMinPasses int + description string + }{ + // Note: In innerCheckPercentileDisruptionWithGrace, workingPercentage = thresholdPercentile + // So a P95 test uses 95% as the working percentage for calculating required passes + // The code also has -1 adjustment when pity factor doesn't relax the requirement + { + name: "10 attempts, P95 (95% working): 8 passes, 2 failures - exceeds requirement", + disruptions: []int{0, 0, 0, 1, 1, 1, 1, 1, 5, 5}, + thresholdPercentile: 95, + historicalDisruption: 2.0, + graceSeconds: 0, + expectedStatus: testCasePassed, + expectedMinPasses: 6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation) + description: "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 8 passes exceeds requirement", + }, + { + name: "10 attempts, P95 (95% working): 7 passes, 3 failures - meets requirement", + disruptions: []int{0, 0, 0, 1, 1, 1, 1, 5, 5, 5}, + thresholdPercentile: 95, + historicalDisruption: 2.0, + graceSeconds: 0, + expectedStatus: testCasePassed, + expectedMinPasses: 6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation) + description: "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 7 passes exceeds requirement", + }, + { + name: "6 attempts, P80 (80% working): 4 passes, 2 failures - exceeds requirement", + disruptions: []int{0, 0, 1, 1, 5, 5}, + thresholdPercentile: 80, + historicalDisruption: 2.0, + graceSeconds: 0, + expectedStatus: testCasePassed, + expectedMinPasses: 1, // strict=2, pity=min(6-2,2)=2, then 2-1=1 (no relaxation) + description: "Strict=2, pity=min(6-2,2)=2, -1 adj=1. 4 passes exceeds requirement", + }, + { + name: "12 attempts, P95 (95% working): 10 passes, 2 failures - exceeds requirement", + disruptions: []int{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 5, 5}, + thresholdPercentile: 95, + historicalDisruption: 2.0, + graceSeconds: 0, + expectedStatus: testCasePassed, + expectedMinPasses: 8, // strict=9, pity=min(12-2,9)=9, then 9-1=8 (no relaxation) + description: "Strict=9, pity=min(12-2,9)=9, -1 adj=8. 10 passes exceeds requirement", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + jobRunIDToAvailabilityResultForBackend := createJobRunIDToAvailabilityResultForBackend(test.disruptions) + historicalDisruptionStatistic := backendDisruptionStats{ + percentileByIndex: make([]float64, 100), + } + historicalDisruptionStatistic.percentileByIndex[test.thresholdPercentile] = test.historicalDisruption + + requiredPasses, failureJobRunIDs, successJobRunIDs, status, summary := + weeklyAverage.innerCheckPercentileDisruptionWithGrace( + jobRunIDToAvailabilityResultForBackend, + test.historicalDisruption, + test.thresholdPercentile, + test.graceSeconds, + ) + + t.Logf("Test: %s\nRequired: %d, Passes: %d, Failures: %d\nStatus: %s\nSummary: %s", + test.name, requiredPasses, len(successJobRunIDs), len(failureJobRunIDs), status, summary) + + assert.Equal(t, test.expectedStatus, status, "%s: %s", test.description, summary) + assert.Equal(t, test.expectedMinPasses, requiredPasses, + "Required passes should match expected for: %s", test.name) + assert.Equal(t, len(test.disruptions), len(successJobRunIDs)+len(failureJobRunIDs), + "Total attempts should equal successes + failures") + }) + } +} From f180675792f2d5a8d7311db3c89764eee5a6086f Mon Sep 17 00:00:00 2001 From: Petr Muller Date: Mon, 12 Jan 2026 17:52:18 +0100 Subject: [PATCH 2/2] TRT-2487: job-run-aggregator: remove relaxing in disruption aggregation I do not _entirely_ understand why the relaxation for disruption tests exists here but the TODO exists this may be worth tightening. The `pityFactor` will relax the threshold in some cases (high attempts, high pass rate) so the additional relaxation would only apply in the remaining cases. --- .../jobrunaggregatoranalyzer/pass_fail.go | 6 ------ .../jobrunaggregatoranalyzer/pass_fail_test.go | 17 ++++++++--------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go index c7c607b3eae..0afd31786ec 100644 --- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go +++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go @@ -505,12 +505,6 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace( strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage] requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses) - if requiredNumberOfPasses == strictRequiredNumberOfPasses { - // TODO try to tighten this after we can keep the test in for about a week. - // We need to come back and revisit the possibility of removing this adjustment. - requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one - } - if requiredNumberOfPasses <= 0 { message := fmt.Sprintf("Current percentile is so low that we cannot latch, skipping (P%d=%.2fs successes=%v failures=%v)", thresholdPercentile, threshold, successRuns, failureRuns) failureJobRunIDs = sets.StringKeySet(jobRunIDToAvailabilityResultForBackend).List() diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go index dfc01133b4b..ba221e2efe7 100644 --- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go +++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go @@ -500,7 +500,6 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { }{ // Note: In innerCheckPercentileDisruptionWithGrace, workingPercentage = thresholdPercentile // So a P95 test uses 95% as the working percentage for calculating required passes - // The code also has -1 adjustment when pity factor doesn't relax the requirement { name: "10 attempts, P95 (95% working): 8 passes, 2 failures - exceeds requirement", disruptions: []int{0, 0, 0, 1, 1, 1, 1, 1, 5, 5}, @@ -508,8 +507,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { historicalDisruption: 2.0, graceSeconds: 0, expectedStatus: testCasePassed, - expectedMinPasses: 6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation) - description: "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 8 passes exceeds requirement", + expectedMinPasses: 7, // strict=7, pity=min(10-2,7)=7 (no relaxation) + description: "Strict=7, pity=min(10-2,7)=7. 8 passes exceeds requirement", }, { name: "10 attempts, P95 (95% working): 7 passes, 3 failures - meets requirement", @@ -518,8 +517,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { historicalDisruption: 2.0, graceSeconds: 0, expectedStatus: testCasePassed, - expectedMinPasses: 6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation) - description: "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 7 passes exceeds requirement", + expectedMinPasses: 7, // strict=7, pity=min(10-2,7)=7 (no relaxation) + description: "Strict=7, pity=min(10-2,7)=7. 7 passes meets requirement exactly", }, { name: "6 attempts, P80 (80% working): 4 passes, 2 failures - exceeds requirement", @@ -528,8 +527,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { historicalDisruption: 2.0, graceSeconds: 0, expectedStatus: testCasePassed, - expectedMinPasses: 1, // strict=2, pity=min(6-2,2)=2, then 2-1=1 (no relaxation) - description: "Strict=2, pity=min(6-2,2)=2, -1 adj=1. 4 passes exceeds requirement", + expectedMinPasses: 2, // strict=2, pity=min(6-2,2)=2 (no relaxation) + description: "Strict=2, pity=min(6-2,2)=2. 4 passes exceeds requirement", }, { name: "12 attempts, P95 (95% working): 10 passes, 2 failures - exceeds requirement", @@ -538,8 +537,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) { historicalDisruption: 2.0, graceSeconds: 0, expectedStatus: testCasePassed, - expectedMinPasses: 8, // strict=9, pity=min(12-2,9)=9, then 9-1=8 (no relaxation) - description: "Strict=9, pity=min(12-2,9)=9, -1 adj=8. 10 passes exceeds requirement", + expectedMinPasses: 9, // strict=9, pity=min(12-2,9)=9 (no relaxation) + description: "Strict=9, pity=min(12-2,9)=9. 10 passes exceeds requirement", }, }