From 60a820e2d500240e647b69cbffd908af85e1c4d9 Mon Sep 17 00:00:00 2001
From: Petr Muller <muller@redhat.com>
Date: Fri, 9 Jan 2026 15:37:53 +0100
Subject: [PATCH 1/2] TRT-2487: job-run-aggregator: fail on minimum of 3
 failures

Our current aggregation logic is too sensitive, leading to the rejection
of payloads for non-regressions. Analysis shows that a significant
portion of rejected payloads are failing due to infrastructure noise or
existing flakes rather than genuine code regressions.

We have component readiness as a backstop to identify regressions with
greater sample sizes.
---
 .../jobrunaggregatoranalyzer/pass_fail.go     |  45 ++-
 .../pass_fail_test.go                         | 306 +++++++++++++++++-
 2 files changed, 331 insertions(+), 20 deletions(-)

diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
index 50c3370d316..c7c607b3eae 100644
--- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
+++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
@@ -501,10 +501,15 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace(
 	numberOfPasses := len(successJobRunIDs)
 	numberOfFailures := len(failureJobRunIDs)
 	workingPercentage := thresholdPercentile // the percentile is our success percentage
-	requiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage]
-	// TODO try to tighten this after we can keep the test in for about a week.
-	// We need to come back and revisit the possibility of removing this adjustment.
-	requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one
+
+	strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage]
+	requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses)
+
+	if requiredNumberOfPasses == strictRequiredNumberOfPasses {
+		// TODO try to tighten this after we can keep the test in for about a week.
+		// We need to come back and revisit the possibility of removing this adjustment.
+		requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one
+	}
 
 	if requiredNumberOfPasses <= 0 {
 		message := fmt.Sprintf("Current percentile is so low that we cannot latch, skipping (P%d=%.2fs successes=%v failures=%v)", thresholdPercentile, threshold, successRuns, failureRuns)
@@ -538,9 +543,16 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace(
 		return requiredNumberOfPasses, failureJobRunIDs, successJobRunIDs, testCaseFailed, summary
 	}
 
-	summary := fmt.Sprintf("Passed: Passed %d times, failed %d times.  (P%d=%.2fs %srequiredPasses=%d successes=%v failures=%v)",
+	if numberOfPasses < strictRequiredNumberOfPasses {
+		pityFactorMsg = fmt.Sprintf(" (%s)", pityFactorMsg)
+	} else {
+		pityFactorMsg = ""
+	}
+
+	summary := fmt.Sprintf("Passed: Passed %d times, failed %d times%s.  (P%d=%.2fs %srequiredPasses=%d successes=%v failures=%v)",
 		numberOfPasses,
 		numberOfFailures,
+		pityFactorMsg,
 		thresholdPercentile, threshold, graceAdded,
 		requiredNumberOfPasses,
 		successRuns, failureRuns,
@@ -614,7 +626,9 @@ func (a *weeklyAverageFromTenDays) CheckFailed(ctx context.Context, jobName stri
 		workingPercentage = int(averageTestResult.WorkingPercentage)
 	}
 
-	requiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage]
+	strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage]
+	requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses)
+
 	if numberOfPasses < requiredNumberOfPasses {
 		summary := fmt.Sprintf("Failed: Passed %d times, failed %d times.  The historical pass rate is %d%%.  The required number of passes is %d.",
 			numberOfPasses,
@@ -625,14 +639,31 @@ func (a *weeklyAverageFromTenDays) CheckFailed(ctx context.Context, jobName stri
 		return testCaseFailed, summary, nil
 	}
 
-	return testCasePassed, fmt.Sprintf("Passed: Passed %d times, failed %d times.  The historical pass rate is %d%%.  The required number of passes is %d.",
+	if numberOfPasses < strictRequiredNumberOfPasses {
+		pityFactorMsg = fmt.Sprintf(" (%s)", pityFactorMsg)
+	} else {
+		pityFactorMsg = ""
+	}
+
+	return testCasePassed, fmt.Sprintf("Passed: Passed %d times, failed %d times.  The historical pass rate is %d%%.  The required number of passes is %d%s.",
 		numberOfPasses,
 		numberOfFailures,
 		workingPercentage,
 		requiredNumberOfPasses,
+		pityFactorMsg,
 	), nil
 }
 
+// pityFactor relaxes required success rate to always pass on 2 and fewer failures to reduce aggregation fails caused by
+// e.g. infrastructure noise. We can afford to relax because component readiness will find genuine regressions over
+// a larger sample size.
+func pityFactor(numberOfAttempts int, strictRequiredNumberOfPasses int) (int, string) {
+	const failurePityFactor = 2
+	maxRequiredNumberOfPasses := max(0, numberOfAttempts-failurePityFactor)
+	requiredNumberOfPasses := min(maxRequiredNumberOfPasses, strictRequiredNumberOfPasses)
+	return requiredNumberOfPasses, fmt.Sprintf("strict required number of passes is %d but %d failures are allowed as pity factor", strictRequiredNumberOfPasses, failurePityFactor)
+}
+
 var testsRequiringHistoryRewrite = make(map[testCoordinates]string)
 
 type testCoordinates struct {
diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
index 80dca30b1f8..dfc01133b4b 100644
--- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
+++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/stretchr/testify/assert"
 
+	"github.com/openshift/ci-tools/pkg/jobrunaggregator/jobrunaggregatorapi"
 	"github.com/openshift/ci-tools/pkg/jobrunaggregator/jobrunaggregatorlib"
 )
 
@@ -57,33 +58,33 @@ func TestCheckPercentileDisruption(t *testing.T) {
 			supportsFuzziness:    true,
 		},
 		{
-			// Required Passes for 95th percentile is 6
+			// Required Passes for 95th percentile is 7
 			// 5 Natural Passes
 			// graceSeconds = 2
 			// The Disruption value that == 7 gets flipped to pass
 			name:                 "Test 95th Percentile Fuzzy Pass",
-			disruptions:          []int{5, 5, 5, 6, 6, 7, 9, 9, 9, 9},
+			disruptions:          []int{5, 5, 5, 6, 6, 7, 7, 9, 9, 9},
 			thresholdPercentile:  95,
 			graceSeconds:         2,
 			historicalDisruption: 6,
 			status:               testCasePassed,
-			failedCount:          4,
-			successCount:         6,
+			failedCount:          3,
+			successCount:         7,
 			supportsFuzziness:    true,
 		},
 		{
-			// Required Passes for 95th percentile is 6
+			// Required Passes for 95th percentile is 7
 			// 4 Natural Passes
 			// graceSeconds = 1
 			// The Disruption values that == 7 get flipped to passes
 			name:                 "Test 95th Percentile Multi Fuzzy Pass",
-			disruptions:          []int{5, 5, 5, 6, 7, 7, 8, 8, 8, 86},
+			disruptions:          []int{5, 5, 5, 6, 7, 7, 7, 8, 8, 86},
 			thresholdPercentile:  95,
 			graceSeconds:         1,
 			historicalDisruption: 6,
 			status:               testCasePassed,
-			failedCount:          4,
-			successCount:         6,
+			failedCount:          3,
+			successCount:         7,
 			supportsFuzziness:    true,
 		},
 		{
@@ -204,16 +205,16 @@ func TestCheckPercentileDisruption(t *testing.T) {
 			supportsFuzziness:    false,
 		},
 		{
-			// Required Passes for 80th percentile is 4
-			// 4 Natural Passes
+			// Required Passes for 80th percentile is 5
+			// 5 Natural Passes
 			name:                 "Test 80th Percentile Pass",
-			disruptions:          []int{0, 0, 1, 1, 2, 2, 2, 2, 2, 2},
+			disruptions:          []int{0, 0, 1, 1, 1, 2, 2, 2, 2, 2},
 			thresholdPercentile:  80,
 			graceSeconds:         0,
 			historicalDisruption: 1,
 			status:               testCasePassed,
-			failedCount:          6,
-			successCount:         4,
+			failedCount:          5,
+			successCount:         5,
 			supportsFuzziness:    true,
 		},
 		{
@@ -290,3 +291,282 @@ func TestCheckPercentileDisruption(t *testing.T) {
 		})
 	}
 }
+
+func TestCheckFailedWithPityFactor(t *testing.T) {
+	tests := []struct {
+		name              string
+		passes            int
+		failures          int
+		skips             int
+		workingPercentage int
+		expectedStatus    testCaseStatus
+		description       string
+	}{
+		// Tests with 95% working percentage (high reliability test)
+		{
+			name:              "10 attempts, 95% working: 7 passes, 3 failures - passes strict",
+			passes:            7,
+			failures:          3,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=7, pity=min(10-2,7)=7. 7 passes meets requirement",
+		},
+		{
+			name:              "10 attempts, 95% working: 6 passes, 4 failures - fails",
+			passes:            6,
+			failures:          4,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=7, pity=min(10-2,7)=7. 6 passes fails to meet requirement of 7",
+		},
+		{
+			name:              "12 attempts, 95% working: 9 passes, 3 failures - should pass",
+			passes:            9,
+			failures:          3,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=9, pity=min(12-2,9)=9. 9 passes meets requirement",
+		},
+		{
+			name:              "12 attempts, 95% working: 8 passes, 4 failures - should fail",
+			passes:            8,
+			failures:          4,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=9, pity=min(12-2,9)=9. 8 passes fails to meet requirement",
+		},
+
+		// Tests with 80% working percentage (moderate reliability test)
+		{
+			name:              "10 attempts, 80% working: 5 passes, 5 failures - passes strict",
+			passes:            5,
+			failures:          5,
+			skips:             0,
+			workingPercentage: 80,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=5, pity=min(10-2,5)=5. 5 passes meets requirement",
+		},
+		{
+			name:              "10 attempts, 80% working: 4 passes, 6 failures - should fail",
+			passes:            4,
+			failures:          6,
+			skips:             0,
+			workingPercentage: 80,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=5, pity=min(10-2,5)=5. 4 passes fails to meet requirement",
+		},
+
+		// Tests with 70% working percentage (lower reliability test)
+		{
+			name:              "10 attempts, 70% working: 3 passes, 7 failures - passes strict",
+			passes:            3,
+			failures:          7,
+			skips:             0,
+			workingPercentage: 70,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=3, pity=min(10-2,3)=3. 3 passes meets requirement",
+		},
+		{
+			name:              "10 attempts, 70% working: 2 passes, 8 failures - should fail",
+			passes:            2,
+			failures:          8,
+			skips:             0,
+			workingPercentage: 70,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=3, pity=min(10-2,3)=3. 2 passes fails to meet requirement",
+		},
+
+		// Tests where pity factor doesn't relax (strict requirement is already at or below pity limit)
+		{
+			name:              "5 attempts, 95% working: 3 passes, 2 failures - passes strict (no relaxation)",
+			passes:            3,
+			failures:          2,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=3, pity=min(5-2,3)=3. No relaxation, 3 passes meets requirement",
+		},
+		{
+			name:              "5 attempts, 95% working: 1 pass, 4 failures - should fail",
+			passes:            1,
+			failures:          4,
+			skips:             0,
+			workingPercentage: 95,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=3, pity=min(5-2,3)=3. 1 pass fails requirement even with pity factor",
+		},
+
+		// More tests showing pity factor behavior
+		{
+			name:              "10 attempts, 90% working: 6 passes, 4 failures - passes strict (no relaxation)",
+			passes:            6,
+			failures:          4,
+			skips:             0,
+			workingPercentage: 90,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=6, pity=min(10-2,6)=6. No relaxation, 6 passes meets requirement",
+		},
+
+		// Tests with 100% working percentage (perfect reliability expectation)
+		{
+			name:              "10 attempts, 100% working: 10 passes, 0 failures - passes naturally",
+			passes:            10,
+			failures:          0,
+			skips:             0,
+			workingPercentage: 100,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=9, pity=min(10-2,9)=8. 10 passes exceeds strict requirement",
+		},
+		{
+			name:              "10 attempts, 100% working: 8 passes, 2 failures - passes with pity factor",
+			passes:            8,
+			failures:          2,
+			skips:             0,
+			workingPercentage: 100,
+			expectedStatus:    testCasePassed,
+			description:       "Strict=9, pity=min(10-2,9)=8. 8 passes meets pity requirement (relaxed from 9)",
+		},
+		{
+			name:              "10 attempts, 100% working: 7 passes, 3 failures - should fail",
+			passes:            7,
+			failures:          3,
+			skips:             0,
+			workingPercentage: 100,
+			expectedStatus:    testCaseFailed,
+			description:       "Strict=9, pity=min(10-2,9)=8. 7 passes fails to meet requirement of 8",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			aggregatedTestRuns := map[TestKey]jobrunaggregatorapi.AggregatedTestRunRow{
+				{TestCaseName: "test-case", CombinedTestSuiteName: "test-suite"}: {
+					WorkingPercentage: float64(test.workingPercentage),
+				},
+			}
+
+			baseline := &weeklyAverageFromTenDays{
+				aggregatedTestRunsByName: aggregatedTestRuns,
+			}
+			// Mark query as already done to avoid nil bigQueryClient access
+			baseline.queryTestRunsOnce.Do(func() {})
+
+			testCaseDetails := &jobrunaggregatorlib.TestCaseDetails{
+				Name:          "test-case",
+				TestSuiteName: "test-suite",
+				Passes:        make([]jobrunaggregatorlib.TestCasePass, test.passes),
+				Failures:      make([]jobrunaggregatorlib.TestCaseFailure, test.failures),
+				Skips:         make([]jobrunaggregatorlib.TestCaseSkip, test.skips),
+			}
+
+			// Create unique job run IDs for passes and failures
+			for i := 0; i < test.passes; i++ {
+				testCaseDetails.Passes[i] = jobrunaggregatorlib.TestCasePass{
+					JobRunID: fmt.Sprintf("pass-%d", i),
+				}
+			}
+			for i := 0; i < test.failures; i++ {
+				testCaseDetails.Failures[i] = jobrunaggregatorlib.TestCaseFailure{
+					JobRunID: fmt.Sprintf("fail-%d", i),
+				}
+			}
+
+			status, message, err := baseline.CheckFailed(nil, "test-job", []string{"suite"}, testCaseDetails)
+
+			assert.NoError(t, err, "Should not error for: %s", test.name)
+			assert.NotEmpty(t, message, "Should have a message for: %s", test.name)
+			assert.Equal(t, test.expectedStatus, status, "%s: %s", test.description, message)
+			t.Logf("Test: %s\nStatus: %s\nMessage: %s", test.name, status, message)
+		})
+	}
+}
+
+func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
+	weeklyAverage := &weeklyAverageFromTenDays{}
+
+	tests := []struct {
+		name                 string
+		disruptions          []int
+		thresholdPercentile  int // also used as workingPercentage in innerCheckPercentileDisruptionWithGrace
+		historicalDisruption float64
+		graceSeconds         int
+		expectedStatus       testCaseStatus
+		expectedMinPasses    int
+		description          string
+	}{
+		// Note: In innerCheckPercentileDisruptionWithGrace, workingPercentage = thresholdPercentile
+		// So a P95 test uses 95% as the working percentage for calculating required passes
+		// The code also has -1 adjustment when pity factor doesn't relax the requirement
+		{
+			name:                 "10 attempts, P95 (95% working): 8 passes, 2 failures - exceeds requirement",
+			disruptions:          []int{0, 0, 0, 1, 1, 1, 1, 1, 5, 5},
+			thresholdPercentile:  95,
+			historicalDisruption: 2.0,
+			graceSeconds:         0,
+			expectedStatus:       testCasePassed,
+			expectedMinPasses:    6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation)
+			description:          "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 8 passes exceeds requirement",
+		},
+		{
+			name:                 "10 attempts, P95 (95% working): 7 passes, 3 failures - meets requirement",
+			disruptions:          []int{0, 0, 0, 1, 1, 1, 1, 5, 5, 5},
+			thresholdPercentile:  95,
+			historicalDisruption: 2.0,
+			graceSeconds:         0,
+			expectedStatus:       testCasePassed,
+			expectedMinPasses:    6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation)
+			description:          "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 7 passes exceeds requirement",
+		},
+		{
+			name:                 "6 attempts, P80 (80% working): 4 passes, 2 failures - exceeds requirement",
+			disruptions:          []int{0, 0, 1, 1, 5, 5},
+			thresholdPercentile:  80,
+			historicalDisruption: 2.0,
+			graceSeconds:         0,
+			expectedStatus:       testCasePassed,
+			expectedMinPasses:    1, // strict=2, pity=min(6-2,2)=2, then 2-1=1 (no relaxation)
+			description:          "Strict=2, pity=min(6-2,2)=2, -1 adj=1. 4 passes exceeds requirement",
+		},
+		{
+			name:                 "12 attempts, P95 (95% working): 10 passes, 2 failures - exceeds requirement",
+			disruptions:          []int{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 5, 5},
+			thresholdPercentile:  95,
+			historicalDisruption: 2.0,
+			graceSeconds:         0,
+			expectedStatus:       testCasePassed,
+			expectedMinPasses:    8, // strict=9, pity=min(12-2,9)=9, then 9-1=8 (no relaxation)
+			description:          "Strict=9, pity=min(12-2,9)=9, -1 adj=8. 10 passes exceeds requirement",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			jobRunIDToAvailabilityResultForBackend := createJobRunIDToAvailabilityResultForBackend(test.disruptions)
+			historicalDisruptionStatistic := backendDisruptionStats{
+				percentileByIndex: make([]float64, 100),
+			}
+			historicalDisruptionStatistic.percentileByIndex[test.thresholdPercentile] = test.historicalDisruption
+
+			requiredPasses, failureJobRunIDs, successJobRunIDs, status, summary :=
+				weeklyAverage.innerCheckPercentileDisruptionWithGrace(
+					jobRunIDToAvailabilityResultForBackend,
+					test.historicalDisruption,
+					test.thresholdPercentile,
+					test.graceSeconds,
+				)
+
+			t.Logf("Test: %s\nRequired: %d, Passes: %d, Failures: %d\nStatus: %s\nSummary: %s",
+				test.name, requiredPasses, len(successJobRunIDs), len(failureJobRunIDs), status, summary)
+
+			assert.Equal(t, test.expectedStatus, status, "%s: %s", test.description, summary)
+			assert.Equal(t, test.expectedMinPasses, requiredPasses,
+				"Required passes should match expected for: %s", test.name)
+			assert.Equal(t, len(test.disruptions), len(successJobRunIDs)+len(failureJobRunIDs),
+				"Total attempts should equal successes + failures")
+		})
+	}
+}

From f180675792f2d5a8d7311db3c89764eee5a6086f Mon Sep 17 00:00:00 2001
From: Petr Muller <muller@redhat.com>
Date: Mon, 12 Jan 2026 17:52:18 +0100
Subject: [PATCH 2/2] TRT-2487: job-run-aggregator: remove relaxing in
 disruption aggregation

I do not _entirely_ understand why the relaxation for disruption tests
exists here but the TODO exists this may be worth tightening. The
`pityFactor` will relax the threshold in some cases (high attempts,
high pass rate) so the additional relaxation would only apply in the
remaining cases.
---
 .../jobrunaggregatoranalyzer/pass_fail.go       |  6 ------
 .../jobrunaggregatoranalyzer/pass_fail_test.go  | 17 ++++++++---------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
index c7c607b3eae..0afd31786ec 100644
--- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
+++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail.go
@@ -505,12 +505,6 @@ func (a *weeklyAverageFromTenDays) innerCheckPercentileDisruptionWithGrace(
 	strictRequiredNumberOfPasses := requiredPassesByPassPercentageByNumberOfAttempts[numberOfAttempts][workingPercentage]
 	requiredNumberOfPasses, pityFactorMsg := pityFactor(numberOfAttempts, strictRequiredNumberOfPasses)
 
-	if requiredNumberOfPasses == strictRequiredNumberOfPasses {
-		// TODO try to tighten this after we can keep the test in for about a week.
-		// We need to come back and revisit the possibility of removing this adjustment.
-		requiredNumberOfPasses = requiredNumberOfPasses - 1 // subtracting one because our current sample missed by one
-	}
-
 	if requiredNumberOfPasses <= 0 {
 		message := fmt.Sprintf("Current percentile is so low that we cannot latch, skipping (P%d=%.2fs successes=%v failures=%v)", thresholdPercentile, threshold, successRuns, failureRuns)
 		failureJobRunIDs = sets.StringKeySet(jobRunIDToAvailabilityResultForBackend).List()
diff --git a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
index dfc01133b4b..ba221e2efe7 100644
--- a/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
+++ b/pkg/jobrunaggregator/jobrunaggregatoranalyzer/pass_fail_test.go
@@ -500,7 +500,6 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
 	}{
 		// Note: In innerCheckPercentileDisruptionWithGrace, workingPercentage = thresholdPercentile
 		// So a P95 test uses 95% as the working percentage for calculating required passes
-		// The code also has -1 adjustment when pity factor doesn't relax the requirement
 		{
 			name:                 "10 attempts, P95 (95% working): 8 passes, 2 failures - exceeds requirement",
 			disruptions:          []int{0, 0, 0, 1, 1, 1, 1, 1, 5, 5},
@@ -508,8 +507,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
 			historicalDisruption: 2.0,
 			graceSeconds:         0,
 			expectedStatus:       testCasePassed,
-			expectedMinPasses:    6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation)
-			description:          "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 8 passes exceeds requirement",
+			expectedMinPasses:    7, // strict=7, pity=min(10-2,7)=7 (no relaxation)
+			description:          "Strict=7, pity=min(10-2,7)=7. 8 passes exceeds requirement",
 		},
 		{
 			name:                 "10 attempts, P95 (95% working): 7 passes, 3 failures - meets requirement",
@@ -518,8 +517,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
 			historicalDisruption: 2.0,
 			graceSeconds:         0,
 			expectedStatus:       testCasePassed,
-			expectedMinPasses:    6, // strict=7, pity=min(10-2,7)=7, then 7-1=6 (no relaxation)
-			description:          "Strict=7, pity=min(10-2,7)=7, -1 adj=6. 7 passes exceeds requirement",
+			expectedMinPasses:    7, // strict=7, pity=min(10-2,7)=7 (no relaxation)
+			description:          "Strict=7, pity=min(10-2,7)=7. 7 passes meets requirement exactly",
 		},
 		{
 			name:                 "6 attempts, P80 (80% working): 4 passes, 2 failures - exceeds requirement",
@@ -528,8 +527,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
 			historicalDisruption: 2.0,
 			graceSeconds:         0,
 			expectedStatus:       testCasePassed,
-			expectedMinPasses:    1, // strict=2, pity=min(6-2,2)=2, then 2-1=1 (no relaxation)
-			description:          "Strict=2, pity=min(6-2,2)=2, -1 adj=1. 4 passes exceeds requirement",
+			expectedMinPasses:    2, // strict=2, pity=min(6-2,2)=2 (no relaxation)
+			description:          "Strict=2, pity=min(6-2,2)=2. 4 passes exceeds requirement",
 		},
 		{
 			name:                 "12 attempts, P95 (95% working): 10 passes, 2 failures - exceeds requirement",
@@ -538,8 +537,8 @@ func TestInnerCheckPercentileDisruptionWithPityFactor(t *testing.T) {
 			historicalDisruption: 2.0,
 			graceSeconds:         0,
 			expectedStatus:       testCasePassed,
-			expectedMinPasses:    8, // strict=9, pity=min(12-2,9)=9, then 9-1=8 (no relaxation)
-			description:          "Strict=9, pity=min(12-2,9)=9, -1 adj=8. 10 passes exceeds requirement",
+			expectedMinPasses:    9, // strict=9, pity=min(12-2,9)=9 (no relaxation)
+			description:          "Strict=9, pity=min(12-2,9)=9. 10 passes exceeds requirement",
 		},
 	}