diff --git a/synth/validator/moving_average.py b/synth/validator/moving_average.py index 168d3e96..5f27ec74 100644 --- a/synth/validator/moving_average.py +++ b/synth/validator/moving_average.py @@ -126,6 +126,24 @@ def apply_per_asset_coefficients( return df["prompt_score_v3"] +def _replace_inf_rolling_avgs(rolling_avg_data: list[dict]) -> None: + """Replace inf rolling averages with the worst finite value. + + Inf causes NaN weights when all miners have inf + (inf - inf = nan in softmax). + """ + finite_avgs = [ + r["rolling_avg"] + for r in rolling_avg_data + if np.isfinite(r["rolling_avg"]) + ] + if finite_avgs: + worst_finite = max(finite_avgs) + for r in rolling_avg_data: + if not np.isfinite(r["rolling_avg"]): + r["rolling_avg"] = worst_finite + + def compute_smoothed_score( miner_data_handler: MinerDataHandler, input_df: DataFrame, @@ -167,6 +185,8 @@ def compute_smoothed_score( {"miner_id": miner_id, "rolling_avg": rolling_avg} ) + _replace_inf_rolling_avgs(rolling_avg_data) + # Add the miner UID to the results moving_averages_data = miner_data_handler.populate_miner_uid_in_miner_data( rolling_avg_data diff --git a/tests/test_calculate_crps.py b/tests/test_calculate_crps.py index 5376db06..64d3ee72 100644 --- a/tests/test_calculate_crps.py +++ b/tests/test_calculate_crps.py @@ -481,3 +481,31 @@ def test_high_freq_gap_intervals_produce_different_scores(self): self.assertEqual(len(gap_increments), 1) # gap: 1 evaluation self.assertEqual(len(reg_increments), 6) # regular: 6 evaluations self.assertNotEqual(score_gap, score_reg) + + +class TestSoftmaxWithInf(unittest.TestCase): + """Tests for compute_softmax handling of inf values.""" + + def test_softmax_single_inf_gives_zero_weight(self): + """A single inf score should get 0 weight.""" + scores = np.array([50, 60, 70, np.inf]) + weights = compute_softmax(scores, -0.2) + self.assertTrue(np.all(np.isfinite(weights))) + self.assertAlmostEqual(weights.sum(), 1.0, places=6) + self.assertEqual(weights[-1], 0.0) + + def test_softmax_all_inf_produces_nan(self): + """All-inf input produces NaN weights (the bug).""" + scores = np.array([np.inf, np.inf, np.inf]) + weights = compute_softmax(scores, -0.2) + # This proves the bug: all NaN weights + self.assertTrue(np.all(np.isnan(weights))) + + def test_softmax_all_finite_works(self): + """Normal finite scores produce valid weights.""" + scores = np.array([50, 60, 70, 80]) + weights = compute_softmax(scores, -0.2) + self.assertTrue(np.all(np.isfinite(weights))) + self.assertAlmostEqual(weights.sum(), 1.0, places=6) + # Lower score = better = higher weight (beta < 0) + self.assertGreater(weights[0], weights[-1])