From e5a3fdbf46193f92d39721673fd4ca4d10c149ae Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 18:20:03 -0400
Subject: [PATCH 1/6] Fix optimizer score versions default featured flag

---
 MCP/tools/score/score_update_guidelines_test.py | 6 +++---
 MCP/tools/score/scores.py                       | 2 +-
 MCP/tools/score/scores_test.py                  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/MCP/tools/score/score_update_guidelines_test.py b/MCP/tools/score/score_update_guidelines_test.py
index 399333a14..fbb17093a 100644
--- a/MCP/tools/score/score_update_guidelines_test.py
+++ b/MCP/tools/score/score_update_guidelines_test.py
@@ -45,7 +45,7 @@ def test_guidelines_only_update_none_handling(self):
             'configuration': current_version_data['configuration'],  # Preserved existing code
             'guidelines': update_params['guidelines'],  # New guidelines
             'note': update_params['version_note'],
-            'isFeatured': "true",
+            'isFeatured': "false",
             'parentVersionId': 'version-current'
         }
         
@@ -241,7 +241,7 @@ def simulate_version_input_creation(score_id, code_content, guidelines, note, pa
                 'scoreId': score_id,
                 'configuration': (code_content or '').strip(),  # Fixed to handle None
                 'note': note or 'Updated via MCP score update tool',
-                'isFeatured': "true"
+                'isFeatured': "false"
             }
             
             # Add guidelines if provided (fixed logic)
@@ -357,7 +357,7 @@ def simulate_complete_workflow():
                     'scoreId': score_data['id'],
                     'configuration': (code or '').strip(),
                     'note': update_params['version_note'] or 'Updated via MCP',
-                    'isFeatured': "true"
+                    'isFeatured': "false"
                 }
                 
                 if guidelines:
diff --git a/MCP/tools/score/scores.py b/MCP/tools/score/scores.py
index b1851d1a4..876edd20b 100644
--- a/MCP/tools/score/scores.py
+++ b/MCP/tools/score/scores.py
@@ -2682,7 +2682,7 @@ async def _create_version_from_code_with_parent(
             'scoreId': score.id,
             'configuration': (code_content or '').strip(),
             'note': note or 'Updated via MCP score update tool',
-            'isFeatured': "true"  # Mark as featured by default
+            'isFeatured': "false"
         }
         
         # Add guidelines if provided
diff --git a/MCP/tools/score/scores_test.py b/MCP/tools/score/scores_test.py
index e485a0926..7339be77b 100644
--- a/MCP/tools/score/scores_test.py
+++ b/MCP/tools/score/scores_test.py
@@ -1670,14 +1670,14 @@ def test_create_version_with_parent_version_input(self):
             'configuration': code_content.strip(),
             'guidelines': guidelines.strip(),
             'note': note,
-            'isFeatured': "true",
+            'isFeatured': "false",
             'parentVersionId': parent_version_id  # Should set parent relationship
         }
         
         # Verify parent relationship is established
         assert expected_version_input['parentVersionId'] == parent_version_id
         assert expected_version_input['scoreId'] == score_id
-        assert expected_version_input['isFeatured'] == "true"
+        assert expected_version_input['isFeatured'] == "false"
     
     def test_create_version_with_parent_error_handling(self):
         """Test error handling in _create_version_from_code_with_parent"""

From eaa2da9dfb1baf51f0c62a265a9d4dc7304b26dc Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 18:29:26 -0400
Subject: [PATCH 2/6] Add score rubric consistency preflight

---
 MCP/tools/evaluation/evaluations.py           |  26 ++-
 dashboard/components/EvaluationTask.tsx       |  26 +++
 plexus/cli/evaluation/evaluations.py          |  86 +++++--
 plexus/cli/item/items.py                      |  61 ++++-
 ...est_feedback_alignment_optimizer_config.py |   8 +
 .../feedback_alignment_optimizer.yaml         |   1 +
 plexus/score_rubric_consistency.py            | 213 ++++++++++++++++++
 plexus/score_rubric_consistency_test.py       |  93 ++++++++
 8 files changed, 495 insertions(+), 19 deletions(-)
 create mode 100644 plexus/score_rubric_consistency.py
 create mode 100644 plexus/score_rubric_consistency_test.py

diff --git a/MCP/tools/evaluation/evaluations.py b/MCP/tools/evaluation/evaluations.py
index 4aef629f1..d8cf242c6 100644
--- a/MCP/tools/evaluation/evaluations.py
+++ b/MCP/tools/evaluation/evaluations.py
@@ -345,6 +345,7 @@ async def plexus_evaluation_run(
         use_score_associated_dataset: bool = False,
         batch: Optional[List[Dict[str, Any]]] = None,
         notes: Optional[str] = None,
+        score_rubric_consistency_check: bool = False,
     ) -> str:
         """
         Run an evaluation using the same code path as CLI.
@@ -391,6 +392,9 @@ async def plexus_evaluation_run(
                  the evaluation's parameters JSON under the "notes" key. Useful for recording
                  context like "Baseline: deterministic accuracy dataset" or
                  "Iteration 3: Added example for transfer-request edge case".
+        - score_rubric_consistency_check: Feedback evaluations only. When True, run a
+                 preflight check that compares the evaluated ScoreVersion code against its
+                 own rubric and store the paragraph on Evaluation.parameters.
 
         Returns:
         - JSON string with evaluation results including evaluation_id, metrics, and dashboard URL.
@@ -434,19 +438,27 @@ def _apply_notes_to_evaluation(evaluation_id: str, notes_text: str) -> None:
                 batch_list = list(batch)
             logger.info(f"Batch evaluation: dispatching {len(batch_list)} evaluations in parallel")
             for i, item in enumerate(batch_list):
-                logger.info(f"  Batch item {i+1}: type={item.get('evaluation_type')}, score={item.get('score_name')}, wait={item.get('wait')}")
+                logger.info(
+                    f"  Batch item {i + 1}: type={item.get('evaluation_type')}, "
+                    f"score={item.get('score_name')}, wait={item.get('wait')}"
+                )
             tasks = [plexus_evaluation_run(**item) for item in batch_list]
             raw_results = await _asyncio.gather(*tasks, return_exceptions=True)
             output = []
             for i, r in enumerate(raw_results):
                 if isinstance(r, Exception):
-                    logger.error(f"Batch item {i+1} raised exception: {type(r).__name__}: {r}", exc_info=r)
+                    logger.error(
+                        f"Batch item {i + 1} raised exception: {type(r).__name__}: {r}",
+                        exc_info=r,
+                    )
                     output.append({"error": f"{type(r).__name__}: {r}"})
                 else:
                     try:
                         output.append(json.loads(r))
                     except Exception as parse_exc:
-                        logger.error(f"Batch item {i+1} result parse error: {parse_exc}, raw={str(r)[:500]}")
+                        logger.error(
+                            f"Batch item {i + 1} result parse error: {parse_exc}, raw={str(r)[:500]}"
+                        )
                         output.append({"error": "Could not parse result", "raw": str(r)})
             logger.info(f"Batch evaluation complete: {len(output)} results")
             # Post-batch notes application: apply notes to each eval sequentially
@@ -459,7 +471,9 @@ def _apply_notes_to_evaluation(evaluation_id: str, notes_text: str) -> None:
                     if eval_id:
                         _apply_notes_to_evaluation(eval_id, item_notes)
                     else:
-                        logger.warning(f"Batch item {i+1}: no eval_id in result, cannot apply notes")
+                        logger.warning(
+                            f"Batch item {i + 1}: no eval_id in result, cannot apply notes"
+                        )
             return json.dumps(output)
 
         if not scorecard_name:
@@ -576,6 +590,8 @@ def _spawn_feedback(
                     cmd += ["--max-category-summary-items", str(max_category_items)]
                     if runner_task_id:
                         cmd += ["--task-id", runner_task_id]
+                    if score_rubric_consistency_check:
+                        cmd += ["--score-rubric-consistency-check"]
                     subprocess.Popen(
                         cmd,
                         stdout=subprocess.DEVNULL,
@@ -699,6 +715,8 @@ def _spawn_feedback(
                         fb_args.extend(['--sample-seed', str(sample_seed)])
                     if notes:
                         fb_args.extend(['--notes', notes])
+                    if score_rubric_consistency_check:
+                        fb_args.append('--score-rubric-consistency-check')
                     # When a specific version is requested, yaml mode must be disabled.
                     effective_yaml = yaml and not resolved_version
                     if effective_yaml:
diff --git a/dashboard/components/EvaluationTask.tsx b/dashboard/components/EvaluationTask.tsx
index 1f0160ddc..2e942c29a 100644
--- a/dashboard/components/EvaluationTask.tsx
+++ b/dashboard/components/EvaluationTask.tsx
@@ -1067,6 +1067,15 @@ const DetailContent = React.memo(({
 
   const rootCauseTopics = rootCauseData?.topics ?? null
   const misclassificationAnalysis = rootCauseData?.misclassification_analysis ?? null
+  const scoreRubricConsistencyCheck = useMemo(() => {
+    try {
+      const params = parseJsonDeep(data.parameters) as Record<string, unknown> | null
+      const check = params?.score_rubric_consistency_check
+      return (check && typeof check === 'object') ? check as Record<string, unknown> : null
+    } catch {
+      return null
+    }
+  }, [data.parameters])
   const rcaCoverage = useMemo(() => {
     try {
       const params = parseJsonDeep(data.parameters) as Record<string, unknown> | null
@@ -1531,6 +1540,23 @@ const DetailContent = React.memo(({
                   </div>
                 )}
 
+                {scoreRubricConsistencyCheck && (
+                  <Alert className="mt-4">
+                    <AlertTriangle className="h-4 w-4" />
+                    <AlertTitle className="text-sm">
+                      Score/rubric consistency
+                      {typeof scoreRubricConsistencyCheck.status === 'string'
+                        ? `: ${scoreRubricConsistencyCheck.status}`
+                        : ''}
+                    </AlertTitle>
+                    <AlertDescription className="text-sm">
+                      {typeof scoreRubricConsistencyCheck.paragraph === 'string'
+                        ? scoreRubricConsistencyCheck.paragraph
+                        : 'No consistency summary was generated.'}
+                    </AlertDescription>
+                  </Alert>
+                )}
+
                 {/* Score-Configuration RCA */}
                 {(rootCauseData && (
                   (rootCauseTopics && rootCauseTopics.length > 0) ||
diff --git a/plexus/cli/evaluation/evaluations.py b/plexus/cli/evaluation/evaluations.py
index b05212a79..939b4663e 100644
--- a/plexus/cli/evaluation/evaluations.py
+++ b/plexus/cli/evaluation/evaluations.py
@@ -69,6 +69,10 @@ def truncate_dict_strings(d, max_length=100):
     run_feedback_evaluation_orchestrated,
 )
 from plexus.utils.feedback_selection import select_feedback_items
+from plexus.score_rubric_consistency import (
+    ScoreRubricConsistencyService,
+    merge_consistency_result_into_parameters,
+)
 
 from plexus.utils import truncate_dict_strings_inner
 
@@ -85,7 +89,7 @@ def log_scorecard_configurations(scorecard_instance, context=""):
         version = score_config.get('version', 'Not specified')
         champion_version = score_config.get('championVersionId', 'Not specified')
         
-        logging.info(f"Score #{i+1}: {score_name}")
+        logging.info(f"Score #{i + 1}: {score_name}")
         logging.info(f"  ID: {score_id}")
         logging.info(f"  Version field: {version}")
         logging.info(f"  Champion version: {champion_version}")
@@ -2756,9 +2760,9 @@ async def _run_accuracy():
                 scorecard_id_resolved = getattr(scorecard_instance, 'id', None)
                 logging.info(f"Resolved from attributes: name='{scorecard_name_resolved}', key='{scorecard_key_resolved}', id='{scorecard_id_resolved}' (type: {type(scorecard_id_resolved)})")
             else:
-                scorecard_name_resolved = scorecard # Fallback to initial identifier
-                scorecard_key_resolved = scorecard # Fallback to initial identifier
-                scorecard_id_resolved = scorecard # Fallback to initial identifier
+                scorecard_name_resolved = scorecard  # Fallback to initial identifier
+                scorecard_key_resolved = scorecard  # Fallback to initial identifier
+                scorecard_id_resolved = scorecard  # Fallback to initial identifier
                 logging.info(f"Using fallback: name='{scorecard_name_resolved}', key='{scorecard_key_resolved}', id='{scorecard_id_resolved}' (type: {type(scorecard_id_resolved)})")
             
             # Check if any cloud dataset options are provided
@@ -3205,9 +3209,9 @@ async def _run_accuracy():
                     raise
             
             # Display final results summary
-            logging.info(f"\n{'='*60}")
+            logging.info(f"\n{'=' * 60}")
             logging.info("EVALUATION RESULTS")
-            logging.info('='*60)
+            logging.info('=' * 60)
             logging.info(f"Sample Size:        {len(labeled_samples_data)}")
 
             # Safely extract metrics (handle None case)
@@ -3236,7 +3240,7 @@ async def _run_accuracy():
             if detailed_summary:
                 logging.info(detailed_summary)
             
-            logging.info('='*60)
+            logging.info('=' * 60)
             
             # Complete task lifecycle in tracker/API task record.
             if tracker:
@@ -3538,8 +3542,8 @@ def check_dict_serializability(d, path=""):
                 if isinstance(item, dict):
                     non_serializable_paths.extend(check_dict_serializability(item, item_path))
                 elif isinstance(item, list):
-                     # Handle nested lists if necessary, though less common in typical results
-                     pass # Or add recursive list check if needed
+                    # Handle nested lists if necessary, though less common in typical results
+                    pass  # Or add recursive list check if needed
                 elif not is_json_serializable(item):
                     logging.warning(f"Non-serializable list item found at path '{item_path}': type={type(item)}")
                     non_serializable_paths.append(item_path)
@@ -3600,8 +3604,8 @@ def score_text_wrapper(scorecard_instance, text, score_name, scorecard_name=None
                 score_result_name = None
                 if hasattr(score_result_obj, 'parameters') and hasattr(score_result_obj.parameters, 'name'):
                     score_result_name = score_result_obj.parameters.name
-                elif hasattr(score_result_obj, 'name'): # Fallback if name is directly on the object
-                     score_result_name = score_result_obj.name
+                elif hasattr(score_result_obj, 'name'):  # Fallback if name is directly on the object
+                    score_result_name = score_result_obj.name
 
                 if score_result_name == score_name:
                     # Ensure the returned object is a Score.Result instance or similar
@@ -3623,8 +3627,8 @@ def score_text_wrapper(scorecard_instance, text, score_name, scorecard_name=None
             #     return result[first_key]
             # else:
             #     return {"error": "Empty result dictionary", "value": "ERROR"}
-        elif hasattr(result, 'value'): # Handle case where a single Result object is returned directly
-             return result
+        elif hasattr(result, 'value'):  # Handle case where a single Result object is returned directly
+            return result
         else:
             # Handle unexpected result types
             logging.warning(f"Unexpected result type from score_entire_text: {type(result)}")
@@ -4136,6 +4140,12 @@ def last(account_key: str, type: Optional[str]):
 @click.option('--yaml', 'use_yaml', is_flag=True, help='Load scorecard from local YAML files instead of the API')
 @click.option('--task-id', default=None, type=str, help='Task ID for progress tracking')
 @click.option('--notes', default=None, type=str, help='Freeform notes explaining why this evaluation is being run. Stored in evaluation parameters.')
+@click.option(
+    '--score-rubric-consistency-check',
+    is_flag=True,
+    default=False,
+    help='Before feedback-backed predictions, compare the evaluated ScoreVersion code against its rubric and store the result.',
+)
 def feedback(
     scorecard: str,
     score: str,
@@ -4150,6 +4160,7 @@ def feedback(
     use_yaml: bool,
     task_id: Optional[str],
     notes: Optional[str] = None,
+    score_rubric_consistency_check: bool = False,
 ):
     """
     Evaluate feedback alignment by analyzing feedback items over a time period for a specific score.
@@ -4482,6 +4493,7 @@ def feedback(
                         "sample_seed": sample_seed,
                         "max_category_summary_items": max_category_summary_items,
                         "mode": "accuracy_with_feedback_dataset",
+                        "score_rubric_consistency_check_requested": bool(score_rubric_consistency_check),
                         **({"notes": notes} if notes else {}),
                         "metadata": {
                             **({"baseline": baseline} if baseline else {}),
@@ -4496,6 +4508,54 @@ def feedback(
                 
                 console.print(f"\nCreated evaluation record: {evaluation_id}")
                 console.print(f"Dashboard URL: https://app.plexusanalytics.com/evaluations/{evaluation_id}")
+
+                if score_rubric_consistency_check:
+                    console.print("\n[bold]Checking score code against rubric...[/bold]")
+                    try:
+                        consistency_result = ScoreRubricConsistencyService().generate_from_api(
+                            client=client,
+                            scorecard_identifier=scorecard,
+                            score_identifier=score_name_for_dataset,
+                            score_id=score_id,
+                            score_version_id=version,
+                        )
+                        merged_parameters = merge_consistency_result_into_parameters(
+                            evaluation_record.parameters,
+                            consistency_result,
+                        )
+                        evaluation_record.update(parameters=json.dumps(merged_parameters))
+                        console.print(
+                            f"[dim]Score/rubric consistency: {consistency_result.status}[/dim]"
+                        )
+                    except Exception as consistency_error:
+                        logging.warning(
+                            "Score/rubric consistency check failed for evaluation %s: %s",
+                            evaluation_id,
+                            consistency_error,
+                            exc_info=True,
+                        )
+                        merged_parameters = {}
+                        try:
+                            merged_parameters = (
+                                json.loads(evaluation_record.parameters)
+                                if isinstance(evaluation_record.parameters, str)
+                                and evaluation_record.parameters
+                                else (evaluation_record.parameters or {})
+                            )
+                        except Exception:
+                            merged_parameters = {}
+                        merged_parameters["score_rubric_consistency_check"] = {
+                            "scorecard_identifier": scorecard,
+                            "score_identifier": score_name_for_dataset,
+                            "score_version_id": version,
+                            "status": "unavailable",
+                            "paragraph": (
+                                "The score/rubric consistency check failed before predictions ran."
+                            ),
+                            "error": str(consistency_error),
+                            "checked_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+                        }
+                        evaluation_record.update(parameters=json.dumps(merged_parameters))
                 
                 # Run accuracy evaluation with the modified scorecard
                 console.print("\n[bold]Running accuracy evaluation with FeedbackItems dataset...[/bold]")
diff --git a/plexus/cli/item/items.py b/plexus/cli/item/items.py
index ad8ecccb3..714049e79 100644
--- a/plexus/cli/item/items.py
+++ b/plexus/cli/item/items.py
@@ -13,6 +13,11 @@
 from plexus.cli.shared.console import console
 from plexus.cli.report.utils import resolve_account_id_for_command
 import json
+from plexus.cli.shared.identifier_resolution import (
+    resolve_score_identifier,
+    resolve_scorecard_identifier,
+)
+from plexus.score_rubric_consistency import ScoreRubricConsistencyService
 
 def format_datetime(dt: Optional[datetime]) -> str:
     """Format datetime with proper handling of None values"""
@@ -471,6 +476,54 @@ def create(account: Optional[str], evaluation_id: Optional[str], text: Optional[
         import traceback
         print(traceback.format_exc())
 
+
+@items.command(name="contradictions")
+@click.option("--scorecard", "scorecard_identifier", required=True)
+@click.option("--score", "score_identifier", required=True)
+@click.option("--version", "score_version_id", required=True)
+@click.option("--item", "item_identifier", default=None, help="Optional item id or identifier for spot-check context.")
+@click.option("--format", "output_format", type=click.Choice(["markdown", "json"]), default="markdown")
+def contradictions(
+    scorecard_identifier: str,
+    score_identifier: str,
+    score_version_id: str,
+    item_identifier: Optional[str],
+    output_format: str,
+):
+    """Check whether one ScoreVersion's code is consistent with its rubric."""
+    client = create_client()
+    account_id = resolve_account_id_for_command(client, None)
+    scorecard_id = resolve_scorecard_identifier(client, scorecard_identifier)
+    if not scorecard_id:
+        raise click.ClickException(f"Could not resolve scorecard: {scorecard_identifier}")
+    score_id = resolve_score_identifier(client, scorecard_id, score_identifier)
+    if not score_id:
+        raise click.ClickException(
+            f"Could not resolve score '{score_identifier}' in scorecard '{scorecard_identifier}'"
+        )
+
+    item_text = ""
+    if item_identifier:
+        item = find_item_by_any_identifier(client, item_identifier, account_id)
+        if not item:
+            raise click.ClickException(f"Could not resolve item: {item_identifier}")
+        item_text = item.text or ""
+
+    result = ScoreRubricConsistencyService().generate_from_api(
+        client=client,
+        scorecard_identifier=scorecard_identifier,
+        score_identifier=score_identifier,
+        score_id=score_id,
+        score_version_id=score_version_id,
+        item_text=item_text,
+    )
+    payload = result.to_parameters_payload()
+    if output_format == "json":
+        console.print_json(json.dumps(payload))
+    else:
+        console.print(f"[bold]Status:[/bold] {result.status}")
+        console.print(result.paragraph)
+
 @items.command()
 @click.option('--account', help='Account key or ID (optional, uses default from environment if not provided)')
 @click.option('--evaluation-id', help='Filter by evaluation ID')
@@ -878,7 +931,10 @@ def upsert(account: Optional[str], json_file: Optional[str], data: Optional[str]
         batch_end = min(batch_start + batch_size, len(items_data))
         batch = items_data[batch_start:batch_end]
         
-        console.print(f"\n[bold]Processing batch {batch_start//batch_size + 1} ({batch_start + 1}-{batch_end} of {len(items_data)})[/bold]")
+        console.print(
+            f"\n[bold]Processing batch {batch_start // batch_size + 1} "
+            f"({batch_start + 1}-{batch_end} of {len(items_data)})[/bold]"
+        )
         
         for i, item_data in enumerate(batch, batch_start + 1):
             try:
@@ -975,6 +1031,7 @@ def item():
 item.add_command(list)
 item.add_command(last)
 item.add_command(info)
+item.add_command(contradictions)
 item.add_command(update)
 item.add_command(upsert)
-item.add_command(delete) 
\ No newline at end of file
+item.add_command(delete)
diff --git a/plexus/cli/procedure/test_feedback_alignment_optimizer_config.py b/plexus/cli/procedure/test_feedback_alignment_optimizer_config.py
index 1c530a6ec..c4435b4a9 100644
--- a/plexus/cli/procedure/test_feedback_alignment_optimizer_config.py
+++ b/plexus/cli/procedure/test_feedback_alignment_optimizer_config.py
@@ -321,6 +321,14 @@ def test_optimizer_yaml_runs_contradictions_directly_without_background_dispatch
     assert 'cache_key = "FeedbackContradictions (expanded): " .. scorecard_name .. " / " .. score_name' in code
 
 
+def test_optimizer_baseline_feedback_runs_score_rubric_consistency_check():
+    config = _load_optimizer_config()
+    code = config["code"]
+
+    assert "score_rubric_consistency_check = true" in code
+    assert 'evaluation_type    = "feedback"' in code
+
+
 def test_optimizer_yaml_treats_cycle_errors_as_terminal_and_does_not_extend_iteration_cap():
     config = _load_optimizer_config()
     code = config["code"]
diff --git a/plexus/procedures/feedback_alignment_optimizer.yaml b/plexus/procedures/feedback_alignment_optimizer.yaml
index 974e61ca6..3b5b03f19 100644
--- a/plexus/procedures/feedback_alignment_optimizer.yaml
+++ b/plexus/procedures/feedback_alignment_optimizer.yaml
@@ -3819,6 +3819,7 @@ code: |
         max_feedback_items = params.max_samples or 100,
         sampling_mode      = "newest",
         wait               = true,
+        score_rubric_consistency_check = true,
         notes              = "Baseline: recent feedback alignment dataset for " .. score_name,
       })
     end
diff --git a/plexus/score_rubric_consistency.py b/plexus/score_rubric_consistency.py
new file mode 100644
index 000000000..ff0266a27
--- /dev/null
+++ b/plexus/score_rubric_consistency.py
@@ -0,0 +1,213 @@
+"""Score-version rubric consistency checks.
+
+This module owns the lightweight preflight that asks whether the score code for a
+specific ScoreVersion appears consistent with that same version's rubric text.
+The result is designed to be persisted on Evaluation.parameters and displayed as
+operator context before RCA.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import asdict, dataclass
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, Optional
+
+
+@dataclass(frozen=True)
+class ScoreRubricConsistencyRequest:
+    scorecard_identifier: str
+    score_identifier: str
+    score_version_id: str
+    rubric_text: str
+    score_code: str
+    item_text: str = ""
+
+
+@dataclass(frozen=True)
+class ScoreRubricConsistencyResult:
+    scorecard_identifier: str
+    score_identifier: str
+    score_version_id: str
+    status: str
+    paragraph: str
+    checked_at: str
+    model: str
+    diagnostics: Dict[str, Any]
+
+    def to_parameters_payload(self) -> Dict[str, Any]:
+        return asdict(self)
+
+
+class ScoreRubricConsistencyService:
+    """Generate a concise score-code vs rubric consistency assessment."""
+
+    DEFAULT_MODEL = "gpt-5-mini"
+    VALID_STATUSES = {"consistent", "potential_conflict", "inconclusive"}
+
+    def __init__(
+        self,
+        *,
+        invoke_model: Optional[Callable[[str, str], str]] = None,
+        model: str = DEFAULT_MODEL,
+    ):
+        self._invoke_model = invoke_model or self._invoke_openai
+        self._model = model
+
+    def generate(self, request: ScoreRubricConsistencyRequest) -> ScoreRubricConsistencyResult:
+        prompt = self._build_prompt(request)
+        raw_text = self._invoke_model(prompt, self._model)
+        try:
+            parsed = self._parse_response(raw_text)
+        except json.JSONDecodeError:
+            repair_prompt = (
+                f"{prompt}\n\nYour prior response was not valid JSON:\n"
+                f"{_truncate(raw_text or '(empty response)', 1000)}\n\n"
+                "Return ONLY valid JSON with exactly these keys: status, paragraph."
+            )
+            raw_text = self._invoke_model(repair_prompt, self._model)
+            parsed = self._parse_response(raw_text)
+        status = str(parsed.get("status") or "inconclusive").strip()
+        if status not in self.VALID_STATUSES:
+            status = "inconclusive"
+        paragraph = _compact_paragraph(str(parsed.get("paragraph") or ""))
+        if not paragraph:
+            paragraph = "The consistency check did not produce a usable assessment."
+            status = "inconclusive"
+        return ScoreRubricConsistencyResult(
+            scorecard_identifier=request.scorecard_identifier,
+            score_identifier=request.score_identifier,
+            score_version_id=request.score_version_id,
+            status=status,
+            paragraph=paragraph,
+            checked_at=datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            model=self._model,
+            diagnostics={
+                "rubric_characters": len(request.rubric_text or ""),
+                "score_code_characters": len(request.score_code or ""),
+                "item_context_characters": len(request.item_text or ""),
+            },
+        )
+
+    def generate_from_api(
+        self,
+        *,
+        client: Any,
+        scorecard_identifier: str,
+        score_identifier: str,
+        score_id: str,
+        score_version_id: str,
+        item_text: str = "",
+    ) -> ScoreRubricConsistencyResult:
+        version = fetch_score_version_for_consistency(client, score_version_id)
+        return self.generate(
+            ScoreRubricConsistencyRequest(
+                scorecard_identifier=scorecard_identifier,
+                score_identifier=score_identifier,
+                score_version_id=score_version_id,
+                rubric_text=version.get("guidelines") or "",
+                score_code=version.get("configuration") or "",
+                item_text=item_text or "",
+            )
+        )
+
+    def _build_prompt(self, request: ScoreRubricConsistencyRequest) -> str:
+        item_section = ""
+        if request.item_text:
+            item_section = (
+                "\nOptional item context for a spot-check:\n"
+                f"{_truncate(request.item_text, 4000)}\n"
+            )
+        return (
+            "You are checking one Plexus ScoreVersion before evaluation.\n"
+            "Compare the score code/prompt against the rubric text stored on the same ScoreVersion.\n"
+            "Identify only meaningful policy mismatches that could affect evaluation results. "
+            "Do not critique style, formatting, implementation architecture, or missing tests.\n\n"
+            "Return ONLY JSON with exactly these keys:\n"
+            '  "status": one of "consistent", "potential_conflict", "inconclusive"\n'
+            '  "paragraph": one short paragraph, 2-4 sentences, no headings or bullets\n\n'
+            f"Scorecard: {request.scorecard_identifier}\n"
+            f"Score: {request.score_identifier}\n"
+            f"ScoreVersion: {request.score_version_id}\n\n"
+            f"Rubric text:\n{_truncate(request.rubric_text, 12000)}\n\n"
+            f"Score code/configuration:\n{_truncate(request.score_code, 16000)}\n"
+            f"{item_section}"
+        )
+
+    def _parse_response(self, text: str) -> Dict[str, Any]:
+        cleaned = (text or "").strip()
+        if "```" in cleaned:
+            match = re.search(r"```(?:json)?\s*([\s\S]*?)```", cleaned)
+            if match:
+                cleaned = match.group(1).strip()
+        obj_match = re.search(r"\{[\s\S]*\}", cleaned)
+        if obj_match:
+            cleaned = obj_match.group(0)
+        return json.loads(cleaned)
+
+    def _invoke_openai(self, prompt: str, model: str) -> str:
+        from dotenv import load_dotenv
+        from openai import OpenAI
+
+        load_dotenv(override=False)
+        client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
+        response = client.responses.create(
+            model=model,
+            reasoning={"effort": "low"},
+            input=[{"role": "user", "content": prompt}],
+            max_output_tokens=2000,
+        )
+        return (response.output_text or "").strip()
+
+
+def fetch_score_version_for_consistency(client: Any, score_version_id: str) -> Dict[str, Any]:
+    query = """
+    query GetScoreVersionForRubricConsistency($id: ID!) {
+        getScoreVersion(id: $id) {
+            id
+            configuration
+            guidelines
+            note
+            score {
+                id
+                name
+            }
+        }
+    }
+    """
+    result = client.execute(query, {"id": score_version_id})
+    version = (result or {}).get("getScoreVersion")
+    if not version:
+        raise ValueError(f"ScoreVersion not found: {score_version_id}")
+    return version
+
+
+def merge_consistency_result_into_parameters(
+    parameters: Any,
+    result: ScoreRubricConsistencyResult,
+) -> Dict[str, Any]:
+    if isinstance(parameters, str):
+        try:
+            merged = json.loads(parameters) if parameters else {}
+        except Exception:
+            merged = {}
+    elif isinstance(parameters, dict):
+        merged = dict(parameters)
+    else:
+        merged = {}
+    merged["score_rubric_consistency_check"] = result.to_parameters_payload()
+    return merged
+
+
+def _truncate(value: str, limit: int) -> str:
+    value = value or ""
+    if len(value) <= limit:
+        return value
+    return value[:limit] + "\n...[truncated]"
+
+
+def _compact_paragraph(value: str) -> str:
+    value = re.sub(r"\s+", " ", value or "").strip()
+    return value[:1200]
diff --git a/plexus/score_rubric_consistency_test.py b/plexus/score_rubric_consistency_test.py
new file mode 100644
index 000000000..e8c6301e9
--- /dev/null
+++ b/plexus/score_rubric_consistency_test.py
@@ -0,0 +1,93 @@
+import json
+
+from plexus.score_rubric_consistency import (
+    ScoreRubricConsistencyRequest,
+    ScoreRubricConsistencyService,
+    merge_consistency_result_into_parameters,
+)
+
+
+def test_score_rubric_consistency_service_returns_compact_payload():
+    def invoke(prompt: str, model: str) -> str:
+        assert "Score code/configuration" in prompt
+        assert model == "test-model"
+        return json.dumps(
+            {
+                "status": "potential_conflict",
+                "paragraph": (
+                    "The rubric says two missing dosages should fail, but the prompt allows "
+                    "two missing current medications. This may make the score more permissive "
+                    "than the rubric during evaluation."
+                ),
+            }
+        )
+
+    result = ScoreRubricConsistencyService(
+        invoke_model=invoke,
+        model="test-model",
+    ).generate(
+        ScoreRubricConsistencyRequest(
+            scorecard_identifier="Scorecard",
+            score_identifier="Medication Review: Dosage",
+            score_version_id="version-1",
+            rubric_text="Fail when two or more current meds lack dosage.",
+            score_code="Pass when no more than two meds lack dosage.",
+        )
+    )
+
+    assert result.status == "potential_conflict"
+    assert result.score_version_id == "version-1"
+    assert "more permissive than the rubric" in result.paragraph
+    assert result.diagnostics["rubric_characters"] > 0
+
+
+def test_merge_consistency_result_into_parameters_preserves_existing_fields():
+    service = ScoreRubricConsistencyService(
+        invoke_model=lambda _prompt, _model: json.dumps(
+            {"status": "consistent", "paragraph": "The score and rubric match."}
+        )
+    )
+    result = service.generate(
+        ScoreRubricConsistencyRequest(
+            scorecard_identifier="Scorecard",
+            score_identifier="Score",
+            score_version_id="version-1",
+            rubric_text="Rubric",
+            score_code="Code",
+        )
+    )
+
+    merged = merge_consistency_result_into_parameters(
+        json.dumps({"days": 90}),
+        result,
+    )
+
+    assert merged["days"] == 90
+    assert merged["score_rubric_consistency_check"]["status"] == "consistent"
+    assert merged["score_rubric_consistency_check"]["score_version_id"] == "version-1"
+
+
+def test_score_rubric_consistency_retries_invalid_json_once():
+    calls = []
+
+    def invoke(prompt: str, _model: str) -> str:
+        calls.append(prompt)
+        if len(calls) == 1:
+            return ""
+        return json.dumps(
+            {"status": "consistent", "paragraph": "The score code follows the rubric."}
+        )
+
+    result = ScoreRubricConsistencyService(invoke_model=invoke).generate(
+        ScoreRubricConsistencyRequest(
+            scorecard_identifier="Scorecard",
+            score_identifier="Score",
+            score_version_id="version-1",
+            rubric_text="Rubric",
+            score_code="Code",
+        )
+    )
+
+    assert result.status == "consistent"
+    assert len(calls) == 2
+    assert "prior response was not valid JSON" in calls[1]

From f501e80697fc9dda5963fe015a7ec166580f8878 Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 19:48:51 -0400
Subject: [PATCH 3/6] Move rubric consistency command to score CLI

---
 plexus/cli/item/items.py        | 53 -------------------
 plexus/cli/score/scores.py      | 63 ++++++++++++++++++++++
 plexus/cli/score/scores_test.py | 94 ++++++++++++++++++++++++++++++++-
 3 files changed, 155 insertions(+), 55 deletions(-)

diff --git a/plexus/cli/item/items.py b/plexus/cli/item/items.py
index 714049e79..8bde6902a 100644
--- a/plexus/cli/item/items.py
+++ b/plexus/cli/item/items.py
@@ -13,11 +13,6 @@
 from plexus.cli.shared.console import console
 from plexus.cli.report.utils import resolve_account_id_for_command
 import json
-from plexus.cli.shared.identifier_resolution import (
-    resolve_score_identifier,
-    resolve_scorecard_identifier,
-)
-from plexus.score_rubric_consistency import ScoreRubricConsistencyService
 
 def format_datetime(dt: Optional[datetime]) -> str:
     """Format datetime with proper handling of None values"""
@@ -477,53 +472,6 @@ def create(account: Optional[str], evaluation_id: Optional[str], text: Optional[
         print(traceback.format_exc())
 
 
-@items.command(name="contradictions")
-@click.option("--scorecard", "scorecard_identifier", required=True)
-@click.option("--score", "score_identifier", required=True)
-@click.option("--version", "score_version_id", required=True)
-@click.option("--item", "item_identifier", default=None, help="Optional item id or identifier for spot-check context.")
-@click.option("--format", "output_format", type=click.Choice(["markdown", "json"]), default="markdown")
-def contradictions(
-    scorecard_identifier: str,
-    score_identifier: str,
-    score_version_id: str,
-    item_identifier: Optional[str],
-    output_format: str,
-):
-    """Check whether one ScoreVersion's code is consistent with its rubric."""
-    client = create_client()
-    account_id = resolve_account_id_for_command(client, None)
-    scorecard_id = resolve_scorecard_identifier(client, scorecard_identifier)
-    if not scorecard_id:
-        raise click.ClickException(f"Could not resolve scorecard: {scorecard_identifier}")
-    score_id = resolve_score_identifier(client, scorecard_id, score_identifier)
-    if not score_id:
-        raise click.ClickException(
-            f"Could not resolve score '{score_identifier}' in scorecard '{scorecard_identifier}'"
-        )
-
-    item_text = ""
-    if item_identifier:
-        item = find_item_by_any_identifier(client, item_identifier, account_id)
-        if not item:
-            raise click.ClickException(f"Could not resolve item: {item_identifier}")
-        item_text = item.text or ""
-
-    result = ScoreRubricConsistencyService().generate_from_api(
-        client=client,
-        scorecard_identifier=scorecard_identifier,
-        score_identifier=score_identifier,
-        score_id=score_id,
-        score_version_id=score_version_id,
-        item_text=item_text,
-    )
-    payload = result.to_parameters_payload()
-    if output_format == "json":
-        console.print_json(json.dumps(payload))
-    else:
-        console.print(f"[bold]Status:[/bold] {result.status}")
-        console.print(result.paragraph)
-
 @items.command()
 @click.option('--account', help='Account key or ID (optional, uses default from environment if not provided)')
 @click.option('--evaluation-id', help='Filter by evaluation ID')
@@ -1031,7 +979,6 @@ def item():
 item.add_command(list)
 item.add_command(last)
 item.add_command(info)
-item.add_command(contradictions)
 item.add_command(update)
 item.add_command(upsert)
 item.add_command(delete)
diff --git a/plexus/cli/score/scores.py b/plexus/cli/score/scores.py
index 03e954620..8faecf50a 100644
--- a/plexus/cli/score/scores.py
+++ b/plexus/cli/score/scores.py
@@ -13,6 +13,7 @@
 from rich.table import Table
 from rich.panel import Panel
 from plexus.cli.shared.console import console
+from plexus.cli.report.utils import resolve_account_id_for_command
 from plexus.dashboard.api.client import PlexusDashboardClient
 from plexus.cli.shared.file_editor import FileEditor
 from typing import Optional
@@ -40,6 +41,7 @@
     clear_resolver_caches
 )
 from plexus.cli.shared.score_config_fetching import fetch_and_cache_single_score
+from plexus.score_rubric_consistency import ScoreRubricConsistencyService
 
 # Define the main command groups that will be exported
 @click.group()
@@ -2097,6 +2099,67 @@ def score_evaluations(scorecard: str, score: str, version_id: Optional[str], sor
 scores.add_command(score_evaluations)
 
 
+@score.command(name="contradictions")
+@click.option("--scorecard", "scorecard_identifier", required=True)
+@click.option("--score", "score_identifier", required=True)
+@click.option("--version", "score_version_id", required=True)
+@click.option(
+    "--item",
+    "item_identifier",
+    default=None,
+    help=(
+        "Optional item id or identifier to include as example context; "
+        "the check is still score-version-level."
+    ),
+)
+@click.option("--format", "output_format", type=click.Choice(["markdown", "json"]), default="markdown")
+def contradictions(
+    scorecard_identifier: str,
+    score_identifier: str,
+    score_version_id: str,
+    item_identifier: Optional[str],
+    output_format: str,
+):
+    """Check whether one ScoreVersion's code is consistent with its rubric."""
+    client = create_client()
+    scorecard_id = memoized_resolve_scorecard_identifier(client, scorecard_identifier)
+    if not scorecard_id:
+        raise click.ClickException(f"Could not resolve scorecard: {scorecard_identifier}")
+    score_id = memoized_resolve_score_identifier(client, scorecard_id, score_identifier)
+    if not score_id:
+        raise click.ClickException(
+            f"Could not resolve score '{score_identifier}' in scorecard '{scorecard_identifier}'"
+        )
+
+    item_text = ""
+    if item_identifier:
+        from plexus.cli.item.items import find_item_by_any_identifier
+
+        account_id = resolve_account_id_for_command(client, None)
+        item = find_item_by_any_identifier(client, item_identifier, account_id)
+        if not item:
+            raise click.ClickException(f"Could not resolve item: {item_identifier}")
+        item_text = item.text or ""
+
+    result = ScoreRubricConsistencyService().generate_from_api(
+        client=client,
+        scorecard_identifier=scorecard_identifier,
+        score_identifier=score_identifier,
+        score_id=score_id,
+        score_version_id=score_version_id,
+        item_text=item_text,
+    )
+    payload = result.to_parameters_payload()
+    if output_format == "json":
+        console.print_json(json.dumps(payload))
+    else:
+        console.print(f"[bold]Status:[/bold] {result.status}")
+        console.print(result.paragraph)
+
+
+scores.add_command(contradictions)
+
+
 @score.command(name="promotion-packet")
 @click.option('--scorecard', '-s', required=True, help='Scorecard identifier (name, key, or ID)')
 @click.option('--score', '-c', required=True, help='Score identifier (name, key, or ID)')
diff --git a/plexus/cli/score/scores_test.py b/plexus/cli/score/scores_test.py
index bf9d76ea5..b33c4f88c 100644
--- a/plexus/cli/score/scores_test.py
+++ b/plexus/cli/score/scores_test.py
@@ -1,6 +1,8 @@
 import pytest
+from types import SimpleNamespace
 from unittest.mock import Mock, patch
-from plexus.cli.score.scores import optimize
+from click.testing import CliRunner
+from plexus.cli.score.scores import optimize, scores
 from plexus.cli.shared.file_editor import FileEditor
 
 @pytest.fixture
@@ -202,4 +204,92 @@ def test_cli_create_missing_path(mock_file_editor):
     
     assert tool_result_content == "Error: Missing parameters or file not found (file_path missing)"
     assert file_edited is False
-    mock_file_editor.create.assert_called_once_with("", "New content\n") 
\ No newline at end of file
+    mock_file_editor.create.assert_called_once_with("", "New content\n")
+
+
+def test_score_contradictions_runs_score_rubric_consistency_check():
+    runner = CliRunner()
+    payload = {
+        "status": "potential_conflict",
+        "paragraph": "The prompt is more lenient than the rubric.",
+    }
+    result_obj = Mock()
+    result_obj.to_parameters_payload.return_value = payload
+
+    with patch("plexus.cli.score.scores.create_client", return_value=Mock()) as create_client, \
+         patch("plexus.cli.score.scores.memoized_resolve_scorecard_identifier", return_value="scorecard-1"), \
+         patch("plexus.cli.score.scores.memoized_resolve_score_identifier", return_value="score-1"), \
+         patch("plexus.cli.score.scores.ScoreRubricConsistencyService") as service_class:
+        service_class.return_value.generate_from_api.return_value = result_obj
+
+        result = runner.invoke(
+            scores,
+            [
+                "contradictions",
+                "--scorecard",
+                "Scorecard",
+                "--score",
+                "Score",
+                "--version",
+                "version-1",
+                "--format",
+                "json",
+            ],
+        )
+
+    assert result.exit_code == 0
+    assert "potential_conflict" in result.output
+    service_class.return_value.generate_from_api.assert_called_once_with(
+        client=create_client.return_value,
+        scorecard_identifier="Scorecard",
+        score_identifier="Score",
+        score_id="score-1",
+        score_version_id="version-1",
+        item_text="",
+    )
+
+
+def test_score_contradictions_can_include_optional_item_context():
+    runner = CliRunner()
+    result_obj = Mock()
+    result_obj.to_parameters_payload.return_value = {
+        "status": "consistent",
+        "paragraph": "The prompt follows the rubric.",
+    }
+
+    with patch("plexus.cli.score.scores.create_client", return_value=Mock()), \
+         patch("plexus.cli.score.scores.memoized_resolve_scorecard_identifier", return_value="scorecard-1"), \
+         patch("plexus.cli.score.scores.memoized_resolve_score_identifier", return_value="score-1"), \
+         patch("plexus.cli.score.scores.resolve_account_id_for_command", return_value="account-1"), \
+         patch("plexus.cli.item.items.find_item_by_any_identifier", return_value=SimpleNamespace(text="item text")), \
+         patch("plexus.cli.score.scores.ScoreRubricConsistencyService") as service_class:
+        service_class.return_value.generate_from_api.return_value = result_obj
+
+        result = runner.invoke(
+            scores,
+            [
+                "contradictions",
+                "--scorecard",
+                "Scorecard",
+                "--score",
+                "Score",
+                "--version",
+                "version-1",
+                "--item",
+                "item-1",
+            ],
+        )
+
+    assert result.exit_code == 0
+    assert "Status:" in result.output
+    service_class.return_value.generate_from_api.assert_called_once()
+    assert service_class.return_value.generate_from_api.call_args.kwargs["item_text"] == "item text"
+
+
+def test_item_contradictions_is_not_registered():
+    from plexus.cli.item.items import item
+
+    result = CliRunner().invoke(item, ["contradictions"])
+
+    assert result.exit_code != 0
+    assert "No such command 'contradictions'" in result.output

From 34ac8561027aa579b747dd9e03ae18dddf9af1d2 Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 19:57:59 -0400
Subject: [PATCH 4/6] Fix evaluation RCA item filtering UI

---
 dashboard/components/EvaluationTask.tsx       | 32 ++++++++++-----
 .../components/EvaluationTaskScoreResults.tsx | 40 +++++++++++++++++--
 dashboard/components/ui/task-status.tsx       |  7 +++-
 3 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/dashboard/components/EvaluationTask.tsx b/dashboard/components/EvaluationTask.tsx
index 2e942c29a..7fc487052 100644
--- a/dashboard/components/EvaluationTask.tsx
+++ b/dashboard/components/EvaluationTask.tsx
@@ -942,9 +942,12 @@ const DetailContent = React.memo(({
   }
 
   const selectFirstFilteredScoreResult = (itemIds: string[]) => {
-    const firstItemId = itemIds.find(Boolean)
+    const normalizedItemIds = itemIds
+      .map(id => String(id).trim())
+      .filter(Boolean)
+    const firstItemId = normalizedItemIds.find(Boolean)
     if (!firstItemId) return
-    const matching = parsedScoreResults.find(result => result.itemId === firstItemId)
+    const matching = parsedScoreResults.find(result => String(result.itemId ?? '').trim() === firstItemId)
     if (matching) {
       onSelectScoreResult?.(matching.id)
     }
@@ -976,25 +979,35 @@ const DetailContent = React.memo(({
     )
 
     const itemIds: string[] = []
+    const fallbackFeedbackItemIds: string[] = []
     let missingCount = 0
 
     filteredClassifications.forEach(classification => {
-      if (!classification.item_id) {
+      const normalizedItemId = classification.item_id ? String(classification.item_id).trim() : null
+      const normalizedFeedbackItemId = classification.feedback_item_id ? String(classification.feedback_item_id).trim() : null
+
+      if (!normalizedItemId && !normalizedFeedbackItemId) {
         missingCount += 1
         return
       }
 
-      itemIds.push(classification.item_id)
+      if (normalizedItemId) {
+        itemIds.push(normalizedItemId)
+      } else if (normalizedFeedbackItemId) {
+        fallbackFeedbackItemIds.push(normalizedFeedbackItemId)
+      }
     })
 
+    const selectedIds = itemIds.length > 0 ? itemIds : fallbackFeedbackItemIds
+
     setSelectedTopicItemIds(null)
     setSelectedTopicLabel(null)
     setSelectedCategoryKey(categoryKey)
     setSelectedCategoryLabel(categoryLabel)
-    setSelectedCategoryItemIds(Array.from(new Set(itemIds)))
+    setSelectedCategoryItemIds(Array.from(new Set(selectedIds)))
     setCategoryMissingItemIdCount(missingCount)
     setSelectedPredictedActual({ predicted: null, actual: null })
-    selectFirstFilteredScoreResult(itemIds)
+    selectFirstFilteredScoreResult(selectedIds)
   }
 
   const clearCategoryFilter = () => {
@@ -1685,12 +1698,13 @@ const DetailContent = React.memo(({
                                 const summary = misclassificationCategoryBreakdown.categorySummaries?.[row.key]
                                 const summaryText = summary?.category_summary_text
                                 const patterns = Array.isArray(summary?.top_patterns) ? summary?.top_patterns : []
-                                const itemCount = summary?.item_count ?? 0
                                 const categoryClassifications = (misclassificationCategoryBreakdown.itemClassifications ?? [])
                                   .filter(classification => classification.primary_category === row.key)
+                                const itemCount = summary?.item_count ?? categoryClassifications.length ?? 0
                                 const itemsWithMissingId = categoryClassifications
                                   .filter(classification => !classification.item_id)
                                   .length
+                                if (itemCount <= 0) return null
                                 return (
                                   <div key={`category-summary-${row.key}`} className="rounded-md bg-muted/40 p-2 space-y-1.5">
                                     <div className="flex items-center justify-between gap-2 mb-1">
@@ -1701,7 +1715,7 @@ const DetailContent = React.memo(({
                                       <span className="text-xs text-muted-foreground shrink-0">{itemCount} item(s)</span>
                                     </div>
                                     <div className="text-xs text-foreground">
-                                      {summaryText || 'No items in this category for this run.'}
+                                      {summaryText || 'Summary unavailable for this category.'}
                                     </div>
                                     {patterns.length > 0 && (
                                       <div className="mt-1 text-xs text-muted-foreground">
@@ -2735,7 +2749,6 @@ ${categoryLines}${mechanicalLines}
           </div>
           {variant !== 'detail' && evaluationNotes && (
             <div className="mt-1">
-              <div className="mb-1 text-xs font-medium text-foreground">Note</div>
               <div className="prose prose-sm max-w-none text-muted-foreground prose-p:text-muted-foreground prose-strong:text-foreground prose-headings:text-muted-foreground prose-li:text-muted-foreground prose-code:text-foreground prose-pre:text-foreground prose-pre:bg-muted">
               <ReactMarkdown remarkPlugins={[remarkGfm, remarkBreaks]} components={{
                 p: ({children}) => <p className="mb-1 last:mb-0 text-sm">{children}</p>,
@@ -2912,6 +2925,7 @@ ${categoryLines}${mechanicalLines}
             )}
             {evaluationNotes && (
               <div className="mt-1">
+                <div className="mb-1 text-xs font-medium text-foreground">Note</div>
                 <div className="prose prose-sm max-w-none text-muted-foreground prose-p:text-muted-foreground prose-strong:text-foreground prose-headings:text-muted-foreground prose-li:text-muted-foreground prose-code:text-foreground prose-pre:text-foreground prose-pre:bg-muted">
                 <ReactMarkdown remarkPlugins={[remarkGfm, remarkBreaks]} components={{
                   p: ({children}) => <p className="mb-1 last:mb-0 text-sm">{children}</p>,
diff --git a/dashboard/components/EvaluationTaskScoreResults.tsx b/dashboard/components/EvaluationTaskScoreResults.tsx
index 0885b6add..48f644d9f 100644
--- a/dashboard/components/EvaluationTaskScoreResults.tsx
+++ b/dashboard/components/EvaluationTaskScoreResults.tsx
@@ -41,6 +41,33 @@ export function EvaluationTaskScoreResults({
   navigationControls,
   isLoading = false
 }: EvaluationTaskScoreResultsProps) {
+  const toNormalized = (value: unknown): string | null => {
+    if (value === null || value === undefined) return null
+    const normalized = String(value).trim()
+    return normalized.length > 0 ? normalized : null
+  }
+
+  const getResultFilterKeys = (result: ScoreResultData): string[] => {
+    const keys = new Set<string>()
+    const itemId = toNormalized(result.itemId)
+    if (itemId) keys.add(itemId)
+
+    const metadataItemId = toNormalized((result as any)?.metadata?.item_id)
+    if (metadataItemId) keys.add(metadataItemId)
+
+    const feedbackItemId = toNormalized((result as any)?.feedbackItem?.id)
+    if (feedbackItemId) keys.add(feedbackItemId)
+
+    if (Array.isArray(result.itemIdentifiers)) {
+      result.itemIdentifiers.forEach((identifier: any) => {
+        const value = toNormalized(identifier?.value)
+        if (value) keys.add(value)
+      })
+    }
+
+    return Array.from(keys)
+  }
+
   console.log('EvaluationTaskScoreResults render:', {
     resultCount: results.length,
     firstResult: results[0],
@@ -104,6 +131,10 @@ export function EvaluationTaskScoreResults({
       }
     });
 
+    const normalizedSelectedItemIds = selectedItemIds
+      ? new Set(selectedItemIds.map(toNormalized).filter((id): id is string => id !== null))
+      : null
+
     const filtered = results.filter(result => {
       if (filters.showCorrect !== null && result.metadata.correct !== filters.showCorrect) {
         return false
@@ -117,9 +148,10 @@ export function EvaluationTaskScoreResults({
         return false
       }
 
-      if (selectedItemIds && selectedItemIds.length > 0 &&
-          !selectedItemIds.includes(result.itemId ?? '')) {
-        return false
+      if (normalizedSelectedItemIds && normalizedSelectedItemIds.size > 0) {
+        const resultKeys = getResultFilterKeys(result)
+        const hasMatch = resultKeys.some(key => normalizedSelectedItemIds.has(key))
+        if (!hasMatch) return false
       }
 
       return true
@@ -264,4 +296,4 @@ export function EvaluationTaskScoreResults({
       </div>
     </div>
   )
-} 
\ No newline at end of file
+}
diff --git a/dashboard/components/ui/task-status.tsx b/dashboard/components/ui/task-status.tsx
index 0a5686dbd..bb99251b5 100644
--- a/dashboard/components/ui/task-status.tsx
+++ b/dashboard/components/ui/task-status.tsx
@@ -385,7 +385,10 @@ export const TaskStatus = React.memo(({
     return (
       <div className="[&>*+*]:mt-2">
         <StyleTag />
-        <div className="rounded-lg bg-muted/30 px-1 py-1 space-y-1 -mx-1">
+        <div className={cn(
+          "rounded-lg px-1 py-1 space-y-1 -mx-1",
+          isSelected ? "bg-foreground/10" : "bg-muted/70"
+        )}>
           {command && commandDisplay !== 'hide' && (
             <div 
               className={cn(
@@ -483,7 +486,7 @@ export const TaskStatus = React.memo(({
       <StyleTag />
       <div className={cn(
         "rounded-lg px-1 py-1 space-y-1 -mx-1 px-2",
-        variant === 'detail' ? "bg-gauge-background" : "bg-muted/30"
+        variant === 'detail' || isSelected ? "bg-foreground/10" : "bg-muted/70"
       )}>
         {command && commandDisplay !== 'hide' && (
           <div 

From 3e22ce0eb111d054805bcc50993a2d35f1944a52 Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 19:59:48 -0400
Subject: [PATCH 5/6] Expand evaluation category filter linkage

---
 dashboard/components/EvaluationTask.tsx       | 78 +++++++++++++++----
 .../components/EvaluationTaskScoreResults.tsx | 33 ++------
 .../EvaluationTask.category-filter.test.tsx   | 73 +++++++++++++++++
 3 files changed, 141 insertions(+), 43 deletions(-)

diff --git a/dashboard/components/EvaluationTask.tsx b/dashboard/components/EvaluationTask.tsx
index 7fc487052..6fa03b974 100644
--- a/dashboard/components/EvaluationTask.tsx
+++ b/dashboard/components/EvaluationTask.tsx
@@ -45,6 +45,35 @@ const parseJsonDeep = (value: unknown): unknown => {
   return current
 }
 
+const toNormalizedId = (value: unknown): string | null => {
+  if (value === null || value === undefined) return null
+  const normalized = String(value).trim()
+  return normalized.length > 0 ? normalized : null
+}
+
+const getScoreResultFilterKeys = (result: ScoreResultData): string[] => {
+  const keys = new Set<string>()
+  const directId = toNormalizedId(result.id)
+  if (directId) keys.add(directId)
+  const itemId = toNormalizedId(result.itemId)
+  if (itemId) keys.add(itemId)
+  const metadataItemId = toNormalizedId((result as any)?.metadata?.item_id)
+  if (metadataItemId) keys.add(metadataItemId)
+  const feedbackItemId = toNormalizedId((result as any)?.feedbackItem?.id)
+  if (feedbackItemId) keys.add(feedbackItemId)
+  const metadataFeedbackItemId = toNormalizedId((result as any)?.metadata?.feedback_item_id)
+  if (metadataFeedbackItemId) keys.add(metadataFeedbackItemId)
+
+  if (Array.isArray(result.itemIdentifiers)) {
+    result.itemIdentifiers.forEach((identifier: any) => {
+      const value = toNormalizedId(identifier?.value)
+      if (value) keys.add(value)
+    })
+  }
+
+  return Array.from(keys)
+}
+
 export interface EvaluationMetric {
   name: string
   value: number
@@ -217,6 +246,7 @@ type MisclassificationCategorySummary = {
   category_summary_text?: string
   top_patterns?: Array<{ pattern?: string; count?: number }>
   representative_evidence?: Array<{
+    score_result_id?: string
     feedback_item_id?: string
     item_id?: string
     source?: string
@@ -269,6 +299,7 @@ type MisclassificationAnalysis = {
   item_classifications_all?: Array<{
     topic_id?: number | string
     topic_label?: string
+    score_result_id?: string
     feedback_item_id?: string
     item_id?: string
     timestamp?: string
@@ -941,13 +972,16 @@ const DetailContent = React.memo(({
     onSelectScoreResult?.(result.id)
   }
 
-  const selectFirstFilteredScoreResult = (itemIds: string[]) => {
-    const normalizedItemIds = itemIds
-      .map(id => String(id).trim())
-      .filter(Boolean)
-    const firstItemId = normalizedItemIds.find(Boolean)
-    if (!firstItemId) return
-    const matching = parsedScoreResults.find(result => String(result.itemId ?? '').trim() === firstItemId)
+  const selectFirstFilteredScoreResult = (filterIds: string[]) => {
+    const normalizedFilterIds = new Set(
+      filterIds
+        .map(id => toNormalizedId(id))
+        .filter((id): id is string => id !== null)
+    )
+    if (normalizedFilterIds.size === 0) return
+    const matching = parsedScoreResults.find(result =>
+      getScoreResultFilterKeys(result).some(key => normalizedFilterIds.has(key))
+    )
     if (matching) {
       onSelectScoreResult?.(matching.id)
     }
@@ -978,27 +1012,37 @@ const DetailContent = React.memo(({
       classification => classification.primary_category === categoryKey
     )
 
+    const scoreResultIds: string[] = []
     const itemIds: string[] = []
     const fallbackFeedbackItemIds: string[] = []
     let missingCount = 0
 
     filteredClassifications.forEach(classification => {
-      const normalizedItemId = classification.item_id ? String(classification.item_id).trim() : null
-      const normalizedFeedbackItemId = classification.feedback_item_id ? String(classification.feedback_item_id).trim() : null
+      const normalizedScoreResultId = toNormalizedId(classification.score_result_id)
+      const normalizedItemId = toNormalizedId(classification.item_id)
+      const normalizedFeedbackItemId = toNormalizedId(classification.feedback_item_id)
 
-      if (!normalizedItemId && !normalizedFeedbackItemId) {
+      if (!normalizedScoreResultId && !normalizedItemId && !normalizedFeedbackItemId) {
         missingCount += 1
         return
       }
 
+      if (normalizedScoreResultId) {
+        scoreResultIds.push(normalizedScoreResultId)
+      }
       if (normalizedItemId) {
         itemIds.push(normalizedItemId)
-      } else if (normalizedFeedbackItemId) {
+      }
+      if (normalizedFeedbackItemId) {
         fallbackFeedbackItemIds.push(normalizedFeedbackItemId)
       }
     })
 
-    const selectedIds = itemIds.length > 0 ? itemIds : fallbackFeedbackItemIds
+    const selectedIds = scoreResultIds.length > 0
+      ? scoreResultIds
+      : itemIds.length > 0
+        ? itemIds
+        : fallbackFeedbackItemIds
 
     setSelectedTopicItemIds(null)
     setSelectedTopicLabel(null)
@@ -1702,7 +1746,11 @@ const DetailContent = React.memo(({
                                   .filter(classification => classification.primary_category === row.key)
                                 const itemCount = summary?.item_count ?? categoryClassifications.length ?? 0
                                 const itemsWithMissingId = categoryClassifications
-                                  .filter(classification => !classification.item_id)
+                                  .filter(classification => (
+                                    !toNormalizedId(classification.item_id)
+                                    && !toNormalizedId(classification.feedback_item_id)
+                                    && !toNormalizedId(classification.score_result_id)
+                                  ))
                                   .length
                                 if (itemCount <= 0) return null
                                 return (
@@ -1741,14 +1789,14 @@ const DetailContent = React.memo(({
                                         </Button>
                                         {selectedCategoryKey === row.key && categoryMissingItemIdCount > 0 && (
                                           <span className="text-[11px] text-muted-foreground">
-                                            {categoryMissingItemIdCount} item(s) missing item_id not shown
+                                            {categoryMissingItemIdCount} item(s) missing linkage ids not shown
                                           </span>
                                         )}
                                       </div>
                                     )}
                                     {itemsWithMissingId > 0 && selectedCategoryKey !== row.key && (
                                       <div className="text-[11px] text-muted-foreground">
-                                        {itemsWithMissingId} item(s) in this category are missing item_id and cannot appear in score results.
+                                        {itemsWithMissingId} item(s) in this category are missing linkage ids and cannot appear in score results.
                                       </div>
                                     )}
                                   </div>
diff --git a/dashboard/components/EvaluationTaskScoreResults.tsx b/dashboard/components/EvaluationTaskScoreResults.tsx
index 48f644d9f..d21af5e5a 100644
--- a/dashboard/components/EvaluationTaskScoreResults.tsx
+++ b/dashboard/components/EvaluationTaskScoreResults.tsx
@@ -49,6 +49,8 @@ export function EvaluationTaskScoreResults({
 
   const getResultFilterKeys = (result: ScoreResultData): string[] => {
     const keys = new Set<string>()
+    const resultId = toNormalized(result.id)
+    if (resultId) keys.add(resultId)
     const itemId = toNormalized(result.itemId)
     if (itemId) keys.add(itemId)
 
@@ -57,6 +59,8 @@ export function EvaluationTaskScoreResults({
 
     const feedbackItemId = toNormalized((result as any)?.feedbackItem?.id)
     if (feedbackItemId) keys.add(feedbackItemId)
+    const metadataFeedbackItemId = toNormalized((result as any)?.metadata?.feedback_item_id)
+    if (metadataFeedbackItemId) keys.add(metadataFeedbackItemId)
 
     if (Array.isArray(result.itemIdentifiers)) {
       result.itemIdentifiers.forEach((identifier: any) => {
@@ -68,17 +72,6 @@ export function EvaluationTaskScoreResults({
     return Array.from(keys)
   }
 
-  console.log('EvaluationTaskScoreResults render:', {
-    resultCount: results.length,
-    firstResult: results[0],
-    lastResult: results[results.length - 1],
-    accuracy,
-    selectedPredictedValue,
-    selectedActualValue,
-    hasSelectedResult: !!selectedScoreResult,
-    selectedScoreResultId: selectedScoreResult?.id
-  });
-
   const [filters, setFilters] = useState<FilterState>({
     showCorrect: null,
     predictedValue: null,
@@ -122,15 +115,6 @@ export function EvaluationTaskScoreResults({
   }, [results])
 
   const filteredResults = useMemo(() => {
-    console.log('Filtering score results:', {
-      totalResults: results.length,
-      filters: {
-        showCorrect: filters.showCorrect,
-        predictedValue: filters.predictedValue,
-        actualValue: filters.actualValue
-      }
-    });
-
     const normalizedSelectedItemIds = selectedItemIds
       ? new Set(selectedItemIds.map(toNormalized).filter((id): id is string => id !== null))
       : null
@@ -148,7 +132,7 @@ export function EvaluationTaskScoreResults({
         return false
       }
 
-      if (normalizedSelectedItemIds && normalizedSelectedItemIds.size > 0) {
+      if (normalizedSelectedItemIds) {
         const resultKeys = getResultFilterKeys(result)
         const hasMatch = resultKeys.some(key => normalizedSelectedItemIds.has(key))
         if (!hasMatch) return false
@@ -157,13 +141,6 @@ export function EvaluationTaskScoreResults({
       return true
     });
 
-    console.log('Filtered results:', {
-      inputCount: results.length,
-      filteredCount: filtered.length,
-      firstFiltered: filtered[0],
-      lastFiltered: filtered[filtered.length - 1]
-    });
-
     return filtered;
   }, [results, filters, selectedItemIds]);
 
diff --git a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
index a3c17aa53..d7d69cb73 100644
--- a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
+++ b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
@@ -128,6 +128,61 @@ const makeTask = () => {
   } as any
 }
 
+const makeTaskWithScoreResultIdOnly = () => {
+  const task = makeTask()
+  task.data.parameters = JSON.stringify({
+    root_cause: {
+      misclassification_analysis: {
+        category_totals: {
+          information_gap: 1,
+        },
+        item_classifications_all: [
+          {
+            score_result_id: 'sr-1',
+            primary_category: 'information_gap',
+            confidence: 'medium',
+            rationale_full: 'Matched only by score result id.',
+          },
+        ],
+        category_summaries: {
+          information_gap: {
+            category_summary_text: 'Score result id only linkage.',
+            item_count: 1,
+          },
+        },
+      },
+    },
+  })
+  return task
+}
+
+const makeTaskWithMissingCategoryLinkage = () => {
+  const task = makeTask()
+  task.data.parameters = JSON.stringify({
+    root_cause: {
+      misclassification_analysis: {
+        category_totals: {
+          information_gap: 1,
+        },
+        item_classifications_all: [
+          {
+            primary_category: 'information_gap',
+            confidence: 'medium',
+            rationale_full: 'No linkage ids on this row.',
+          },
+        ],
+        category_summaries: {
+          information_gap: {
+            category_summary_text: 'No linkage ids available.',
+            item_count: 1,
+          },
+        },
+      },
+    },
+  })
+  return task
+}
+
 describe('EvaluationTask category summary drill-down', () => {
   test('applies category filter and auto-selects first matching score result', async () => {
     const onSelectScoreResult = jest.fn()
@@ -147,6 +202,24 @@ describe('EvaluationTask category summary drill-down', () => {
     })
   })
 
+  test('filters by score_result_id linkage when item_id is unavailable', async () => {
+    const onSelectScoreResult = jest.fn()
+    render(<EvaluationTask variant="detail" task={makeTaskWithScoreResultIdOnly()} onSelectScoreResult={onSelectScoreResult} />)
+
+    fireEvent.click(screen.getByRole('button', { name: /View items \(1\)/i }))
+
+    expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('["sr-1"]')
+    expect(onSelectScoreResult).toHaveBeenCalledWith('sr-1')
+  })
+
+  test('applies empty category filter when linkage ids are missing', async () => {
+    render(<EvaluationTask variant="detail" task={makeTaskWithMissingCategoryLinkage()} />)
+
+    fireEvent.click(screen.getByRole('button', { name: /View items \(1\)/i }))
+
+    expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('[]')
+  })
+
   test('renders score version and procedure related-resource cards in detail view', async () => {
     const { container } = render(<EvaluationTask variant="detail" task={makeTask()} />)
 

From 15dfe3ee1200b8159f927999fbfbdc8409eb6e75 Mon Sep 17 00:00:00 2001
From: Ryan Alyn Porter <rap@endymion.com>
Date: Wed, 29 Apr 2026 20:11:50 -0400
Subject: [PATCH 6/6] Fix evaluation category View items filtering

---
 dashboard/components/EvaluationTask.tsx       | 57 +++++++++++--------
 .../EvaluationTask.category-filter.test.tsx   |  2 +-
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/dashboard/components/EvaluationTask.tsx b/dashboard/components/EvaluationTask.tsx
index 4db10e74c..af96ea203 100644
--- a/dashboard/components/EvaluationTask.tsx
+++ b/dashboard/components/EvaluationTask.tsx
@@ -74,6 +74,19 @@ const getScoreResultFilterKeys = (result: ScoreResultData): string[] => {
   return Array.from(keys)
 }
 
+const collectClassificationLinkageIds = (classification: {
+  score_result_id?: string
+  item_id?: string
+  feedback_item_id?: string
+}): string[] => {
+  const ids = [
+    toNormalizedId(classification.score_result_id),
+    toNormalizedId(classification.item_id),
+    toNormalizedId(classification.feedback_item_id),
+  ].filter((id): id is string => id !== null)
+  return ids
+}
+
 export interface EvaluationMetric {
   name: string
   value: number
@@ -1012,46 +1025,42 @@ const DetailContent = React.memo(({
       classification => classification.primary_category === categoryKey
     )
 
-    const scoreResultIds: string[] = []
-    const itemIds: string[] = []
-    const fallbackFeedbackItemIds: string[] = []
+    const linkageIds: string[] = []
     let missingCount = 0
 
     filteredClassifications.forEach(classification => {
-      const normalizedScoreResultId = toNormalizedId(classification.score_result_id)
-      const normalizedItemId = toNormalizedId(classification.item_id)
-      const normalizedFeedbackItemId = toNormalizedId(classification.feedback_item_id)
-
-      if (!normalizedScoreResultId && !normalizedItemId && !normalizedFeedbackItemId) {
+      const classificationIds = collectClassificationLinkageIds(classification)
+      if (classificationIds.length === 0) {
         missingCount += 1
         return
       }
+      linkageIds.push(...classificationIds)
+    })
 
-      if (normalizedScoreResultId) {
-        scoreResultIds.push(normalizedScoreResultId)
-      }
-      if (normalizedItemId) {
-        itemIds.push(normalizedItemId)
-      }
-      if (normalizedFeedbackItemId) {
-        fallbackFeedbackItemIds.push(normalizedFeedbackItemId)
-      }
+    const summaryEvidence = misclassificationCategoryBreakdown.categorySummaries?.[categoryKey]?.representative_evidence ?? []
+    summaryEvidence.forEach(evidence => {
+      linkageIds.push(
+        ...[
+          toNormalizedId(evidence.score_result_id),
+          toNormalizedId(evidence.item_id),
+          toNormalizedId(evidence.feedback_item_id),
+        ].filter((id): id is string => id !== null)
+      )
     })
 
-    const selectedIds = scoreResultIds.length > 0
-      ? scoreResultIds
-      : itemIds.length > 0
-        ? itemIds
-        : fallbackFeedbackItemIds
+    const normalizedLinkageIds = new Set(linkageIds.map(id => toNormalizedId(id)).filter((id): id is string => id !== null))
+    const selectedScoreResultIds = parsedScoreResults
+      .filter(result => getScoreResultFilterKeys(result).some(key => normalizedLinkageIds.has(key)))
+      .map(result => String(result.id).trim())
 
     setSelectedTopicItemIds(null)
     setSelectedTopicLabel(null)
     setSelectedCategoryKey(categoryKey)
     setSelectedCategoryLabel(categoryLabel)
-    setSelectedCategoryItemIds(Array.from(new Set(selectedIds)))
+    setSelectedCategoryItemIds(Array.from(new Set(selectedScoreResultIds)))
     setCategoryMissingItemIdCount(missingCount)
     setSelectedPredictedActual({ predicted: null, actual: null })
-    selectFirstFilteredScoreResult(selectedIds)
+    selectFirstFilteredScoreResult(selectedScoreResultIds)
   }
 
   const clearCategoryFilter = () => {
diff --git a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
index d7d69cb73..c4963a9e6 100644
--- a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
+++ b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx
@@ -191,7 +191,7 @@ describe('EvaluationTask category summary drill-down', () => {
     fireEvent.click(screen.getByRole('button', { name: /View items \(1\)/i }))
 
     expect(screen.getByText('Filtered by category: Information gap')).toBeInTheDocument()
-    expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('["item-1"]')
+    expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('["sr-1"]')
     expect(onSelectScoreResult).toHaveBeenCalledWith('sr-1')
 
     fireEvent.click(screen.getByRole('button', { name: /Clear category filter/i }))