diff --git a/dashboard/components/EvaluationTask.tsx b/dashboard/components/EvaluationTask.tsx index af96ea203..d6cabaa7f 100644 --- a/dashboard/components/EvaluationTask.tsx +++ b/dashboard/components/EvaluationTask.tsx @@ -110,6 +110,8 @@ interface ScoreResult { correct: boolean human_explanation: string | null text: string | null + feedback_item_id?: string | null + item_id?: string | null } trace: any | null itemId: string | null @@ -119,7 +121,14 @@ interface ScoreResult { url?: string }> | null feedbackItem: { + id?: string | null editCommentValue: string | null + initialAnswerValue?: string | null + initialCommentValue?: string | null + finalAnswerValue?: string | null + editorName?: string | null + editedAt?: string | null + createdAt?: string | null } | null } @@ -836,6 +845,8 @@ function parseScoreResult(result: any): ParsedScoreResult { const correct = Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct); const humanExplanation = scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? null; const text = scoreResult?.metadata?.text ?? parsedMetadata.text ?? null; + const feedbackItemId = scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? null; + const metadataItemId = scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? null; const itemId = result.itemId || parsedMetadata.item_id?.toString() || null; // Parse feedbackItem data @@ -843,6 +854,7 @@ function parseScoreResult(result: any): ParsedScoreResult { (sr: any) => sr.type === 'prediction' ) || null; const feedbackItem = result.feedbackItem ? { + id: result.feedbackItem.id || null, editCommentValue: result.feedbackItem.editCommentValue || null, initialAnswerValue: result.feedbackItem.initialAnswerValue || originalScoreResult?.value || null, initialCommentValue: result.feedbackItem.initialCommentValue || originalScoreResult?.explanation || null, @@ -863,7 +875,9 @@ function parseScoreResult(result: any): ParsedScoreResult { human_label: humanLabel, correct, human_explanation: humanExplanation, - text + text, + feedback_item_id: feedbackItemId, + item_id: metadataItemId }, trace, itemId, @@ -901,6 +915,7 @@ const DetailContent = React.memo(({ const [containerWidth, setContainerWidth] = useState(0) const containerRef = useRef(null) + const scoreResultsPanelRef = useRef(null) const [selectedPredictedActual, setSelectedPredictedActual] = useState<{ predicted: string | null actual: string | null @@ -1014,6 +1029,11 @@ const DetailContent = React.memo(({ setSelectedPredictedActual({ predicted: null, actual: null }) selectFirstFilteredScoreResult(itemIds) } + requestAnimationFrame(() => { + if (scoreResultsPanelRef.current && typeof scoreResultsPanelRef.current.scrollIntoView === 'function') { + scoreResultsPanelRef.current.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }) } const handleCategoryFilter = ( @@ -1052,6 +1072,12 @@ const DetailContent = React.memo(({ const selectedScoreResultIds = parsedScoreResults .filter(result => getScoreResultFilterKeys(result).some(key => normalizedLinkageIds.has(key))) .map(result => String(result.id).trim()) + if (filteredClassifications.length > 0 && selectedScoreResultIds.length === 0) { + toast({ + title: 'No linked score results', + description: 'This category has no score-result linkage in the current payload.', + }) + } setSelectedTopicItemIds(null) setSelectedTopicLabel(null) @@ -1061,6 +1087,11 @@ const DetailContent = React.memo(({ setCategoryMissingItemIdCount(missingCount) setSelectedPredictedActual({ predicted: null, actual: null }) selectFirstFilteredScoreResult(selectedScoreResultIds) + requestAnimationFrame(() => { + if (scoreResultsPanelRef.current && typeof scoreResultsPanelRef.current.scrollIntoView === 'function') { + scoreResultsPanelRef.current.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }) } const clearCategoryFilter = () => { @@ -1955,7 +1986,7 @@ const DetailContent = React.memo(({ {/* Show score results panel during loading or when results exist, hidden only in narrow detail mode */} {(!showScoreResultInNarrowView) && (isResultsLoading || showResultsList) && ( -
+
{activeFilterChipLabel && (
diff --git a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx index c4963a9e6..95469b63f 100644 --- a/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx +++ b/dashboard/components/__tests__/EvaluationTask.category-filter.test.tsx @@ -183,7 +183,91 @@ const makeTaskWithMissingCategoryLinkage = () => { return task } +const makeTaskWithFeedbackItemLinkedCategories = () => { + const scoreConfigurationFeedbackIds = Array.from({ length: 12 }, (_, i) => `fb-sc-${i + 1}`) + const informationGapFeedbackIds = Array.from({ length: 3 }, (_, i) => `fb-ig-${i + 1}`) + + const scoreResults = [ + ...scoreConfigurationFeedbackIds.map((feedbackId, i) => ({ + id: `sr-sc-${i + 1}`, + value: 'No', + confidence: 0.8, + explanation: `Score config result ${i + 1}`, + metadata: { + human_label: 'Yes', + correct: false, + human_explanation: null, + text: `score config ${i + 1}`, + feedback_item_id: feedbackId, + }, + trace: null, + itemId: null, + itemIdentifiers: [], + feedbackItem: { id: feedbackId, editCommentValue: null }, + })), + ...informationGapFeedbackIds.map((feedbackId, i) => ({ + id: `sr-ig-${i + 1}`, + value: 'No', + confidence: 0.7, + explanation: `Information gap result ${i + 1}`, + metadata: { + human_label: 'Yes', + correct: false, + human_explanation: null, + text: `information gap ${i + 1}`, + feedback_item_id: feedbackId, + }, + trace: null, + itemId: null, + itemIdentifiers: [], + feedbackItem: { id: feedbackId, editCommentValue: null }, + })), + ] + + const task = makeTask() + task.data.scoreResults = scoreResults + task.data.totalItems = scoreResults.length + task.data.processedItems = scoreResults.length + task.data.parameters = JSON.stringify({ + root_cause: { + misclassification_analysis: { + category_totals: { + score_configuration_problem: 12, + information_gap: 3, + }, + item_classifications_all: [ + ...scoreConfigurationFeedbackIds.map(feedbackId => ({ + feedback_item_id: feedbackId, + primary_category: 'score_configuration_problem', + confidence: 'high', + rationale_full: `Config issue for ${feedbackId}.`, + })), + ...informationGapFeedbackIds.map(feedbackId => ({ + feedback_item_id: feedbackId, + primary_category: 'information_gap', + confidence: 'medium', + rationale_full: `Info gap for ${feedbackId}.`, + })), + ], + category_summaries: { + score_configuration_problem: { + category_summary_text: 'Prompt/config causes most errors.', + item_count: 12, + }, + information_gap: { + category_summary_text: 'Missing context causes a smaller set of errors.', + item_count: 3, + }, + }, + }, + }, + }) + return task +} + describe('EvaluationTask category summary drill-down', () => { + const readSelectedItemIds = () => JSON.parse(screen.getByTestId('selected-item-ids').textContent || 'null') + test('applies category filter and auto-selects first matching score result', async () => { const onSelectScoreResult = jest.fn() render() @@ -220,6 +304,34 @@ describe('EvaluationTask category summary drill-down', () => { expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('[]') }) + test('filters full 12-item category when linkage is primarily feedback_item_id', async () => { + render() + + fireEvent.click(screen.getByRole('button', { name: /View items \(12\)/i })) + + const selected = readSelectedItemIds() + expect(Array.isArray(selected)).toBe(true) + expect(selected).toHaveLength(12) + expect(selected).toEqual(expect.arrayContaining(['sr-sc-1', 'sr-sc-12'])) + expect(selected).not.toEqual(expect.arrayContaining(['sr-ig-1'])) + }) + + test('keeps category filters isolated between 12-item and 3-item categories', async () => { + render() + + fireEvent.click(screen.getByRole('button', { name: /View items \(12\)/i })) + const firstSelection = readSelectedItemIds() + + fireEvent.click(screen.getByRole('button', { name: /View items \(3\)/i })) + const secondSelection = readSelectedItemIds() + + expect(firstSelection).toHaveLength(12) + expect(secondSelection).toHaveLength(3) + expect(firstSelection).not.toEqual(secondSelection) + expect(secondSelection).toEqual(expect.arrayContaining(['sr-ig-1', 'sr-ig-3'])) + expect(secondSelection).not.toEqual(expect.arrayContaining(['sr-sc-1'])) + }) + test('renders score version and procedure related-resource cards in detail view', async () => { const { container } = render() diff --git a/dashboard/components/evaluations-dashboard.tsx b/dashboard/components/evaluations-dashboard.tsx index f3db7beb0..4cb9561b7 100644 --- a/dashboard/components/evaluations-dashboard.tsx +++ b/dashboard/components/evaluations-dashboard.tsx @@ -631,7 +631,9 @@ export default function EvaluationsDashboard({ human_label: scoreResult?.metadata?.human_label ?? parsedMetadata.human_label ?? (typeof item.metadata === 'object' ? (item.metadata as any).human_label : null) ?? null, correct: Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct ?? (typeof item.metadata === 'object' ? (item.metadata as any).correct : null)), human_explanation: scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? (typeof item.metadata === 'object' ? (item.metadata as any).human_explanation : null) ?? null, - text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? (typeof item.metadata === 'object' ? (item.metadata as any).text : null) ?? null + text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? (typeof item.metadata === 'object' ? (item.metadata as any).text : null) ?? null, + feedback_item_id: scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? (typeof item.metadata === 'object' ? (item.metadata as any).feedback_item_id : null) ?? null, + item_id: scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? (typeof item.metadata === 'object' ? (item.metadata as any).item_id : null) ?? null }, itemId: item.itemId ?? parsedMetadata.item_id?.toString() ?? null, createdAt: item.createdAt || new Date().toISOString(), diff --git a/dashboard/utils/data-operations.test.ts b/dashboard/utils/data-operations.test.ts index 9704f45a2..1ada44d17 100644 --- a/dashboard/utils/data-operations.test.ts +++ b/dashboard/utils/data-operations.test.ts @@ -199,7 +199,9 @@ describe('transformEvaluation', () => { human_label: 'correct_answer', correct: true, human_explanation: 'This is the right answer', - text: 'Sample input text' + text: 'Sample input text', + feedback_item_id: null, + item_id: null }); }); @@ -448,7 +450,9 @@ describe('transformEvaluation', () => { human_label: 'correct', correct: true, human_explanation: null, - text: null + text: null, + feedback_item_id: null, + item_id: null }); // Verify itemIdentifiers are correctly extracted and preserved diff --git a/dashboard/utils/data-operations.ts b/dashboard/utils/data-operations.ts index 6bb0b0eba..3f4a88635 100644 --- a/dashboard/utils/data-operations.ts +++ b/dashboard/utils/data-operations.ts @@ -68,6 +68,7 @@ export type ProcessedEvaluation = Omit | null; + feedbackItem?: any | null; createdAt: string; }>; }; @@ -1037,7 +1038,9 @@ export function transformEvaluation(evaluation: BaseEvaluation): ProcessedEvalua human_label: scoreResult?.metadata?.human_label ?? parsedMetadata.human_label ?? null, correct: Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct), human_explanation: scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? null, - text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? null + text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? null, + feedback_item_id: scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? null, + item_id: scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? null }; return {