Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions dashboard/components/EvaluationTask.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ interface ScoreResult {
correct: boolean
human_explanation: string | null
text: string | null
feedback_item_id?: string | null
item_id?: string | null
}
trace: any | null
itemId: string | null
Expand All @@ -119,7 +121,14 @@ interface ScoreResult {
url?: string
}> | null
feedbackItem: {
id?: string | null
editCommentValue: string | null
initialAnswerValue?: string | null
initialCommentValue?: string | null
finalAnswerValue?: string | null
editorName?: string | null
editedAt?: string | null
createdAt?: string | null
} | null
}

Expand Down Expand Up @@ -836,13 +845,16 @@ function parseScoreResult(result: any): ParsedScoreResult {
const correct = Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct);
const humanExplanation = scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? null;
const text = scoreResult?.metadata?.text ?? parsedMetadata.text ?? null;
const feedbackItemId = scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? null;
const metadataItemId = scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? null;
const itemId = result.itemId || parsedMetadata.item_id?.toString() || null;

// Parse feedbackItem data
const originalScoreResult = result.feedbackItem?.scoreResults?.items?.find(
(sr: any) => sr.type === 'prediction'
) || null;
const feedbackItem = result.feedbackItem ? {
id: result.feedbackItem.id || null,
editCommentValue: result.feedbackItem.editCommentValue || null,
initialAnswerValue: result.feedbackItem.initialAnswerValue || originalScoreResult?.value || null,
initialCommentValue: result.feedbackItem.initialCommentValue || originalScoreResult?.explanation || null,
Expand All @@ -863,7 +875,9 @@ function parseScoreResult(result: any): ParsedScoreResult {
human_label: humanLabel,
correct,
human_explanation: humanExplanation,
text
text,
feedback_item_id: feedbackItemId,
item_id: metadataItemId
},
trace,
itemId,
Expand Down Expand Up @@ -901,6 +915,7 @@ const DetailContent = React.memo(({

const [containerWidth, setContainerWidth] = useState(0)
const containerRef = useRef<HTMLDivElement>(null)
const scoreResultsPanelRef = useRef<HTMLDivElement>(null)
const [selectedPredictedActual, setSelectedPredictedActual] = useState<{
predicted: string | null
actual: string | null
Expand Down Expand Up @@ -1014,6 +1029,11 @@ const DetailContent = React.memo(({
setSelectedPredictedActual({ predicted: null, actual: null })
selectFirstFilteredScoreResult(itemIds)
}
requestAnimationFrame(() => {
if (scoreResultsPanelRef.current && typeof scoreResultsPanelRef.current.scrollIntoView === 'function') {
scoreResultsPanelRef.current.scrollIntoView({ behavior: 'smooth', block: 'start' })
}
})
}

const handleCategoryFilter = (
Expand Down Expand Up @@ -1052,6 +1072,12 @@ const DetailContent = React.memo(({
const selectedScoreResultIds = parsedScoreResults
.filter(result => getScoreResultFilterKeys(result).some(key => normalizedLinkageIds.has(key)))
.map(result => String(result.id).trim())
if (filteredClassifications.length > 0 && selectedScoreResultIds.length === 0) {
toast({
title: 'No linked score results',
description: 'This category has no score-result linkage in the current payload.',
})
}

setSelectedTopicItemIds(null)
setSelectedTopicLabel(null)
Expand All @@ -1061,6 +1087,11 @@ const DetailContent = React.memo(({
setCategoryMissingItemIdCount(missingCount)
setSelectedPredictedActual({ predicted: null, actual: null })
selectFirstFilteredScoreResult(selectedScoreResultIds)
requestAnimationFrame(() => {
if (scoreResultsPanelRef.current && typeof scoreResultsPanelRef.current.scrollIntoView === 'function') {
scoreResultsPanelRef.current.scrollIntoView({ behavior: 'smooth', block: 'start' })
}
})
}

const clearCategoryFilter = () => {
Expand Down Expand Up @@ -1955,7 +1986,7 @@ const DetailContent = React.memo(({

{/* Show score results panel during loading or when results exist, hidden only in narrow detail mode */}
{(!showScoreResultInNarrowView) && (isResultsLoading || showResultsList) && (
<div className={`w-full ${showAsColumns ? 'h-full' : 'h-[500px] mt-6'} flex flex-col overflow-hidden`}>
<div ref={scoreResultsPanelRef} className={`w-full ${showAsColumns ? 'h-full' : 'h-[500px] mt-6'} flex flex-col overflow-hidden`}>
{activeFilterChipLabel && (
<div className="mb-2">
<span className="inline-flex items-center rounded-full bg-muted px-2 py-0.5 text-xs text-foreground">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,91 @@ const makeTaskWithMissingCategoryLinkage = () => {
return task
}

const makeTaskWithFeedbackItemLinkedCategories = () => {
const scoreConfigurationFeedbackIds = Array.from({ length: 12 }, (_, i) => `fb-sc-${i + 1}`)
const informationGapFeedbackIds = Array.from({ length: 3 }, (_, i) => `fb-ig-${i + 1}`)

const scoreResults = [
...scoreConfigurationFeedbackIds.map((feedbackId, i) => ({
id: `sr-sc-${i + 1}`,
value: 'No',
confidence: 0.8,
explanation: `Score config result ${i + 1}`,
metadata: {
human_label: 'Yes',
correct: false,
human_explanation: null,
text: `score config ${i + 1}`,
feedback_item_id: feedbackId,
},
trace: null,
itemId: null,
itemIdentifiers: [],
feedbackItem: { id: feedbackId, editCommentValue: null },
})),
...informationGapFeedbackIds.map((feedbackId, i) => ({
id: `sr-ig-${i + 1}`,
value: 'No',
confidence: 0.7,
explanation: `Information gap result ${i + 1}`,
metadata: {
human_label: 'Yes',
correct: false,
human_explanation: null,
text: `information gap ${i + 1}`,
feedback_item_id: feedbackId,
},
trace: null,
itemId: null,
itemIdentifiers: [],
feedbackItem: { id: feedbackId, editCommentValue: null },
})),
]

const task = makeTask()
task.data.scoreResults = scoreResults
task.data.totalItems = scoreResults.length
task.data.processedItems = scoreResults.length
task.data.parameters = JSON.stringify({
root_cause: {
misclassification_analysis: {
category_totals: {
score_configuration_problem: 12,
information_gap: 3,
},
item_classifications_all: [
...scoreConfigurationFeedbackIds.map(feedbackId => ({
feedback_item_id: feedbackId,
primary_category: 'score_configuration_problem',
confidence: 'high',
rationale_full: `Config issue for ${feedbackId}.`,
})),
...informationGapFeedbackIds.map(feedbackId => ({
feedback_item_id: feedbackId,
primary_category: 'information_gap',
confidence: 'medium',
rationale_full: `Info gap for ${feedbackId}.`,
})),
],
category_summaries: {
score_configuration_problem: {
category_summary_text: 'Prompt/config causes most errors.',
item_count: 12,
},
information_gap: {
category_summary_text: 'Missing context causes a smaller set of errors.',
item_count: 3,
},
},
},
},
})
return task
}

describe('EvaluationTask category summary drill-down', () => {
const readSelectedItemIds = () => JSON.parse(screen.getByTestId('selected-item-ids').textContent || 'null')

test('applies category filter and auto-selects first matching score result', async () => {
const onSelectScoreResult = jest.fn()
render(<EvaluationTask variant="detail" task={makeTask()} onSelectScoreResult={onSelectScoreResult} />)
Expand Down Expand Up @@ -220,6 +304,34 @@ describe('EvaluationTask category summary drill-down', () => {
expect(screen.getByTestId('selected-item-ids')).toHaveTextContent('[]')
})

test('filters full 12-item category when linkage is primarily feedback_item_id', async () => {
render(<EvaluationTask variant="detail" task={makeTaskWithFeedbackItemLinkedCategories()} />)

fireEvent.click(screen.getByRole('button', { name: /View items \(12\)/i }))

const selected = readSelectedItemIds()
expect(Array.isArray(selected)).toBe(true)
expect(selected).toHaveLength(12)
expect(selected).toEqual(expect.arrayContaining(['sr-sc-1', 'sr-sc-12']))
expect(selected).not.toEqual(expect.arrayContaining(['sr-ig-1']))
})

test('keeps category filters isolated between 12-item and 3-item categories', async () => {
render(<EvaluationTask variant="detail" task={makeTaskWithFeedbackItemLinkedCategories()} />)

fireEvent.click(screen.getByRole('button', { name: /View items \(12\)/i }))
const firstSelection = readSelectedItemIds()

fireEvent.click(screen.getByRole('button', { name: /View items \(3\)/i }))
const secondSelection = readSelectedItemIds()

expect(firstSelection).toHaveLength(12)
expect(secondSelection).toHaveLength(3)
expect(firstSelection).not.toEqual(secondSelection)
expect(secondSelection).toEqual(expect.arrayContaining(['sr-ig-1', 'sr-ig-3']))
expect(secondSelection).not.toEqual(expect.arrayContaining(['sr-sc-1']))
})

test('renders score version and procedure related-resource cards in detail view', async () => {
const { container } = render(<EvaluationTask variant="detail" task={makeTask()} />)

Expand Down
4 changes: 3 additions & 1 deletion dashboard/components/evaluations-dashboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,9 @@ export default function EvaluationsDashboard({
human_label: scoreResult?.metadata?.human_label ?? parsedMetadata.human_label ?? (typeof item.metadata === 'object' ? (item.metadata as any).human_label : null) ?? null,
correct: Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct ?? (typeof item.metadata === 'object' ? (item.metadata as any).correct : null)),
human_explanation: scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? (typeof item.metadata === 'object' ? (item.metadata as any).human_explanation : null) ?? null,
text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? (typeof item.metadata === 'object' ? (item.metadata as any).text : null) ?? null
text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? (typeof item.metadata === 'object' ? (item.metadata as any).text : null) ?? null,
feedback_item_id: scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? (typeof item.metadata === 'object' ? (item.metadata as any).feedback_item_id : null) ?? null,
item_id: scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? (typeof item.metadata === 'object' ? (item.metadata as any).item_id : null) ?? null
},
itemId: item.itemId ?? parsedMetadata.item_id?.toString() ?? null,
createdAt: item.createdAt || new Date().toISOString(),
Expand Down
8 changes: 6 additions & 2 deletions dashboard/utils/data-operations.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ describe('transformEvaluation', () => {
human_label: 'correct_answer',
correct: true,
human_explanation: 'This is the right answer',
text: 'Sample input text'
text: 'Sample input text',
feedback_item_id: null,
item_id: null
});
});

Expand Down Expand Up @@ -448,7 +450,9 @@ describe('transformEvaluation', () => {
human_label: 'correct',
correct: true,
human_explanation: null,
text: null
text: null,
feedback_item_id: null,
item_id: null
});

// Verify itemIdentifiers are correctly extracted and preserved
Expand Down
5 changes: 4 additions & 1 deletion dashboard/utils/data-operations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export type ProcessedEvaluation = Omit<BaseEvaluation, 'task' | 'scorecard' | 's
value: string;
url?: string;
}> | null;
feedbackItem?: any | null;
createdAt: string;
}>;
};
Expand Down Expand Up @@ -1037,7 +1038,9 @@ export function transformEvaluation(evaluation: BaseEvaluation): ProcessedEvalua
human_label: scoreResult?.metadata?.human_label ?? parsedMetadata.human_label ?? null,
correct: Boolean(scoreResult?.metadata?.correct ?? parsedMetadata.correct),
human_explanation: scoreResult?.metadata?.human_explanation ?? parsedMetadata.human_explanation ?? null,
text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? null
text: scoreResult?.metadata?.text ?? parsedMetadata.text ?? null,
feedback_item_id: scoreResult?.metadata?.feedback_item_id ?? parsedMetadata.feedback_item_id ?? null,
item_id: scoreResult?.metadata?.item_id ?? parsedMetadata.item_id ?? null
};

return {
Expand Down
Loading