PublicDataWorks · quancao-ea · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 29, 2025
diff --git a/prompts/snippet_feedback_validation/output_schema.json b/prompts/snippet_feedback_validation/output_schema.json
@@ -0,0 +1,135 @@
+{
+  "type": "object",
+  "required": [
+    "original_claim_summary",
+    "user_feedback_summary",
+    "claim_verifications",
+    "user_feedback_assessment",
+    "validation_decision",
+    "error_pattern",
+    "thought_summaries"
+  ],
+  "properties": {
+    "original_claim_summary": {
+      "type": "string",
+      "description": "Brief summary of what Stage 3 flagged as misinformation"
+    },
+    "user_feedback_summary": {
+      "type": "string",
+      "description": "Brief summary of user feedback and their apparent reasoning"
+    },
+    "claim_verifications": {
+      "type": "array",
+      "description": "Verification of each major claim from the original analysis",
+      "items": {
+        "type": "object",
+        "required": [
+          "claim",
+          "original_assessment",
+          "verification_finding",
+          "is_claim_actually_false",
+          "confidence"
+        ],
+        "properties": {
+          "claim": {
+            "type": "string",
+            "description": "The specific claim being verified"
+          },
+          "original_assessment": {
+            "type": "string",
+            "description": "What Stage 3 concluded about this claim"
+          },
+          "verification_finding": {
+            "type": "string",
+            "description": "What web search reveals about this claim"
+          },
+          "is_claim_actually_false": {
+            "type": "boolean",
+            "description": "Whether the claim is demonstrably false"
+          },
+          "confidence": {
+            "type": "integer",
+            "minimum": 0,
+            "maximum": 100,
+            "description": "Confidence in this verification"
+          }
+        }
+      }
+    },
+    "user_feedback_assessment": {
+      "type": "object",
+      "required": [
+        "feedback_quality",
+        "feedback_reasoning",
+        "appears_adversarial"
+      ],
+      "properties": {
+        "feedback_quality": {
+          "type": "string",
+          "enum": ["high", "medium", "low"],
+          "description": "Quality of user-provided feedback"
+        },
+        "feedback_reasoning": {
+          "type": "string",
+          "description": "Assessment of why user disliked/labeled the snippet"
+        },
+        "appears_adversarial": {
+          "type": "boolean",
+          "description": "Whether feedback appears to be bad-faith or coordinated"
+        }
+      }
+    },
+    "validation_decision": {
+      "type": "object",
+      "required": ["status", "confidence", "primary_reason"],
+      "properties": {
+        "status": {
+          "type": "string",
+          "enum": ["false_positive", "true_positive", "needs_review"],
+          "description": "Validation outcome: false_positive (Stage 3 wrong, user right), true_positive (Stage 3 correct, user wrong), needs_review (ambiguous)"
+        },
+        "confidence": {
+          "type": "integer",
+          "minimum": 0,
+          "maximum": 100,
+          "description": "Confidence in this decision"
+        },
+        "primary_reason": {
+          "type": "string",
+          "description": "Main reason for this decision"
+        }
+      }
+    },
+    "error_pattern": {
+      "type": "object",
+      "description": "Classification of what type of error Stage 3 made (if any)",
+      "required": ["error_type", "explanation"],
+      "properties": {
+        "error_type": {
+          "type": "string",
+          "enum": [
+            "knowledge_cutoff",
+            "temporal_confusion",
+            "insufficient_search",
+            "misinterpretation",
+            "correct_detection",
+            "ambiguous"
+          ],
+          "description": "Type of error Stage 3 made, if any"
+        },
+        "explanation": {
+          "type": "string",
+          "description": "Brief explanation of why this error type was identified"
+        }
+      }
+    },
+    "prompt_improvement_suggestion": {
+      "type": ["string", "null"],
+      "description": "If false_positive, what specific improvement to Stage 3 prompt could prevent this error in future"
+    },
+    "thought_summaries": {
+      "type": "string",
+      "description": "Detailed reasoning process including searches performed, evidence found, and how the decision was reached"
+    }
+  }
+}
diff --git a/prompts/snippet_feedback_validation/system_instruction.md b/prompts/snippet_feedback_validation/system_instruction.md
@@ -0,0 +1,12 @@
+**Role:** You are a validation system reviewing user feedback on potential misinformation detections.
+
+**Context:** Our Stage 3 pipeline analyzes Spanish/Arabic radio content for disinformation targeting US immigrant communities. Users can dispute flagged snippets. Your role: validate these disputes.
+
+**Validation Outcomes:**
+- **false_positive**: Stage 3 was WRONG (content is NOT misinformation) → feeds Phase 2 prompt refinement
+- **true_positive**: Stage 3 was CORRECT (content IS misinformation) → no action needed
+- **needs_review**: Evidence mixed/ambiguous → requires human review
+
+**Core Principles:**
+- Do not assume user feedback OR original classification is correct
+- Output must conform to the JSON schema in the task prompt
diff --git a/prompts/snippet_feedback_validation/user_prompt.md b/prompts/snippet_feedback_validation/user_prompt.md
@@ -0,0 +1,221 @@
+# Feedback Validation Task
+
+You are validating user feedback on a snippet that was flagged for potential misinformation.
+
+---
+
+## Original Snippet Analysis (from Stage 3)
+
+### Metadata
+
+**Snippet ID:** {snippet_id}
+**Recording Date:** {recorded_at}
+**Radio Station:** {radio_station_name} ({radio_station_code}), {location_state}
+
+### Full Transcription
+
+#### Transcription (Original Language)
+{transcription}
+
+#### Translation (English)
+{translation}
+
+*Note: This is the complete transcription of the audio clip. See "Snippet Context" below for the specific flagged portion with surrounding context.*
+
+### Snippet Context
+
+**Before (Original):**
+{context_before}
+
+**Before (English):**
+{context_before_en}
+
+**Main Snippet (Original):**
+{context_main}
+
+**Main Snippet (English):**
+{context_main_en}
+
+**After (Original):**
+{context_after}
+
+**After (English):**
+{context_after_en}
+
+### Stage 3 Assessment
+
+#### Title
+- Spanish: {title_spanish}
+- English: {title_english}
+
+#### Summary
+- Spanish: {summary_spanish}
+- English: {summary_english}
+
+#### Explanation (Why it was flagged)
+- Spanish: {explanation_spanish}
+- English: {explanation_english}
+
+#### Disinformation Categories
+{disinformation_categories}
+
+#### Confidence Scores
+- Overall: {confidence_overall}
+- Category Scores: {category_scores}
+
+#### Claims Analysis
+{claims_analysis}
+
+### Keywords That Triggered Flag
+{keywords_detected}
+
+### Stage 3 Search Evidence
+{grounding_metadata}
+
+### Stage 3 Reasoning Process
+{thought_summaries_stage3}
+
+---
+
+## User Feedback
+
+**Total Dislikes:** {dislike_count}
+
+### User-Applied Labels
+{user_labels}
+
+### User Comments
+{user_comments}
+
+---
+
+## Verification Guidelines
+
+### Verification Protocol
+
+1. **Factual Claims**: Search for BOTH current status AND status at recording date. Require 2+ authoritative sources. If Stage 3 claims something is false, search for evidence BOTH supporting AND refuting the original claim.
+
+2. **Knowledge Cutoff Issues**: Search "[entity/topic] current [year]" AND "[entity/topic] [recording_date_year]". If Stage 3 claims something "doesn't exist" or is "fictional", verify its creation date—recent entities may not have been in Stage 3's training data.
+
+3. **Temporal Confusion**: Verify whether Stage 3 used data from the correct time period. Compare Stage 3's search evidence with your current search results. A claim can be true now but was false at recording time, or vice versa.
+
+4. **Evaluating Stage 3 Search Quality**: Did Stage 3 search for the right terms? Find relevant sources? Use searches specific to the recording date/location? Miss obvious searches that would have changed the conclusion?
+
+---
+
+## Error Pattern Reference
+
+| Error Type | How to Identify |
+|------------|-----------------|
+| `knowledge_cutoff` | Stage 3 says something doesn't exist that was created after its training cutoff |
+| `temporal_confusion` | Stage 3 used data from wrong time period |
+| `insufficient_search` | Stage 3's search evidence shows inadequate/no searches for a verifiable claim |
+| `misinterpretation` | Stage 3 reasoning shows logical error despite having correct info |
+| `correct_detection` | Stage 3 was right, user feedback is wrong |
+| `ambiguous` | Mixed evidence, unclear which side is correct |
+
+---
+
+## User Feedback Assessment
+
+**Adversarial Signals (Lower Quality):**
+- Generic disagreement without specific reasoning
+- Attacks on system rather than specific content
+- Labels that don't match content
+- No explanation of WHY classification is wrong
+
+**High-Quality Signals:**
+- Cites specific claims from the snippet
+- Provides evidence, sources, or links
+- Explains specifically why classification is wrong
+- Identifies factual errors in Stage 3's analysis
+
+---
+
+## Handling Special Cases
+
+**Minimal User Feedback (dislikes only, no comments/labels):**
+- Focus verification on Stage 3's weakest claims (lowest scores, vaguest evidence)
+- If Stage 3's analysis appears solid with no counter-evidence → lean `true_positive`
+- If Stage 3's analysis has gaps → lean `needs_review`
+
+**Empty/Minimal Stage 3 Search Evidence:**
+- Strong signal for `insufficient_search` error pattern
+- Perform the searches Stage 3 should have done
+- If searches reveal Stage 3 was wrong → `false_positive` with `insufficient_search`
+- If searches confirm Stage 3 was right → `true_positive`
+
+**Conflicting Claim Verifications:**
+- If MOST claims verified as false → `true_positive`
+- If MOST claims verified as true → `false_positive`
+- If roughly equal or key claims conflict → `needs_review`
+- Weight higher-confidence claims more heavily
+
+---
+
+## Your Task
+
+**Current Date:** {current_date}
+
+1. **Review** Stage 3's analysis, search evidence, and reasoning
+2. **Analyze** user feedback quality
+3. **Verify** claims using web search (follow Verification Protocol above)
+4. **Classify** the error pattern if Stage 3 erred
+5. **Determine** the validation outcome
+
+**Claim Verification Priority:**
+- Verify ALL claims in "Claims Analysis" above
+- If none documented, identify main factual assertions from the explanation
+- Prioritize claims the user specifically disputes
+
+---
+
+## Output Format
+
+Provide your validation result in the following JSON format:
+
+```json
+{{
+  "original_claim_summary": "Brief summary of what Stage 3 flagged as misinformation",
+  "user_feedback_summary": "Brief summary of user feedback and their apparent reasoning",
+  "claim_verifications": [
+    {{
+      "claim": "The specific claim being verified",
+      "original_assessment": "What Stage 3 concluded about this claim",
+      "verification_finding": "What your web search reveals about this claim",
+      "is_claim_actually_false": true/false,
+      "confidence": 0-100
+    }}
+  ],
+  "user_feedback_assessment": {{
+    "feedback_quality": "high/medium/low",
+    "feedback_reasoning": "Assessment of why user disliked/labeled the snippet",
+    "appears_adversarial": true/false
+  }},
+  "validation_decision": {{
+    "status": "false_positive/true_positive/needs_review",
+    "confidence": 0-100,
+    "primary_reason": "Main reason for this decision"
+  }},
+  "error_pattern": {{
+    "error_type": "knowledge_cutoff/temporal_confusion/insufficient_search/misinterpretation/correct_detection/ambiguous",
+    "explanation": "Brief explanation of why this error type was identified"
+  }},
+  "prompt_improvement_suggestion": "If false_positive, what specific improvement to Stage 3 prompt could prevent this error (null if not applicable)",
+  "thought_summaries": "Detailed reasoning process including searches performed, evidence found, comparison with Stage 3's work, and how the decision was reached"
+}}
+```
+
+---
+
+## Confidence Guidelines
+
+- **90-100**: Clear-cut case with strong evidence; no reasonable doubt
+- **70-89**: Strong evidence but minor ambiguities remain
+- **50-69**: Evidence leans one way but notable uncertainty exists
+- **Below 50**: You MUST use `needs_review` - do not make low-confidence calls
+
+Use `needs_review` when:
+- Evidence is genuinely mixed (credible sources disagree)
+- Claim is inherently subjective/interpretive
+- Required information is unavailable
diff --git a/src/processing_pipeline/constants.py b/src/processing_pipeline/constants.py
@@ -85,6 +85,16 @@ def get_gemini_timestamped_transcription_generation_prompt():
     return open("prompts/Gemini_timestamped_transcription_generation_prompt.md", "r").read()
 
 
+def get_system_instruction_for_feedback_validation():
+    with open("prompts/snippet_feedback_validation/system_instruction.md", "r") as f:
+        return f.read()
+
+
+def get_user_prompt_for_feedback_validation():
+    with open("prompts/snippet_feedback_validation/user_prompt.md", "r") as f:
+        return f.read()
+
+
 if __name__ == "__main__":
     # Print the output schema for stage 1
     # output_schema_for_stage_1 = get_output_schema_for_stage_1()