sathyarseelam · sathyarseelam · Apr 16, 2025 · Apr 19, 2025
diff --git a/backend/__pycache__/gemini.cpython-313.pyc b/backend/__pycache__/gemini.cpython-313.pyc
diff --git a/backend/app.py b/backend/app.py
@@ -7,14 +7,15 @@
 from pymongo import MongoClient
 from pydantic import BaseModel
 from typing import List, Dict, Optional
+import json
+import os
 
 # Import external modules (assume these are available in your project)
 from policy_scraper import fetch_main_page, extract_prop_blocks, fetch_prop_details
 from gemini import (
     simplify_description,
     simplify_paragraph,
     people_affected,
-    personalize_proposition,
 )
 
 app = FastAPI()
@@ -302,10 +303,9 @@ class Proposition(BaseModel):
     simplified_description: Optional[str] = None
     simplified_paragraph: Optional[str] = None
     affected_people: Optional[str] = None
-    personalization_summary: Optional[str] = None
 
 
-@app.get("/scrape-propositions", response_model=List[Proposition])
+@app.get("/scrape-propositions", response_model=List[Proposition]) # main scraping function
 def get_scraped_propositions():
     try:
         soup = fetch_main_page()  # Scraper function
@@ -319,8 +319,10 @@ def get_scraped_propositions():
         raise HTTPException(status_code=500, detail=f"Error scraping propositions: {str(e)}")
 
 
-@app.get("/simplify-propositions", response_model=List[Proposition])
+@app.get("/simplify-propositions", response_model=List[Proposition]) # helps simplify the 
 def get_simplified_propositions():
+    global propositions_cache
+    propositions_cache = propositions_cache
     if not propositions_cache:
         raise HTTPException(
             status_code=404,
@@ -341,60 +343,20 @@ def get_simplified_propositions():
         prop["simplified_paragraph"] = simple_para
         prop["affected_people"] = people_aff
         simplified_props.append(prop)
+        save_propositions_to_file(simplified_props)
     return simplified_props
 
-
-@app.get("/personalized-feed", response_model=List[Proposition])
-def get_personalized_feed():
-    # Hardcoded user profile for personalization (simulate signup data)
-    user_profile = {
-        "first_name": "Alex",
-        "last_name": "Doe",
-        "date_of_birth": "1990-01-15",
-        "email": "alex@example.com",
-        "gender": "Non-Binary",
-        "county": "Los Angeles",
-        "income_bracket": "50k-75k",
-        "education_level": "Bachelor's Degree",
-        "occupation": "Software Developer",
-        "family_size": 10,
-        "race_ethnicity": "Hispanic",
-        "policy_preferences": {
-            "Climate change": "Right",
-            "Universal healthcare": "Right",
-            "Prison reform": "Right",
-            "Abortion": "Right",
-            "Education": "Right",
-            "Immigration": "Right",
-            "Military spending": "Right"
-        }
-    }
-    if not propositions_cache:
-        raise HTTPException(
-            status_code=404,
-            detail="No propositions available. Please scrape them first.",
-        )
-    personalized_props = []
-    for prop in propositions_cache:
-        details = prop.get("details", "")
-        if not details:
-            continue
-        personalization = personalize_proposition(user_profile, details)
-        # Skip propositions that are "Not aligned"
-        if "Not aligned" in personalization:
-            continue
-        prop["personalization_summary"] = personalization
-        personalized_props.append(prop)
-    return personalized_props
-
+def save_propositions_to_file(data, filename="propositions_data.json"):
+    filepath = os.path.join(os.getcwd(), filename)
+    with open(filepath, "w", encoding="utf-8") as f: 
+        json.dump(data, f, ensure_ascii=False, indent=4)
+
 
 @app.get("/prop-api-root")
 def prop_api_root():
     return {"message": "Welcome to the Proposition API!"}
 
 
-# ----------- Main Section -----------
-
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/backend/gemini.py b/backend/gemini.py
@@ -45,112 +45,4 @@ def people_affected(text: str) -> str:
         f"{text}"
     )
     resp = model.generate_content(prompt)
-    return resp.text.strip()
-
-def get_top_propositions(user_profile: dict, propositions: list) -> list:
-    """
-    Get the top 3 propositions that resonate most with the user based on their profile.
-
-    Args:
-        user_profile (dict): The user's profile data.
-        propositions (list): A list of propositions, each as a dictionary with keys like 'number' and 'details'.
-
-    Returns:
-        list: A list of the top 3 propositions with their alignment, reason, and impact.
-    """
-    results = []
-
-    for proposition in propositions:
-        # Use the personalize_proposition function to evaluate alignment
-        proposition_text = proposition.get("details", "")
-        result = personalize_proposition(user_profile, proposition_text)
-        result["proposition_number"] = proposition.get("number", "Unknown")
-        result["proposition_title"] = proposition.get("title", "Untitled")
-        results.append(result)
-
-    # Sort the results by alignment priority (Highly aligned > Moderately aligned > Not aligned)
-    alignment_priority = {"Highly aligned": 3, "Moderately aligned": 2, "Not aligned": 1}
-    results.sort(key=lambda x: alignment_priority.get(x["alignment"], 0), reverse=True)
-
-    # Return the top 3 propositions
-    return results[:3]
-
-# Personalization summary (AI-based alignment)
-# Enhanced personalization function in gemini.py
-def personalize_proposition(user_profile: dict, proposition_text: str) -> dict:
-    user_info_parts = []
-    for key, value in user_profile.items():
-        if key == "policy_preferences":
-            prefs = " | ".join([f"{k}: {v}" for k, v in value.items()])
-            user_info_parts.append(f"Policy Preferences: {prefs}")
-        else:
-            user_info_parts.append(f"{key}: {value}")
-    user_info_str = " ; ".join(user_info_parts)
-
-    prompt = (
-        "Using the user profile and proposition below, create a personalized response with the following format:\n"
-        "1. Alignment: One of 'Highly aligned', 'Moderately aligned', or 'Not aligned'\n"
-        "2. Reason: A short sentence (≈15 words) explaining the alignment reasoning\n"
-        "3. Impact: One sentence on how this would directly impact this specific user\n"
-        "4. Format as a JSON with these fields. No markdown or additional text.\n\n"
-        f"User Profile: {user_info_str}\n\n"
-        f"Proposition Text: {proposition_text}"
-    )
-
-    resp = model.generate_content(prompt)
-    response_text = resp.text.strip()
-
-    # Parse the JSON response (with error handling)
-    try:
-        import json
-        return json.loads(response_text)
-    except:
-        # Fallback if JSON parsing fails
-        return {
-            "alignment": "Unknown",
-            "reason": "Could not determine alignment",
-            "impact": "Impact analysis unavailable"
-        }
-
-if __name__ == "__main__":
-    # Load propositions from props.json
-    props_file_path = "/Users/avnigandhi/Documents/VoteSmartWeb/backend/props.json"
-    with open(props_file_path, "r") as file:
-        propositions = json.load(file)
-
-    # Example user profile (hardcoded for now)
-    sample_user_profile = {
-        "first_name": "Alex",
-        "last_name": "Doe",
-        "date_of_birth": "1990-01-15",
-        "email": "alex@example.com",
-        "gender": "Non-Binary",
-        "county": "Los Angeles",
-        "income_bracket": "50k-75k",
-        "education_level": "Bachelor's Degree",
-        "occupation": "Software Developer",
-        "family_size": 1,
-        "race_ethnicity": "Hispanic",
-        "policy_preferences": {
-            "Climate change": "Left",
-            "Universal healthcare": "Left",
-            "Prison reform": "Neutral",
-            "Abortion": "Right",
-            "Education": "Left",
-            "Immigration": "Left",
-            "Military spending": "Right"
-        }
-    }
-
-    # Get the top 3 propositions
-    top_propositions = get_top_propositions(sample_user_profile, propositions)
-
-    # Print the results (for debugging purposes)
-    print("\nTop 3 Propositions:")
-    for prop in top_propositions:
-        print(f"Proposition {prop['proposition_number']} - {prop['proposition_title']}")
-
-    # Output the top 3 propositions for the frontend
-    output_file_path = "/Users/avnigandhi/Documents/VoteSmartWeb/backend/top_propositions.json"
-    with open(output_file_path, "w") as output_file:
-        json.dump(top_propositions, output_file, indent=4)
+    return resp.text.strip()
diff --git a/backend/personalized_props.py b/backend/personalized_props.py
@@ -1,107 +1,77 @@
-# main.py (or wherever you put your “run” logic)
 import os
 import json
-import google.generativeai as genai
+import re
 from dotenv import load_dotenv
+import google.generativeai as genai
 
 load_dotenv()
+
 api_key = os.getenv("GEMINI_API_KEY")
 if not api_key:
-    raise ValueError("GEMINI_API_KEY not found")
+    raise ValueError("GEMINI_API_KEY not found in environment variables.")
 
 genai.configure(api_key=api_key)
+model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest")
 
-# 1. Proposition data from your config.json
-with open("config.json", "r", encoding="utf-8") as f:
-    PROPOSITIONS = json.load(f)
-
-# 2. Sample user profile
-user_profile = {
-    "first_name": "Alex",
-    "last_name": "Doe",
-    "date_of_birth": "1990-01-15",
-    "email": "alex@example.com",
-    "gender": "Non-Binary",
-    "county": "Los Angeles",
-    "income_bracket": "50k-75k",
-    "education_level": "Bachelor's Degree",
-    "occupation": "Software Developer",
-    "family_size": 10,
-    "race_ethnicity": "Hispanic",
-    "policy_preferences": {
-        "Climate change": "Right",
-        "Universal healthcare": "Right",
-        "Prison reform": "Right",
-        "Abortion": "Right",
-        "Education": "Right",
-        "Immigration": "Right",
-        "Military spending": "Right"
-    }
-}
+def load_json(filepath:str):
+    with open(filepath, 'r', encoding='utf-8') as f:
+        return json.load(f)
 
-def extract_user_interests(user_profile: dict) -> list[str]:
-    """
-    Extract a list of policy topics from user_profile['policy_preferences'],
-    ignoring 'Neutral' stances.
-    """
-    prefs = user_profile.get("policy_preferences", {})
-    # Just get the keys if stance != 'Neutral'
-    user_interests = [topic for topic, stance in prefs.items() if stance.lower() != "neutral"]
-    return user_interests
+def clean_json(raw_text):
+    cleaned = re.sub(r"```(?:json)?", "", raw_text)
+    cleaned = cleaned.replace("```", "").strip()
+    return cleaned
 
-def pick_top_3_propositions(user_interests: list[str]) -> list[dict]:
-    """
-    Example GPT prompt that ranks propositions by how well they match the user_interests.
-    Returns a list of 3 dicts: [{number, title, summary, score}, ...].
-    """
-    # Create a minimal version of your proposition data for the prompt
-    mini_proposals = [
-        {
-            "number": prop["number"],
-            "title": prop["title"],
-            "summary": prop["simplified_description"],
-            "paragraph": prop["simplified_paragraph"],
-            "affected people": prop["affected_people"]
-        }
-        for prop in PROPOSITIONS
-    ]
+def rank_props(user_profile:dict, props_data:dict, top_n: int = 3) -> list:
+    ranked = []
+    for prop in props_data:
+        prompt = (
+            "You are an AI that evaluates how relevant a political proposition is for a specific user.\n"
+            "Given the user's profile and one proposition, analyze the alignment and impact.\n\n"
+            "Return only a single JSON object with the following fields:\n"
+            "- proposition_title\n"
+            "- alignment (one of: 'Highly aligned', 'Moderately aligned', 'Not aligned')\n"
+            "- reason (a short sentence explaining the alignment in ~15 words)\n"
+            "- impact (one sentence on how this proposition might affect this user)\n"
+            "- relevance_score (a float between 0 and 1, with two decimal places for precision)\n\n"
+            "Return a valid JSON object only. Do NOT use triple backticks, markdown formatting, or any extra text.\n\n"
+            f"User Profile:\n{json.dumps(user_profile, indent=2)}\n\n"
+            f"Proposition:\n"
+            f"Title: {prop.get('title')}\n"
+            f"Details: {prop.get('details')}\n"
+            f"Affected People: {prop.get('affected_people')}\n"
+        )
+        try:
+            response = model.generate_content(prompt)
+            raw_output = response.text.strip()
+            cleaned_output = clean_json(raw_output)
+            parsed = json.loads(cleaned_output)
+            ranked.append(parsed)
+        except Exception as e:
+            print(f"Error processing proposition '{prop.get('title')}': {e}")
 
-    # Build a prompt that asks GPT to rank them
-    prompt_text = (
-        f"The user cares most about: {', '.join(user_interests)}.\n\n"
-        "Here are some ballot propositions:\n"
-        f"{json.dumps(mini_proposals, indent=2)}\n\n"
-        "Please assign an alignment score (1=low, 5=high). "
-        "Then return the TOP 3 in JSON, sorted by score descending, in this format:\n"
-        "[\n"
-        "  { \"number\": \"...\", \"title\": \"...\", \"summary\": \"...\", \"score\": 5 },\n"
-        "  ...\n"
-        "]"
-    )
+    # now sort once, and take top_n
+    ranked.sort(key=lambda x: x.get("relevance_score", 0), reverse=True)
+    return ranked[:top_n]
 
-    resp = genai.generate_text(
-        model="gemini-1.5-pro-latest",  # or whichever you have
-        prompt=prompt_text,
-        temperature=0.0,
-        max_output_tokens=512
-    )
-
-    raw_output = resp.candidates[0].output
-    try:
-        top3 = json.loads(raw_output)
-    except:
-        top3 = []
-    return top3
+if __name__ == '__main__':
+    USER_JSON_PATH = 'test_user_profile.json'
+    PROPS_JSON_PATH = 'propositions_data.json'
+    OUTPUT_PATH    = 'top3_props.json'
 
-def main():
-    # 1) Convert user profile to a list of interests
-    interests_list = extract_user_interests(user_profile)
+    user_profile = load_json(USER_JSON_PATH)
+    props_data    = load_json(PROPS_JSON_PATH)
 
-    # 2) Get the top 3 propositions
-    top_3 = pick_top_3_propositions(interests_list)
+    top3_props = rank_props(user_profile, props_data, top_n=3)
 
-    # 3) Print or return them
-    print(json.dumps(top_3, indent=2))
+    # Print to console
+    print("Top 3 Personalized Propositions:\n")
+    for i, prop in enumerate(top3_props, 1):
+        print(f"#{i}:")
+        print(json.dumps(prop, indent=2))
+        print("\n" + "-"*40 + "\n")
 
-if __name__ == "__main__":
-    main()
+    # **Write to JSON file**
+    with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
+        json.dump(top3_props, f, ensure_ascii=False, indent=2)
+    print(f"Saved top 3 propositions to {OUTPUT_PATH}")