Esashiero · google-labs-jules · Jan 17, 2026
diff --git a/app.py b/app.py
@@ -646,6 +646,7 @@ def _process_batch_gen(self, batch):
                 self.is_running = False
 
     def run_tournament(self, variations, subreddits, sort_by, time_filter, post_type):
+        yield f"<<<TOURNAMENT_START>>>{json.dumps(variations)}"
         yield "🏆 Starting Query Tournament (5 competing variations):"
         for i, var in enumerate(variations, 1):
             yield f"   {i}. [{var['type']}] \"{var['query']}\""
@@ -663,7 +664,9 @@ def run_tournament(self, variations, subreddits, sort_by, time_filter, post_type
                     stats = future.result()
                     # New weighted score
                     v_score = (stats['high_quality'] * 10) + (stats['avg_score'] * 0.5)
-                    performance[var['query']] = {**stats, "v_score": v_score, "type": var['type']}
+                    v_data = {**stats, "v_score": v_score, "type": var['type'], "query": var['query']}
+                    performance[var['query']] = v_data
+                    yield f"<<<TOURNAMENT_RESULT>>>{json.dumps(v_data)}"
                     yield f"   📊 Variant '{var['type']}': {stats['high_quality']} matches found (avg {stats['avg_score']:.1f}) | Score: {v_score:.1f}"
                 except Exception as e:
                     logger.error(f"Tournament error: {e}")
@@ -724,6 +727,13 @@ def _test_variation(self, var, subreddits, sort_by, time_filter, post_type):
 
     def execute(self, query, subreddits, sort_by="new", time_filter="all", post_type="any", limit=100):
         # 0. Show Configuration
+        pass_info = {
+            "query": query,
+            "sort": sort_by,
+            "time": time_filter,
+            "limit": limit
+        }
+        yield f"<<<SEARCH_PASS>>>{json.dumps(pass_info)}"
         yield f"\n🔍 Search Pass Configuration:"
         yield f"   Query: {query}"
         yield f"   Sort: {sort_by} | Time: {time_filter} | Limit: {limit}/sub"
@@ -906,6 +916,7 @@ def search(
             core_constraints = self.llm.extract_core_characteristics(criteria)
 
             if core_constraints and "core_constraints" in core_constraints:
+                yield f"<<<CORE_CONSTRAINTS>>>{json.dumps(core_constraints['core_constraints'])}"
                 themes = [c["theme"] for c in core_constraints["core_constraints"]]
                 yield f"   🔒 Identified Mandatory constraints: {', '.join(themes)}"
 
@@ -1011,6 +1022,7 @@ def quota_met():
         self._log_search_metrics(overall_query, criteria or keywords, subreddits, len(final_posts), duration)
 
         # Show Detailed Stats
+        yield f"<<<SEARCH_STATS>>>{json.dumps(pipeline.stats)}"
         yield "\n📊 Search Statistics:"
         yield f"   - Posts fetched from Reddit: {pipeline.stats['fetched']}"
         yield f"   - Posts analyzed by LLM: {pipeline.stats['analyzed']}"
@@ -1233,6 +1245,84 @@ def api_generate_query():
         return jsonify({"error": str(e)}), 500
 
 
+@app.route("/api/curate/init", methods=["POST"])
+def api_curate_init():
+    data = request.json
+    if not data or not data.get("description"):
+        return jsonify({"error": "Description required"}), 400
+
+    desc = data["description"]
+    provider = data.get("provider", "mistral")
+    llm = LLMFilter(provider)
+
+    # 1. Extract Core Characteristics
+    core = llm.extract_core_characteristics(desc)
+
+    # 2. Vocabulary Context (Learning)
+    vocabulary_context = None
+    tagged_db = load_tagged_results()
+    favorites_db = load_favorites()
+    training_data = {**tagged_db, **favorites_db}
+    if training_data:
+        similar = find_similar_posts(desc, training_data, top_k=8)
+        if similar:
+            vocabulary_context = analyze_vocabulary(similar)
+
+    # 3. Generate Variations
+    variations = llm.generate_query_variations(
+        desc,
+        num_variations=5,
+        core_constraints=core,
+        vocabulary_context=vocabulary_context
+    )
+
+    return jsonify({
+        "core_constraints": core.get("core_constraints", []) if core else [],
+        "variations": variations,
+        "vocabulary_context": vocabulary_context
+    })
+
+
+@app.route("/api/results", methods=["GET"])
+def list_results():
+    os.makedirs("results", exist_ok=True)
+    files = [f for f in os.listdir("results") if f.endswith(".json")]
+    results = []
+    for f in files:
+        try:
+            with open(os.path.join("results", f), "r") as jf:
+                data = json.load(jf)
+                # If it's the new format with metadata
+                if isinstance(data, dict) and "results" in data:
+                    results.append({
+                        "id": f.replace(".json", ""),
+                        "query": data.get("query", ""),
+                        "criteria": data.get("criteria", ""),
+                        "timestamp": data.get("timestamp", 0),
+                        "count": len(data.get("results", []))
+                    })
+                else:
+                    # Old format (just list of posts)
+                    results.append({
+                        "id": f.replace(".json", ""),
+                        "query": f,
+                        "timestamp": os.path.getmtime(os.path.join("results", f)),
+                        "count": len(data)
+                    })
+        except: continue
+
+    return jsonify(sorted(results, key=lambda x: x["timestamp"], reverse=True))
+
+
+@app.route("/api/results/<result_id>", methods=["GET"])
+def get_result(result_id):
+    path = os.path.join("results", f"{result_id}.json")
+    if not os.path.exists(path):
+        return jsonify({"error": "Not found"}), 404
+    with open(path, "r") as f:
+        return jsonify(json.load(f))
+
+
 @app.route("/api/blacklist", methods=["GET"])
 def get_blacklist():
     if os.path.exists(BLACKLIST_FILE):
@@ -1397,29 +1487,39 @@ def send(msg):
             try:
                 msg = next(it)
                 if msg.startswith("<<<POST_SCORED>>>"):
-                    try:
-                        data = json.loads(msg.replace("<<<POST_SCORED>>>", ""))
-                        clean_msg = f"   ⭐ [{data.get('score', 0):.1f}] r/{data.get('sub')}: {data.get('title', '')[:40]}..."
-                        yield send(clean_msg)
-                    except: pass
-                elif msg.startswith("<<<"): 
-                    pass # hide other protocol messages
+                    yield send(msg)
                 else:
                     yield send(msg)
             except StopIteration as e:
                 final_posts = e.value
                 break
 
         if final_posts:
-             # Save to results/ folder if running from web too
+            # Save to results/ folder
             os.makedirs("results", exist_ok=True)
-            # Use timestamp + query/criteria for filename
             q_safe = "".join(c for c in (args.get("criteria") or args.get("keywords") or "results") if c.isalnum() or c in (' ', '_'))[:50].strip().replace(" ", "_")
-            filename = f"results/{time.strftime('%Y%m%d_%H%M%S')}_{q_safe}.html"
-            _generate_html_report(final_posts, q_safe, filename)
-            yield send(f"💾 Saved report: {filename}")
+            ts = time.strftime('%Y%m%d_%H%M%S')
+
+            # Save JSON
+            result_payload = {
+                "query": args.get("keywords"),
+                "criteria": args.get("criteria"),
+                "timestamp": time.time(),
+                "config": dict(args),
+                "results": final_posts
+            }
+            json_path = f"results/{ts}_{q_safe}.json"
+            with open(json_path, "w") as f:
+                json.dump(result_payload, f, indent=2)
+
+            # Save HTML (optional but kept for legacy)
+            html_path = f"results/{ts}_{q_safe}.html"
+            _generate_html_report(final_posts, args.get("criteria") or args.get("keywords"), html_path)
+
+            yield send(f"💾 Saved JSON: {json_path}")
+            yield send(f"💾 Saved HTML: {html_path}")
 
-        # Send HTML formatted results/JSON for the frontend to render the list
+        # Send final data
         yield send(f"<<<APPROVED_POSTS>>>{json.dumps(final_posts)}")
 
     return Response(generate(), mimetype="text/event-stream")

diff --git a/config/blacklist.json b/config/blacklist.json
@@ -12606,5 +12606,37 @@
     "keywords": "assault",
     "criteria": "",
     "timestamp": 1768270781.3960922
+  },
+  "1l3l86b": {
+    "id": "1l3l86b",
+    "title": "Done",
+    "url": "https://www.reddit.com/r/TrueOffMyChest/comments/1l3l86b/done/",
+    "keywords": "(astronaut OR cosmonaut OR spacefarer) AND (theory OR hypothesis) AND (Reddit OR subreddit OR \"online forum\")",
+    "criteria": "reddit astronaut theory",
+    "timestamp": 1768607734.552211
+  },
+  "g1lphq": {
+    "id": "g1lphq",
+    "title": "Problems With r/NoSleep",
+    "url": "https://www.reddit.com/r/TrueOffMyChest/comments/g1lphq/problems_with_rnosleep/",
+    "keywords": "(astronaut OR cosmonaut OR spacefarer) AND (theory OR hypothesis) AND (Reddit OR subreddit OR \"online forum\")",
+    "criteria": "reddit astronaut theory",
+    "timestamp": 1768607734.552216
+  },
+  "3wqdhm": {
+    "id": "3wqdhm",
+    "title": "Me [28 M] with my GF [F] of 11 months duration, and I'm suddenly questioning our relationship after something she said",
+    "url": "https://www.reddit.com/r/relationships/comments/3wqdhm/me_28_m_with_my_gf_f_of_11_months_duration_and_im/",
+    "keywords": "(space traveler OR cosmonaut OR \"space explorer\") AND (speculation OR conjecture OR \"educated guess\") AND (Reddit OR r/ OR \"internet forum\")",
+    "criteria": "reddit astronaut theory",
+    "timestamp": 1768607907.662214
+  },
+  "3qvj6w": {
+    "id": "3qvj6w",
+    "title": null,
+    "url": null,
+    "keywords": "(space traveler OR cosmonaut OR \"space explorer\") AND (speculation OR conjecture OR \"educated guess\") AND (Reddit OR r/ OR \"internet forum\")",
+    "criteria": "reddit astronaut theory",
+    "timestamp": 1768607907.662227
   }
 }