From 284aa5f4d12ff3f705e37bed9cc7a2afd5cd36f9 Mon Sep 17 00:00:00 2001 From: liquidpurple <67183976+liquidpurple@users.noreply.github.com> Date: Sat, 21 Mar 2026 14:40:31 -0700 Subject: [PATCH] Fix empty Links/Issues/Visualization after loading saved crawl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit link_manager and issue_detector are None on fresh Crawler instances — they're only initialized during start_crawl(). When load_crawl_into_session() runs, the existing `if crawler.link_manager:` guards silently skip data injection, so loading any saved crawl shows URLs but no links, issues, or visualization edges. This fix initializes LinkManager and IssueDetector before injecting data if they don't already exist, and populates the discovered/visited URL sets so stats display correctly. --- main.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index bb283ab..168681b 100644 --- a/main.py +++ b/main.py @@ -1067,19 +1067,36 @@ def load_crawl_into_session(crawl_id): crawler.base_url = crawl['base_url'] crawler.base_domain = crawl['base_domain'] + + + # Initialize link manager if needed (it's None until a crawl starts) + if not crawler.link_manager: + from src.core.link_manager import LinkManager + crawler.link_manager = LinkManager(crawl['base_domain']) + # Load links into link manager - if crawler.link_manager: - crawler.link_manager.all_links = links - # Rebuild links_set - crawler.link_manager.links_set.clear() - for link in links: - link_key = f"{link['source_url']}|{link['target_url']}" - crawler.link_manager.links_set.add(link_key) + crawler.link_manager.all_links = links + # Rebuild links_set + crawler.link_manager.links_set.clear() + for link in links: + link_key = f"{link['source_url']}|{link['target_url']}" + crawler.link_manager.links_set.add(link_key) + + # Populate discovered/visited URL sets so get_stats() returns correct counts + crawler.link_manager.all_discovered_urls = set(u['url'] for u in urls) + crawler.link_manager.visited_urls = set(u['url'] for u in urls) + + # Initialize issue detector if needed (it's None until a crawl starts) + if not crawler.issue_detector: + from src.core.issue_detector import IssueDetector + crawler.issue_detector = IssueDetector() # Load issues into issue detector - if crawler.issue_detector: - crawler.issue_detector.detected_issues = issues + crawler.issue_detector.detected_issues = issues + + + # Set Flask session flag for force full refresh session['force_full_refresh'] = True @@ -1437,4 +1454,4 @@ def open_browser(): serve(app, host='0.0.0.0', port=5000, threads=8) if __name__ == '__main__': - main() \ No newline at end of file + main()