threadsrecon/main.py at main · offseq/threadsrecon · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Main entry point for the Threads Data Analysis Tool
This script orchestrates the entire data pipeline:
- Data scraping from Threads.net
- Data analysis and processing
- Visualization generation
- Report creation
"""

import os
import sys
import asyncio
import argparse

# Ensure we're running from the correct directory
script_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(script_dir)

# Import utility functions
from utils.helpers import (
    check_requirements,
    load_config,
    setup_environment,
    display_ascii_art
)

# Import controllers
from controllers.scrape_controller import scrape_data
from controllers.analysis_controller import analyze_data
from controllers.visualization_controller import visualize_all
from controllers.report_controller import generate_report

async def main():
    """
    Main entry point for the application

    Handles:
    1. Command line argument parsing
    2. Initial setup and configuration
    3. Execution of requested operations (scrape/analyze/visualize/report)
    4. Sequential execution of all operations if 'all' is specified

    Command line options:
    - scrape: Collect data from Threads.net
    - analyze: Process and analyze collected data
    - visualize: Generate visualization of analysis results
    - report: Create PDF report of findings
    - all: Execute all above operations in sequence
    """
    parser = argparse.ArgumentParser(description='Threads Data Analysis Tool')
    parser.add_argument('command', choices=['scrape', 'analyze', 'visualize','report', 'all'],
                      help='Command to execute: scrape, analyze, visualize, report, or all')

    args = parser.parse_args()

    # Initial setup
    check_requirements()
    config = load_config()
    setup_environment(config)

    # Variables to track results between pipeline stages
    analysis_results = None
    visualization_paths = None

    # Execute requested command
    if args.command == 'scrape' or args.command == 'all':
        display_ascii_art('scrape')
        print("Starting data scraping...")
        scrape_data(config)

    if args.command == 'analyze' or args.command == 'all':
        display_ascii_art('analyze')
        print("Starting data analysis...")
        analysis_results = await analyze_data(config)

    if args.command == 'visualize' or args.command == 'all':
        display_ascii_art('visualize')
        print("Generating network visualization...")
        print("This may take a few minutes for large datasets...")
        visualization_paths = visualize_all(config)

    if args.command == 'report' or args.command == 'all':
        display_ascii_art('report')
        print("Generating pdf report...")
        # Pass visualization paths to the report generator if available
        report_path = generate_report(config, visualization_paths)
        if report_path:
            print(f"Report generated at: {report_path}")

if __name__ == "__main__":
    asyncio.run(main())