From 6ee47192494e141451ab91d8693447b8875da9cc Mon Sep 17 00:00:00 2001 From: squid-protocol Date: Mon, 27 Apr 2026 08:27:15 -0400 Subject: [PATCH] feat: migrate Museum of Code to HTML and setup MkDocs CI/CD --- .github/workflows/deploy-docs.yml | 37 + docs/wiki/01-06-the-structural-rag-graph.md | 94 ++ docs/wiki/04-00-security_landscape.md | 98 ++ .../generate-llm-architecture-briefs.md | 139 +- .../museum-of-code/alphafold_teardown.html | 1395 +++++++++++++++++ docs/wiki/museum-of-code/index.md | 2 +- .../museum-of-code/teardown-of-alphafold.md | 91 -- gitgalaxy/README.md | 50 +- gitgalaxy/tools/ai_guardrails/README.md | 26 +- gitgalaxy/tools/cobol_to_cobol/README.md | 32 +- gitgalaxy/tools/cobol_to_java/README.md | 30 +- gitgalaxy/tools/compliance/README.md | 24 +- gitgalaxy/tools/network_auditing/README.md | 13 +- .../tools/supply_chain_security/README.md | 129 +- .../tools/terabyte_log_scanning/README.md | 16 +- mkdocs.yml | 47 +- 16 files changed, 1924 insertions(+), 299 deletions(-) create mode 100644 .github/workflows/deploy-docs.yml create mode 100644 docs/wiki/01-06-the-structural-rag-graph.md create mode 100644 docs/wiki/04-00-security_landscape.md create mode 100755 docs/wiki/museum-of-code/alphafold_teardown.html delete mode 100644 docs/wiki/museum-of-code/teardown-of-alphafold.md diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 00000000..c9e1a241 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,37 @@ +name: Deploy Museum of Code Docs + +on: + push: + branches: + - main # Triggers the action when you push to the main branch + workflow_dispatch: # Allows you to manually trigger the build from the GitHub UI + +# Grants the action permission to push the built site to the gh-pages branch +permissions: + contents: write + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for git info/history (like last updated timestamps) + + - name: Configure Git Credentials + run: | + git config user.name github-actions[bot] + git config user.email 41898282+github-actions[bot]@users.noreply.github.com + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: 3.x + + - name: Install Dependencies + # Installs Material theme and the PyMdown extensions required by your mkdocs.yml + run: pip install mkdocs-material pymdown-extensions + + - name: Build and Deploy Docs + run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/docs/wiki/01-06-the-structural-rag-graph.md b/docs/wiki/01-06-the-structural-rag-graph.md new file mode 100644 index 00000000..9d9207bc --- /dev/null +++ b/docs/wiki/01-06-the-structural-rag-graph.md @@ -0,0 +1,94 @@ +# The Structural RAG Graph (Mapping the Magnitude) + +> **The Flaw in Modern RAG** +> +> Standard Retrieval-Augmented Generation (RAG) for codebases is fundamentally blind. When you feed a repository into a standard AI tool, the embedding engine slices the text arbitrarilyโ€”usually by token count (e.g., every 500 tokens). It rips functions in half, separates decorators from their classes, and destroys the architectural context. +> +> The GalaxyScope engine rejects arbitrary token slicing. By leveraging the AST-free **blAST Engine**, we chunk the codebase biologically. We parse the repository at the exact boundaries of its structural logic, extracting over 50 unique mathematical metrics per function, and rolling them up into a massive, highly relational Knowledge Graph. +> +> We don't just feed text to an AI; we give it a multi-dimensional, queryable map of the physical universe across 50+ programming languages. + +## The Tree-sitter Trap: Why We Abandoned ASTs + +The industry standard relies on Large Language Models (LLMs) or Abstract Syntax Trees (ASTs) like Tree-sitter. We use neither. + +ASTs were built to feed compilers, not to generate macroscopic architectural graphs. People are using a microscope to look at the stars. It is the wrong tool for the job. + +By using our AST-free physics engine, we trace data flow and build full function call graphs equally well, while exposing the massive blind spots of traditional ASTs. + +**What an AST physically cannot give you:** +* **Repo-scale perspective.** ASTs get lost in the weeds. We see the galaxy. +* **Simultaneous polyglot analysis.** ASTs require 50 different parsers. We map 50 languages in a single pass. +* **Compilation-free execution.** ASTs break if a dependency is missing. We don't care if it compiles. +* **Hyper-velocity.** ASTs take hours to build trees. We map 100,000 LOC/sec. +* **Human intent.** ASTs throw comments in the trash. We analyze "ghost mass" to capture developer context. +* **Graceful degradation.** ASTs panic on syntax errors. We map broken and legacy code flawlessly. +* **Ecosystem awareness.** ASTs ignore YAML, JSON, and configs. We map the entire infrastructure. + +**The GitGalaxy Advantage:** We don't build rigid syntax trees. We hunt universal structural patterns. Maximum speed. Zero compilation. Full architectural reality. + +## 1. Structural Chunking (The Satellite) + +Instead of slicing by lines of code, the engine uses the `func_start` optical sensors to identify the exact boundaries of executable logic. Every method, function, and subroutine becomes a discrete "Satellite." + +For every single Satellite, the engine extracts a **50+ Dimensional Vector** of pure architectural DNA based on our [Taxonomical Equivalence Map](./03-03-claim-3-taxonomy-map.md). + +For a single function, the database records: +* **The Raw Geometry:** Big-O Branching complexity, argument counts, total lines of code. +* **The Physics:** `io` (Disk/Network boundaries), `flux` (State mutation), `concurrency` (Threads/Async). +* **The Risks:** `danger` (Destructive OS commands), `bailout_hits` (Panics/Exits), `safety_neg` (Bypassed types). +* **The Telemetry:** `print_hits` vs `telemetry` (Amateur prints vs Professional logging). + +## 2. The Holographic Hierarchy (The Roll-Up) + +Because the data is captured deterministically at the lowest level of executable logic, the GalaxyScope naturally rolls this telemetry upward. The exact same 50-dimensional physics apply at every magnitude of the architecture. + +1. **Satellites (Functions / Methods):** The smallest unit of executable logic. We know exactly which function contains a hardcoded secret or a nested loop. +2. **Entities (Classes / Structs / Interfaces):** Satellites are rolled up into their parent Entities. We know which Class is generating the most state mutation (`flux`) or carrying the heaviest cognitive load. +3. **Stars (Files):** Entities are rolled up into Files. We cross-reference the aggregated physics against the [Dependency Radar](./02-01-pipeline-overview.md) to calculate the file's exact Blast Radius, PageRank, and network centrality. +4. **Constellations (Folders / Modules):** Stars are rolled up into directories. We can mathematically prove which neighborhood of the repository is decaying into technical debt, or which module is acting as a monolithic choke point. +5. **The Galaxy (The Repository):** Constellations are rolled up to provide the ultimate global metrics. A single snapshot of the entire systemic health, ecosystem dominance, and ML-inferred security posture. + +*From 1 RAG paradigm, you get 5 scales of architectural resolution.* + +## 3. The Queryable Knowledge Graph + +This magnitude of data is useless if it is trapped in abstract embeddings or heavy graph databases (like Neo4j) that AI agents struggle to query. + +As the pipeline concludes, the [SQLite Record Keeper](./02-21-record-keeper.md) serializes this massive web of relationships into a highly normalized, portable relational database. + +This transforms the codebase into a strict **Code Knowledge Graph**. + +Because LLMs are trained on billions of lines of SQL, they natively understand how to navigate this structure. An Autonomous AI Agent doesn't need to guess where a vulnerability is hiding in a 10,000-file repository. It can simply query the database: + +```sql +-- "Find all highly-centralized files handling network I/O +-- that contain destructive logic bombs and lack test coverage." + +SELECT s.file_name, s.pagerank, s.risk_danger +FROM stars s +INNER JOIN dna_hits d ON s.star_id = d.star_id +WHERE s.pagerank > 1.5 + AND d.io > 0 + AND d.danger > 0 + AND d.test = 0; +``` + +## 4. The Ultimate Context Window + +By vectorizing the entire architecture into a deterministic database of exact regex hit counts, physical mass, and risk exposures, GitGalaxy provides a profound capability: **Omniscience without Compilation.** + +Whether you are scanning a 50-year-old COBOL banking monolith, a modern Rust microservice, or a scattered TypeScript monorepo, the engine standardizes the output. Agents, security teams, and architects receive the exact same multi-dimensional, queryable blueprint of reality. + +

+ +--- + +### ๐ŸŒŒ Powered by the blAST Engine + +This documentation is part of the [GitGalaxy Ecosystem](https://github.com/squid-protocol/gitgalaxy), an AST-free, compilation-free heuristic knowledge graph engine. + +* ๐Ÿ“– **[Previous: How to Read the Galaxy](./01-05-how-to-read-the-galaxy.md)** +* ๐Ÿ“– **[Next: Pipeline Overview](./02-01-pipeline-overview.md)** +* ๐Ÿช **[Explore the GitHub Repository](https://github.com/squid-protocol/gitgalaxy)** for code, tools, and updates. +* ๐Ÿ”ญ **[Visualize your own repository at GitGalaxy.io](https://gitgalaxy.io/)** using our interactive 3D WebGPU dashboard. \ No newline at end of file diff --git a/docs/wiki/04-00-security_landscape.md b/docs/wiki/04-00-security_landscape.md new file mode 100644 index 00000000..305a557f --- /dev/null +++ b/docs/wiki/04-00-security_landscape.md @@ -0,0 +1,98 @@ +# The Competitive Landscape (Defying the Status Quo) + +> **Challenging the Industry Standard** +> +> The DevSecOps industry is dominated by massive platforms that rely on slow compilation, rigid Abstract Syntax Trees (ASTs), and blind trust in dependency manifests. They hunt for past examples of vulnerabilities, creating massive computational bottlenecks in modern CI/CD pipelines. +> +> GitGalaxy was engineered to break this paradigm. Utilizing our custom engine, we bypass compilation entirely, executing zero-trust physical audits across 50+ languages simultaneously. By defining threats through minimal keyword permutation combinations rather than waiting for external CVE databases, we calculate risk exposures, build full function call graphs for reachability, and intercept zero-days at over 100,000 lines of code per second. +> +> Here is exactly how our architecture stands against the industry titans. + +## The Status Quo vs. The GitGalaxy Engine + +### **Black Duck (Synopsys)** +* **The Status Quo:** Black Duck practically invented the Software Composition Analysis (SCA) market and remains the gold standard for open-source license compliance. It provides unparalleled visibility into the open-source components nested within complex enterprise applications. +* **How We Beat Them:** Black Duck relies on signature matching to find past examples of known vulnerabilities. We do not wait for past examples. We define threats by searching for the minimal keyword permutation combinations that expose zero-days. We scan all the physical files, not just manifests. Black Duck operates as a black box with high false positives. We offer lower false positive rates by allowing full customizability of all keyword regex hits with about a dozen unique whitelists and blacklists to minimize alert fatigue. + +### **Checkmarx** +* **The Status Quo:** Checkmarx is an absolute powerhouse in enterprise SAST. They are renowned for their rigorous data-flow and control-flow analysis, which is highly trusted by enterprises to catch complex vulnerabilities like SQL injection and cross-site scripting. +* **How We Beat Them:** Checkmarx requires slow compilation to map execution paths. We are 100% compilation-free. We build full function call graphs for reachability just like they do, but without compiling. Our faster speed scanning allows for true, frictionless CI/CD integration. They operate as a rigid black box. We deliver drastically lower false positive rates via full customizability of all keyword regex hits and a dozen unique whitelist/blacklist filters. We handle 50 languages natively; they handle a fraction. + +### **CodeQL (GitHub Advanced Security)** +* **The Status Quo:** CodeQL is a brilliant semantic analysis engine that treats code as a database. It allows security researchers to deeply query logical flaws and complex data-flow paths to find bespoke, project-specific vulnerabilities. +* **How We Beat Them:** CodeQL requires a full database build and successful compilation before it can run. We are entirely compilation-free. We build full function call graphs for reachability without needing a compiled database. Our faster speed scanning allows for immediate CI/CD integration. We scan all files across 50 languages in a single pass because keywords are universally expressed in every language. + +### **Dependabot (GitHub Native)** +* **The Status Quo:** Dependabot is an amazing, frictionless automation tool built directly into GitHub that excels at keeping your dependency manifests up-to-date and alerting you to known vulnerabilities. It is the baseline standard for repository hygiene. +* **How We Beat Them:** Dependabot only reads manifests and only checks for past examples of vulnerabilities. We scan all the physical files. We don't rely on historical CVE databases. We hunt for the minimal keyword permutation combinations that define a threat type. We natively handle 50 languages because keywords are universally expressed in every language. + +### **Endor Labs** +* **The Status Quo:** Endor Labs is a top-tier innovator in dependency lifecycle management. Their semantic reachability analysis and call graphs are fantastic for proving whether a vulnerable library is actually executed, saving security teams from massive alert fatigue. +* **How We Beat Them:** Endor Labs requires full build environments or deep AST generation to build their call graphs. We are completely compilation-free. We build full function call graphs for reachability instantly without compiling. Our faster speed scanning allows for immediate CI/CD integration. We scan all files across 50 languages natively. + +### **govulncheck (The Go Ecosystem Scanner)** +* **The Status Quo:** Designed specifically for the Go ecosystem, `govulncheck` is a brilliant, highly accurate tool that uses call graph analysis to prove if a vulnerable Go module is actually executed by the application. +* **How We Beat Them:** `govulncheck` requires compilation and only handles a single language. We handle 50 languages natively because keywords are universally expressed in every language. We build full function call graphs for cross-language reachability without compiling. Our faster speed scanning provides immediate CI/CD feedback. + +### **npm audit (Native Ecosystem Scanners)** +* **The Status Quo:** As the ubiquitous tool for Node.js developers, `npm audit` is unmatched for providing immediate, native feedback on known dependency vulnerabilities directly from the GitHub Advisory Database. +* **How We Beat Them:** `npm audit` only handles one ecosystem and relies entirely on manifests to find past examples of vulnerabilities. We handle 50 languages natively. We scan all the actual files. We define threats using broader definitions, hunting for minimal keyword permutation combinations instead of waiting for a public database to be updated. + +### **Phylum** +* **The Status Quo:** Phylum is a fantastic pioneer in software supply chain security. They are highly respected for their ecosystem-specific sandboxing, author reputation analytics, and deep analysis of installation scripts to block malicious actors. +* **How We Beat Them:** Phylum focuses on ecosystem-specific sandboxing. We handle 50 languages natively because keywords are universally expressed. We scan all the physical files without the heavy overhead of behavior emulation. Our faster speed scanning allows for seamless CI/CD integration. We provide full customizability of all rules with a dozen whitelists and blacklists to minimize alert fatigue. + +### **Semgrep (Semantic Grep)** +* **The Status Quo:** Semgrep is an exceptional, lightweight SAST tool beloved by developers for replacing clunky regex with smart, semantic pattern matching. It is fast, customizable, and allows security teams to write rules without needing to compile the code. +* **How We Beat Them:** Semgrep builds semantic trees to understand logic. We bypass trees entirely. Our faster speed scanning allows for instantaneous CI/CD integration. We scan all files across 50 languages natively. We define threats using minimal keyword permutation combinations. We build full function call graphs for reachability, tracking data across massive codebases instantly. + +### **Snyk** +* **The Status Quo:** Snyk is a massive, developer-first juggernaut in the DevSecOps space. They excel at identifying vulnerable open-source dependencies, container misconfigurations, and standard SAST flaws using their proprietary vulnerability database. +* **How We Beat Them:** Snyk relies on checking manifests against cloud databases of past examples. We scan all the physical files. We use broader definitions of threats, searching for the minimal keyword permutation combinations that define a zero-day. Snyk acts as a black box. We guarantee lower false positive rates by allowing full customizability of all keyword regex hits with about a dozen unique whitelists and blacklists. + +### **Socket.dev** +* **The Status Quo:** Socket is an incredibly innovative supply chain security tool that proactively detects malicious behavior in open-source packages (like unexpected network calls) rather than just looking at published CVEs. +* **How We Beat Them:** Socket analyzes package behavior via external cloud APIs for specific ecosystems. We handle 50 languages locally because keywords are universally expressed in every language. We scan all the files. We define threats using minimal keyword permutation combinations. We offer full customizability with a dozen whitelists and blacklists to drive down false positive rates. + +### **SonarQube** +* **The Status Quo:** SonarQube is the undeniable gold standard for mature Static Application Security Testing (SAST) and code quality. Their deep Abstract Syntax Tree (AST) generation provides thorough insights for compiled languages and technical debt. +* **How We Beat Them:** SonarQube requires successful builds and compilation. We are entirely compilation-free. We build full function call graphs for reachability without compiling. Our faster speed scanning allows for frictionless CI/CD integration. They are notorious for high false positive rates. We drastically lower false positive rates by allowing full customizability of all keyword regex hits alongside a dozen unique whitelists and blacklists. + +### **Trivy (Aqua Security)** +* **The Status Quo:** Trivy is an industry-standard, incredibly reliable scanner for containers and repositories. It is lightning-fast at parsing manifest files and cross-referencing them against known CVE databases for baseline compliance. +* **How We Beat Them:** Trivy parses manifests and checks against past vulnerability examples. We scan all the physical files. We identify threats using broader definitions based on minimal keyword permutation combinations. We handle 50 languages natively because keywords are universally expressed in every language. + +### **Veracode** +* **The Status Quo:** Veracode is an absolute titan in enterprise application security, offering a comprehensive suite of SAST, DAST, and SCA tools trusted by global corporations. Their ability to scan compiled binaries makes them a staple in mature DevSecOps compliance programs. +* **How We Beat Them:** Veracode requires code compilation, packaging, and cloud analysis. We are completely compilation-free. We build full function call graphs for reachability without compiling. Our faster speed scanning allows for immediate CI/CD integration. They operate as a high-false-positive black box. We ensure lower false positive rates by allowing full customizability of all keyword regex hits with about a dozen unique whitelist and blacklist controls. + +--- + +## The Architecture of Disruption (How We Do It) + +Talk is cheap in the cybersecurity industry. Everyone claims to be faster and more accurate. Here is exactly how the GitGalaxy physics engine physically achieves these capabilities. + +### 1. Zero Compilation. Zero Delays. +Standard enterprise scanners are paralyzed until a codebase successfully builds. We do not care if the code compiles. By utilizing our proprietary paradigm, we bypass rigid logic trees and parse the raw structural reality of the text itself. +๐Ÿ“– **[Read the Proof: The Paradigm (No Compilation)](./01-03-the-blast-paradigm.md)** + +### 2. Speed, Scope, and Minimal Permutations +How do we scan 50 languages natively without relying on 50 different language parsers? Because malicious intent and architectural structures are universally expressed. We hunt the DNAโ€”the minimal keyword permutation combinationsโ€”not the syntax. +๐Ÿ“– **[Read the Proof: Speed, Scope & Search Strategies](./03-01-claim-1-search-strategies.md)** + +### 3. The Physical Pipeline +To process 100,000+ lines of code per second and map complex reachability, you need a fundamentally different data pipeline. From dropping inert binaries to mapping function call graphs, here is the exact sequence of events that powers the engine. +๐Ÿ“– **[Read the Proof: Pipeline Overview (How It Works)](./02-01-pipeline-overview.md)** + +

+ +--- + +### ๐ŸŒŒ Powered by the Engine + +This documentation is part of the [GitGalaxy Ecosystem](https://github.com/squid-protocol/gitgalaxy), an AST-free, compilation-free heuristic knowledge graph engine. + +* ๐Ÿ“– **[Previous: Future Outlooks](./03-08-future-outlooks.md)** +* ๐Ÿ“– **[Next: Full API Network Map](./04-01-full-api-network-map.md)** +* ๐Ÿช **[Explore the GitHub Repository](https://github.com/squid-protocol/gitgalaxy)** for code, tools, and updates. +* ๐Ÿ”ญ **[Visualize your own repository at GitGalaxy.io](https://gitgalaxy.io/)** using our interactive 3D WebGPU dashboard. \ No newline at end of file diff --git a/docs/wiki/cookbook/generate-llm-architecture-briefs.md b/docs/wiki/cookbook/generate-llm-architecture-briefs.md index f835a47e..e1bc94e4 100644 --- a/docs/wiki/cookbook/generate-llm-architecture-briefs.md +++ b/docs/wiki/cookbook/generate-llm-architecture-briefs.md @@ -1,35 +1,58 @@ -# How to Generate LLM-Optimized Architecture Briefs +# Recipe: Generate LLM-Optimized Architecture Briefs -Onboarding a new developerโ€”or an AI coding agentโ€”onto a massive, monolithic codebase takes weeks. If you simply dump raw source code into an LLM (like Claude or GPT-4), it will quickly blow out the context window and hallucinate architectural relationships because it lacks global graph visibility. +Onboarding a new developerโ€”or an AI coding agent (like SWE-agent or Claude)โ€”onto a massive, monolithic codebase takes weeks. -GitGalaxy solves this using the **LLM Recorder**. It condenses the entire repository's physical constraints, dependency graphs, historical Git churn, and structural risk into a single, highly-optimized Markdown brief (`_llm.md`). +If you simply dump raw source code into standard RAG (Retrieval-Augmented Generation), the embedding engine slices the text arbitrarily by token count. It rips functions in half, destroys architectural context, and guarantees the LLM will hallucinate structural relationships. -This brief acts as a "Rosetta Stone," allowing any standard LLM to instantly understand the ecosystem with the exact same mathematical context as a Principal Systems Architect. +GitGalaxy solves this using the **LLM Recorder**. It condenses the entire repository's physical constraints, dependency graphs, historical Git churn, and structural risk into a single, highly-optimized Markdown brief (`_llm.md`). This brief acts as a "Rosetta Stone," allowing any standard LLM to instantly understand the ecosystem with the exact same mathematical context as a Principal Systems Architect. -## The AI Translation Layer +## The Tree-sitter Trap (Why Standard AI Fails) -The LLM Recorder bridges the gap between the raw mathematical output of the GitGalaxy engines (PageRank, Shannon Entropy, Big-O depths) and natural language reasoning. +The industry standard for feeding code to AI relies on Abstract Syntax Trees (ASTs) like Tree-sitter. ASTs were built to feed compilers, not to generate macroscopic architectural graphs. -### 1. Execute the Scan -You can run the full GalaxyScope pipeline, or pass the `--llm-only` flag to exclusively generate the AI artifacts without rendering the 3D WebGPU payload. +**What an AST physically cannot give an AI:** +* **Repo-scale perspective:** ASTs get lost in the weeds of syntax. +* **Simultaneous polyglot analysis:** ASTs require 50 different parsers. We map 50 languages in a single pass. +* **Compilation-free execution:** ASTs break if a dependency is missing. We don't care if the code compiles. +* **Human intent:** ASTs throw comments in the trash. We analyze "ghost mass" to capture developer context and documentation risk. +* **Graceful degradation:** ASTs panic on syntax errors. We map broken and legacy code flawlessly. + +## The Solution: The Structural RAG Graph + +GitGalaxy rejects arbitrary token slicing and rigid ASTs. By leveraging our AST-free **blAST Engine**, we chunk the codebase biologically. + +We parse the repository at the exact boundaries of its structural logic, extracting over 50 unique mathematical metrics per function, and rolling them up into a **5-Scale Holographic Hierarchy**: + +1. **Satellites (Functions/Methods):** The exact geometry, I/O boundaries, and state mutations of every logic block. +2. **Entities (Classes/Structs):** Satellites roll up into parent entities to measure cognitive load density. +3. **Stars (Files):** Entities roll up into files to calculate exact Blast Radius and PageRank via our dependency radar. +4. **Constellations (Folders):** Stars roll up to identify neighborhood-level technical debt. +5. **The Galaxy (Repository):** A single snapshot of systemic health and ML-inferred security posture. + +--- + +## 1. Execute the Scan + +To generate the LLM Architecture Brief, run the GalaxyScope orchestrator and pass the `--llm-only` flag. This instructs the chassis to bypass the heavy SQLite and WebGPU serialization steps, routing the data exclusively to the AI translation layer. ```bash galaxyscope /path/to/target_repository --llm-only ``` -### 2. Feed the Brief to your AI -The engine outputs a `_galaxy_llm.md` file. Upload this file directly into ChatGPT, Claude, or your local autonomous agent framework (like SWE-agent). +**Output:** The engine will generate a timestamped `_galaxy_llm.md` file. + +## 2. Feed the Brief to your AI -The brief strictly categorizes the codebase into actionable intelligence: +Upload the generated Markdown file directly into ChatGPT, Claude, or your local autonomous agent framework. The brief strictly categorizes the codebase into actionable intelligence: * **The 13-Point Risk Physics:** Summarizes the Min/Max/Mean of every risk vector (Cognitive Load, State Flux, Tech Debt) across the entire repository. * **Architectural Choke Points:** Identifies "God Nodes" (highest 'Imported By' / Blast Radius) and "Orchestrators" (highest outbound imports / fragility). -* **The Hotspot Matrix:** Cross-references historical Git volatility (Churn) against high Cognitive Load to pinpoint the exact files causing the most developer friction. +* **The Hotspot Matrix:** Cross-references historical Git volatility (Churn) against high Risk to pinpoint the exact files causing the most developer friction. * **Systemic Network Bottlenecks:** Uses N-Dimensional physics to flag catastrophic intersections, such as the **"House of Cards"** (files that are deeply embedded in the graph *and* possess extreme Error/Exception exposure). * **Key Person Dependencies:** Flags massive, load-bearing files written almost entirely by a single developer (High Silo Risk / Bus Factor). -### 3. The Enforced System Prompt -To prevent the LLM from outputting sensationalized, useless jargon, GitGalaxy automatically injects a strict System Prompt at the bottom of the brief. +### The Enforced System Prompt +To prevent the LLM from outputting sensationalized, useless jargon, GitGalaxy automatically injects a strict System Prompt at the top of the brief, forcing the AI to evaluate risk density rather than subjective "code quality." ```markdown ## AI SYSTEM INSTRUCTIONS (OUTPUT FORMAT) @@ -41,6 +64,90 @@ To prevent the LLM from outputting sensationalized, useless jargon, GitGalaxy au > 5. Recommended Next Steps (Refactoring for Stability) ``` -By providing the AI with mathematically proven network topology rather than raw text, you guarantee deterministic, actionable refactoring advice. +--- + +## 3. Real-World Payload Example + +Below is an actual, unedited architecture brief generated by GitGalaxy for the open-source SAP `abap-cleaner` repository. + +If you paste this exact text into an LLM like Claude 3.5 Sonnet or GPT-4o, it will instantly possess a complete, mathematical understanding of the 118,000-line Java architecture without ever needing to read the source code. + +
+๐Ÿ‘๏ธ Expand to view the raw `abap-cleaner_galaxy_llm.md` output + +```markdown +# ARCHITECTURAL_BRIEF: abap-cleaner +> INSTRUCTION: Deterministic Syntactic Physics. Base architectural insights on Mass, DNA, and Risk overlays. + +## 0. FORENSIC TRACEABILITY +| Metadata | Value | +|---|---| +| **Engine** | `GitGalaxy Scope v6.2.0 (Delta Mode)` | +| **Target Path** | `/srv/storage_16tb/projects/gitgalaxy/data/abap-cleaner` | +| **Scan Duration** | `2.38s` | +| **Git Branch** | `main` | + +## 0.5 AI THREAT AUDIT STATUS +> **โœ… SECURE_NO_MALWARE_DETECTED** +> XGBoost Structural DNA model found no malicious artifacts. + +## 1. SYSTEM ROLE & PHILOSOPHY +> Code is art. Logic is art. Systems engineering is art. +> You are analyzing software architecture through the lens of GitGalaxy... +> [System Prompts Omitted for Brevity] + +## 3. MACRO STATE +| Metric | Value | +|---|---| +| Total Artifacts | 731 | +| Visible Matter (Scanned) | 550 | +| Dark Matter (Non-scanned) | 362 | +| Total LOC | 118063 | +| % Scanned of codebase | 75.2% | +| Dominant Lang | JAVA | + +## 7. ARCHITECTURAL CHOKE POINTS & DEPENDENCIES +### Top I/O Latency Risks +- `com.sap.adt.abapcleaner/src/com/sap/adt/abapcleaner/base/FileSystem.java` (Hits: 36) +- `com.sap.adt.abapcleaner/src/com/sap/adt/abapcleaner/rulehelpers/DdlAnalyzer.java` (Hits: 15) + +### Top 5 Orchestrators (Highest 'Imports' / Fragility Index) +1. **AbapCleanerHandlerBase.java** โ€” 47 outbound dependencies +2. **RuleTestBase.java** โ€” 40 outbound dependencies +3. **FrmProfiles.java** โ€” 36 outbound dependencies + +## 8.5 ALGORITHMIC & DATABASE BOTTLENECKS +### Highest Time Complexity (Big-O) +- `compareTo` (@ `CompareDoc.java`) -> **O(2^N) [Recursive]** +- `executeOn` (@ `RuleForLogicalExpressions.java`) -> **O(2^N) [Recursive]** + +## 11. CUMULATIVE RISK HITLIST (Top Highest Risk Files) +### 1. `com.sap.adt.abapcleaner/src/com/sap/adt/abapcleaner/parser/Token.java` (JAVA) -> Cumulative Risk: **588.31** +- **Archetype:** `file_cluster_3` (Distance: 14.588 IQR) +- **Mass:** 3845.88 | **LOC:** 3949 | **CtrlFlow:** 64.3% | **Silo Risk:** 100.0% +- **Primary Risk Drivers:** Spec Match (100.0%), Documentation (100.0%), Safety Score (95.27%), Tech Debt (92.93%) +- **Heaviest Functions:** `determineMemoryAccessType` (Impact: 483.6), `getLastTokenOfLogicalExpression` (Impact: 336.4) + +## 13.5 STRATEGIC REFACTORING TARGETS (Volatility & Silos) +### ๐Ÿ”ฅ The Hotspot Matrix (High Volatility + High Risk) +These files are messy, complex, and modified frequently. They are the primary source of developer friction. +- `Command.java` -> Churn: **73.11%** | Cog Load: 20.60% | Debt: 86.26% +- `Token.java` -> Churn: **58.4%** | Cog Load: 30.97% | Debt: 92.93% + +### ๐Ÿ‘ค Key Person Dependencies (High Impact + Siloed Knowledge) +These are massive, load-bearing files written almost entirely by a single developer. They represent severe 'Bus Factor' risk. +- `Token.java` -> **Jรถrg-Michael Grassau** (100.0% isolated ownership) +- `Command.java` -> **Jรถrg-Michael Grassau** (100.0% isolated ownership) +``` +
+ +

+ +--- + +### ๐ŸŒŒ Powered by the blAST Engine + +This documentation is part of the [GitGalaxy Ecosystem](https://github.com/squid-protocol/gitgalaxy), an AST-free, compilation-free heuristic knowledge graph engine. -> **Read the full technical specification:** [LLM Recorder](../02-14-llm-recorder.md) \ No newline at end of file +* ๐Ÿ“– **[Deep Dive: The LLM Recorder Engine](../02-14-llm-recorder.md)** +* ๐Ÿช **[Explore the GitHub Repository](https://github.com/squid-protocol/gitgalaxy)** for code, tools, and updates. \ No newline at end of file diff --git a/docs/wiki/museum-of-code/alphafold_teardown.html b/docs/wiki/museum-of-code/alphafold_teardown.html new file mode 100755 index 00000000..381e4f9a --- /dev/null +++ b/docs/wiki/museum-of-code/alphafold_teardown.html @@ -0,0 +1,1395 @@ + + + + + + Museum of Code: alphafold_2018 + + + + + + +
+
Museum of Code
+

alphafold_2018

+

The Atomic Scale Origami Algorithm that Changed Humanity.

+
+ +
+

+ + Historical Significance +

+ +
+
+
Status
+
The Atomic Scale Origami Algorithm
+
+
+
Why it matters
+
It brought Protein Shape into Focus
+
+
+
Architects
+
John Jumper and the DeepMind Team
+
+
+ +
+

For 50 years, the 'Folding Problem' was the holy grail of biology. We knew the ingredients of life but we couldn't predict their shape.

And in biology, shape is functionโ€”it dictates how drugs work, how diseases spread, and how life survives.

AlphaFold changed the rules of the game. For decades, the time between a geneโ€™s sequence and a proteinโ€™s shape was a lifetime of lab work, AlphaFold collapsed that time to seconds.

Itโ€™s like humming a simple melody into a computer and then having it instantly transformed into an award-winning top-of-the-charts song, in any genre, again, and again and again.

But instead of music, it produces the shapes of proteins, the very machines that keep us alive and go awry in different diseases. We can finally predict the machines that control our health and life on this planet.

+
+
+ + + +
+ +
+

+ + Architectural Synthesis +

+ +
+

1. Information Flow & Purpose (The Executive Summary)
+ This is not a traditional software application; it is a highly specialized, brute-force mathematical pipeline. Data flows from massive pre-compiled weight tensors (the 13 binary .pb and .h5 "Dark Matter" files) directly into tightly encapsulated Python scripts. With an Encapsulation Ratio of 1.0 and a mere 1,756 lines of executable code driving the entire system, the architecture relies on intense computational density rather than sprawling object-oriented abstraction.

+ +

2. Notable Structures & Topology
+ The dependency graph is startlingly flat. A network topology with an Average Path Length of 0.0, 0 Articulation Points, and 0.0% Cyclic Loop Density indicates that these files do not form a deep, interconnected web. Instead, they act as highly isolated utility scripts processing data in sequence. However, this flat structure incurs a massive Architectural Drift (Z-Score: 4.66). The system heavily deviates from standard Python conventions, sacrificing modularity for immediate, linear execution.

+ +

3. Security & Vulnerabilities
+ From a zero-trust perspective, the ecosystem is perfectly sterileโ€”0 Shadow APIs, 0 Typosquatting hits, and 0 Supply Chain Anomalies. However, operational safety is severely compromised by a 40.9% Verification Risk and only 1 active Test Suite. This is the definitive hallmark of "Academic Research Code": it was built rapidly to prove a thesis for a publication, not test-driven for enterprise production. It relies entirely on the mathematical brilliance of its authors rather than programmatic guardrails.

+ +

4. Outliers & Extremes
+ The structural extremities reveal the friction of deployment. contacts_network.py acts as a "Blind Bottleneck"โ€”a God Node calculating spatial distances at an agonizing O(N^6) time complexity, yet crippled by a 100% Documentation Risk. Simultaneously, the deployment pipeline itself (run_eval.sh) collapses under 100% Cognitive Load and 75% Tech Debt. The team was clearly focused on the neural network, treating the operational shell as a brittle afterthought, further evidenced by a chaotic 51.5% "Civil War" formatting clash (Tabs vs. Spaces) across the codebase.

+ +

5. Recommended Next Steps (Refactoring for Stability)
+

    +
  • Decouple the God Node: Fracture contacts_network.py into distinct, documented modules to lower the cognitive load and isolate the hazardous O(N^6) spatial logic.
  • +
  • Establish Verification Guardrails: Introduce unit test coverage to the core contacts.py orchestrators to reduce the 41% Verification Risk before attempting to scale the algorithm.
  • +
  • Standardize the Deployment Shell: Rewrite the brittle run_eval.sh script into a formalized Python orchestration tool to eliminate the extreme Tech Debt and cognitive load at the execution boundary.
  • +

+
+ +
+ +
+
+ +
+
+

Global System Scorecard

+ +
+
+

Ecosystem Composition

+
+
1
Test Suites
+
5
Doc/Prose
+
1
Build/Make
+
0
Config/JSON
+
+
+ +
+

Network Health

+
+
Cluster 3
Global Archetype
+
4.66
Z-Score Drift
+
0.0%
Cyclic Loop Density
+
+
+ +
+

Zero-Trust Audit

+
+
0
Typosquat Hits
+
0
Binary Anomalies
+
0
Blacklist/Unknown Pkgs
+
+
+
+ +
+
+

Architecture & Scale

+
+
33
Total Files
+
1756
Coding LOC
+
6
Total Classes
+
1.0
Encapsulation Ratio
+
+
+ +
+

Extended Topology

+
+
0.0
Avg Path Length
+
0
Articulation Points
+
0.0
Assortativity
+
0.0
Modularity
+
+
+ +
+

Extended Security

+
+
0
Shadow APIs
+
+
+
+ +
+

Global Risk Exposures (Averages)

+
+
7.57
Cog Load
+
0.0
Deep Churn
+
5.73
Error/Safety Risk
+
21.6
Tech Debt
+
21.84
Doc Risk
+
40.93
Verification Risk
+ +
0.0
Stability (Heat)
+
0.0
Graveyard
+
1.4
API Exposure
+
0.38
Concurrency
+
5.98
State Flux
+
51.52
Civil War (Tabs/Spaces)
+
+
+
+
+ +
+
+ + +
+
+ +
+
+ +
+
+

What to look for:

+

Sort by Fragility to find orchestrators that pull the system togetherโ€”these are highly coupled and break easily if external APIs change. Sort by Popularity to find the load-bearing pillars; if these fail, the ecosystem collapses. A healthy system balances mass across many nodes; a fragile one consolidates it into a single 'God Node'.

+
+
+ + + + + + + + + + + +
File NameEcosystem Role โ†•Structural Mass โ†•Fragility โ†•Popularity โ†•
+
+
+
+ +
+ +
+
+

What to look for:

+

This matrix exposes the multi-dimensional technical debt of the architecture. Sort by Cumulative Risk to prioritize your refactoring efforts. Look for the deadly trio: High Cog Load, High State Flux, and High Test Risk. A file with low 'Cog Load' but extreme 'State Flux' is easy to read but mutates data dangerously.

+
+
+ + + + + + + + + + + + + + + + + +
File NameCumulative Risk โ†•Cognitive Load โ†•Tech Debt โ†•State Flux โ†•Test Risk โ†•Safety Risk โ†•Concurrency โ†•API Exposure โ†•Graveyard โ†•Churn โ†•
+
+
+
+
+ +
+
+ + +
+
+

What to look for:

+

This is the raw heuristic telemetry driving the physics engine. Use this to manually verify the automated risk scores and investigate specifically why a file was flagged. Scan for severe outliers in structural signaturesโ€”from high Struct Branch density to dangerous State Bailout Hits.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
File Name โ†•Mass โ†•Max Big-O โ†•Cumulative Risk โ†•File Archetype โ†•LOC โ†•Cog Raw โ†•Ownership Entropy โ†•Silo Risk โ†•Raw Churn Freq โ†•Pagerank Score โ†•Closeness Score โ†•Producer Ratio โ†•Avg Func Loc โ†•Avg Func Complexity โ†•Max Func Complexity โ†•Avg Func Args โ†•Func Complexity Gini โ†•Func Internal Density โ†•Dependency Density โ†•Encapsulation Ratio โ†•Ai Threat Confidence โ†•Func Z Max โ†•Func Z Mean โ†•Func Z Median โ†•Pct Z Above 5 โ†•Pct Z Above 15 โ†•Repo Z Score โ†•Is Malware โ†•Has Credentials โ†•Binary Anomaly โ†•Glassworm Flag โ†•Token Mass โ†•Financial Read Cost โ†•Agentic Black Hole โ†•Requires Hitl โ†•Appsec Rce Funnel โ†•Appsec God Mode โ†•Appsec Exfiltration โ†•Hallucination Zone โ†•Silent Mutation Risk โ†•Struct Branch โ†•Struct Linear โ†•Struct Args โ†•Struct Func Start โ†•Struct Class Start โ†•Def Safety โ†•State Safety Neg โ†•State Danger โ†•Arch Io โ†•Arch Api โ†•State Flux โ†•State Graveyard โ†•Def Doc โ†•Def Test โ†•Arch Concurrency โ†•Arch Ui Framework โ†•Struct Closures โ†•Arch Globals โ†•Struct Decorators โ†•Struct Generics โ†•Struct Comprehensions โ†•Arch Scientific โ†•State Heat Triggers โ†•Arch Import โ†•Def Ownership โ†•State Planned Debt โ†•State Fragile Debt โ†•Def Spec Exposure โ†•Arch Ssr Boundaries โ†•Arch Events โ†•Arch Dependency Injection โ†•Struct Macros โ†•State Pointers โ†•State Memory Alloc โ†•Arch Inline Asm โ†•Def Telemetry โ†•State Print Hits โ†•State Cast Hits โ†•State Bailout Hits โ†•State Halt Hits โ†•Bitwise Hits โ†•Def Sync Locks โ†•Def Freeze Hits โ†•Def Cleanup โ†•Def Encapsulation โ†•Def Listeners โ†•Def Test Skip โ†•Struct Tabs โ†•Struct Spaces โ†•Arch Hardware โ†•Arch Crypto โ†•Def Auth โ†•Arch Ipc โ†•Arch Feature Flags โ†•Arch Serialization โ†•Arch Regex โ†•Arch Time โ†•Llm Api โ†•Llm Orchestrator โ†•Llm Vector Store โ†•Llm Local Compute โ†•Ai Tools โ†•Ai Memory โ†•Ai Logic Loop โ†•Ml Traditional โ†•Dl Frameworks โ†•Lazy Evaluation โ†•Vectorized Math โ†•Struct Var Decl โ†•Struct Camel Case โ†•Struct Snake Case โ†•Struct Pascal Case โ†•Struct Upper Case โ†•Struct Short Vars โ†•Struct Long Vars โ†•State Slop Duplicates โ†•State Slop Orphans โ†•Threat Obfuscated โ†•Threat Bypasses โ†•Threat Network Hooks โ†•Threat Eval Exec โ†•Threat Env Mutation โ†•Sec Graveyard โ†•Threat Crypto Math โ†•Threat Stego Imports โ†•Threat Homoglyphs โ†•Threat Private Info โ†•Threat Extension Mismatch โ†•Threat Entropy โ†•Threat Tainted Injection โ†•Prompt Injection โ†•Agentic Rce โ†•
+
+
+
+
+ +
+
+ + +
+
+

What to look for:

+

Here we isolate the architecture down to the atomic level. Sort by Big-O to find recursive or highly nested algorithms that threaten performance (O(N^6) or worse). Sort by Impact Mass to locate massive, monolithic functions that violate the Single Responsibility Principle and need to be fractured into smaller, testable units.

+
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Function Name โ†•Parent File โ†•Impact Mass โ†•Big-O โ†•Recursive? โ†•Function Archetype โ†•LOC โ†•Args โ†•Outbound Calls โ†•Function Drift โ†•Token Mass โ†•Keyword Density โ†•Struct Branch โ†•Struct Linear โ†•Struct Args โ†•Struct Func Start โ†•Struct Class Start โ†•Def Safety โ†•State Safety Neg โ†•State Danger โ†•Arch Io โ†•Arch Api โ†•State Flux โ†•State Graveyard โ†•Def Doc โ†•Def Test โ†•Arch Concurrency โ†•Arch Ui Framework โ†•Struct Closures โ†•Arch Globals โ†•Struct Decorators โ†•Struct Generics โ†•Struct Comprehensions โ†•Arch Scientific โ†•State Heat Triggers โ†•Arch Import โ†•Def Ownership โ†•State Planned Debt โ†•State Fragile Debt โ†•Def Spec Exposure โ†•Civil War โ†•Arch Ssr Boundaries โ†•Arch Events โ†•Arch Dependency Injection โ†•Struct Macros โ†•State Pointers โ†•State Memory Alloc โ†•Arch Inline Asm โ†•Def Telemetry โ†•State Print Hits โ†•State Cast Hits โ†•State Bailout Hits โ†•State Halt Hits โ†•Bitwise Hits โ†•Def Sync Locks โ†•Def Freeze Hits โ†•Def Cleanup โ†•Def Encapsulation โ†•Def Listeners โ†•Def Test Skip โ†•Struct Tabs โ†•Struct Spaces โ†•Arch Hardware โ†•Arch Crypto โ†•Def Auth โ†•Arch Ipc โ†•Arch Feature Flags โ†•Arch Serialization โ†•Arch Regex โ†•Arch Time โ†•Llm Api โ†•Llm Orchestrator โ†•Llm Vector Store โ†•Llm Local Compute โ†•Ai Tools โ†•Ai Memory โ†•Ai Logic Loop โ†•Ml Traditional โ†•Dl Frameworks โ†•Lazy Evaluation โ†•Vectorized Math โ†•Struct Var Decl โ†•Struct Camel Case โ†•Struct Snake Case โ†•Struct Pascal Case โ†•Struct Upper Case โ†•Struct Short Vars โ†•Struct Long Vars โ†•State Slop Duplicates โ†•State Slop Orphans โ†•Threat Obfuscated โ†•Threat Bypasses โ†•Threat Network Hooks โ†•Threat Eval Exec โ†•Threat Env Mutation โ†•Sec Graveyard โ†•Threat Crypto Math โ†•Threat Stego Imports โ†•Threat Homoglyphs โ†•Threat Private Info โ†•Threat Extension Mismatch โ†•Threat Entropy โ†•Threat Tainted Injection โ†•Prompt Injection โ†•Agentic Rce โ†•
+
+
+
+
+
+ +
+
+

+ ๐ŸŒŒ Powered by the blAST Engine +

+

This structural teardown was generated using GitGalaxy, an AST-free, compilation-free heuristic knowledge graph engine.

+ + +
+
+ + + + diff --git a/docs/wiki/museum-of-code/index.md b/docs/wiki/museum-of-code/index.md index 41ad3fae..da270a51 100644 --- a/docs/wiki/museum-of-code/index.md +++ b/docs/wiki/museum-of-code/index.md @@ -6,7 +6,7 @@ Using the **GitGalaxy blAST engine**, we have stripped away the abstractions of ### The Exhibits -* ๐Ÿงฌ **[AlphaFold (2018)](teardown-of-alphafold.md)** - The 5,000-line Python breakthrough that solved protein folding. +* ๐Ÿงฌ **AlphaFold (2018)** - The 5,000-line Python breakthrough that solved protein folding. * ๐Ÿ“ฑ **[Android](teardown-of-android.md)** - The world's most ubiquitous mobile OS. * ๐Ÿš€ **[Apollo 11](teardown-of-apollo-11.md)** - The AGC assembly code that took humanity to the moon. * ๐Ÿช™ **[Bitcoin v0.1.0](teardown-of-bitcoin.md)** - The original, highly coupled C++ prototype of decentralized finance. diff --git a/docs/wiki/museum-of-code/teardown-of-alphafold.md b/docs/wiki/museum-of-code/teardown-of-alphafold.md deleted file mode 100644 index 306f38a6..00000000 --- a/docs/wiki/museum-of-code/teardown-of-alphafold.md +++ /dev/null @@ -1,91 +0,0 @@ -# The Architecture of AlphaFold (2018): A Structural Physics Teardown of the Protein Folding Pioneer - -**Executive Summary:** We performed a deep **static code analysis** on the original open-source release of DeepMind's AlphaFold (2018). By mapping its structural physics, we uncover the concentrated **software architecture**, dense tensor orchestration, and specialized "God Nodes" that solved one of the grandest challenges in biology. This teardown exposes the raw **code smells**, tight Python coupling, and single-point silos of a remarkably compact 5,000-line repository that forever changed computational science. - -### Welcome to the Museum of Code - -In 2018, Google DeepMind entered the 13th Critical Assessment of Structure Prediction (CASP13) and stunned the scientific community. Their submission, AlphaFold, utilized deep residual neural networks to predict the 3D structures of proteins from amino acid sequences with unprecedented accuracy. This repository represents the first iteration of that breakthroughโ€”a pivotal artifact in the history of computational biology and machine learning. - -But what does a Nobel-prize-winning scientific breakthrough look like under the hood? We ran the `alphafold_2018` repository through the **GitGalaxy blAST engine**โ€”an AST-free structural physics scannerโ€”to strip away the academic papers and visualize its raw code complexity, coupling, and fragility. Here is the physical reality of the original AlphaFold architecture. - -> [!NOTE] -> *Insert WebGL/Video rotation of the galaxy here* - -### The 3D Cartography: Macro State - -Mapping AlphaFold reveals a shockingly compact repository. For a system that revolutionized structural biology, the active execution logic relies on a highly concentrated, Python-exclusive codebase. - -| Macro State Metric | Value | Architectural Interpretation | -| :--- | :--- | :--- | -| **Total LOC** | **5,202** | Extraordinarily compact. The core scientific breakthrough was achieved with fewer lines of code than a standard web app framework. | -| **Language Profile** | **98.6% Python** | Pure Python orchestration, relying entirely on underlying C/C++ tensor libraries (like TensorFlow) for the heavy lifting. | -| **Network Modularity** | **0.1345** | Low modularity. The biological models, network architecture, and gradient descent loops are tightly intertwined. | -| **Cyclic Density** | **0.0%** | Zero dependency loops. A flawless directed acyclic graph ensures strict, predictable data pipelines. | -| **Articulation Pts** | **4** | High systemic resilience. Only four files act as critical structural bridges. | - -### The "House of Cards": Architectural Choke Points - -In software architecture, we identify structural health by separating **Structural Pillars** (the foundational files everything relies on) from **Fragile Orchestrators** (the complex controllers pulling everything together). - -Here is how AlphaFold 2018 distributes its architectural weight: - -**Top 5 Structural Pillars (Highest Inbound Blast Radius):** -These files act as core load-bearing infrastructure. Changes here carry a high risk of cascading breaks across the entire ecosystem. -* **`contacts.py`** โ€” **22 inbound connections** (The biological domain anchor). -* **`features.py`** โ€” **19 inbound connections** (The data parsing anchor). -* **`train_eval.py`** โ€” **10 inbound connections** -* **`network.py`** โ€” **9 inbound connections** -* **`mmcif.py`** โ€” **7 inbound connections** (The structural file parser). - -**Top 5 Orchestrators (Highest Outbound Coupling):** -These files pull in massive amounts of external dependencies to coordinate the neural network and physics simulations. -* **`replica_exchange.py`** โ€” **24 outbound dependencies** -* **`train_eval.py`** โ€” **18 outbound dependencies** -* **`test_train_eval.py`** โ€” **17 outbound dependencies** -* **`score_def.py`** โ€” **16 outbound dependencies** -* **`contact_resnet.py`** โ€” **13 outbound dependencies** - -*Architectural Insight:* The architecture is highly logical. The base biology and data formats (`contacts.py`, `features.py`) serve as the rigid foundations, while the physics-based simulated annealing (`replica_exchange.py`) and training loops (`train_eval.py`) act as the massive, highly-coupled orchestrators driving the actual execution. - -### Technical Debt & The "God Nodes" - -Research codebases typically prioritize mathematical correctness and iteration speed over enterprise-grade maintainability, leading to concentrated logic nodes. - -**The Heaviest Functions (Impact Score):** -* **`ReplicaExchange`** (in `replica_exchange.py`): Impact Score **321.4**. This is the core orchestrator for simulated annealing and gradient descent, carrying immense algorithmic weight. -* **`run_network`** (in `network.py`): Impact Score **151.7**. The "God Node" that initializes and executes the deep residual network. -* **`get_features`** (in `features.py`): Impact Score **131.7**. The primary data ingestion pipeline. - -**Cumulative Risk Outliers:** -The highest multi-dimensional technical debt in the system resides in the training and simulation pipelines: -* **`train_eval.py`**: Cumulative Risk **496.16**. The highest risk file in the repository, managing high cognitive load and state flux, though it operates with 0.0% silo risk. -* **`replica_exchange.py`**: Cumulative Risk **437.28**. Plagued by 28.7% State Flux Exposure, mutating gradients and tensors rapidly in an extremely tight optimization loop. - -**The Key Person Risk (Silos):** -In cutting-edge research teams, specialized scientific knowledge often creates severe "Bus Factor" risks. GitGalaxy tracks isolated ownership to quantify this. In AlphaFold 2018, the most critical execution nodes were maintained by single researchers: -* **`replica_exchange.py`** (Mass: 279.78) -> **Andrew W. Senior** (100.0% isolated ownership) -* **`network.py`** (Mass: 104.7) -> **John Jumper** (100.0% isolated ownership) -* **`plot_utils.py`** (Mass: 55.44) -> **Richard Evans** (100.0% isolated ownership) - -*Note: John Jumper would later go on to co-lead AlphaFold 2 and win the Nobel Prize in Chemistry, making this 100% ownership tag an incredible historical artifact of a genius working on a core neural network topology.* - -### The Security Perimeter (Zero-Trust & X-Ray) - -Applying zero-trust security lenses to an academic machine learning repository reveals the realities of data ingestion. - -* **Autonomous AI Threats & Malware:** **0 detected**. The codebase is mathematically secure against malicious structural DNA. -* **Supply Chain Firewall:** **0 Blacklisted / 1 Unknown Dependency**. An exceptionally tight perimeter. -* **Binary Anomalies (X-Ray):** **2 hits**. Expected anomalies associated with embedded tensor test payloads or compressed structural data. -* **Weaponizable Surface Exposures:** The engine flagged `io.py` with **100.0% Injection Surface**. This is an architectural reality of bioinformatics: parsing massive, complex string files (like `.mmcif` or `.pdb` protein datasets) natively creates deserialization and injection vulnerabilities if the data isn't trusted. However, because this is an offline research tool operating on known genomic databases, the operational risk is minimal. - -### Conclusion - -AlphaFold (2018) is a breathtaking example of focused, hyper-specialized scientific engineering. With just over 5,000 lines of Python, the DeepMind team orchestrated a solution that altered the trajectory of structural biology. It survives its low modularity (0.1345) and extreme Key Person silos by maintaining absolute cyclic discipline (0.0%). While `train_eval.py` and `replica_exchange.py` carry heavy cognitive load and high state flux, they are the necessary engines of discovery. This repository proves that world-changing architecture doesn't require millions of lines of codeโ€”it requires the right algorithms, tightly orchestrated. - ---- -### See Your Own Code in 3D -This architectural teardown was generated using **GitGalaxy**, an AST-free structural physics engine that treats codebases like gravitational networks. - -* ๐ŸŒŒ **Explore the 3D WebGPU Galaxy:** Upload your own repo's JSON payload securely in your browser at [gitgalaxy.io](https://gitgalaxy.io/). -* โš™๏ธ **View the Source:** GitGalaxy is open-source. Check out the blAST engine at [github.com/squid-protocol/gitgalaxy](https://github.com/squid-protocol/gitgalaxy). -* ๐Ÿš€ **Automate your Security:** Deploy the GitGalaxy Supply Chain Firewall and X-Ray Inspector directly into your CI/CD pipeline using our [GitHub Actions](#). \ No newline at end of file diff --git a/gitgalaxy/README.md b/gitgalaxy/README.md index 11e3eee0..78ee780a 100644 --- a/gitgalaxy/README.md +++ b/gitgalaxy/README.md @@ -1,29 +1,36 @@ -# GitGalaxy: Internal Architecture & Source Code +# GitGalaxy: The Core Engine & GalaxyScope Orchestrator -Welcome to the internal source code for the **GitGalaxy blAST Engine**. +[![Architecture](https://img.shields.io/badge/Architecture-blAST_Engine-00BFFF.svg)](#) +[![Velocity](https://img.shields.io/badge/Velocity-40k%2B_LOC%2Fsec-00C957.svg)](#) +[![CLI](https://img.shields.io/badge/Interface-GalaxyScope_CLI-8A2BE2.svg)](#) -This directory contains the core physics, routing, and mathematical heuristics that power the system. If you are a developer looking to contribute or understand the pipeline, here is the architectural map: +Welcome to the internal source code for the **GitGalaxy Core Engine**. -### ๐Ÿ—บ๏ธ The Developer Map -* **`/core/`**: The optical routing layer. Contains `aperture.py` and `prism.py`, which break down source code into structural signals and separate executable logic from ghost mass (comments). -* **`/physics/`**: The heuristics engine. Contains `signal_processor.py` and `neural_auditor.py`, which apply GitGalaxy mathematics to score O(N) complexity, blast radius, and state flux. -* **`/recorders/`**: The export layer. Translates the internal state maps into SQLite databases, AI-agent JSON tickets, and WebGPU data payloads. -* **`/security/`**: The zero-trust validation layer for detecting embedded malware and logic bombs. -* **`/tools/`**: The enterprise "Spokes". Contains specific automation controllers for CI/CD pipelines, including Supply Chain Firewalls, PII Leak Hunters, and GitHub Actions integrations. +This directory contains the central orchestratorโ€”**GalaxyScope**โ€”alongside the core physics, optical routing, and mathematical heuristics that power the entire system. If you are a developer looking to contribute, understand the pipeline, or run the primary CLI, here is your architectural map. + +### ๐Ÿ—บ๏ธ The Developer Map (How the Pipeline Flows) + +When you trigger the `galaxyscope` command, the data flows through these five physical directories: + +* **`/core/` (The Frontline):** The optical routing layer. Contains the [Aperture Filter](https://squid-protocol.github.io/gitgalaxy/02-03-aperture-filter/) and [The Prism](https://squid-protocol.github.io/gitgalaxy/02-07-the-prism/), which break down source code into structural signals, separating executable logic from ghost mass (comments) and inert binaries. +* **`/physics/` (The Math):** The heuristics engine. Contains the [Signal Processor](https://squid-protocol.github.io/gitgalaxy/02-09-signal-processing/) and [Neural Auditor](https://squid-protocol.github.io/gitgalaxy/02-19-neural-auditor/), which apply GitGalaxy mathematics to score O(N) complexity, topological blast radius, and state flux without using ASTs. +* **`/recorders/` (The Exporters):** The translation layer. Converts the internal state maps into highly relational [SQLite Databases](https://squid-protocol.github.io/gitgalaxy/02-21-record-keeper/), AI-agent JSON tickets, and the final 3D WebGPU payload. +* **`/security/` (The Sentinel):** The zero-trust validation layer. Contains the [Security Lens](https://squid-protocol.github.io/gitgalaxy/02-06-security-lens/) responsible for intercepting embedded malware, hardcoded secrets, and logic bombs on the fly. +* **`/tools/` (The Spokes):** The enterprise automation layer. Contains specialized controllers for CI/CD pipelinesโ€”like the [Supply Chain Firewall](https://squid-protocol.github.io/gitgalaxy/04-03-supply-chain-firewall/) and [PII Leak Hunter](https://squid-protocol.github.io/gitgalaxy/04-06-pii-leak-hunter/)โ€”that consume the core engine's telemetry. --- ### โšก Performance Showcase: NVDA (NonVisual Desktop Access) -To demonstrate the engine's capability on complex, cross-language system architecture, we unleashed GalaxyScope on **NVDA**, the open-source Windows screen reader. +To demonstrate the GalaxyScope orchestrator's capability on complex, cross-language system architecture, we unleashed it on **NVDA**, the open-source Windows screen reader. -Because NVDA relies heavily on bridging Python application logic with low-level C++ system hooks, it requires advanced dependency mapping. The blAST engine successfully parsed the mixed-language architecture, analyzing **236,754 lines of code** in just **5.59 seconds** (a velocity of 42,357 LOC/sec). +Because NVDA relies heavily on bridging Python application logic with low-level C++ system hooks, it requires advanced polyglot dependency mapping. The blAST engine successfully parsed the mixed-language architecture, analyzing **236,754 lines of code** in just **5.59 seconds** (a velocity of 42,357 LOC/sec). -Crucially, during the import resolution phase, the Air-Gapped Dependency Radar successfully intercepted a structural naming collision (`fstream` vs `sstream`), proving the real-time typosquatting defenses are fully operational. +Crucially, during the import resolution phase, the Air-Gapped Dependency Radar successfully intercepted a structural naming collision (`fstream` vs `sstream`), proving the real-time typosquatting defenses are fully operational without relying on cloud APIs. -> **Note on False Positives:** Because `fstream` and `sstream` are both standard C++ libraries, this specific flag is a false positive. To prevent the engine from halting on trusted internal libraries, contributors can whitelist them by adding them to the `approved_imports.json` registry. +> **Note on False Positives:** Because `fstream` and `sstream` are both standard C++ libraries, this specific flag is a false positive. To prevent the engine from halting on trusted internal libraries, contributors can whitelist them by adding them to the global `approved_imports.json` registry (see [GitGalaxy Config](https://squid-protocol.github.io/gitgalaxy/06-01-gitgalaxy-config/)). -![NVDA Processing Demo](../docs/wiki/assets/nvda_processing.gif) +![NVDA Processing Demo](../../docs/wiki/assets/nvda_processing.gif) ```text [INFO] PASS_1.5: Running Air-Gapped Typosquatting & Dependency Confusion Radar... @@ -36,7 +43,7 @@ Crucially, during the import resolution phase, the Air-Gapped Dependency Radar s --- -### ๐Ÿ› ๏ธ Local Development & Testing +### ๐Ÿ› ๏ธ Local Development & GalaxyScope Execution If you are modifying the internal physics or optical routing, it is highly recommended to install the package in editable mode so your CLI commands instantly reflect your local code changes. @@ -45,18 +52,21 @@ From the **root directory** of the repository, run: pip install -e . ``` -Once installed, you can trigger the main orchestrator (`galaxyscope.py`) globally from your terminal: +Once installed, you can trigger the main orchestrator globally from your terminal. This command runs the full [Data Pipeline](https://squid-protocol.github.io/gitgalaxy/02-01-pipeline-overview/) and outputs the final artifact. ```bash galaxyscope /path/to/test/repo --debug ``` -Before submitting a Pull Request, ensure your changes do not skew the baseline risk equations by running the test suite: +Before submitting a Pull Request, ensure your changes do not skew the core baseline risk equations by running the test suite: ```bash python3 -m unittest discover tests/ ``` --- -### ๐ŸŒŒ Deep Dive into the Physics -If you want to understand the exact equations inside the `/physics/` module, read the full methodology in the Wiki: -* ๐Ÿ“– **[GitGalaxy Signal Processing & Equations](https://squid-protocol.github.io/gitgalaxy/)** +### ๐ŸŒŒ Deep Dive into the Pipeline Architecture +To fully understand how GalaxyScope processes data, maps files, and applies risk exposures, explore the official documentation: + +* ๐Ÿ“– **[GalaxyScope CLI Reference](https://squid-protocol.github.io/gitgalaxy/01-02-galaxyscope-cli-reference/)** (Flags, outputs, and behaviors) +* ๐Ÿ“– **[The Data Pipeline Overview](https://squid-protocol.github.io/gitgalaxy/02-01-pipeline-overview/)** (Step-by-step breakdown of the runtime) +* ๐Ÿ“– **[Risk Exposures & Methodology](https://squid-protocol.github.io/gitgalaxy/08-01-methodology/)** (The math behind the heuristics) * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/ai_guardrails/README.md b/gitgalaxy/tools/ai_guardrails/README.md index d537e0d0..1723ede4 100644 --- a/gitgalaxy/tools/ai_guardrails/README.md +++ b/gitgalaxy/tools/ai_guardrails/README.md @@ -8,38 +8,38 @@ Welcome to the **GitGalaxy AI Guardrails Suite**. The adoption of Generative AI has created two massive security blind spots for modern enterprise teams. First, developers are building AI features that grant LLMs dangerous levels of system access (The AppSec Threat). Second, developers are utilizing autonomous coding agents that can silently introduce architectural degradation into complex codebases (The DevSec Threat). -Legacy security scanners cannot fix this. They look for traditional SQL injection, not Prompt Injection. They rely on slow compilation cycles that fail to keep pace with AI development. +Legacy security scanners ([like SonarQube or Checkmarx](https://squid-protocol.github.io/gitgalaxy/04-00-security_landscape/)) cannot fix this. They look for traditional SQL injection, not Prompt Injection. They rely on slow compilation cycles that fail to keep pace with AI development, leaving you completely blind to Agentic logic loops and context shredders. -GitGalaxy maps the architectural reality of your code in seconds. We use AST-free mathematical heuristics to generate deep, contextual reports, allowing you to block dangerous AI behavior before it hits production. +GitGalaxy maps the architectural reality of your code in seconds. We use AST-free mathematical heuristics to generate deep, contextual reports, allowing you to block dangerous AI behavior before it ever hits production. --- ### ๐Ÿ›ก๏ธ Side 1: The AI AppSec Sensor (`AIAppSecSensor`) *Protects your application from the AI features you build.* -Standard AST scanners frequently miss "Weaponized AI Architectures." This sensor maps the physical call-path distance between an LLM API execution and your critical system functions. +Standard AST scanners frequently miss "Weaponized AI Architectures." This sensor acts as a physical boundary, mapping the physical call-path distance between an LLM API execution and your critical system functions. -* **The RCE Funnel:** Detects LLMs wired directly to OS commands or shell executions. Prevents Prompt-Injection-to-RCE attacks. -* **The "God-Mode" Agent:** Flags autonomous tools with raw, unfiltered database access. Blocks autonomous data corruption. -* **The Exfiltration Vector:** Identifies LLMs accessing network sockets and cryptographic secrets. Stops SSRF and key exfiltration vulnerabilities. +* **The RCE Funnel:** Detects LLMs wired directly to OS commands or shell executions. This allows you to aggressively [block Prompt-Injection-to-RCE attacks](https://squid-protocol.github.io/gitgalaxy/cookbook/prevent-agentic-rce/) in your CI/CD pipeline. +* **The "God-Mode" Agent:** Flags autonomous tools with raw, unfiltered database access. Blocks autonomous data corruption before it can wipe a production table. +* **The Exfiltration Vector:** Identifies LLMs accessing network sockets and cryptographic secrets, stopping SSRF and key exfiltration vulnerabilities cold. --- ### ๐Ÿค– Side 2: The Dev Agent Firewall (`DevAgentFirewall`) *Protects your codebase from the autonomous AI tools you use.* -Not all legacy code is safe for an AI coding assistant (like Cursor, Copilot, or Claude) to modify. This firewall evaluates the structural complexity, cognitive load, and entropy of a file to determine if an AI agent will succeed, hallucinate, or silently destroy your system logic. +Not all legacy code is safe for an AI coding assistant (like Cursor, Copilot, or Claude) to modify. This firewall evaluates the structural complexity, cognitive load, and entropy of a file to determine if an AI agent will succeed, hallucinate, or silently destroy your system logic. By running this sensor, you can safely [sandbox autonomous agents](https://squid-protocol.github.io/gitgalaxy/cookbook/sandbox-autonomous-agents/) to only work on verified, low-complexity files. * **Context Window Shredders:** Identifies massive files with extreme algorithmic complexity. Prevents AI context collapse and logic truncation. -* **The Hallucination Zone:** Highlights heavy metaprogramming with zero documentation. Prevents AI method hallucination and fabricated syntax. +* **The Hallucination Zone:** Highlights heavy metaprogramming with zero documentation, preventing AI method hallucination and fabricated syntax. * **Silent Mutation Risk:** Flags logic with a high blast radius and zero test coverage. Blocks unverifiable AI modifications. -* **HITL Mandate:** Detects severe technical debt. Forces a Human-In-The-Loop (HITL) code review requirement for PRs generated by AI agents. +* **HITL Mandate:** Detects severe technical debt. Forces a strict Human-In-The-Loop (HITL) code review requirement for PRs generated by AI agents. --- ### ๐Ÿš€ Quickstart: CI/CD & Pipeline Integration -Currently, the AI Guardrails operate as deep-inspection middleware. Instead of running as standalone standalone commands, these sensors inject themselves into the primary `galaxyscope` analysis pipeline to evaluate project telemetry in real-time. +Currently, the AI Guardrails operate as deep-inspection middleware. Instead of running as standalone commands, these sensors seamlessly inject themselves into the primary `galaxyscope` analysis pipeline to evaluate project telemetry in real-time. #### 1. Local CLI Execution Run a standard scan using the global PyPI package. The guardrails will automatically evaluate the ecosystem and report critical Agentic vulnerabilities. @@ -75,7 +75,7 @@ jobs: ### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is driven by our custom mathematical heuristics engine, capable of mapping multi-dimensional relationships at extreme velocity without requiring rigid ASTs. Read the official documentation to see the structural methodologies powering these guardrails: -* ๐Ÿ“– **[AI AppSec Sensor Architecture](../../../docs/wiki/02-17-ai-appsec-sensor.md)** -* ๐Ÿ“– **[Dev Agent Firewall Mechanics](../../../docs/wiki/02-18-dev-agent-firewall.md)** -* ๐Ÿ“– **[Logic Bomb & Injection Surface Risk Equations](../../../docs/wiki/08-20-logic-bomb-exposure.md)** +* ๐Ÿ“– **[AI AppSec Sensor Architecture](https://squid-protocol.github.io/gitgalaxy/02-17-ai-appsec-sensor/)** +* ๐Ÿ“– **[Dev Agent Firewall Mechanics](https://squid-protocol.github.io/gitgalaxy/02-18-dev-agent-firewall/)** +* ๐Ÿ“– **[Logic Bomb & Injection Surface Risk Equations](https://squid-protocol.github.io/gitgalaxy/08-20-logic-bomb-exposure/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/cobol_to_cobol/README.md b/gitgalaxy/tools/cobol_to_cobol/README.md index 4fa80bef..a108a4f1 100644 --- a/gitgalaxy/tools/cobol_to_cobol/README.md +++ b/gitgalaxy/tools/cobol_to_cobol/README.md @@ -4,16 +4,16 @@ [![Architecture](https://img.shields.io/badge/Architecture-Deterministic_Regex-00BFFF.svg)](#) [![State Manager](https://img.shields.io/badge/State-Hybrid_RAM%2FSQLite-8A2BE2.svg)](#) -Welcome to the **GitGalaxy Mainframe Modernization Suite**. This is a deterministic, high-speed static analysis suite designed to safely slice, sanitize, and modernize monolithic legacy systems. +Welcome to the **GitGalaxy Mainframe Modernization Suite**. This is a deterministic, high-speed static analysis suite designed to safely slice, sanitize, and [map monolithic legacy systems](https://squid-protocol.github.io/gitgalaxy/cookbook/map-cobol-monoliths/). **Mainframe Proven:** The outputs of these architectural tools natively compile against raw MVS 3.8j operating systems (1974 Hercules Mainframe), while simultaneously scaffolding modern cloud environments. ### ๐Ÿ”„ The Modernization Pipeline -You point the Migration Controller at a massive, undocumented COBOL repository. It translates a chaotic folder of `.cbl` files into a deterministic execution pipeline: +You point the [Legacy Refraction Controller](https://squid-protocol.github.io/gitgalaxy/05-01-legacy-refraction-controller/) at a massive, undocumented COBOL repository. It translates a chaotic folder of `.cbl` files into a deterministic execution pipeline: * **The Assessment:** Dynamically scales between high-speed RAM and SQLite3. -* **Dead Code Extraction:** Uses structural heuristics to mathematically map and extract orphaned memory and dead code bloat. *(AST-Free)* +* **Dead Code Extraction:** Uses structural heuristics to mathematically map and [extract orphaned memory and dead code bloat](https://squid-protocol.github.io/gitgalaxy/cookbook/identifying-dead-code-in-cobol/). *(AST-Free)* * **Dependency Mapping:** Maps data lineage to deflect dead dependencies. * **Asset Generation:** Generates pristine PostgreSQL schemas, JSON APIs, and compile-ready JCLs. @@ -24,29 +24,29 @@ You point the Migration Controller at a massive, undocumented COBOL repository. This suite is built on a modular Hub-and-Spoke architecture. Every Python script acts as an independent CLI tool or is orchestrated centrally. #### 1. Pre-Processors & Sensors -* **Lexical Patcher (`cobol_lexical_patcher.py`):** Safely neutralizes legacy compiler traps. -* **System Limits Reporter (`cobol_system_limits_reporter.py`):** Flags non-deterministic routing logic and system constraint breaches. +* **[Lexical Patcher](https://squid-protocol.github.io/gitgalaxy/05-13-lexical-patcher/) (`cobol_lexical_patcher.py`):** Safely neutralizes legacy compiler traps. +* **[System Limits Reporter](https://squid-protocol.github.io/gitgalaxy/05-17-system-limits-reporter/) (`cobol_system_limits_reporter.py`):** Flags non-deterministic routing logic and system constraint breaches.
![System Limits Reporter](../../../docs/wiki/assets/system_limits_reporter.gif) #### 2. Extractors & Slicers -* **Graveyard Finder (`cobol_graveyard_finder.py`):** Expands copybooks to calculate dead code bloat. +* **[Graveyard Reaper](https://squid-protocol.github.io/gitgalaxy/05-10-graveyard-reaper/) (`cobol_graveyard_finder.py`):** Expands copybooks to calculate dead code bloat.
![Graveyard Reaper](../../../docs/wiki/assets/graveyard_reaper.gif) -* **DAG Architect (`cobol_dag_architect.py`):** Maps data lineage to mathematically calculate zero-trust execution topology. +* **[DAG Architect](https://squid-protocol.github.io/gitgalaxy/05-08-dag-architect/) (`cobol_dag_architect.py`):** Maps data lineage to [mathematically calculate zero-trust execution topology](https://squid-protocol.github.io/gitgalaxy/cookbook/creating-dag-from-cobol-files/).
![DAG Architect](../../../docs/wiki/assets/dag_architect.gif) -* **Microservice Slicer (`cobol_microservice_slicer.py`):** Executes 3-pass recursive variable taint-tracking. +* **[Microservice Slicer](https://squid-protocol.github.io/gitgalaxy/05-14-microservice-slicer/) (`cobol_microservice_slicer.py`):** Executes 3-pass recursive variable taint-tracking for safe [business logic extraction](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-microservice-slicing/).
![Microservice Slicer](../../../docs/wiki/assets/microservice_slicer.gif) -* **ETL Unpacker (`cobol_etl_unpacker.py`):** Translates binary EBCDIC and Packed Decimal to CSVs. +* **[ETL Unpacker](https://squid-protocol.github.io/gitgalaxy/05-09-etl-unpacker/) (`cobol_etl_unpacker.py`):** Translates binary EBCDIC and Packed Decimal to CSVs to [unpack hidden ETL flows](https://squid-protocol.github.io/gitgalaxy/cookbook/unpacking-etl-from-cbl-files/). #### 3. Cloud & Mainframe Forges -* **Compiler Forge (`cobol_compiler_forge.py`):** Flattens copybooks and generates era-aware build JCLs. +* **[Compiler Forge](https://squid-protocol.github.io/gitgalaxy/05-07-mainframe-compiler-forge/) (`cobol_compiler_forge.py`):** Flattens copybooks and generates era-aware build JCLs.
![Compiler Forge](../../../docs/wiki/assets/compiler_forge.gif) -* **Cloud Schema Forge (`cobol_schema_forge.py`):** Translates `PIC` clauses to strict PostgreSQL DDLs. +* **[Cloud Schema Forge](https://squid-protocol.github.io/gitgalaxy/05-15-cloud-schema-forge/) (`cobol_schema_forge.py`):** Translates `PIC` clauses to [strict PostgreSQL DDL schemas](https://squid-protocol.github.io/gitgalaxy/cookbook/creating-schema-from-cobol-files/).
![Cloud Schema Forge](../../../docs/wiki/assets/cloud_schema_forge.gif) -* **Zero-Trust JCL Forge (`cobol_jcl_forge.py`):** Extracts `SELECT` mappings to auto-generate strict, least-privilege JCL emulators. +* **[Zero-Trust JCL Forge](https://squid-protocol.github.io/gitgalaxy/05-12-zero-trust-jcl-forge/) (`cobol_jcl_forge.py`):** Extracts `SELECT` mappings to [auto-generate strict, least-privilege JCL emulators](https://squid-protocol.github.io/gitgalaxy/cookbook/creating-jcl-from-cobol-files/).
![Zero-Trust JCL Forge](../../../docs/wiki/assets/jcl_forge_demo.gif) #### 4. The AI Remediation Boundary -* **Anomaly Task Forge (`cobol_agent_task_forge.py`):** Isolates structural anomalies into bounded JSON job tickets for LLM remediation. +* **[Anomaly Task Forge](https://squid-protocol.github.io/gitgalaxy/05-16-anomaly-agent-task-forge/) (`cobol_agent_task_forge.py`):** Isolates structural anomalies into bounded JSON job tickets for LLM remediation. --- @@ -166,7 +166,7 @@ The controller generates a timestamped `_gitgalaxy_clean` directory containing: ### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is driven by our custom mathematical heuristics engine, capable of mapping multi-dimensional relationships at extreme velocity without requiring rigid ASTs. Read the official documentation to explore the architecture of the modernization controllers: -* ๐Ÿ“– **[The Legacy Modernization Controller](../../../docs/wiki/05-01-legacy-refraction-controller.md)** -* ๐Ÿ“– **[Dead Code Extraction Mathematics](../../../docs/wiki/05-10-graveyard-reaper.md)** -* ๐Ÿ“– **[Zero-Trust JCL Forge Mechanics](../../../docs/wiki/05-12-zero-trust-jcl-forge.md)** +* ๐Ÿ“– **[The Legacy Refraction Controller](https://squid-protocol.github.io/gitgalaxy/05-01-legacy-refraction-controller/)** +* ๐Ÿ“– **[Dead Code Extraction Mathematics](https://squid-protocol.github.io/gitgalaxy/05-10-graveyard-reaper/)** +* ๐Ÿ“– **[Zero-Trust JCL Forge Mechanics](https://squid-protocol.github.io/gitgalaxy/05-12-zero-trust-jcl-forge/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/cobol_to_java/README.md b/gitgalaxy/tools/cobol_to_java/README.md index a708800e..4828dcbb 100644 --- a/gitgalaxy/tools/cobol_to_java/README.md +++ b/gitgalaxy/tools/cobol_to_java/README.md @@ -4,15 +4,15 @@ [![Architecture](https://img.shields.io/badge/Architecture-Spring_Boot_3.2-00BFFF.svg)](#) [![Automation](https://img.shields.io/badge/Automation-100%25_Compilable_Shells-8A2BE2.svg)](#) -Most legacy modernization efforts fail because they feed raw, monolithic COBOL directly into an LLM. This leads to hallucinations, memory leaks, and broken architectures. +Most legacy modernization efforts fail because they feed raw, monolithic COBOL directly into an LLM. As seen across the [DevSecOps Competitive Landscape](https://squid-protocol.github.io/gitgalaxy/04-00-security_landscape/), relying purely on AI or generic ASTs leads to hallucinations, memory leaks, and broken architectures. -GitGalaxy flips the paradigm. We use deterministic mathematical parsing to build a structurally perfect, 100% compiling Java Spring Boot architecture *first*, and only use AI for the final isolated logic. +GitGalaxy flips the paradigm. We use the deterministic, mathematical [blAST Engine](https://squid-protocol.github.io/gitgalaxy/01-03-the-blast-paradigm/) to build a structurally perfect, 100% compiling Java Spring Boot architecture *first*, and only use AI for the final isolated logic. This pipeline has been stress-tested across a randomized corpus of 27 distinct legacy COBOL repositories (including complex IBM CICS applications), generating structurally sound, Maven-compilable Spring Boot systems without human intervention. ### ๐Ÿงช The Ultimate CI/CD Stress Test -To prove the viability of this deterministic approach, the Java Forge was subjected to an automated batch test across 27 diverse repositories. +To prove the viability of this deterministic approach, the Java Forge was subjected to an [Automated Batch Test](https://squid-protocol.github.io/gitgalaxy/05-06-batch-test-harness/) across 27 diverse repositories.
![Java Forge & Batch Test](../../../docs/wiki/assets/java_forge_and_batch_test.gif) @@ -33,40 +33,40 @@ We do not claim to magically translate entire enterprise systems with zero human * **What We Automate:** Exact memory mapping, JPA entities, REST controllers, and complete Maven build systems. * **What We Delegate:** Highly specific, isolated internal business logic. -* **How We Scale:** We generate strict JSON job tickets containing isolated logic slices. +* **How We Scale:** We generate [strict JSON Autonomous Agent Tickets](https://squid-protocol.github.io/gitgalaxy/05-05-autonomous-agent-tickets/) containing isolated logic slices. * **Zero Hallucinations:** AI agents are restricted to filling in the pre-wired methods, preventing architectural hallucinations. --- ### ๐Ÿ—๏ธ How It Works: Deterministic Scaffolding -The `cobol_to_java_controller.py` ingests the JSON Intermediate Representation (IR) generated by the Mainframe Modernization Suite. It then orchestrates a suite of specialized architectural forges. +The `cobol_to_java_controller.py` ingests the JSON Intermediate Representation (IR) generated by the Mainframe Modernization Suite. It then orchestrates a suite of specialized architectural forges to seamlessly [Scaffold Java Spring Boot](https://squid-protocol.github.io/gitgalaxy/cookbook/scaffold-spring-boot/). -#### 1. Entity & Memory Mapping Forge -Translates legacy schema boundaries into strict Spring Boot `@Entity` classes. +#### 1. [Entity & Memory Mapping Forge](https://squid-protocol.github.io/gitgalaxy/05-03-entity-and-memory-mapping/) +Translates legacy schema boundaries into strict Spring Boot `@Entity` classes. Read the [Entity Forge Cookbook Recipe](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-to-java-spring-boot-entity-forge/). * **Precision Mapping:** Translates complex `PIC` clauses to `BigDecimal`. * **Array Resolution:** Resolves `OCCURS` arrays into `List` collections. * **Memory Overlays:** Maps `REDEFINES` memory overlays as transient aliases. -#### 2. The API Contract & Service Forge -Translates the DAG lineage intent into modern REST Controllers and auto-wires the `@Service` layer. +#### 2. [The API Contract & Service Forge](https://squid-protocol.github.io/gitgalaxy/05-04-api-and-service-contracts/) +Translates the DAG lineage intent into modern REST Controllers and auto-wires the `@Service` layer. View the [REST API](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-to-java-REST-API-generation/) and [Service Forge](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-to-java-spring-boot-service-forge/) recipes. * **Paradigm Detection:** Detects batch vs. transactional data paradigms. * **Controller Generation:** Builds specific REST controller endpoints. * **Mock Service Shield:** Generates mock services for missing external dependencies. -#### 3. EBCDIC Decoder Forge +#### 3. [EBCDIC Decoder Forge (Data Serialization)](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-to-java-mainframe-data-serialization-forge/) Automatically generates the utility classes necessary to read raw mainframe byte streams. * **Legacy Unpacking:** Safely unpacks Packed Decimal (`COMP-3`) data. * **Boundary Validation:** Validates hex-boundaries to prevent runtime crashes. * **Format Translation:** Decodes raw EBCDIC strings to standard UTF-8. -#### 4. The Build System Forge +#### 4. [The Build System Forge](https://squid-protocol.github.io/gitgalaxy/cookbook/cobol-to-java-automated-spring-boot-build-system/) Generates the configuration files required for instant compilation. * **Dependency Management:** Generates production-ready Maven `pom.xml`. * **Environment Config:** Configures Spring Boot `application.yml`. * **Instant Verification:** Ensures instant out-of-the-box compilation. -#### 5. The AI Boundary (Agent Task Forge) +#### 5. [The AI Boundary (Anomaly Agent Task Forge)](https://squid-protocol.github.io/gitgalaxy/05-16-anomaly-agent-task-forge/) Packages the remaining logic into strict JSON tickets for LLMs or human engineers. * **Logic Extraction:** Extracts isolated business rules from the monolith. * **Ticket Generation:** Packages strict JSON tickets with required inputs/outputs. @@ -101,7 +101,7 @@ Don't just take our word for it. We have published the raw, unedited artifacts g ### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is driven by our custom mathematical heuristics engine, capable of mapping multi-dimensional relationships at extreme velocity. Dive into the official wiki to understand the exact translation mechanics and memory-mapping heuristics: -* ๐Ÿ“– **[Spring Boot Scaffolding Logic](../../../docs/wiki/05-02-spring-boot-scaffolding.md)** -* ๐Ÿ“– **[Entity & Memory Mapping Rules](../../../docs/wiki/05-03-entity-and-memory-mapping.md)** -* ๐Ÿ“– **[API & Service Contract Generation](../../../docs/wiki/05-04-api-and-service-contracts.md)** +* ๐Ÿ“– **[Spring Boot Scaffolding Logic](https://squid-protocol.github.io/gitgalaxy/05-02-spring-boot-scaffolding/)** +* ๐Ÿ“– **[Entity & Memory Mapping Rules](https://squid-protocol.github.io/gitgalaxy/05-03-entity-and-memory-mapping/)** +* ๐Ÿ“– **[API & Service Contract Generation](https://squid-protocol.github.io/gitgalaxy/05-04-api-and-service-contracts/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/compliance/README.md b/gitgalaxy/tools/compliance/README.md index 3529b114..208fc974 100644 --- a/gitgalaxy/tools/compliance/README.md +++ b/gitgalaxy/tools/compliance/README.md @@ -6,7 +6,7 @@ Welcome to the **GitGalaxy Compliance & SBOM Suite**. -The industry standard for generating a Software Bill of Materials (SBOM) is fundamentally flawed. Standard tools open your `package.json`, `composer.json`, or `requirements.txt`, read the list of dependencies, and blindly export them to a PDF. **They trust the manifest.** +The industry standard for generating a [Software Bill of Materials (SBOM)](https://squid-protocol.github.io/gitgalaxy/04-02-sbom-generator/) is fundamentally flawed. Standard tools open your `package.json`, `composer.json`, or `requirements.txt`, read the list of dependencies, and blindly export them to a PDF. **They trust the manifest.** But manifests lie. A supply chain attack doesn't announce itself. A package might claim to be a simple text-formatting utility, but its physical files contain high-entropy encrypted payloads, obfuscated malware, or mismatched languages. @@ -14,24 +14,24 @@ GitGalaxy takes a **Zero-Trust** approach. We don't just read the manifest; we p ### ๐Ÿง  The Zero-Trust Strategy: Trust Nothing, Verify Everything -When you run our Universal SBOM Generator, it leverages the full weight of the GitGalaxy static analysis engine to audit your dependencies: +When you run our [Universal SBOM Generator](https://squid-protocol.github.io/gitgalaxy/04-02-sbom-generator/), it leverages the full weight of the GitGalaxy static analysis engine to audit your dependencies: #### 1. The Universal Manifest Slicer It automatically detects your ecosystem (**NPM, PyPI, Composer, Cargo, Go Modules, Maven, and RubyGems**), slices the manifest, and cross-references the declared dependencies against what actually exists on your hard drive. If a dependency is claimed but missing, it is flagged as `UNVERIFIED_MISSING_ON_DISK`. #### 2. Deep File Inspection & Structural Verification For every package found on disk, we open the core source files and run them through our **Structural Profiler** to confirm the file's true identity. -* **Identity Spoofing:** If an attacker hides a malicious bash script by naming it `index.js`, the profiler cross-references the extension against the internal file shebangs and structural markers. It triggers an **Identity Crisis** and flags the package as `SPOOF_DETECTED`. -* **Entropy Auditing:** We calculate the Shannon Entropy of the raw code. If the structural density exceeds standard human programming bounds (e.g., an entropy score > 4.8), we flag it for containing encrypted or packed payloads. +* **Identity Spoofing:** If an attacker hides a malicious bash script by naming it `index.js`, the profiler cross-references the extension against the internal file shebangs and structural markers. It triggers an [Identity Crisis](https://squid-protocol.github.io/gitgalaxy/02-05-language-lens/) and flags the package as `SPOOF_DETECTED`. +* **Entropy Auditing:** We calculate the [Shannon Entropy](https://squid-protocol.github.io/gitgalaxy/04-05-binary-anomaly-detector/) of the raw code. If the structural density exceeds standard human programming bounds (e.g., an entropy score > 4.8), we flag it for containing encrypted or packed payloads. ### ๐Ÿ›ก๏ธ The Full GitGalaxy Defense Pipeline Our compliance auditing isn't just a simple script; it is backed by a multi-tiered, battle-tested heuristic pipeline: -* **Pre-Process Analyzers (Binary Detection):** Acts as the frontline perimeter. It detects embedded hex arrays, opaque binary debris, and machine-generated monoliths before they can overwhelm the system. -* **Metadata & Evasion Sensors:** Scans your metadata (`.gitattributes`, `Makefile`). Crucially, it hunts for evasion tacticsโ€”like an attacker using `.gitignore` to secretly force-include a malicious `.so` binary while hiding it from standard directory scans. -* **Language Verification Engine:** Bypasses LLM hallucinations by using 60+ strict keyword regex profiles to definitively lock in a file's language family based on structural evidence, not just its extension. -* **Statistical Outlier Detection:** Applies Z-Score math across the codebase. If a file claims to be a specific language but its structural logic density is a mathematical outlier compared to the rest of the ecosystem, it drops into **Quarantine**. We catch malware trying to disguise itself as inert data dumps. +* **[Pre-Process Analyzers (Binary Detection)](https://squid-protocol.github.io/gitgalaxy/02-03-aperture-filter/):** Acts as the frontline perimeter. It detects embedded hex arrays, opaque binary debris, and machine-generated monoliths before they can overwhelm the system. +* **[Metadata & Evasion Sensors](https://squid-protocol.github.io/gitgalaxy/02-06-security-lens/):** Scans your metadata (`.gitattributes`, `Makefile`). Crucially, it hunts for evasion tacticsโ€”like an attacker using `.gitignore` to secretly force-include a malicious `.so` binary while hiding it from standard directory scans. +* **[Language Verification Engine](https://squid-protocol.github.io/gitgalaxy/02-05-language-lens/):** Bypasses LLM hallucinations by using 60+ strict keyword regex profiles to definitively lock in a file's language family based on structural evidence, not just its extension. +* **[Statistical Outlier Detection](https://squid-protocol.github.io/gitgalaxy/02-09-signal-processing/):** Applies Z-Score math across the codebase. If a file claims to be a specific language but its structural logic density is a mathematical outlier compared to the rest of the ecosystem, it drops into **Quarantine**. We catch malware trying to disguise itself as inert data dumps. --- @@ -77,7 +77,7 @@ zero-trust-sbom /path/to/your/project ``` #### 2. GitHub Actions CI/CD Integration -Automate your compliance by generating and saving a mathematically verified SBOM on every release. Create `.github/workflows/generate-sbom.yml`: +Automate your compliance by generating and saving a mathematically verified SBOM on every release (see our [Cookbook Recipe](https://squid-protocol.github.io/gitgalaxy/cookbook/generate-zero-trust-sbom/)). Create `.github/workflows/generate-sbom.yml`: ```yaml name: Generate Zero-Trust SBOM @@ -109,7 +109,7 @@ jobs: --- ### ๐ŸŒŒ Powered by the blAST Engine -This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is powered by the **blAST Engine**, an AST-free, mathematical heuristics engine capable of mapping repositories at 100,000 LOC/sec. +This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is powered by the **[blAST Engine](https://squid-protocol.github.io/gitgalaxy/01-03-the-blast-paradigm/)**, an AST-free, mathematical heuristics engine capable of mapping repositories at 100,000 LOC/sec. -* ๐Ÿ“– **[Read the Official Wiki](https://squid-protocol.github.io/gitgalaxy/)** for deep dives into the engine's static analysis methodologies, architecture blueprints, and the Taxonomical Equivalence Map. -* ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** to explore other enterprise tools like Supply Chain Firewalls and Terabyte Log Scanners. \ No newline at end of file +* ๐Ÿ“– **[Read the Official Wiki](https://squid-protocol.github.io/gitgalaxy/)** for deep dives into the engine's static analysis methodologies, architecture blueprints, and the **[Taxonomical Equivalence Map](https://squid-protocol.github.io/gitgalaxy/03-03-claim-3-taxonomy-map/)**. +* ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** to explore other enterprise tools like **[Supply Chain Firewalls](https://squid-protocol.github.io/gitgalaxy/04-03-supply-chain-firewall/)** and **[Terabyte Log Scanners](https://squid-protocol.github.io/gitgalaxy/04-07-terabyte-log-scanner/)**. \ No newline at end of file diff --git a/gitgalaxy/tools/network_auditing/README.md b/gitgalaxy/tools/network_auditing/README.md index 84e199a0..e7b2362c 100644 --- a/gitgalaxy/tools/network_auditing/README.md +++ b/gitgalaxy/tools/network_auditing/README.md @@ -8,7 +8,7 @@ Welcome to the **GitGalaxy API Security & Attack Surface Mapping Suite**. Security documentation is often strictly theoretical, whereas compiled source code represents physical reality. Attackers do not exploit the APIs you have documented; they hunt for the forgotten, undocumented endpoints left exposed in your codebase. -Standard DevSecOps scanners rely on approved Swagger or OpenAPI files to dictate what should be tested. GitGalaxy provides a deterministic source of truth. By scanning the raw codebase at high velocity, we reveal the exact routing logic that is actively exposed to the network, regardless of what the documentation claims. +Standard DevSecOps scanners ([like Checkmarx, SonarQube, or Semgrep](https://squid-protocol.github.io/gitgalaxy/04-00-security_landscape/)) rely on approved Swagger or OpenAPI files to dictate what should be tested. GitGalaxy provides a deterministic source of truth. By scanning the raw codebase at high velocity, we reveal the exact routing logic that is actively exposed to the network, regardless of what the documentation claims. ### ๐Ÿ” Core Methodology: OpenAPI Drift Detection @@ -16,8 +16,8 @@ We utilize AST-free stoichiometric signaturesโ€”calculated metrics derived direc * **Map Physical Reality:** Scans raw text for actual execution routes without needing a compiler or build environment. * **Extract Theoretical Truth:** Parses official Swagger or OpenAPI specifications. -* **Mathematical Resolution:** Applies strict set theory to expose critical security gaps and API drift. -* **Identify Shadow APIs (Critical Risk):** Exposes undocumented, active endpoints that evade standard WAFs and security audits. +* **Mathematical Resolution:** Applies strict set theory to expose critical security gaps and [API drift](https://squid-protocol.github.io/gitgalaxy/04-01-full-api-network-map/). +* **Identify Shadow APIs (Critical Risk):** Exposes undocumented, active endpoints that evade standard WAFs and security audits, allowing you to seamlessly [hunt Shadow APIs in CI/CD pipelines](https://squid-protocol.github.io/gitgalaxy/cookbook/hunt-shadow-apis/). * **Identify Ghost/Zombie APIs (Audit Bloat):** Highlights documented but non-existent or deprecated endpoints. ### ๐Ÿง  Smart Auto-Discovery & Monorepo Support @@ -112,7 +112,8 @@ Outputs a deterministic terminal dashboard optimized for CI/CD pipeline integrat ### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is driven by our custom mathematical heuristics engine, capable of mapping multi-dimensional relationships at extreme velocity. Read the official documentation to see how we deterministically map API routes: -* ๐Ÿ“– **[Full API Network Map Architecture](../../../docs/wiki/04-01-full-api-network-map.md)** -* ๐Ÿ“– **[The Network Risk Sensor Mechanics](../../../docs/wiki/02-16-network-risk-sensor.md)** -* ๐Ÿ“– **[API Exposure Risk Equations](../../../docs/wiki/08-14-api-exposure.md)** +* ๐Ÿ“– **[The blAST Paradigm (ASTs vs LLMs)](https://squid-protocol.github.io/gitgalaxy/01-03-the-blast-paradigm/)** +* ๐Ÿ“– **[Full API Network Map Architecture](https://squid-protocol.github.io/gitgalaxy/04-01-full-api-network-map/)** +* ๐Ÿ“– **[The Network Risk Sensor Mechanics](https://squid-protocol.github.io/gitgalaxy/02-16-network-risk-sensor/)** +* ๐Ÿ“– **[API Exposure Risk Equations](https://squid-protocol.github.io/gitgalaxy/08-14-api-exposure/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/supply_chain_security/README.md b/gitgalaxy/tools/supply_chain_security/README.md index 43ed64c2..b0ccd9e4 100644 --- a/gitgalaxy/tools/supply_chain_security/README.md +++ b/gitgalaxy/tools/supply_chain_security/README.md @@ -6,19 +6,19 @@ Welcome to the **GitGalaxy Supply Chain Security Suite**. -Standard security scanners have a massive blind spot: they read your `package.json` or `requirements.txt` and check those names against CVE databases. They never look inside the actual downloaded files. +Standard security scanners ([like Snyk, Dependabot, or Trivy](https://squid-protocol.github.io/gitgalaxy/04-00-security_landscape/)) have a massive blind spot: they read your `package.json` or `requirements.txt` and check those names against CVE databases. They act as manifest readers, never looking inside the actual downloaded files. -Modern attackers (like the **XZ-Utils** or **Glassworm** campaigns) exploit this. They don't announce themselves in a manifest. +Modern attackers (like the **XZ-Utils** or **Glassworm** campaigns) exploit this. They don't announce their malware in a manifest. -GitGalaxy operates differently. We scan the physical internals of every dependency file at extreme velocities (100k+ LOC/sec) before it enters your system. +GitGalaxy operates differently. Powered by the [blAST Engine](https://squid-protocol.github.io/gitgalaxy/01-03-the-blast-paradigm/), we bypass compilation and rigid ASTs entirely. We scan the physical internals of every dependency file at extreme velocities (100k+ LOC/sec) before it enters your system, identifying threats via minimal keyword permutations rather than waiting for a CVE to be published. ### ๐Ÿ›ก๏ธ What We Stop -We provide highly effective defense against structural threats: -* **Hidden Executables:** Steganography and XZ-Utils attack patterns. +We provide highly effective, zero-trust defense against structural threats: +* **Hidden Executables:** Steganography and [XZ-Utils attack patterns](https://squid-protocol.github.io/gitgalaxy/04-05-binary-anomaly-detector/). * **Malicious Typosquatting:** Unicode homoglyphs tricking developer imports. -* **Encrypted Payloads:** Sub-atomic XOR decryption loops. +* **Encrypted Payloads:** Sub-atomic XOR decryption loops hiding inside utility files. * **Hostile I/O:** Shadow imports establishing covert outbound connections. -* **Anomalous Logic:** Network sockets hidden inside declarative CSS/JSON. +* **API Drift:** Network sockets hidden inside undocumented [Shadow APIs](https://squid-protocol.github.io/gitgalaxy/04-01-full-api-network-map/). --- @@ -26,22 +26,27 @@ We provide highly effective defense against structural threats: Wired directly into your Git Pre-Commit hooks or CI/CD pipelines, these sentinels act as a physical firewall to fail poisoned builds early. -#### 1. The Supply Chain Firewall (`supply-chain-firewall`) +#### 1. [The Supply Chain Firewall](https://squid-protocol.github.io/gitgalaxy/04-03-supply-chain-firewall/) (`supply-chain-firewall`) Scans massive `node_modules` or `venv` directories in seconds. -* **Zero-Trust Verification:** Checks every physical `import` against allowlists. -* **Behavioral Heuristics:** Scans for tainted data injection routines. - -#### 2. X-Ray Inspector (`xray-inspector`) -Designed to triage binary files and encrypted malware. -* **Magic Byte Validation:** Catches executable scripts disguised as images. -* **Entropy Math:** Flags high-entropy encrypted text payloads. +* **Zero-Trust Verification:** Checks every physical `import` against strict allowlists. +* **Behavioral Heuristics:** Scans for tainted data injection routines and parasitic logic. + +#### 2. [Zero-Trust SBOM Generator](https://squid-protocol.github.io/gitgalaxy/04-02-sbom-generator/) (`sbom-generator`) +Standard SBOMs blindly trust manifests. Ours doesn't. +* **Physical Audits:** Extracts and micro-scans files from every downloaded dependency. +* **CycloneDX 1.4:** Generates compliant manifests injected with physical threat telemetry. + +#### 3. [X-Ray Inspector](https://squid-protocol.github.io/gitgalaxy/04-05-binary-anomaly-detector/) (`xray-inspector`) +Designed to fast-triage binary files and encrypted malware without cloud processing. +* **Magic Byte Validation:** Catches executable scripts disguised as harmless `.png` images. +* **Entropy Math:** Flags high-entropy encrypted text payloads (Shannon Entropy > 4.8). * **Parasitic Headers:** Detects executable logic inside static data blobs. -#### 3. Vault Sentinel (`vault-sentinel`) -A hyper-speed pre-commit hook strictly for secret detection. -* **Tier 0 Path Blocking:** Instantly blocks sensitive file path commits. -* **Deep Content Scanning:** Hunts for hardcoded cloud cryptographic keys. -* **Graveyard Detection:** Finds abandoned passwords in commented code. +#### 4. [Vault Sentinel](https://squid-protocol.github.io/gitgalaxy/04-04-vault-sentinel/) (`vault-sentinel`) +A hyper-speed pre-commit hook strictly for localized secret detection. +* **Tier 0 Path Blocking:** Instantly blocks sensitive file path commits (e.g., `.pem`, `id_rsa`). +* **Deep Content Scanning:** Hunts for hardcoded cloud cryptographic keys and SaaS tokens. +* **Graveyard Detection:** Finds abandoned passwords sitting in [commented-out dead code](https://squid-protocol.github.io/gitgalaxy/08-13-graveyard-detector/). --- @@ -50,7 +55,7 @@ A hyper-speed pre-commit hook strictly for secret detection. #### Showcase A: Vault Sentinel (Secret Detection) To prove this engine operates fast enough to be a synchronous pre-commit hook without frustrating developers, we unleashed the **Vault Sentinel** on the massive **tRPC** TypeScript monorepo. -The engine evaluated 871 files and performed deep-content cryptographic scans on 695 of them in **0.53 seconds** (processing over 1,300 files per second). It successfully intercepted 7 exposed environment/config files and caught a hardcoded API key before the commit could execute. +The engine evaluated 871 files and performed deep-content cryptographic scans on 695 of them in **0.53 seconds** (processing over 1,300 files per second). It successfully intercepted 7 exposed environment files and caught a hardcoded API key before the commit could execute. ![Vault Sentinel Demo](../../../docs/wiki/assets/vault_sentinel_scan.gif) @@ -62,15 +67,6 @@ The engine ripped through the repository at **2,825 files per second**. By readi ![X-Ray Inspector Demo](../../../docs/wiki/assets/xray_inspector_scan.gif) ```text -๐Ÿ”Ž Scanning 95 files for structural anomalies: - - Magic Byte Mismatches (e.g., hidden executables disguised as images) - - Parasitic Execution Headers (e.g., executable logic buried in data blobs) - - High-Entropy Encrypted Payloads (e.g., packed malware or sub-atomic XOR loops) -โ˜ข๏ธ [ANOMALY DETECTED] examples/fmtstr/printf.mipsel - -> Embedded execution header found: b'\x7fELF' -โ˜ข๏ธ [ANOMALY DETECTED] examples/fmtstr/printf.mips64el - -> Embedded execution header found: b'\x7fELF' - =========================================================================== โ˜ข๏ธ X-RAY INSPECTOR: MISSION REPORT =========================================================================== @@ -80,26 +76,18 @@ The engine ripped through the repository at **2,825 files per second**. By readi Scan Velocity : 2,825 files/sec --------------------------------------------------------------------------- Active Anomalies : 13 - File Denylist Blocks : 0 - File Allowlist Bypasses: 0 --------------------------------------------------------------------------- โŒ TRIAGE ALERT: 13 structural anomalies detected. Blocking commit/PR. ``` #### Showcase C: Supply Chain Firewall (Infrastructure-as-Code Audit) -To prove the firewall can handle diverse ecosystems without throwing false positives, we ran it against the **Terraform** repository. +To prove the firewall can handle diverse polyglot ecosystems without throwing false positives, we ran it against the **Terraform** repository. The engine parsed 1,834 files at a velocity of **436 files per second**. It successfully verified the integrity of the dependency tree, identified 54 unknown packages for audit, and cleared the build without tripping any false alarms on standard Go/HCL syntax. ![Supply Chain Firewall Demo](../../../docs/wiki/assets/terraform_firewall_scan.gif) ```text -๐Ÿ”Ž Scanning 1,834 files for supply chain risks: - - Zero-Trust Package Verification - - Unicode Homoglyphs & Typo-squatting - - Steganography & Shadow Imports - - Tainted I/O & Malicious Execution - =========================================================================== ๐Ÿงฑ SUPPLY CHAIN FIREWALL: MISSION REPORT =========================================================================== @@ -112,8 +100,6 @@ The engine parsed 1,834 files at a velocity of **436 files per second**. It succ Unknown Packages : 54 --------------------------------------------------------------------------- Active Threats : 0 - File Denylist Blocks : 0 - File Allowlist Bypasses: 0 --------------------------------------------------------------------------- โœ… BUILD PASSED: Dependency supply chain is clean. ``` @@ -122,16 +108,36 @@ The engine parsed 1,834 files at a velocity of **436 files per second**. It succ ### ๐Ÿš€ Quickstart: CI/CD & Pre-Commit Integration -If you have installed GitGalaxy globally via PyPI (`pip install gitgalaxy`), you can execute these Sentinels directly from the terminal or wire them into your automation pipelines. +GitGalaxy is designed for frictionless adoption. You can install it globally via PyPI (`pip install gitgalaxy`) or run it natively in GitHub Actions without installing anything. + +#### 1. Global GitHub Marketplace Action (Recommended) +You can drop GitGalaxy into any repository immediately using our official [GitHub Marketplace Action](https://github.com/marketplace/actions/gitgalaxy-scanner). + +Add this to your `.github/workflows/security.yml` file: + +```yaml +name: GitGalaxy Zero-Trust Audit +on: [pull_request] + +jobs: + gitgalaxy-scan: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run GitGalaxy Supply Chain Firewall + uses: squid-protocol/gitgalaxy@v2.0.7 + with: + tool: 'supply-chain-firewall' +``` -#### 1. Local CLI Execution +#### 2. Local CLI Execution ```bash supply-chain-firewall ./node_modules/ xray-inspector ./src/ vault-sentinel . ``` -#### 2. Local Pre-Commit Hook Integration +#### 3. Local Pre-Commit Hook Integration To run the Vault Sentinel automatically before every commit, add this to your `.pre-commit-config.yaml` file: ```yaml @@ -146,35 +152,12 @@ repos: pass_filenames: true ``` -#### 3. GitHub Actions CI/CD Integration -To run the Supply Chain Firewall on every Pull Request, create a `.github/workflows/security.yml` file: - -```yaml -name: GitGalaxy Security Audit - -on: - pull_request: - branches: [ "main" ] - -jobs: - gitgalaxy-scan: - runs-on: ubuntu-latest - steps: - - name: Checkout Repository - uses: actions/checkout@v4 - - - name: Run Supply Chain Firewall - uses: squid-protocol/gitgalaxy@main - with: - tool: 'supply-chain-firewall' - target: '.' -``` - --- -### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) -This tool is a specialized spoke in the larger GitGalaxy ecosystem. It is driven by our custom mathematical heuristics engine, capable of mapping multi-dimensional relationships at extreme velocity. Explore the official wiki to see the sub-atomic heuristics used to catch obfuscated malware: +### ๐ŸŒŒ Explore the GitGalaxy Wiki +This toolsuite is just one spoke in the larger GitGalaxy ecosystem. Explore the official documentation to see the math and methodology behind our AST-free engine: -* ๐Ÿ“– **[Supply Chain Firewall Architecture](../../../docs/wiki/04-03-supply-chain-firewall.md)** -* ๐Ÿ“– **[Binary Anomaly & Entropy Mathematics](../../../docs/wiki/04-05-binary-anomaly-detector.md)** -* ๐Ÿ“– **[Hardcoded Secrets Exposure Equations](../../../docs/wiki/08-23-hardcoded-secrets-exposure.md)** +* ๐Ÿ“– **[The Competitive Landscape (How We Beat the Status Quo)](https://squid-protocol.github.io/gitgalaxy/04-00-security_landscape/)** +* ๐Ÿ“– **[Supply Chain Firewall Architecture](https://squid-protocol.github.io/gitgalaxy/04-03-supply-chain-firewall/)** +* ๐Ÿ“– **[Binary Anomaly & Entropy Mathematics](https://squid-protocol.github.io/gitgalaxy/04-05-binary-anomaly-detector/)** +* ๐Ÿ“– **[Hardcoded Secrets Exposure Equations](https://squid-protocol.github.io/gitgalaxy/08-23-hardcoded-secrets-exposure/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/gitgalaxy/tools/terabyte_log_scanning/README.md b/gitgalaxy/tools/terabyte_log_scanning/README.md index 2ca04d0c..a174678e 100644 --- a/gitgalaxy/tools/terabyte_log_scanning/README.md +++ b/gitgalaxy/tools/terabyte_log_scanning/README.md @@ -10,22 +10,22 @@ During an active incident response or catastrophic data breach, standard tools f This suite provides a tactical, pipeline-ready solution: **ultra-high-velocity, unindexed binary streaming.** Running at over 2 GB per minute on standard hardware, our custom stream-processing engine reads data continuously without ever loading the massive file into RAM. This makes it perfect for active breach triage, or as an automated CI/CD pipeline job to sanitize server logs before they are permanently archived. -### 1. The PII Data Leak Hunter (`pii-leak-hunter`) +### 1. [The PII Data Leak Hunter](https://squid-protocol.github.io/gitgalaxy/04-06-pii-leak-hunter/) (`pii-leak-hunter`) A specialized incident response tool designed to find hemorrhaging Personally Identifiable Information (Credit Cards, SSNs, AWS API Keys) inside massive, raw data dumps. * **Binary-Level Regex Evaluation:** Compiles structural patterns to raw bytes for extreme CPU efficiency. -* **Automated Data Masking:** Redacts toxic payloads before writing to evidence logs. +* **Automated Data Masking:** Redacts toxic payloads before writing to safe evidence logs. * **Exfiltration Histograms:** Generates terminal ASCII charts to pinpoint exact breach minutes. -* **Pipeline Sanitization:** Runs automatically in CI/CD to block PII log archiving. +* **Pipeline Sanitization:** Runs automatically in CI/CD to block PII log archiving via our [Hunting PII Leaks Recipe](https://squid-protocol.github.io/gitgalaxy/cookbook/hunt-pii-leaks/). -### 2. The Terabyte Log Scanner (`terabyte-log-scanner`) +### 2. [The Terabyte Log Scanner](https://squid-protocol.github.io/gitgalaxy/04-07-terabyte-log-scanner/) (`terabyte-log-scanner`) A runtime execution tracer that connects static codebase architecture to physical runtime reality. It parses massive mainframe SMF logs or distributed traces to prove what code is actually executing. * **Intermediate Representation (IR) Ingestion:** Ingests static repository maps to hunt known compiled programs in the logs. * **Execution Verification:** Proves exact runtime execution frequencies in production environments. -* **Zero-Hit Dead Code:** Mathematically proves if compiled legacy code is truly abandoned. +* **Zero-Hit Dead Code:** Mathematically [proves if compiled legacy code is truly abandoned](https://squid-protocol.github.io/gitgalaxy/cookbook/prove-dead-code-logs/). * **Dynamic Telemetry:** Outputs sidecar JSON for 3D WebGPU traffic heatmaps. --- @@ -110,7 +110,7 @@ jobs: ### ๐ŸŒŒ Powered by the blAST Engine (Bypassing LLMs and ASTs) This tool is a modular enterprise integration within the broader GitGalaxy architecture. It is driven by our custom mathematical heuristics engine, capable of processing multi-dimensional data at extreme velocity without requiring rigid ASTs or cloud APIs. Read the official documentation to see the structural methodologies powering this high-speed log analysis: -* ๐Ÿ“– **[PII Leak Hunter Architecture](../../../docs/wiki/04-06-pii-leak-hunter.md)** -* ๐Ÿ“– **[Terabyte Log Scanner Mechanics](../../../docs/wiki/04-07-terabyte-log-scanner.md)** -* ๐Ÿ“– **[Time-Series Execution Histograms](../../../docs/wiki/08-25-execution-histograms.md)** +* ๐Ÿ“– **[The blAST Paradigm (ASTs vs LLMs)](https://squid-protocol.github.io/gitgalaxy/01-03-the-blast-paradigm/)** +* ๐Ÿ“– **[PII Leak Hunter Architecture](https://squid-protocol.github.io/gitgalaxy/04-06-pii-leak-hunter/)** +* ๐Ÿ“– **[Terabyte Log Scanner Mechanics](https://squid-protocol.github.io/gitgalaxy/04-07-terabyte-log-scanner/)** * ๐Ÿช **[Return to the Main GitGalaxy Hub](https://github.com/squid-protocol/gitgalaxy)** \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 440b0ba9..983c77eb 100755 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,10 +19,10 @@ theme: extra: social: - icon: "fontawesome/solid/globe" - link: "[https://gitgalaxy.io/](https://gitgalaxy.io/)" + link: "https://gitgalaxy.io/" name: "GitGalaxy Visualizer" - icon: "fontawesome/brands/github" - link: "[https://github.com/squid-protocol/gitgalaxy](https://github.com/squid-protocol/gitgalaxy)" + link: "https://github.com/squid-protocol/gitgalaxy" name: "GitHub Source Code" docs_dir: "docs/wiki" @@ -40,10 +40,10 @@ plugins: extra_javascript: - "javascripts/mathjax.js" - - "[https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.js](https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.js)" + - "https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.2/es5/tex-mml-chtml.js" nav: - - '๐ŸŒŒ Open GitGalaxy Visualizer': "[https://gitgalaxy.io/](https://gitgalaxy.io/)" + - '๐ŸŒŒ Open GitGalaxy Visualizer': "https://gitgalaxy.io/" - 'Home': "index.md" - 'Enterprise Cookbook (Use Cases)': - 'Enable 24/7 Delta Monitoring': "cookbook/continuous-delta-monitoring.md" @@ -61,35 +61,25 @@ nav: - 'Detect Architectural Drift & Trojans': "cookbook/detect-architectural-drift.md" - 'Map Legacy COBOL Monoliths': "cookbook/map-cobol-monoliths.md" - 'Scaffold Java Spring Boot': "cookbook/scaffold-spring-boot.md" + - 'Cross-Cultural M&A Due Diligence': "cookbook/Cross-Cultural-M&A-Technical-Due-Diligence.md" + - 'COBOL Microservice Slicing': "cookbook/cobol-microservice-slicing.md" + - 'COBOL to Java Build System': "cookbook/cobol-to-java-automated-spring-boot-build-system.md" + - 'COBOL to Java Serialization': "cookbook/cobol-to-java-mainframe-data-serialization-forge.md" + - 'COBOL to Java REST API': "cookbook/cobol-to-java-REST-API-generation.md" + - 'COBOL Entity Forge': "cookbook/cobol-to-java-spring-boot-entity-forge.md" + - 'COBOL Service Forge': "cookbook/cobol-to-java-spring-boot-service-forge.md" + - 'Create DAG from COBOL': "cookbook/creating-dag-from-cobol-files.md" + - 'Create JCL from COBOL': "cookbook/creating-jcl-from-cobol-files.md" + - 'Create Schema from COBOL': "cookbook/creating-schema-from-cobol-files.md" + - 'Identify Dead COBOL Code': "cookbook/identifying-dead-code-in-cobol.md" + - 'Unpack ETL from CBL': "cookbook/unpacking-etl-from-cbl-files.md" - '๐Ÿ›๏ธ Museum of Code': - 'Welcome to the Museum': 'museum-of-code/index.md' - - 'AlphaFold (2018)': 'museum-of-code/teardown-of-alphafold.md' - - 'Android': 'museum-of-code/teardown-of-android.md' - - 'Apollo 11': 'museum-of-code/teardown-of-apollo-11.md' - - 'Bitcoin v0.1.0': 'museum-of-code/teardown-of-bitcoin.md' - - 'BLAST': 'museum-of-code/teardown-of-blast.md' - - 'DOOM': 'museum-of-code/teardown-of-doom.md' - - 'Fineract': 'museum-of-code/teardown-of-fineract.md' - - 'FreeBSD': 'museum-of-code/teardown-of-freebsd.md' - - 'GnuPG': 'museum-of-code/teardown-of-gnupg.md' - - 'Kubernetes': 'museum-of-code/teardown-of-kubernetes.md' - - 'Linux': 'museum-of-code/teardown-of-linux.md' - - 'MediaWiki': 'museum-of-code/teardown-of-mediawiki.md' - - 'NVDA': 'museum-of-code/teardown-of-nvda.md' - - 'OpenCV': 'museum-of-code/teardown-of-opencv.md' - - 'Pandas': 'museum-of-code/teardown-of-pandas.md' - - 'Ruby on Rails': 'museum-of-code/teardown-of-rails.md' - - 'ROOT': 'museum-of-code/teardown-of-root.md' - - 'SQLite': 'museum-of-code/teardown-of-sqlite.md' - - 'TensorFlow': 'museum-of-code/teardown-of-tensorflow.md' - - 'WordPress': 'museum-of-code/teardown-of-wordpress.md' - - 'WRF-Fortran': 'museum-of-code/teardown-of-wrf-fortran.md' - - '1. Foundation & Architecture': + - '1. Foundation & Architecture': - 'Project Overview': "01-project-overview.md" - 'GalaxyScope CLI Reference': "01-02-galaxyscope-cli-reference.md" - 'The blAST Paradigm (ASTs vs LLMs)': "01-03-the-blast-paradigm.md" - - 'Installation & Airgap Setup': "01-04-airgap-installation.md" - - 'How to Read the Galaxy (Visuals)': "01-05-how-to-read-the-galaxy.md" + - 'The Structural RAG Graph': "01-06-the-structural-rag-graph.md" - '2. Data Pipeline': - 'Pipeline Overview': "02-01-pipeline-overview.md" - 'Optical Orchestration': "02-02-optical-orchestration.md" @@ -122,6 +112,7 @@ nav: - 'Claim 7 (Comparing DOOM Ports)': "03-07-claim-7-doom-comparisons.md" - 'Future Outlooks': "03-08-future-outlooks.md" - '4. Security Tools & Spokes': + - 'Competitive Landscape': "04-00-security_landscape.md" - 'Full API Network Map': "04-01-full-api-network-map.md" - 'SBOM Generator': "04-02-sbom-generator.md" - 'Supply Chain Firewall': "04-03-supply-chain-firewall.md"