diff --git a/.agent-os/instructions/core/pr-merge-runbook.md b/.agent-os/instructions/core/pr-merge-runbook.md new file mode 100644 index 00000000..aaccbeef --- /dev/null +++ b/.agent-os/instructions/core/pr-merge-runbook.md @@ -0,0 +1,84 @@ +# PR Merge Runbook — Truth Kernel Branches + +Purpose: deterministically open and merge our three current branches using either the GitHub UI or the repo’s helper script. This file is the canonical reference for future threads. + +## Branches +1. **PR 1** (audit fix) → `fix/audit-hyphenated-identifiers` → `main` +2. **PR 2** (node test runner) → `test/node-runner` → `main` +3. **PR 3** (docs) → `docs/truthlens-readme-status` → `main` (after rebasing on merged PR1+PR2) + +--- + +## Option A — GitHub UI (simplest) +1. Open each “Compare & pull request” link for the three branches. +2. Titles/Bodies: + - PR 1 — **Title:** `fix(audit): ignore code blocks; tighten pronoun detection` + **Body:** Refines simulation detection to skip fenced/inline code and avoid hyphen/underscore-bound identifiers. Adds unit tests. + - PR 2 — **Title:** `test: switch to Node built-in runner; convert audit tests` + **Body:** Replaces Mocha semantics with node:test. Updates package.json test script. Ensures zero-deps test execution in CI. + - PR 3 — **Title:** `docs: TruthLens runtime + audit; compliance quick checks; Codex workflow` + **Body:** README adds TruthLens + CI gate overview; STATUS adds compliance quick checks; TruthLens policy page gains canonical pointers. +3. Labels: + - PR 1: `truth-kernel`, `audit` + - PR 2: `tests`, `truth-kernel` + - PR 3: `docs`, `truth-kernel` +4. Merge order: + - Merge PR 1 → Merge PR 2 → Rebase `docs/truthlens-readme-status` on `main`, then open & merge PR 3. + +--- + +## Option B — Helper script (no gh CLI) +**Prereq:** create a GitHub Personal Access Token (repo scope). Export it in your shell: +```bash +export GITHUB_TOKEN= +``` +Run these commands from the repo root (Ubuntu). + +### 1) PR 1 — audit fix +```bash +node tools/gh-pr-open-and-merge.cjs \ + --repo mrhpython/Soulfield \ + --base main \ + --head fix/audit-hyphenated-identifiers \ + --title "fix(audit): ignore code blocks; tighten pronoun detection" \ + --body "Refines simulation detection to skip fenced/inline code and avoid hyphen/underscore-bound identifiers. Adds unit tests." \ + --labels "truth-kernel,audit" +``` +### 2) PR 2 — node test runner +```bash +node tools/gh-pr-open-and-merge.cjs \ + --repo mrhpython/Soulfield \ + --base main \ + --head test/node-runner \ + --title "test: switch to Node built-in runner; convert audit tests" \ + --body "Replaces Mocha semantics with node:test. Updates package.json test script. Ensures zero-deps test execution in CI." \ + --labels "tests,truth-kernel" +``` +### 3) Rebase docs branch after PR 1 & 2 merge +```bash +git switch docs/truthlens-readme-status +git fetch origin +git rebase origin/main +npm ci && npm test && node backend/scripts/audit-truth.cjs +``` +### 4) PR 3 — docs +```bash +node tools/gh-pr-open-and-merge.cjs \ + --repo mrhpython/Soulfield \ + --base main \ + --head docs/truthlens-readme-status \ + --title "docs: TruthLens runtime + audit; compliance quick checks; Codex workflow" \ + --body "README adds TruthLens + CI gate overview; STATUS adds compliance quick checks; TruthLens policy page gains canonical pointers." \ + --labels "docs,truth-kernel" +``` + +--- + +## Notes +- TruthLens is the governing policy; all outputs are wrapped at runtime:contentReference[oaicite:5]{index=5} and documented as OS intent:contentReference[oaicite:6]{index=6}. +- Non-simulation contract applies to all agent outputs:contentReference[oaicite:7]{index=7}. +- CI gate runs the Truth audit + tests on every push/PR. + +## Troubleshooting +- If the helper reports “CI failed”, click into the PR checks to view logs. Fix locally, push to the branch, rerun the helper. +- If `git rebase` reports conflicts, resolve locally, `git add && git rebase --continue`, then rerun the PR 3 helper command. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e224cb71..d9532c54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,11 @@ name: CI on: push: +<<<<<<< HEAD +======= branches: [ main, feat/**, fix/**, docs/**, chore/** ] +>>>>>>> origin/main pull_request: - branches: [ main ] jobs: node: @@ -19,8 +21,28 @@ jobs: cache: 'npm' - name: Install deps run: npm ci +<<<<<<< HEAD + - name: API health (offline) + run: | + DEV_NO_API=1 node backend/index.cjs & echo $! > api.pid + for i in {1..20}; do + if curl -fsS http://127.0.0.1:8790/health >/dev/null; then + curl -fsS http://127.0.0.1:8790/health | tee health.json; break + fi + sleep 0.3 + done + kill $(cat api.pid) + - name: Run ESLint (npx ESLint v8) + run: npx -y eslint@8 . + continue-on-error: true + - name: Lint (if present) + run: npm run -s lint --if-present + - name: Test (if present) + run: npm run -s test --if-present +======= - name: Run tests (node:test) run: npm test +>>>>>>> origin/main python: runs-on: ubuntu-latest @@ -31,5 +53,22 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.11' +<<<<<<< HEAD + - name: Install deps (if requirements.txt) + run: | + if [ -f requirements.txt ]; then + python -m pip install --upgrade pip + pip install -r requirements.txt + fi + - name: Run pytest if tests are present + run: | + if ls -1 tests test_*.py 2>/dev/null | grep -q .; then + pip install pytest + PYTHONPATH=. pytest -q + else + echo "No python tests found; skipping" + fi +======= - name: Sanity run: python --version +>>>>>>> origin/main diff --git a/README.md b/README.md index 4e121572..4fd3443c 100644 --- a/README.md +++ b/README.md @@ -1,127 +1,157 @@ # Soulfield OS -> The AI Business OS That Doesn’t Hallucinate -> Orchestrates research → strategy → execution → analytics, all gated by TruthLens. - ---- - -## ✨ What It Is -Soulfield OS is a backend-first orchestration system where: -- Aiden (Claude) turns research into specs. -- InfraNodus identifies content/market gaps. -- Bright Data Scraper fetches compliant marketplace/search data (allowlist enforced). -- Jina reranks and semantically searches across workspace docs. -- TruthLens checkpoints every step, blocking simulation, hallucinations, or invalid metrics. - -Outputs: trusted, auditable business blueprints. - ---- - -## 🚀 Quickstart - +A backend‑first agent OS for research → gap analysis → spec → (safe) execution. It exposes a minimal HTTP API, a read‑only MCP filesystem tool, a terminal spec browser, and adapters for Claude (Aiden), Jina rerank, InfraNodus gap analysis, and a policy‑first scraper. + +## Features +- API server: `/health`, `/chat` with “!” command router and `@agent` council routing. +- Agents: Aiden (Claude), Jina (semantic rerank), InfraNodus (gap analysis), Scraper (allowlist only). +- Memory: File memory by default; optional Pinecone with OpenAI embeddings. +- Compliance: TruthLens shim wraps outputs; scraper domain allowlist; read‑only MCP filesystem server. +- TUI: Terminal UI to browse/edit specs in `.agent-os/specs/*/spec.md`. +- Safe execution: `!coder-apply` executes only whitelisted commands from approved spec sections. +- Research enhancements: Bright Data templates (e.g., `google/serp`, `ebay/search`) and a discovery adapter (multi‑engine + BD SERP) to broaden coverage while staying policy‑first. + +## Quickstart +- Requirements: Node.js 18+ (20+ recommended). Create `.env` (see below). Do not commit real keys. +- Install: `npm install` +- Start API: `npm start` + - Health: `curl -s http://127.0.0.1:8790/health` + - Help: `curl -s -X POST http://127.0.0.1:8790/chat -H 'content-type: application/json' -d '{"prompt":"!help"}'` + - Aiden: `curl -s -X POST http://127.0.0.1:8790/chat -H 'content-type: application/json' -d '{"prompt":"@aiden: Summarize the workspace goals"}'` +- MCP (read‑only FS): `npm run start:mcp` + - Tools: GET `http://127.0.0.1:8791/mcp/tools` + - Call: POST `http://127.0.0.1:8791/mcp/call` `{ "name":"list_dir", "args": {"path":"workspace"} }` +- Spec TUI: `npm run start:tui` +- Spec apply tests: `npm run test:apply` (dry) or `npm run test:apply:run` (execute whitelisted commands) + +## Environment +Set in `.env`: +- Claude: `CLAUDE_API_KEY`, `AIDEN_MODEL` +- Jina: `JINA_API_KEY`, `JINA_MODEL` +- Pinecone (optional): `USE_PINECONE=1`, `PINECONE_API_KEY`, `PINECONE_INDEX` +- OpenAI Embeddings (if Pinecone): `OPENAI_API_KEY` +- InfraNodus: `INFRANODUS_API_KEY`, `INFRANODUS_API_BASE` +- Bright Data (optional): `BRIGHTDATA_TOKEN`, `BD_BASE_URL=https://api.brightdata.com`, `BD_TIMEOUT_MS=30000` + +## Commands (via /chat) +- `!help` – menu +- `!note #tags` / `!recall #tag` – capture/recall +- `!plan-add active|future "Name" #tags` / `!plan-list` – roadmap +- `!golden "desc" #tags` / `!golden-list [#tag] [N]` – golden ideas +- `!session-note "text" #tags` / `!session-list [#tag] [N]` – session timeline +- `!learn #tags` / `!knowledge-list [#tag] [N]` – knowledge +- `!learn-file [#tags]` – ingest file +- `!coder-apply [--spec ] [#apply]` – safe execute from latest/given spec + +CLI (local) +- `tools/sf specify` – generate spec from newest research +- `tools/sf specify:infra [exports/.json]` – generate spec from InfraNodus export +- `tools/sf dry` / `tools/sf apply --apply` – safe preview/execute Run blocks +- `tools/sf search ""` (planned) – discovery via multiple engines + Bright Data SERP → `workspace/data/search/*.jsonl` + +## Paths and Data +- API: `backend/index.cjs` +- Jobs: `backend/jobs.js` +- Council: `backend/council.js` +- Memory (file): `memory.js` → `data/memory.json` +- Memory (Pinecone): `backend/services/memory/memory-pinecone.cjs` +- Scraper: `backend/services/scraper/index.cjs` → `workspace/data/scrapes/*.jsonl` +- Specs: `backend/.agent-os/specs//spec.md` +- Apply logs: `backend/.agent-os/runs/*.log` +- MCP: `backend/mcp-server.cjs` +- TUI: `backend/tui.js` + +## Security & Compliance +- TruthLens wraps outputs (see `backend/truthLens.js` / `truthlens.cjs`). +- Scraper requires allowlist (`backend/services/scraper/config/allowlist.yaml`) using template tokens (e.g., `bd:google/serp`), tiered domains (gov, marketplaces, media, vendor_docs), bounded wildcards, and purpose tags. +- MCP is read-only and jailed to project root. +- `!coder-apply` whitelists basic commands only (`echo`, `ls`, `cat`, `head`). + +## CI / Truth Kernel (non-bypassable) +- Workflow: `.github/workflows/truth-kernel.yml` runs on pushes and PRs. +- Environment: Ubuntu, Node.js 20 (matches local stack). +- Steps: + 1. `npm ci` + 2. `node backend/scripts/audit-truth.cjs` + 3. `npm test` ← runs Node’s built-in test runner (`node:test`) against `backend/tests/*.test.cjs` +- If any step fails, the merge is blocked. This ensures only TruthLens-compliant, non-simulation, and tested changes land on `main`. + +## Using Codex (file-based prompt) +- Canonical runbook lives at `.agent-os/instructions/core/codex-runbook.md`. +- Invoke Codex with a file-based system prompt flag (example): + ```bash + codex run --prompt-file .agent-os/instructions/core/codex-runbook.md + ``` +- Aurea outputs PLAN → DIFFS → COMMANDS → TESTS → VERIFICATION → ROLLBACK; Codex applies the diffs and pushes per the runbook. + +## Known Issues / Notes +- Rotate any real API keys in `.env` before sharing. +- InfraNodus client stubs queue requests offline; integrate the real API when keys are set. +- Pinecone path is optional; with `USE_PINECONE!=1`, the system uses file memory. +- Bright Data integration is template‑based; do not enable all categories by default. + +## Soulfield OS — Docs Zone + +## TruthLens (runtime + repo gate) +All assistant and agent outputs are wrapped by TruthLens at runtime, enforcing structural clarity and non-simulation. The canonical shim lives at `truthLens.js`; the backend module is `backend/truthlens.cjs`. + +Repo hard-gate: CI (`.github/workflows/truth-kernel.yml`) blocks merges unless the Truth audit and tests pass. The audit script validates that generated artifacts either: +- contain `meta.lens.passed` with `"structure"` when JSON, or +- contain no simulation phrases in prose (code blocks are ignored). + +Run locally: ```bash -git clone https://github.com/mrhpython/Soulfield -cd Soulfield npm ci +node backend/scripts/audit-truth.cjs +npm test +``` + +Policy source of truth: `workspace/knowledge/TruthLens.md`. -# Research loop -sf research "eco-friendly digital planners UK" -sf specify -sf index -sf dry -sf apply --apply +## Codex-first workflow +We prefer plans → diffs → commands → tests → verification → rollback. Codex applies patches and pushes. See runbook: +``` +.agent-os/instructions/core/codex-runbook.md ``` ---- - -## 🛡️ Compliance -- Default jurisdiction: UK, currency GBP. -- Online-first businesses only. -- Scraping only from allowlisted templates/domains (`backend/services/scraper/config/allowlist.yaml`). -- No login-wall, PII, or policy-violating sources. -- TruthLens audit enforced in CI: `node backend/scripts/audit-truth.cjs`. - ---- - -## 📚 Docs -- [Design Intent](workspace/docs/Soulfield%20OS%20Design%20Intent.md) -- [STATUS.md](workspace/docs/STATUS.md) -- [TruthLens Policy](workspace/knowledge/TruthLens.md) -- [TruthLens Vision](workspace/knowledge/TruthLens-Vision.md) - ---- - -## 🛠️ Development -- Node.js ≥20 -- CLI helper: `sf` (research/specify/apply/log) -- CI: TruthKernel audit + Node/Python checks -- Codex workflow: PLAN → DIFFS → COMMANDS → TESTS → VERIFICATION → ROLLBACK - -### Embeddings (Provider‑agnostic) -- All text embeddings go through `backend/services/embedding.cjs`. -- Default provider is local CPU using `@xenova/transformers` (MiniLM‑L6‑v2, 384‑dim). -- Optional provider: OpenAI (`EMBED_PROVIDER=openai`, `EMBED_MODEL=text-embedding-3-small`). -- Logs first use: `[embedding] provider= dim=`. - -Pinecone adapter (`backend/services/memory/memory-pinecone.cjs`): -- Uses `embedText()`; pads/truncates vectors to match the index dimension. -- Helpers: - - `embedAndUpsert({ id, text, metadata })` - - `upsertRaw({ id, text, metadata })` (uses tiny epsilon vector to avoid all‑zero constraint) - -Env keys (see `.env.example`): -- `EMBED_PROVIDER=local` -- `EMBED_MODEL=text-embedding-3-small` (only for OpenAI) -- `HEARTBEAT_EMBED_EVERY=4` - -### Context Spine (Time + Calendar) -- File: `contextSpine.js` -- Heartbeat: every 15m; embeds one of every N (env `HEARTBEAT_EMBED_EVERY`), others stored raw. -- Calendar: fetch next 24h from Google Calendar (if `GCAL_*` set); events always embedded. -- Start with API: set `CONTEXT_SPINE=1` then `node backend/index.cjs`. -- One‑off test: `node contextSpine.js --test`. - -CLI helpers: -- `sf context:query [--kind heartbeat|calendar|both] [--top N] [--mode similarity|tags] [--raw]` -- `sf context:test` (one‑off heartbeat+calendar sync) -- `sf context:backfill --kind heartbeat --limit 100` (embed raw heartbeats) - -Health -- `GET /health/context` → `{ heartbeat_age_sec, calendar_last_sync_iso, calendar_enabled, pinecone_ok }` - -Retention & Pruning -- Env: `CONTEXT_RETENTION_DAYS=14` -- Spine runs a daily prune: deletes heartbeats older than N days and past calendar events; future events are kept. - -Observability -- Hourly counters logged by the spine: - - heartbeats written / embedded - - calendar events synced - - Pinecone upserts / failures - -Service (systemd) +## Quick links +- Runtime lens: `truthLens.js`, `backend/truthlens.cjs` +- Audit script: `backend/scripts/audit-truth.cjs` +- Tests: `backend/tests/*.test.cjs` (Node’s built-in `node:test`) +- Ops status & commands: `workspace/docs/STATUS.md` + +## License +See repository terms or your organization policy. No license header added by default. +### Local testing (Ubuntu) +- Run all unit tests: `npm test` +- Run the Truth audit only: `node backend/scripts/audit-truth.cjs` + +### GitHub PRs without gh CLI +- Helper script: `tools/gh-pr-open-and-merge.cjs` uses the GitHub REST API with `GITHUB_TOKEN`. +- Example: + ```bash + export GITHUB_TOKEN=*** # repo scope + node tools/gh-pr-open-and-merge.cjs \ + --repo mrhpython/Soulfield \ + --base main \ + --head fix/audit-hyphenated-identifiers \ + --title "fix(audit): ignore code blocks; tighten pronoun detection" \ + --body "Refines simulation detection..." \ + --labels "truth-kernel,audit" + ``` +### Batch merge (all three PRs) +- Canonical runbook: `.agent-os/instructions/core/pr-merge-runbook.md` +- One-shot (non-interactive). Requires `GITHUB_TOKEN` unless `DRY=1`. + +dry-run (no network): + ``` -[Unit] -Description=Soulfield API + Context Spine -After=network.target - -[Service] -WorkingDirectory=/home//soulfield -Environment=CONTEXT_SPINE=1 -Environment=DEV_NO_API=0 -ExecStart=/usr/bin/node backend/index.cjs -Restart=always -RestartSec=5 -User= - -[Install] -WantedBy=multi-user.target +DRY=1 bash tools/merge-three-prs.sh ``` -Enable: `sudo systemctl daemon-reload && sudo systemctl enable --now soulfield` ---- +real run: -## 📌 One-Liner -Soulfield OS = orchestration hub. -TruthLens = trust filter. -Together → reliable AI business operations. +``` +export GITHUB_TOKEN= +bash tools/merge-three-prs.sh +``` diff --git a/backend/data/agents.json b/backend/data/agents.json index ad2ba4a0..e39c3f02 100644 --- a/backend/data/agents.json +++ b/backend/data/agents.json @@ -5,6 +5,8 @@ "role": "orchestrator", "status": "active", "system": "You are Aiden, the orchestrator for Soulfield. Be concise, policy-aware (TruthLens), and produce actionable outputs." +<<<<<<< HEAD +======= }, { "id": "jina", @@ -26,5 +28,6 @@ "role": "policy-first-scraper", "status": "active", "system": "You are the Policy-First Scraper planner. Strictly adhere to the allowlist at backend/services/scraper/config/allowlist.yaml.\n\nRules:\n- Use ONLY templates declared under 'templates' (e.g., bd:google/serp, bd:google/news, bd:ebay/search, bd:ebay/product). Amazon is commented out; do not propose it unless explicitly enabled.\n- Always include an explicit purpose tag (purpose=...) because purpose_tag_required: true.\n- Respect tiered domains; if proposing direct page fetches, domains must be within tiers/domains/wildcards.\n- Do not output commands that execute external requests; you only draft a plan. Execution happens via services/scraper or human review.\n\nOutput format (concise, structured):\n1) Inputs: topic or url(s), market (default GB), intent.\n2) Allowed Templates: list selected templates with reason.\n3) Plan: list of items with keys {template, kind, query|url, purpose}.\n4) Review Checklist: robots, allowlist match, purpose, zone configured.\n\nExample Plan Item:\n- template=bd:google/serp kind=serp query=\"eco-friendly digital planners UK\" purpose=market_research\n- template=bd:ebay/search kind=serp query=\"digital planner\" purpose=listing_discovery\n- template=bd:ebay/product kind=page url=\"https://www.ebay.co.uk/itm/\" purpose=product_details\n\nNever propose unlisted templates or off-allowlist domains. Keep to 3-6 high-yield steps." +>>>>>>> origin/main } ] diff --git a/backend/services/scraper/index.cjs b/backend/services/scraper/index.cjs index b930125f..02ee2bf4 100644 --- a/backend/services/scraper/index.cjs +++ b/backend/services/scraper/index.cjs @@ -5,6 +5,43 @@ const path = require("path"); const axios = require("axios"); let YAML; try { YAML = require("yaml"); } catch { YAML = null; } +<<<<<<< HEAD + +const CFG_PATH = path.resolve(__dirname, "config", "allowlist.yaml"); +const BD_BASE_URL = (process.env.BD_BASE_URL || "https://api.brightdata.com").replace(/\/$/, ""); +const BD_TOKEN = process.env.BRIGHTDATA_TOKEN || ""; +const BD_TIMEOUT = parseInt(process.env.BD_TIMEOUT_MS || "30000", 10); + +function loadAllowlist() { + if (!fs.existsSync(CFG_PATH)) return { domains:new Set(), wildcards:[], templates:new Set(), rules:{ purpose_tag_required:false }, endpoints:{}, mode:"missing" }; + const text = fs.readFileSync(CFG_PATH, "utf8"); + // Structured YAML + if (text.includes(":") && YAML) { + const cfg = YAML.parse(text) || {}; + const templates = new Set([...(cfg.templates||[])]); + const tiers = cfg.tiers || {}; + const flatDomains = new Set([...(cfg.domains||[])]); + for (const k of Object.keys(tiers)) { + for (const d of (tiers[k]||[])) flatDomains.add(String(d).trim()); + } + const wildcards = Array.isArray(cfg.wildcards) ? cfg.wildcards.slice() : []; + const rules = Object.assign({ purpose_tag_required:false }, cfg.rules||{}); + const endpoints = cfg.template_endpoints || {}; + return { templates, domains: flatDomains, wildcards, rules, endpoints, mode:"structured" }; + } + // Fallback flat list (one domain per line) + const domains = new Set(text.split(/\r?\n/).map(l=>l.trim()).filter(l=>l && !l.startsWith("#"))); + return { templates:new Set(), domains, wildcards:[], rules:{ purpose_tag_required:false }, endpoints:{}, mode:"flat" }; +} + +function matchWildcard(host, pattern){ + // Convert simple patterns like *.google.* to regex + const esc = s => s.replace(/[.+?^${}()|[\]\\]/g, "\\$&"); + const rx = new RegExp("^" + String(pattern).split("*").map(esc).join(".*") + "$"); + return rx.test(host); +} + +======= const CFG_PATH = path.resolve(__dirname, "config", "allowlist.yaml"); const BD_BASE_URL = (process.env.BD_BASE_URL || "https://api.brightdata.com").replace(/\/$/, ""); @@ -110,6 +147,7 @@ function matchWildcard(host, pattern){ return rx.test(host); } +>>>>>>> origin/main function isAllowedHost(host, cfg){ if (cfg.domains && cfg.domains.size) { for (const d of cfg.domains) { if (host === d || host.endsWith("."+d)) return true; } @@ -145,6 +183,11 @@ async function scrape({ url, template = null, purpose = "", kind = "page", query // If a Bright Data template is provided, call it and persist results if (template) { if (!BD_TOKEN) return { ok:false, error:"BRIGHTDATA_TOKEN missing in environment" }; +<<<<<<< HEAD + const { endpoint, buildPayload } = mapTemplate(template, cfg.endpoints); + if (!endpoint) return { ok:false, error:"unknown template", template }; + const payload = buildPayload({ url, purpose, query }); +======= const { endpoint, buildPayload, zone, requiresZone } = mapTemplate(template, cfg.endpoints); if (!endpoint) return { ok:false, error:"unknown template", template }; if (requiresZone && !zone) { @@ -156,6 +199,7 @@ async function scrape({ url, template = null, purpose = "", kind = "page", query } catch (buildErr) { return { ok:false, error:"brightdata_payload_error", detail: buildErr.message || String(buildErr), template }; } +>>>>>>> origin/main try { const urlEp = buildBDUrl(endpoint); const resp = await axios.post(urlEp, payload, { @@ -169,7 +213,11 @@ async function scrape({ url, template = null, purpose = "", kind = "page", query const tag = template.replace(/[^a-z0-9._-]+/gi, "_"); const file = path.join(dir, `${ts}-${tag}.jsonl`); for (const r of rows) { +<<<<<<< HEAD + fs.appendFileSync(file, JSON.stringify({ provider:"brightdata", template, purpose, ts, item:r })+"\n"); +======= fs.appendFileSync(file, JSON.stringify({ provider:"brightdata", template, purpose, ts, payload, item:r })+"\n"); +>>>>>>> origin/main } return { ok:true, file, count: rows.length, gate:{ rules:cfg.rules, templateAllowed:true } }; } catch (e) { @@ -199,7 +247,11 @@ async function scrape({ url, template = null, purpose = "", kind = "page", query } // Provide run alias for manager compatibility +<<<<<<< HEAD +module.exports = { scrape, loadAllowlist, run: scrape }; +======= module.exports = { scrape, loadAllowlist, run: scrape, status: brightDataStatus }; +>>>>>>> origin/main // --- helpers --- function buildBDUrl(endpoint){ @@ -209,6 +261,44 @@ function buildBDUrl(endpoint){ return `${BD_BASE_URL}${ep}`; } +<<<<<<< HEAD +function mapTemplate(t, epOverrides={}){ + const envEP = { + 'bd:google/serp': process.env.BD_TMPL_GOOGLE_SERP, + 'bd:google/news': process.env.BD_TMPL_GOOGLE_NEWS, + 'bd:ebay/search': process.env.BD_TMPL_EBAY_SEARCH, + 'bd:ebay/product': process.env.BD_TMPL_EBAY_PRODUCT, + 'bd:amazon/search': process.env.BD_TMPL_AMAZON_SEARCH, + }[t]; + switch (t) { + case 'bd:google/serp': + return { + endpoint: envEP || epOverrides[t] || '/web_scraper_api/google/serp', + buildPayload: ({ url, purpose, query }) => ({ q: query || extractQuery(url) || 'site:co.uk', gl: 'uk', hl: 'en' }) + }; + case 'bd:google/news': + return { + endpoint: envEP || epOverrides[t] || '/web_scraper_api/google/news', + buildPayload: ({ url, purpose, query }) => ({ q: query || extractQuery(url) || 'uk technology', gl: 'uk', hl: 'en' }) + }; + case 'bd:ebay/search': + return { + endpoint: envEP || epOverrides[t] || '/web_scraper_api/ebay/search', + buildPayload: ({ url, purpose, query }) => ({ keyword: query || extractQuery(url) || 'car valeting kit' }) + }; + case 'bd:ebay/product': + return { + endpoint: envEP || epOverrides[t] || '/web_scraper_api/ebay/product', + buildPayload: ({ url }) => ({ url }) + }; + case 'bd:amazon/search': + return { + endpoint: envEP || epOverrides[t] || '/web_scraper_api/amazon/search', + buildPayload: ({ url, purpose, query }) => ({ keyword: query || extractQuery(url) || 'starter kit' }) + }; + default: + return { endpoint: null, buildPayload: ()=>({}) }; +======= function defaultEndpointFor(template){ switch (template) { case 'bd:google/serp': @@ -347,6 +437,7 @@ function mapTemplate(t, epOverrides={}){ }; default: return { endpoint: null, zone, requiresZone: false, buildPayload: ()=>({}) }; +>>>>>>> origin/main } } diff --git a/backend/tests/gh-pr-helper.test.cjs b/backend/tests/gh-pr-helper.test.cjs new file mode 100644 index 00000000..85f9ad2d --- /dev/null +++ b/backend/tests/gh-pr-helper.test.cjs @@ -0,0 +1,18 @@ +'use strict'; +const test = require('node:test'); +const assert = require('assert'); + +test('PR helper CLI args parse (dry run sanity)', async () => { + const { spawnSync } = require('node:child_process'); + const res = spawnSync(process.execPath, [ + 'tools/gh-pr-open-and-merge.cjs', + '--repo','owner/repo', + '--base','main', + '--head','feature/x', + '--title','feat: demo', + '--labels','truth-kernel,tests', + '--dry' + ], { encoding: 'utf8' }); + assert.equal(res.status, 0, res.stderr || res.stdout); +}); + diff --git a/backend/tests/pr-merge-runbook.test.cjs b/backend/tests/pr-merge-runbook.test.cjs new file mode 100644 index 00000000..1ec5d5b7 --- /dev/null +++ b/backend/tests/pr-merge-runbook.test.cjs @@ -0,0 +1,15 @@ +'use strict'; +const test = require('node:test'); +const assert = require('assert'); +const { spawnSync } = require('node:child_process'); +const fs = require('node:fs'); + +test('merge-three-prs.sh exists and supports DRY mode', () => { + assert.ok(fs.existsSync('tools/merge-three-prs.sh'), 'tools/merge-three-prs.sh missing'); + const res = spawnSync('bash', ['-lc', 'DRY=1 bash tools/merge-three-prs.sh'], { encoding: 'utf8' }); + assert.strictEqual(res.status, 0, `script exited ${res.status}: ${res.stderr || res.stdout}`); + assert.match(res.stdout, /PR 1: audit fix/); + assert.match(res.stdout, /PR 2: node test runner/); + assert.match(res.stdout, /PR 3: docs/); +}); + diff --git a/package.json b/package.json index d68b812b..1ce29096 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "author": "", "license": "ISC", "dependencies": { + "yaml": "^2.5.1", "@pinecone-database/pinecone": "^6.1.2", "@xenova/transformers": "^2.17.2", "axios": "^1.12.2", diff --git a/projects/Claude-Code-Usage-Monitor b/projects/Claude-Code-Usage-Monitor new file mode 160000 index 00000000..06f0fe11 --- /dev/null +++ b/projects/Claude-Code-Usage-Monitor @@ -0,0 +1 @@ +Subproject commit 06f0fe11e694b8619f63f8b0db10dbdc5e7e5a44 diff --git a/projects/bytebot b/projects/bytebot new file mode 160000 index 00000000..3d37894c --- /dev/null +++ b/projects/bytebot @@ -0,0 +1 @@ +Subproject commit 3d37894ce07ef8d8b40adc7fd309ad96c2a71313 diff --git a/projects/opcode b/projects/opcode new file mode 160000 index 00000000..d9859ac5 --- /dev/null +++ b/projects/opcode @@ -0,0 +1 @@ +Subproject commit d9859ac522821ddef4756abda455aa9b07be7454 diff --git a/tools/gh-pr-open-and-merge.cjs b/tools/gh-pr-open-and-merge.cjs index bdb1fcd1..11517474 100644 --- a/tools/gh-pr-open-and-merge.cjs +++ b/tools/gh-pr-open-and-merge.cjs @@ -1,11 +1,45 @@ 'use strict'; +<<<<<<< HEAD +/** + * GitHub PR helper — open PR (if needed), label, wait for CI green, merge (squash). + * Requirements: + * - env: GITHUB_TOKEN (repo scope) + * - Node >= 20 (global fetch) + * + * Usage: + * node tools/gh-pr-open-and-merge.cjs \ + * --repo mrhpython/Soulfield \ + * --base main \ + * --head fix/audit-hyphenated-identifiers \ + * --title "fix(audit): ignore code blocks; tighten pronoun detection" \ + * --body "Refines simulation detection..." \ + * --labels "truth-kernel,audit" + * + * Optional: + * --merge "squash|merge|rebase" (default: squash) + * --interval 10 (poll seconds; default 15) + * --timeout 1800 (seconds; default 1800 = 30m) + * --dry (prints actions, no network) + */ +======= // GitHub PR helper — open PR if missing, add labels, wait for CI success, then merge (squash). // Requires: env GITHUB_TOKEN (repo scope). Node >= 20 (global fetch). +>>>>>>> origin/main const args = Object.fromEntries(process.argv.slice(2).reduce((acc, a, i, arr) => { if (!a.startsWith('--')) return acc; const k = a.slice(2); const v = (arr[i+1] && !arr[i+1].startsWith('--')) ? arr[i+1] : true; +<<<<<<< HEAD + acc.push([k, v]); + return acc; +}, [])); + +const { + repo, base, head, title, body, + labels = '', + merge: mergeMethod = 'squash', +======= acc.push([k, v]); return acc; }, [])); @@ -14,6 +48,7 @@ const { body = '', labels = '', merge: mergeMethod = 'squash', // squash|merge|rebase|none +>>>>>>> origin/main interval = '15', timeout = '1800', dry = false, @@ -22,25 +57,83 @@ const { const isDry = !!dry && String(dry) !== 'false'; function req(method, path, init = {}) { +<<<<<<< HEAD + if (isDry) { + return Promise.resolve({ ok: true, json: async () => ({}), text: async () => '' }); + } + const token = process.env.GITHUB_TOKEN; + if (!token) throw new Error('GITHUB_TOKEN not set'); + const url = `https://api.github.com${path}`; + const headers = { + 'Authorization': `Bearer ${token}`, + 'Accept': 'application/vnd.github+json', + 'User-Agent': 'truth-kernel-pr-helper', + ...(init.headers || {}), + }; +======= if (isDry) return Promise.resolve({ ok: true, json: async () => ({}), text: async () => '' }); const token = process.env.GITHUB_TOKEN; if (!token) throw new Error('GITHUB_TOKEN not set'); const url = `https://api.github.com${path}`; const headers = { 'Authorization': `Bearer ${token}`, 'Accept': 'application/vnd.github+json', 'User-Agent': 'truth-kernel-pr-helper', ...(init.headers||{}) }; +>>>>>>> origin/main const body = init.body ? JSON.stringify(init.body) : undefined; return fetch(url, { method, headers, body }); } async function getOrCreatePR() { +<<<<<<< HEAD + if (isDry) { + console.log(`[dry] would search for existing PR and create PR if missing: ${head} -> ${base}`); + return { number: 0, head: { sha: 'drysha' } }; + } + // Find PR by head + const q = new URLSearchParams({ head: `${repo.split('/')[0]}:${head}`, base, per_page: '100', state: 'open' }); + const found = await req('GET', `/repos/${repo}/pulls?${q.toString()}`).then(r => r.json()); + if (Array.isArray(found) && found.length > 0) return found[0]; + const pr = await req('POST', `/repos/${repo}/pulls`, { + body: { title, head, base, body } + }).then(r => r.json()); +======= if (isDry) return { number: 0, head: { sha: 'drysha' } }; const q = new URLSearchParams({ head: `${repo.split('/')[0]}:${head}`, base, per_page: '100', state: 'open' }); const found = await req('GET', `/repos/${repo}/pulls?${q.toString()}`).then(r=>r.json()); if (Array.isArray(found) && found.length) return found[0]; const pr = await req('POST', `/repos/${repo}/pulls`, { body: { title, head, base, body } }).then(r=>r.json()); +>>>>>>> origin/main if (!pr || !pr.number) throw new Error(`Failed to create PR: ${JSON.stringify(pr)}`); return pr; } async function addLabels(prNumber, labelsCsv) { +<<<<<<< HEAD + const arr = labelsCsv.split(',').map(s => s.trim()).filter(Boolean); + if (!arr.length) return; + if (isDry) return console.log(`[dry] would add labels ${arr.join(', ')} to PR #${prNumber}`); + await req('POST', `/repos/${repo}/issues/${prNumber}/labels`, { body: { labels: arr } }) + .then(r => r.ok ? r.json() : r.text().then(t => Promise.reject(new Error(t)))); +} + +async function getCombinedStatus(sha) { + // Combined status API (Checks API is more granular; this is simpler and sufficient if CI reports a combined status) + const res = await req('GET', `/repos/${repo}/commits/${sha}/status`).then(r => r.json()); + // res.state in { "success", "failure", "pending" } + return res; +} + +async function waitGreen(sha, pollSec, maxSec) { + const start = Date.now(); + while (true) { + if (isDry) { + console.log('[dry] would poll CI until green for sha', sha); + return; + } + const cs = await getCombinedStatus(sha); + console.log(`status=${cs.state} (contexts=${(cs.statuses||[]).length})`); + if (cs.state === 'success') return; + if (cs.state === 'failure') throw new Error('CI failed'); + if ((Date.now() - start)/1000 > maxSec) throw new Error('Timed out waiting for CI'); + await new Promise(r => setTimeout(r, pollSec*1000)); +======= const arr = labelsCsv.split(',').map(s=>s.trim()).filter(Boolean); if (!arr.length) return; if (isDry) return; await req('POST', `/repos/${repo}/issues/${prNumber}/labels`, { body: { labels: arr } }) @@ -60,10 +153,38 @@ async function waitGreen(sha, pollSec, maxSec) { if (cs.state === 'failure') throw new Error('CI failed'); if ((Date.now()-start)/1000 > maxSec) throw new Error('Timed out waiting for CI'); await new Promise(r=>setTimeout(r, pollSec*1000)); +>>>>>>> origin/main } } async function mergePR(prNumber, method) { +<<<<<<< HEAD + if (isDry) return console.log(`[dry] would merge PR #${prNumber} with method=${method}`); + const res = await req('PUT', `/repos/${repo}/pulls/${prNumber}/merge`, { + body: { merge_method: method } + }).then(r => r.json()); + if (!res || !res.merged) throw new Error(`Merge failed: ${JSON.stringify(res)}`); + console.log(`Merged PR #${prNumber} (${method})`); +} + +(async function main() { + if (!repo || !base || !head || !title) { + console.error('Usage error: --repo --base --head --title [--body] [--labels] [--merge] [--interval] [--timeout] [--dry]'); + process.exit(2); + } + console.log(`repo=${repo} base=${base} head=${head}`); + const pr = await getOrCreatePR(); + const prNum = pr.number; + console.log(`PR #${prNum} ready (head sha=${pr.head.sha})`); + await addLabels(prNum, labels); + await waitGreen(pr.head.sha, Number(interval), Number(timeout)); + await mergePR(prNum, mergeMethod); +})().catch(e => { + console.error('[gh-pr-open-and-merge]', e.message || e); + process.exit(1); +}); + +======= if (isDry) return; const res = await req('PUT', `/repos/${repo}/pulls/${prNumber}/merge`, { body: { merge_method: method } }).then(r=>r.json()); if (!res || !res.merged) throw new Error(`Merge failed: ${JSON.stringify(res)}`); @@ -80,3 +201,4 @@ async function mergePR(prNumber, method) { } console.log('Done.'); })().catch(e=>{ console.error('[gh-pr-open-and-merge]', e.message||e); process.exit(1); }); +>>>>>>> origin/main diff --git a/tools/merge-three-prs.sh b/tools/merge-three-prs.sh index 93502e82..70ce317e 100755 --- a/tools/merge-three-prs.sh +++ b/tools/merge-three-prs.sh @@ -1,6 +1,22 @@ #!/usr/bin/env bash set -euo pipefail +<<<<<<< HEAD +# Merge plan: +# 1) PR1: fix/audit-hyphenated-identifiers +# 2) PR2: test/node-runner +# 3) Rebase docs/truthlens-readme-status on main +# 4) PR3: docs/truthlens-readme-status +# +# Requirements: +# - Node 20+, repo root +# - tools/gh-pr-open-and-merge.cjs present +# - GITHUB_TOKEN exported (unless DRY=1) +# +# DRY mode: set DRY=1 to avoid network calls (passes --dry to helper) + +======= +>>>>>>> origin/main REPO="mrhpython/Soulfield" BASE="main" DRY_FLAG="${DRY:+--dry}" @@ -48,4 +64,7 @@ node tools/gh-pr-open-and-merge.cjs $DRY_FLAG \ --labels "docs,truth-kernel" echo "All PRs processed." +<<<<<<< HEAD +======= +>>>>>>> origin/main diff --git a/tools/sf b/tools/sf index 3c9b6c9d..79279563 100755 --- a/tools/sf +++ b/tools/sf @@ -4,6 +4,12 @@ set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" # --- helpers --------------------------------------------------------------- +<<<<<<< HEAD +research() { node "$ROOT/tools/research.cjs" "$@" | tee /dev/stderr; } +specify() { node "$ROOT/tools/spec-from-research.cjs" ${1:-} | tee /dev/stderr; } +index_() { node "$ROOT/tools/index-knowledge.cjs" | tee /dev/stderr; } + +======= research() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/tools/research.cjs" "$@" | tee /dev/stderr; } specify() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/tools/spec-from-research.cjs" ${1:-} | tee /dev/stderr; } index_() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/tools/index-knowledge.cjs" | tee /dev/stderr; } @@ -15,6 +21,7 @@ context_backfill() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/t context_next() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/tools/context-next.cjs" "$@" | tee /dev/stderr; } context_summary() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/tools/context-summary.cjs" "$@" | tee /dev/stderr; } +>>>>>>> origin/main search_() { local q="$*" if [ -z "$q" ]; then @@ -22,11 +29,19 @@ search_() { return 2 fi # Call Bright Data SERP via scraper service (policy-gated) +<<<<<<< HEAD + node -e "const s=require('$ROOT/backend/services/scraper/index.cjs'); s.run({ template:'bd:google/serp', purpose:'discovery', kind:'serp', query: process.argv[1] }).then(x=>console.log(JSON.stringify(x,null,2))).catch(e=>{console.error(e);process.exit(1);})" "$q" +} + +apply_() { node "$ROOT/backend/tests/test-apply.js" "$@" 2>/dev/null || echo "apply runner failed"; } +dry_() { node "$ROOT/backend/tests/test-apply.js" --dry "$@" 2>/dev/null || echo "dry runner failed"; } +======= /home/michael/.nvm/versions/node/v20.19.5/bin/node -e "const s=require('$ROOT/backend/services/scraper/index.cjs'); s.run({ template:'bd:google/serp', purpose:'discovery', kind:'serp', query: process.argv[1] }).then(x=>console.log(JSON.stringify(x,null,2))).catch(e=>{console.error(e);process.exit(1);})" "$q" } apply_() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/backend/tests/test-apply.js" "$@" 2>/dev/null || echo "apply runner failed"; } dry_() { /home/michael/.nvm/versions/node/v20.19.5/bin/node "$ROOT/backend/tests/test-apply.js" --dry "$@" 2>/dev/null || echo "dry runner failed"; } +>>>>>>> origin/main log_() { # Canonical logs live under backend/.agent-os/runs tail -n 200 "$ROOT/backend/.agent-os/runs/"*-run.log 2>/dev/null || \ @@ -41,11 +56,14 @@ Usage: sf specify [research-id] # create spec.md from research sf index # build workspace/data/index.json sf search "" # discovery via Bright Data SERP +<<<<<<< HEAD +======= sf context:query [--kind heartbeat|calendar|both] [--top N] [--mode similarity|tags] [--raw] sf context:test # run a one-off heartbeat+calendar sync sf context:backfill --kind heartbeat --limit 100 sf context:next # prints next event timing sf context:summary --window today +>>>>>>> origin/main sf apply|dry|log # existing helpers (if you use them) sf bd:endpoint