Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions scripts/tt-curated/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
set -euo pipefail

export HOME=/home/tt/workspace/tools/gbrain-sandbox-home
export PATH=/home/tt/workspace/tools/gbrain-local-bun/bin:$PATH
export GBRAIN_EMBED_DIMENSIONS="768"

GBRAIN_ROOT="/home/tt/workspace/tools/gbrain"
GBRAIN_CLI="/home/tt/workspace/tools/gbrain-local-bun/bin/bun run src/cli.ts"
CURATED_STAGE="/home/tt/workspace/tools/gbrain-curated-stage"
CURATED_DB_DIR="/home/tt/workspace/tools/gbrain-curated-db-768"
CURATED_DB_PATH="$CURATED_DB_DIR/brain.pglite"
CURATED_LOG_DIR="/home/tt/workspace/tools/gbrain-curated-logs"

mkdir -p "$CURATED_LOG_DIR"

run_gbrain() {
cd "$GBRAIN_ROOT"
# shellcheck disable=SC2086
$GBRAIN_CLI "$@"
}

ensure_interactive_env() {
if [[ -z "${OPENAI_BASE_URL:-}" || -z "${GBRAIN_EMBED_MODEL:-}" ]]; then
echo "warning: OPENAI_BASE_URL or GBRAIN_EMBED_MODEL not set in current shell" >&2
echo "hint: run these scripts via: bash -ic '<script>' so ~/.bashrc exports load" >&2
fi
}
86 changes: 86 additions & 0 deletions scripts/tt-curated/cron-embed-health-regression.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="/home/tt/workspace/tools/gbrain/scripts/tt-curated"
LOG_DIR="/home/tt/workspace/tools/gbrain-curated-logs"
STAMP="$(date '+%Y%m%d-%H%M%S')"
LOG_FILE="$LOG_DIR/cron-embed-health-regression-$STAMP.log"
JSON_FILE="$LOG_DIR/query-regression-last.json"
OPENCLAW_CONFIG="/home/tt/.openclaw/openclaw.json"
CHAT_ID="477144117"

mkdir -p "$LOG_DIR"

send_failure_alert() {
local body="$1"
/usr/bin/python3 - <<'PY' "$OPENCLAW_CONFIG" "$CHAT_ID" "$body"
import json, pathlib, sys, urllib.parse, urllib.request
config_path = pathlib.Path(sys.argv[1])
chat_id = sys.argv[2]
body = sys.argv[3]
config = json.loads(config_path.read_text())
token = config["channels"]["telegram"]["botToken"]
url = f"https://api.telegram.org/bot{token}/sendMessage"
data = urllib.parse.urlencode({"chat_id": chat_id, "text": body}).encode()
req = urllib.request.Request(url, data=data, method="POST")
with urllib.request.urlopen(req, timeout=20) as resp:
resp.read()
PY
}

{
echo "[$(date '+%F %T')] start curated embed/health/regression"
"$SCRIPT_DIR/run-embed-health.sh"
echo "[$(date '+%F %T')] done"
} >"$LOG_FILE" 2>&1 || {
tail_text="$(tail -n 40 "$LOG_FILE")"
message="gbrain nightly FAIL\nlog: $LOG_FILE\n\n$tail_text"
send_failure_alert "$message" || true
echo "FAILED: $LOG_FILE"
exit 1
}

if [[ -f "$JSON_FILE" ]]; then
status="$(/usr/bin/python3 - <<'PY' "$JSON_FILE"
import json, sys
obj = json.load(open(sys.argv[1]))
print('ok' if obj.get('ok') else 'fail')
PY
)"
if [[ "$status" != "ok" ]]; then
summary="$(/usr/bin/python3 - <<'PY' "$JSON_FILE"
import json, sys
obj = json.load(open(sys.argv[1]))
failed = [r['id'] for r in obj.get('results', []) if not r.get('ok')]
print(', '.join(failed[:10]))
PY
)"
drift="$(/usr/bin/python3 - <<'PY' "$JSON_FILE"
import json, sys
obj = json.load(open(sys.argv[1]))
parts = []
for item in obj.get('severe_drift_cases', [])[:5]:
priority = item.get('priority', 'high')
seg = f"{item['id']}[{priority}]"
if item.get('rank_shift') is not None:
seg += f" r+{item['rank_shift']}"
if item.get('score_drop') is not None:
seg += f" s-{item['score_drop']:.3f}"
parts.append(seg)
print('; '.join(parts))
PY
)"
message="gbrain regression FAIL\nlog: $LOG_FILE"
if [[ -n "$summary" ]]; then
message+="\nfailed: $summary"
fi
if [[ -n "$drift" ]]; then
message+="\ndrift: $drift"
fi
send_failure_alert "$message" || true
echo "REGRESSION_FAILED: $LOG_FILE"
exit 1
fi
fi

echo "OK: $LOG_FILE"
60 changes: 60 additions & 0 deletions scripts/tt-curated/cron-refresh-import-extract.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="/home/tt/workspace/tools/gbrain/scripts/tt-curated"
LOG_DIR="/home/tt/workspace/tools/gbrain-curated-logs"
STAMP="$(date '+%Y%m%d-%H%M%S')"
LOG_FILE="$LOG_DIR/cron-refresh-import-extract-$STAMP.log"
OPENCLAW_CONFIG="/home/tt/.openclaw/openclaw.json"
CHAT_ID="477144117"
SMOKE_JSON="$LOG_DIR/refresh-smoke-last.json"

mkdir -p "$LOG_DIR"

send_failure_alert() {
local body="$1"
/usr/bin/python3 - <<'PY' "$OPENCLAW_CONFIG" "$CHAT_ID" "$body"
import json, pathlib, sys, urllib.parse, urllib.request
config_path = pathlib.Path(sys.argv[1])
chat_id = sys.argv[2]
body = sys.argv[3]
config = json.loads(config_path.read_text())
token = config["channels"]["telegram"]["botToken"]
url = f"https://api.telegram.org/bot{token}/sendMessage"
data = urllib.parse.urlencode({"chat_id": chat_id, "text": body}).encode()
req = urllib.request.Request(url, data=data, method="POST")
with urllib.request.urlopen(req, timeout=20) as resp:
resp.read()
PY
}

{
echo "[$(date '+%F %T')] start curated refresh/import/extract"
"$SCRIPT_DIR/run-refresh-import-extract.sh"
echo "[$(date '+%F %T')] smoke check"
"$SCRIPT_DIR/refresh-smoke-check.sh"
echo "[$(date '+%F %T')] done"
} >"$LOG_FILE" 2>&1 || {
tail_text="$(tail -n 40 "$LOG_FILE")"
smoke_fail="$(/usr/bin/python3 - <<'PY' "$SMOKE_JSON"
import json, pathlib, sys
path = pathlib.Path(sys.argv[1])
if not path.exists():
print('')
raise SystemExit(0)
obj = json.loads(path.read_text())
failed = [r['id'] for r in obj.get('results', []) if not r.get('ok')]
print(', '.join(failed[:10]))
PY
)"
message="gbrain refresh FAIL\nlog: $LOG_FILE"
if [[ -n "$smoke_fail" ]]; then
message+="\nsmoke: $smoke_fail"
fi
message+="\n\n$tail_text"
send_failure_alert "$message" || true
echo "FAILED: $LOG_FILE"
exit 1
}

echo "OK: $LOG_FILE"
44 changes: 44 additions & 0 deletions scripts/tt-curated/embed-safe.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash
set -euo pipefail
source /home/tt/workspace/tools/gbrain/scripts/tt-curated/common.sh
ensure_interactive_env

if [[ ! -e "$CURATED_DB_PATH" ]]; then
echo "curated DB missing: $CURATED_DB_PATH" >&2
echo "run init-db.sh and import-extract.sh first" >&2
exit 1
fi

ATTEMPTS="${ATTEMPTS:-3}"
SLEEP_SECONDS="${SLEEP_SECONDS:-90}"

for attempt in $(seq 1 "$ATTEMPTS"); do
ts=$(date '+%Y-%m-%d %H:%M:%S')
log_file="$CURATED_LOG_DIR/embed-safe-$(date '+%Y%m%d-%H%M%S')-attempt${attempt}.log"
echo "[$ts] embed attempt $attempt/$ATTEMPTS" | tee "$log_file"

if run_gbrain embed --stale 2>&1 | tee -a "$log_file"; then
echo "embed command completed on attempt $attempt" | tee -a "$log_file"
else
echo "embed command exited non-zero on attempt $attempt" | tee -a "$log_file"
fi

stats_json=$(run_gbrain stats 2>/dev/null || true)
echo "$stats_json" >> "$log_file"

features_json=$(run_gbrain features --json 2>/dev/null || true)
echo "$features_json" >> "$log_file"

if ! grep -q 'missing-embeddings' <<<"$features_json"; then
echo "no missing-embeddings recommendation remains; stopping" | tee -a "$log_file"
exit 0
fi

if [[ "$attempt" -lt "$ATTEMPTS" ]]; then
echo "missing embeddings remain; sleeping ${SLEEP_SECONDS}s before retry" | tee -a "$log_file"
sleep "$SLEEP_SECONDS"
fi
done

echo "embed-safe finished with remaining missing embeddings" >&2
exit 0
126 changes: 126 additions & 0 deletions scripts/tt-curated/gbrain-curated
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="/home/tt/workspace/tools/gbrain/scripts/tt-curated"
LOG_DIR="/home/tt/workspace/tools/gbrain-curated-logs"

usage() {
cat <<'EOF'
gbrain-curated — TT curated gbrain helper

Usage:
gbrain-curated help
gbrain-curated status
gbrain-curated smoke
gbrain-curated regression
gbrain-curated baseline
gbrain-curated weekly
gbrain-curated refresh
gbrain-curated nightly
gbrain-curated logs [smoke|regression|weekly|refresh|nightly]

Commands:
status Show compact snapshot from latest smoke/regression/weekly files
smoke Run lightweight retrieval smoke check now
regression Run full regression + drift check now
baseline Re-capture regression baseline intentionally
weekly Generate weekly local review snapshot now
refresh Run refresh/import/extract + smoke now
nightly Run embed/health/regression nightly pipeline now
logs Print latest log/result path for a category
EOF
}

latest_file() {
local pattern="$1"
python - <<'PY' "$LOG_DIR" "$pattern"
from pathlib import Path
import sys
log_dir = Path(sys.argv[1])
pattern = sys.argv[2]
files = sorted(log_dir.glob(pattern), key=lambda p: p.stat().st_mtime, reverse=True)
print(files[0] if files else "")
PY
}

show_status() {
python - <<'PY' "$LOG_DIR"
import json, sys
from pathlib import Path
log_dir = Path(sys.argv[1])

def load(name):
path = log_dir / name
if not path.exists():
return None
return json.loads(path.read_text())

smoke = load('refresh-smoke-last.json')
reg = load('query-regression-last.json')
weekly = load('weekly-baseline-review.json')

print('gbrain curated status')
if smoke:
print(f"- smoke: {'ok' if smoke.get('ok') else 'fail'} ({smoke.get('passed')}/{smoke.get('total')})")
else:
print('- smoke: no snapshot')
if reg:
print(f"- regression: {'ok' if reg.get('ok') else 'fail'} ({reg.get('passed')}/{reg.get('total')}) drift={reg.get('severe_drift_count')}")
else:
print('- regression: no snapshot')
if weekly:
r = weekly.get('regression', {})
s = weekly.get('smoke', {})
print(f"- weekly: regression={r.get('passed')}/{r.get('total')} smoke={s.get('passed')}/{s.get('total')}")
else:
print('- weekly: no snapshot')
PY
}

show_log() {
local kind="${1:-nightly}"
case "$kind" in
smoke) latest_file 'refresh-smoke-*.log' ;;
regression) printf '%s\n' "$LOG_DIR/query-regression-last.json" ;;
weekly) printf '%s\n' "$LOG_DIR/weekly-baseline-review.json" ;;
refresh) latest_file 'cron-refresh-import-extract-*.log' ;;
nightly) latest_file 'cron-embed-health-regression-*.log' ;;
*) echo "unknown log kind: $kind" >&2; exit 1 ;;
esac
}

cmd="${1:-help}"
case "$cmd" in
help|-h|--help)
usage
;;
status)
show_status
;;
smoke)
bash -ic "$SCRIPT_DIR/refresh-smoke-check.sh"
;;
regression)
bash -ic "$SCRIPT_DIR/query-regression.py"
;;
baseline)
bash -ic "$SCRIPT_DIR/query-regression.py --write-baseline"
;;
weekly)
bash -ic "$SCRIPT_DIR/weekly-baseline-review.py"
;;
refresh)
bash -ic "$SCRIPT_DIR/cron-refresh-import-extract.sh"
;;
nightly)
bash -ic "$SCRIPT_DIR/cron-embed-health-regression.sh"
;;
logs)
show_log "${2:-nightly}"
;;
*)
echo "unknown command: $cmd" >&2
usage >&2
exit 1
;;
esac
7 changes: 7 additions & 0 deletions scripts/tt-curated/health.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
source /home/tt/workspace/tools/gbrain/scripts/tt-curated/common.sh

run_gbrain doctor --json
run_gbrain features --json
run_gbrain stats
16 changes: 16 additions & 0 deletions scripts/tt-curated/import-extract.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -euo pipefail
source /home/tt/workspace/tools/gbrain/scripts/tt-curated/common.sh

if [[ ! -d "$CURATED_STAGE" ]]; then
echo "curated stage missing: $CURATED_STAGE" >&2
echo "run refresh-stage.sh first" >&2
exit 1
fi

if [[ ! -e "$CURATED_DB_PATH" ]]; then
/home/tt/workspace/tools/gbrain/scripts/tt-curated/init-db.sh
fi

run_gbrain import "$CURATED_STAGE" --no-embed
run_gbrain extract all --dir "$CURATED_STAGE"
7 changes: 7 additions & 0 deletions scripts/tt-curated/init-db.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail
source /home/tt/workspace/tools/gbrain/scripts/tt-curated/common.sh

rm -rf "$CURATED_DB_DIR"
mkdir -p "$CURATED_DB_DIR"
run_gbrain init --pglite --path "$CURATED_DB_PATH" --json
Loading