Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c622306
chore: reorganize repo for orchestrator pipeline
melroyanthony Mar 14, 2026
7ae26cd
feat(stage-0): initialize project structure
melroyanthony Mar 14, 2026
a658f6d
feat(stage-1): analyze requirements with RICE/MoSCoW
melroyanthony Mar 14, 2026
fc9f304
feat(stage-2): define architecture and API contracts
melroyanthony Mar 14, 2026
371d98c
chore(stage-2.5): create issues and feature branch
melroyanthony Mar 14, 2026
ca19b84
feat(stage-3): implement Track A (Rust) and Track B (Python agent)
melroyanthony Mar 14, 2026
a2b8eb0
feat(stage-4): add tests and validate E2E
melroyanthony Mar 14, 2026
1698f61
feat(stage-4): add tests and validate E2E flow
melroyanthony Mar 14, 2026
40523b0
chore: add Rust target/ to .gitignore and remove build artifacts
melroyanthony Mar 14, 2026
21b5887
feat(stage-5): finalize with docs, CI/CD, and changelog
melroyanthony Mar 14, 2026
45f59b8
chore: remove .claude/ from tracking and add to .gitignore
melroyanthony Mar 14, 2026
942b198
fix: remove || true and stderr suppression in CI E2E steps
melroyanthony Mar 14, 2026
9a28f3f
fix(stage-3): add snake traversal, edition 2021, working set fix
melroyanthony Mar 14, 2026
a64a263
fix: address PR review comments
melroyanthony Mar 14, 2026
a553fdc
refactor: cleanup stale files and fix solution/README alignment
melroyanthony Mar 14, 2026
0f00f5e
fix: resolve Issue #8, K_full scaling, and review comments
melroyanthony Mar 14, 2026
7fa4c90
fix: address 11 new review comments
melroyanthony Mar 14, 2026
9690c76
docs: align all planning docs with Rust implementation
melroyanthony Mar 14, 2026
aba1304
fix: address 9 new review comments
melroyanthony Mar 14, 2026
a932d51
chore: remove CLAUDE.md from tracking and fix stale doc reference
melroyanthony Mar 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
name: CI

on:
push:
branches:
- main
- "feat/*"
pull_request:
branches:
- main
- "feat/*"

defaults:
run:
# All paths below are relative to the repo root.
shell: bash

# ---------------------------------------------------------------------------
# Jobs
# ---------------------------------------------------------------------------

jobs:

# -------------------------------------------------------------------------
# Track A: Rust
# -------------------------------------------------------------------------
rust:
name: "Track A — Rust (build + test)"
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable

- name: Cache Cargo registry + build artifacts
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
solution/backend/rust/target
key: ${{ runner.os }}-cargo-${{ hashFiles('solution/backend/rust/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-

- name: Build release binary
working-directory: solution/backend/rust
run: cargo build --release

- name: Run unit tests (15 tests)
working-directory: solution/backend/rust
run: cargo test -- --nocapture

# -------------------------------------------------------------------------
# Track B: Python Agent
# -------------------------------------------------------------------------
python:
name: "Track B — Python agent (lint + smoke test)"
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"

- name: Set up Python via uv
working-directory: solution/agent
run: uv python install 3.12

- name: Create virtual environment
working-directory: solution/agent
run: uv venv

- name: Install dependencies
working-directory: solution/agent
run: uv pip install -r requirements.txt

- name: Verify evaluator imports
working-directory: solution/agent
run: |
uv run python -c "
from evaluator import (
parse_problem, compute_subgraph_latency, check_oom,
evaluate, solution_to_dict, topological_sort,
)
print('evaluator imports OK')
"

- name: Verify scheduler imports
working-directory: solution/agent
run: |
uv run python -c "
from scheduler import build_baseline, optimize
print('scheduler imports OK')
"

- name: Run agent (baseline mode) against benchmark 1
working-directory: solution/agent
env:
GOOGLE_API_KEY: dummy
run: |
uv run python agent.py \
../../problem/benchmarks/mlsys-2026-1.json \
/tmp/track-b-ci-1.json
uv run python -c "
import json, sys
with open('/tmp/track-b-ci-1.json') as f:
s = json.load(f)
required = ('subgraphs', 'granularities', 'tensors_to_retain', 'subgraph_latencies')
for k in required:
assert k in s, f'Missing key: {k}'
assert len(s['subgraphs']) > 0, 'No subgraphs'
assert all(lat >= 0 for lat in s['subgraph_latencies']), 'Negative latency'
total = sum(s['subgraph_latencies'])
print(f'Track B CI smoke test passed. subgraphs={len(s[\"subgraphs\"])} total_latency={total:.2f}')
"

# -------------------------------------------------------------------------
# E2E: Build Rust binary, run all 5 benchmarks through both tracks
# -------------------------------------------------------------------------
e2e:
name: "E2E — both tracks, all 5 benchmarks"
runs-on: ubuntu-latest
needs:
- rust
- python

steps:
- name: Checkout
uses: actions/checkout@v4

# -- Track A setup --

- name: Install Rust stable
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable

- name: Cache Cargo artifacts
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
solution/backend/rust/target
key: ${{ runner.os }}-cargo-e2e-${{ hashFiles('solution/backend/rust/Cargo.toml') }}
restore-keys: |
${{ runner.os }}-cargo-

- name: Build Track A release binary
working-directory: solution/backend/rust
run: cargo build --release

# -- Track B setup --

- name: Install uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"

- name: Set up Python via uv
working-directory: solution/agent
run: uv python install 3.12

- name: Create virtual environment and install deps
working-directory: solution/agent
run: |
uv venv
uv pip install -r requirements.txt

# -- Run E2E against all 5 benchmarks --

- name: Track A — run all 5 benchmarks
run: |
RUST_BIN="solution/backend/rust/target/release/mlsys"
BENCH_DIR="problem/benchmarks"
TMP_DIR="/tmp/e2e-track-a-$$"
mkdir -p "$TMP_DIR"

PASS=0; FAIL=0

for b in 1 5 9 13 17; do
OUT="$TMP_DIR/out-$b.json"
"$RUST_BIN" "$BENCH_DIR/mlsys-2026-$b.json" "$OUT"

python3 - <<'PYEOF' "$OUT" "$b"
import json, sys

out_file = sys.argv[1]
bench_id = sys.argv[2]

with open(out_file) as f:
s = json.load(f)

required = ('subgraphs', 'granularities', 'tensors_to_retain', 'subgraph_latencies')
for k in required:
assert k in s, f"Benchmark {bench_id}: missing key {k}"

assert len(s['subgraphs']) > 0, f"Benchmark {bench_id}: no subgraphs"

for i, lat in enumerate(s['subgraph_latencies']):
assert lat >= 0, f"Benchmark {bench_id}: negative latency at index {i}"

for i, g in enumerate(s['granularities']):
assert all(x > 0 for x in g), f"Benchmark {bench_id}: invalid granularity at {i}"

op_counts = {}
for sg in s['subgraphs']:
for op in sg:
op_counts[op] = op_counts.get(op, 0) + 1
dupes = {k: v for k, v in op_counts.items() if v > 1}
assert not dupes, f"Benchmark {bench_id}: duplicate ops {dupes}"

total = sum(s['subgraph_latencies'])
print(f"Track A benchmark {bench_id}: OK — subgraphs={len(s['subgraphs'])} total_latency={total:.2f}")
PYEOF

done

- name: Track B — run all 5 benchmarks (baseline mode)
working-directory: solution/agent
env:
GOOGLE_API_KEY: dummy
run: |
BENCH_DIR="../../problem/benchmarks"
TMP_DIR="/tmp/e2e-track-b-$$"
mkdir -p "$TMP_DIR"

for b in 1 5 9 13 17; do
OUT="$TMP_DIR/out-$b.json"
uv run python agent.py "$BENCH_DIR/mlsys-2026-$b.json" "$OUT"

python3 - <<'PYEOF' "$OUT" "$b"
import json, sys

out_file = sys.argv[1]
bench_id = sys.argv[2]

with open(out_file) as f:
s = json.load(f)

required = ('subgraphs', 'granularities', 'tensors_to_retain', 'subgraph_latencies')
for k in required:
assert k in s, f"Benchmark {bench_id}: missing key {k}"

assert len(s['subgraphs']) > 0, f"Benchmark {bench_id}: no subgraphs"

for i, lat in enumerate(s['subgraph_latencies']):
assert lat >= 0, f"Benchmark {bench_id}: negative latency at index {i}"

for i, g in enumerate(s['granularities']):
assert all(x > 0 for x in g), f"Benchmark {bench_id}: invalid granularity at {i}"

op_counts = {}
for sg in s['subgraphs']:
for op in sg:
op_counts[op] = op_counts.get(op, 0) + 1
dupes = {k: v for k, v in op_counts.items() if v > 1}
assert not dupes, f"Benchmark {bench_id}: duplicate ops {dupes}"

total = sum(s['subgraph_latencies'])
print(f"Track B benchmark {bench_id}: OK — subgraphs={len(s['subgraphs'])} total_latency={total:.2f}")
PYEOF

done
86 changes: 86 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Python
__pycache__/
*.py[cod]
*$py.class
.venv/
venv/
.env
.env.*
!.env.example
*.egg-info/
.pytest_cache/
.mypy_cache/
.ruff_cache/
*.pyo
*.pyd
.Python
pip-log.txt

# Node
node_modules/
.next/
out/
dist/
build/
.npm/
.npmrc
*.tsbuildinfo
.turbo/
.vercel/
.cache/

# Package managers
package-lock.json
yarn.lock
pnpm-lock.yaml
uv.lock

# IDE
.idea/
.vscode/
*.swp
*.swo
*.sublime-*
.project
.settings/

# OS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
Thumbs.db
ehthumbs.db

# Docker
*.log
docker-compose.override.yml

# Coverage
htmlcov/
.coverage
.coverage.*
coverage/
*.cover
*.lcov

# Testing
.tox/
.nox/
.hypothesis/
.pytest_cache/

# Rust
target/
Cargo.lock

# Claude Code config
.claude/
CLAUDE.md

# Secrets (never commit)
*.pem
*.key
secrets/
credentials.json
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading
Loading