reaatech · reaatech · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -9,21 +9,27 @@ on:
 jobs:
   evaluate:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
     steps:
       - uses: actions/checkout@v4
 
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+
       - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
           node-version: '22'
-          cache: 'npm'
-          cache-dependency-path: package-lock.json
+          cache: 'pnpm'
 
       - name: Install dependencies
-        run: npm ci --legacy-peer-deps
+        run: pnpm install --frozen-lockfile
 
       - name: Build
-        run: npm run build
+        run: pnpm build
 
       - name: Download baseline results
         if: github.event_name == 'pull_request'
@@ -40,23 +46,23 @@ jobs:
           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
         run: |
           mkdir -p results
-          npx agent-eval-harness eval \
+          node dist/cli.js eval \
             trajectories/examples/*.jsonl \
             --config trajectories/examples/config.yaml \
             --output results/
 
       - name: Run regression gates
         if: github.event_name == 'pull_request' && hashFiles('baseline/') != ''
         run: |
-          npx agent-eval-harness compare \
+          node dist/cli.js compare \
             baseline/results.json \
             results/results.json \
             --format markdown \
             --output results/comparison.md
 
       - name: Check gates
         run: |
-          npx agent-eval-harness gate \
+          node dist/cli.js gate \
             results/results.json \
             --preset standard \
             --exit-code

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -4,75 +4,102 @@ on:
   push:
     tags:
       - 'v*'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: false
+
+env:
+  NODE_VERSION: 22
 
 jobs:
   release:
+    name: Release
     runs-on: ubuntu-latest
     permissions:
       contents: write
       packages: write
+      id-token: write
     steps:
-      - uses: actions/checkout@v4
-
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
+
       - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: '22'
-          cache: 'npm'
-          cache-dependency-path: package-lock.json
+          node-version: ${{ env.NODE_VERSION }}
+          cache: 'pnpm'
           registry-url: 'https://registry.npmjs.org'
-      
+
       - name: Install dependencies
-        run: npm ci
-      
+        run: pnpm install --frozen-lockfile
+
       - name: Run tests
-        run: npm test
-      
+        run: pnpm test
+
       - name: Build
-        run: npm run build
-      
+        run: pnpm build
+
       - name: Publish to npm
-        run: npm publish --access public
+        run: pnpm publish --access public --no-git-checks
         env:
           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
-
+          NPM_CONFIG_PROVENANCE: 'true'
+
+      - name: Mirror to GitHub Packages
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          cat > .npmrc <<EOF
+          @reaatech:registry=https://npm.pkg.github.com
+          //npm.pkg.github.com/:_authToken=${NODE_AUTH_TOKEN}
+          EOF
+          pnpm publish --registry=https://npm.pkg.github.com --no-git-checks
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
-      
+
       - name: Login to Docker Hub
         uses: docker/login-action@v3
         with:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PASSWORD }}
-      
+
       - name: Build and push Docker image
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
         with:
           context: .
           push: true
           tags: |
             ${{ github.repository }}:${{ github.ref_name }}
             ${{ github.repository }}:latest
-          cache-from: type=registry,ref=${{ github.repository }}:buildcache
-          cache-to: type=inline
-      
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
       - name: Create GitHub Release
-        uses: softprops/action-gh-release@v1
+        uses: softprops/action-gh-release@v2
         with:
           generate_release_notes: true
           files: |
             dist/*.js
           body: |
             ## Changes
             See the [CHANGELOG](https://github.com/${{ github.repository }}/blob/main/CHANGELOG.md) for details.
-            
+
             ## Installation
-            
+
             ### npm
             ```bash
-            npm install agent-eval-harness
+            npm install @reaatech/agent-eval-harness
             ```
-            
+
             ### Docker
             ```bash
             docker pull ${{ github.repository }}:${{ github.ref_name }}
+            ```
diff --git a/.lintstagedrc.json b/.lintstagedrc.json
@@ -1,13 +1,4 @@
 {
-  "src/**/*.{ts,js}": [
-    "eslint --fix",
-    "prettier --write"
-  ],
-  "tests/**/*.{ts,js}": [
-    "eslint --fix",
-    "prettier --write"
-  ],
-  "*.{json,md,yaml,yml}": [
-    "prettier --write"
-  ]
+  "*.{ts,js,json,jsonc}": ["biome check --write --no-errors-on-unmatched"],
+  "*.{md,yaml,yml}": ["biome format --write --no-errors-on-unmatched"]
 }
diff --git a/.npmrc b/.npmrc
@@ -0,0 +1,2 @@
+shamefully-hoist=false
+strict-peer-dependencies=true
diff --git a/.prettierrc b/.prettierrc
diff --git a/AGENTS.md b/AGENTS.md
@@ -196,7 +196,7 @@ Golden trajectories serve as reference implementations for regression testing.
 ### Comparing Against Golden
 
 ```typescript
-import { compareAgainstGolden } from 'agent-eval-harness';
+import { compareAgainstGolden } from '@reaatech/agent-eval-harness';
 
 const result = compareAgainstGolden(trajectory, goldenTrajectory, {
   similarityThreshold: 0.85,
@@ -251,7 +251,7 @@ judge:
 4. **Apply calibration** to future judge scores
 
 ```typescript
-import { calibrate, applyCalibration } from 'agent-eval-harness';
+import { calibrate, applyCalibration } from '@reaatech/agent-eval-harness';
 
 await calibrate({
   humanLabelsPath: 'calibration/human-labels.jsonl',
@@ -363,7 +363,7 @@ latency:
 ### Latency Monitoring
 
 ```typescript
-import { monitorLatency } from 'agent-eval-harness';
+import { monitorLatency } from '@reaatech/agent-eval-harness';
 
 const budget = {
   per_turn_p99: 5000,
@@ -405,7 +405,7 @@ tool_validation:
 ### Validation Example
 
 ```typescript
-import { validateTrajectory, validateSchema } from 'agent-eval-harness';
+import { validateTrajectory, validateSchema } from '@reaatech/agent-eval-harness';
 
 const toolSchemas = {
   send_reset_email: {

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -187,7 +187,7 @@ describe('MyEvaluator', () => {
 
 ```typescript
 import { describe, it, expect } from 'vitest';
-import { loadFromFile, evaluate } from 'agent-eval-harness';
+import { loadFromFile, evaluate } from '@reaatech/agent-eval-harness';
 
 describe('Integration: Load and Evaluate', () => {
   it('should load and evaluate trajectory', () => {

diff --git a/Dockerfile b/Dockerfile
@@ -1,28 +1,27 @@
 # Stage 1: Build
 FROM node:22-alpine AS builder
 
-WORKDIR /app
+RUN npm install -g pnpm@10
 
-# Copy package files
-COPY package.json package-lock.json ./
+WORKDIR /app
 
-# Install dependencies (full install for build)
-RUN npm ci --legacy-peer-deps && npm cache clean --force
+COPY package.json pnpm-lock.yaml ./
+RUN pnpm install --frozen-lockfile
 
-# Copy source
 COPY tsconfig.json ./
 COPY src ./src
 
-# Build
-RUN npm run build
+RUN pnpm build
 
 # Stage 2: Install production deps only
 FROM node:22-alpine AS prod-deps
 
+RUN npm install -g pnpm@10
+
 WORKDIR /app
 
-COPY package.json package-lock.json ./
-RUN npm ci --legacy-peer-deps --only=production --ignore-scripts && npm cache clean --force
+COPY package.json pnpm-lock.yaml ./
+RUN pnpm install --prod --frozen-lockfile --ignore-scripts
 
 # Stage 3: Runtime
 FROM node:22-alpine AS runtime

diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ End-to-end agent evaluation harness for full agent runs. Supports trajectory eva
 
 ```bash
 # npm
-npm install agent-eval-harness
+npm install @reaatech/agent-eval-harness
 
 # Or use without installing
 npx agent-eval-harness eval trajectories/*.jsonl
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		shamefully-hoist=false
		strict-peer-dependencies=true