diff --git a/ROADMAP.md b/ROADMAP.md
index efe926c..7114565 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -193,7 +193,7 @@ Derived data pipelines, performance optimization, durable background jobs, enter
- [x] [P1] High-performer agent stack analysis: which combinations of MCPs, skills, commands, and subagents differentiate top performers
- [x] [P0] Per-tool success rate tracking with compound reliability computation (10 steps at 99% = 90.4% end-to-end)
- [x] [P0] Harness configuration fingerprinting from session telemetry (tools, context files, permissions, customizations)
-- [ ] [P1] Context quality scoring: AGENTS.md freshness, token efficiency, guide/sensor coverage
+- [x] [P1] Context quality scoring: AGENTS.md freshness, token efficiency, guide/sensor coverage
- [ ] [P1] Harness evolution timeline: before/after correlation of configuration changes with outcome changes
- [ ] [P1] Harnessability scoring per project: documentation quality, typing strength, module boundaries
- [ ] [P1] Paragon's 4-dimension evaluation: tool correctness, tool usage accuracy, task completion, task efficiency
diff --git a/frontend/src/components/maturity/__tests__/context-quality-table.test.tsx b/frontend/src/components/maturity/__tests__/context-quality-table.test.tsx
new file mode 100644
index 0000000..39f9ede
--- /dev/null
+++ b/frontend/src/components/maturity/__tests__/context-quality-table.test.tsx
@@ -0,0 +1,48 @@
+import { render, screen } from "@testing-library/react"
+
+import { ContextQualityTable } from "@/components/maturity/context-quality-table"
+
+describe("ContextQualityTable", () => {
+ it("renders an empty state", () => {
+ render()
+
+ expect(screen.getByText("No context quality signals available yet.")).toBeInTheDocument()
+ })
+
+ it("renders context quality rows", () => {
+ render(
+ ,
+ )
+
+ expect(screen.getByText("acme/api")).toBeInTheDocument()
+ expect(screen.getByText("12 sessions")).toBeInTheDocument()
+ expect(screen.getByText("82.8")).toBeInTheDocument()
+ expect(screen.getByText("100 fresh")).toBeInTheDocument()
+ expect(screen.getByText("42% cache, 18.5K avg input")).toBeInTheDocument()
+ expect(screen.getByText("50% context / 75% models")).toBeInTheDocument()
+ expect(screen.getByText("Add AGENTS.md")).toBeInTheDocument()
+ expect(screen.getByText("Complete outcome facets")).toBeInTheDocument()
+ })
+})
diff --git a/frontend/src/components/maturity/__tests__/maturity-summary.test.tsx b/frontend/src/components/maturity/__tests__/maturity-summary.test.tsx
index 6d3fe94..eb29338 100644
--- a/frontend/src/components/maturity/__tests__/maturity-summary.test.tsx
+++ b/frontend/src/components/maturity/__tests__/maturity-summary.test.tsx
@@ -28,6 +28,7 @@ describe("MaturitySummary", () => {
agent_team_modes: [],
customization_outcomes: [],
project_readiness: [],
+ context_quality: [],
}}
/>,
)
diff --git a/frontend/src/components/maturity/context-quality-table.tsx b/frontend/src/components/maturity/context-quality-table.tsx
new file mode 100644
index 0000000..0a58584
--- /dev/null
+++ b/frontend/src/components/maturity/context-quality-table.tsx
@@ -0,0 +1,117 @@
+import { Badge } from "@/components/ui/badge"
+import { Card, CardContent, CardHeader } from "@/components/ui/card"
+import { formatMetric, formatPercent, formatTokens } from "@/lib/utils"
+import type { ContextQualityEntry } from "@/types/api"
+
+interface ContextQualityTableProps {
+ rows: ContextQualityEntry[]
+}
+
+function formatCoverage(value: number): string {
+ return `${value.toFixed(0)}%`
+}
+
+export function ContextQualityTable({ rows }: ContextQualityTableProps) {
+ if (rows.length === 0) {
+ return (
+
+
+ Context Quality
+
+
+
+ No context quality signals available yet.
+
+
+
+ )
+ }
+
+ return (
+
+
+ Context Quality
+
+ Scores project context by guidance coverage, freshness, token efficiency, and telemetry
+ sensor coverage.
+
+
+
+
+
+
+
+ | Repository |
+ Score |
+ Guides |
+ Tokens |
+ Sensors |
+ Top Gaps |
+
+
+
+ {rows.map((row) => (
+
+
+
+ {row.repository}
+ {row.session_count} sessions
+
+ |
+
+
+ {formatMetric(row.context_quality_score)}
+ overall
+
+ |
+
+
+ {formatMetric(row.guide_coverage_score)}
+
+ {formatMetric(row.guide_freshness_score, 0)} fresh
+
+
+ |
+
+
+ {formatMetric(row.token_efficiency_score)}
+
+ {formatPercent(row.cache_hit_rate)} cache,{" "}
+ {row.avg_input_tokens == null
+ ? "-"
+ : formatTokens(row.avg_input_tokens)}{" "}
+ avg input
+
+
+ |
+
+
+ {formatMetric(row.sensor_coverage_score)}
+
+ {formatCoverage(row.context_usage_coverage_pct)} context /{" "}
+ {formatCoverage(row.model_coverage_pct)} models
+
+
+ |
+
+
+ {row.top_gaps.length === 0 ? (
+ No major gaps
+ ) : (
+ row.top_gaps.map((gap) => (
+
+ {gap}
+
+ ))
+ )}
+
+ |
+
+ ))}
+
+
+
+
+
+ )
+}
diff --git a/frontend/src/pages/maturity.tsx b/frontend/src/pages/maturity.tsx
index 64a9a4a..4b66807 100644
--- a/frontend/src/pages/maturity.tsx
+++ b/frontend/src/pages/maturity.tsx
@@ -18,6 +18,7 @@ import { TeamCustomizationLandscapeTable } from "@/components/maturity/team-cust
import { LeverageScoreTable } from "@/components/maturity/leverage-score-table"
import { LeverageTrendChart } from "@/components/maturity/leverage-trend-chart"
import { EffectivenessScatter } from "@/components/maturity/effectiveness-scatter"
+import { ContextQualityTable } from "@/components/maturity/context-quality-table"
import { ProjectReadinessTable } from "@/components/maturity/project-readiness-table"
import { ToolAdoptionSummary } from "@/components/tools/tool-adoption-summary"
import { ToolAdoptionChart } from "@/components/tools/tool-adoption-chart"
@@ -151,7 +152,10 @@ export function MaturityPage({ teamId, dateRange }: MaturityPageProps) {
)}
{data && activeTab === "projects" && (
-
+
)}
)
diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts
index 7670459..7712bcf 100644
--- a/frontend/src/types/api.ts
+++ b/frontend/src/types/api.ts
@@ -2035,6 +2035,26 @@ export interface ProjectReadinessEntry {
session_count: number
}
+export interface ContextQualityEntry {
+ repository: string
+ session_count: number
+ context_quality_score: number
+ guide_coverage_score: number
+ guide_freshness_score: number
+ token_efficiency_score: number
+ sensor_coverage_score: number
+ cache_hit_rate: number | null
+ avg_input_tokens: number | null
+ context_usage_coverage_pct: number
+ tool_coverage_pct: number
+ model_coverage_pct: number
+ facet_coverage_pct: number
+ has_claude_md: boolean
+ has_agents_md: boolean
+ readiness_checked_at: string | null
+ top_gaps: string[]
+}
+
export interface MaturityAnalyticsResponse {
tool_categories: ToolCategoryBreakdown
engineer_profiles: EngineerLeverageProfile[]
@@ -2050,6 +2070,7 @@ export interface MaturityAnalyticsResponse {
agent_team_modes: AgentTeamModeSummary[]
customization_outcomes: CustomizationOutcomeAttribution[]
project_readiness: ProjectReadinessEntry[]
+ context_quality: ContextQualityEntry[]
sessions_analyzed: number
avg_leverage_score: number
avg_effectiveness_score: number | null
diff --git a/src/primer/common/schemas.py b/src/primer/common/schemas.py
index 1e3e847..ae48667 100644
--- a/src/primer/common/schemas.py
+++ b/src/primer/common/schemas.py
@@ -2454,6 +2454,26 @@ class ProjectReadinessEntry(BaseModel):
session_count: int
+class ContextQualityEntry(BaseModel):
+ repository: str
+ session_count: int
+ context_quality_score: float
+ guide_coverage_score: float
+ guide_freshness_score: float
+ token_efficiency_score: float
+ sensor_coverage_score: float
+ cache_hit_rate: float | None = None
+ avg_input_tokens: float | None = None
+ context_usage_coverage_pct: float
+ tool_coverage_pct: float
+ model_coverage_pct: float
+ facet_coverage_pct: float
+ has_claude_md: bool
+ has_agents_md: bool
+ readiness_checked_at: datetime | None = None
+ top_gaps: list[str] = Field(default_factory=list)
+
+
class MaturityAnalyticsResponse(BaseModel):
tool_categories: ToolCategoryBreakdown
engineer_profiles: list[EngineerLeverageProfile]
@@ -2469,6 +2489,7 @@ class MaturityAnalyticsResponse(BaseModel):
agent_team_modes: list[AgentTeamModeSummary] = []
customization_outcomes: list[CustomizationOutcomeAttribution] = []
project_readiness: list[ProjectReadinessEntry]
+ context_quality: list[ContextQualityEntry] = Field(default_factory=list)
sessions_analyzed: int
avg_leverage_score: float
avg_effectiveness_score: float | None = None
diff --git a/src/primer/server/services/maturity_service.py b/src/primer/server/services/maturity_service.py
index 661211c..f19d9ee 100644
--- a/src/primer/server/services/maturity_service.py
+++ b/src/primer/server/services/maturity_service.py
@@ -29,6 +29,7 @@
from primer.common.schemas import (
AgentSkillUsage,
AgentTeamModeSummary,
+ ContextQualityEntry,
CustomizationOutcomeAttribution,
CustomizationStateFunnel,
CustomizationUsage,
@@ -1490,40 +1491,219 @@ def _team_customization_label(
),
)
- # 6. Project readiness
+ # 6. Project readiness and context quality
project_readiness: list[ProjectReadinessEntry] = []
+ context_quality: list[ContextQualityEntry] = []
if sessions_analyzed > 0:
- repo_session_counts = (
- db.query(SessionModel.repository_id, func.count(SessionModel.id))
+ repo_session_rows = (
+ db.query(
+ SessionModel.id,
+ SessionModel.repository_id,
+ SessionModel.input_tokens,
+ SessionModel.cache_read_tokens,
+ SessionModel.source_metadata,
+ )
.filter(
SessionModel.id.in_(db.query(session_id_subq.c.id)),
SessionModel.repository_id.isnot(None),
)
- .group_by(SessionModel.repository_id)
.all()
)
- repo_counts = dict(repo_session_counts)
- if repo_counts:
+ facet_session_ids = {
+ row.session_id
+ for row in (
+ db.query(SessionFacets.session_id)
+ .filter(SessionFacets.session_id.in_(db.query(session_id_subq.c.id)))
+ .all()
+ )
+ }
+ model_session_ids = {session_id for session_id, *_rest in model_rows}
+ repo_context_buckets: dict[str, dict] = {}
+ for (
+ session_id,
+ repository_id,
+ input_tokens,
+ cache_read_tokens,
+ source_metadata,
+ ) in repo_session_rows:
+ if repository_id is None:
+ continue
+ bucket = repo_context_buckets.setdefault(
+ repository_id,
+ {
+ "sessions": set(),
+ "input_tokens": 0,
+ "cache_read_tokens": 0,
+ "tool_sessions": set(),
+ "model_sessions": set(),
+ "facet_sessions": set(),
+ "context_usage_sessions": set(),
+ },
+ )
+ bucket["sessions"].add(session_id)
+ bucket["input_tokens"] += input_tokens or 0
+ bucket["cache_read_tokens"] += cache_read_tokens or 0
+ if per_session.get(session_id):
+ bucket["tool_sessions"].add(session_id)
+ if session_id in model_session_ids:
+ bucket["model_sessions"].add(session_id)
+ if session_id in facet_session_ids:
+ bucket["facet_sessions"].add(session_id)
+ if _context_signal_count(source_metadata) > 0:
+ bucket["context_usage_sessions"].add(session_id)
+
+ def _coverage_pct(count: int, total: int) -> float:
+ return round((count / total) * 100, 1) if total > 0 else 0.0
+
+ def _guide_freshness_score(checked_at: datetime | None) -> float:
+ if checked_at is None:
+ return 0.0
+ now = datetime.now(tz=checked_at.tzinfo) if checked_at.tzinfo else datetime.now()
+ age_days = max((now - checked_at).days, 0)
+ if age_days <= 14:
+ return 100.0
+ if age_days <= 30:
+ return 80.0
+ if age_days <= 90:
+ return 50.0
+ return 25.0
+
+ def _prompt_efficiency_score(avg_input_tokens: float | None) -> float:
+ if avg_input_tokens is None:
+ return 0.0
+ if avg_input_tokens <= 20_000:
+ return 1.0
+ if avg_input_tokens <= 50_000:
+ return 0.8
+ if avg_input_tokens <= 100_000:
+ return 0.5
+ return 0.25
+
+ if repo_context_buckets:
repos = (
db.query(GitRepository)
.filter(
- GitRepository.id.in_(list(repo_counts.keys())),
- GitRepository.ai_readiness_score.isnot(None),
+ GitRepository.id.in_(list(repo_context_buckets.keys())),
)
.all()
)
+ repos_by_id = {repo.id: repo for repo in repos}
for repo in repos:
- project_readiness.append(
- ProjectReadinessEntry(
+ if repo.ai_readiness_score is not None:
+ project_readiness.append(
+ ProjectReadinessEntry(
+ repository=repo.full_name,
+ has_claude_md=repo.has_claude_md or False,
+ has_agents_md=repo.has_agents_md or False,
+ has_claude_dir=repo.has_claude_dir or False,
+ ai_readiness_score=repo.ai_readiness_score or 0.0,
+ session_count=len(repo_context_buckets[repo.id]["sessions"]),
+ )
+ )
+ for repository_id, bucket in repo_context_buckets.items():
+ repo = repos_by_id.get(repository_id)
+ if repo is None:
+ continue
+ session_count = len(bucket["sessions"])
+ input_tokens = bucket["input_tokens"]
+ cache_read_tokens = bucket["cache_read_tokens"]
+ token_denominator = input_tokens + cache_read_tokens
+ cache_hit_rate = (
+ round(cache_read_tokens / token_denominator, 3)
+ if token_denominator > 0
+ else None
+ )
+ avg_input_tokens = (
+ round(input_tokens / session_count, 1)
+ if session_count > 0 and token_denominator > 0
+ else None
+ )
+
+ guide_coverage_score = (
+ repo.ai_readiness_score
+ if repo.ai_readiness_score is not None
+ else (
+ (50.0 if repo.has_claude_md else 0.0)
+ + (20.0 if repo.has_agents_md else 0.0)
+ + (30.0 if repo.has_claude_dir else 0.0)
+ )
+ )
+ guide_freshness_score = _guide_freshness_score(repo.ai_readiness_checked_at)
+ context_usage_coverage_pct = _coverage_pct(
+ len(bucket["context_usage_sessions"]),
+ session_count,
+ )
+ tool_coverage_pct = _coverage_pct(len(bucket["tool_sessions"]), session_count)
+ model_coverage_pct = _coverage_pct(len(bucket["model_sessions"]), session_count)
+ facet_coverage_pct = _coverage_pct(len(bucket["facet_sessions"]), session_count)
+ sensor_coverage_score = round(
+ (
+ context_usage_coverage_pct
+ + tool_coverage_pct
+ + model_coverage_pct
+ + facet_coverage_pct
+ )
+ / 4,
+ 1,
+ )
+ cache_score = min((cache_hit_rate or 0.0) / 0.5, 1.0)
+ token_efficiency_score = round(
+ ((cache_score * 0.6) + (_prompt_efficiency_score(avg_input_tokens) * 0.4))
+ * 100,
+ 1,
+ )
+ context_quality_score = round(
+ (guide_coverage_score * 0.30)
+ + (guide_freshness_score * 0.15)
+ + (token_efficiency_score * 0.25)
+ + (sensor_coverage_score * 0.30),
+ 1,
+ )
+ gaps: list[str] = []
+ if not repo.has_claude_md:
+ gaps.append("Add CLAUDE.md")
+ if not repo.has_agents_md:
+ gaps.append("Add AGENTS.md")
+ if guide_freshness_score < 75:
+ gaps.append("Refresh guidance scan")
+ if cache_hit_rate is None:
+ gaps.append("Add token/cache telemetry")
+ elif cache_hit_rate < 0.25:
+ gaps.append("Improve cache reuse")
+ if avg_input_tokens is not None and avg_input_tokens > 50_000:
+ gaps.append("Trim prompt/context payloads")
+ if context_usage_coverage_pct < 50:
+ gaps.append("Increase context telemetry coverage")
+ if tool_coverage_pct < 90:
+ gaps.append("Complete tool telemetry")
+ if model_coverage_pct < 90:
+ gaps.append("Complete model telemetry")
+ if facet_coverage_pct < 90:
+ gaps.append("Complete outcome facets")
+
+ context_quality.append(
+ ContextQualityEntry(
repository=repo.full_name,
+ session_count=session_count,
+ context_quality_score=context_quality_score,
+ guide_coverage_score=round(guide_coverage_score, 1),
+ guide_freshness_score=guide_freshness_score,
+ token_efficiency_score=token_efficiency_score,
+ sensor_coverage_score=sensor_coverage_score,
+ cache_hit_rate=cache_hit_rate,
+ avg_input_tokens=avg_input_tokens,
+ context_usage_coverage_pct=context_usage_coverage_pct,
+ tool_coverage_pct=tool_coverage_pct,
+ model_coverage_pct=model_coverage_pct,
+ facet_coverage_pct=facet_coverage_pct,
has_claude_md=repo.has_claude_md or False,
has_agents_md=repo.has_agents_md or False,
- has_claude_dir=repo.has_claude_dir or False,
- ai_readiness_score=repo.ai_readiness_score or 0.0,
- session_count=repo_counts.get(repo.id, 0),
+ readiness_checked_at=repo.ai_readiness_checked_at,
+ top_gaps=gaps[:4],
)
)
project_readiness.sort(key=lambda p: p.ai_readiness_score, reverse=True)
+ context_quality.sort(key=lambda row: (-row.context_quality_score, -row.session_count))
# Aggregate metrics
avg_leverage = (
@@ -1560,6 +1740,7 @@ def _team_customization_label(
agent_team_modes=agent_team_modes,
customization_outcomes=customization_outcomes,
project_readiness=project_readiness,
+ context_quality=context_quality,
sessions_analyzed=sessions_analyzed,
avg_leverage_score=round(avg_leverage, 1),
avg_effectiveness_score=(
diff --git a/tests/test_maturity.py b/tests/test_maturity.py
index 9f3a4b7..25a28c0 100644
--- a/tests/test_maturity.py
+++ b/tests/test_maturity.py
@@ -94,6 +94,7 @@ def test_maturity_empty(client, admin_headers):
assert data["daily_leverage"] == []
assert data["agent_skill_breakdown"] == []
assert data["harness_configuration_fingerprints"] == []
+ assert data["context_quality"] == []
def test_get_maturity_analytics_uses_cached_payload(monkeypatch, db_session):
@@ -634,6 +635,95 @@ def test_maturity_builds_harness_configuration_fingerprints(
assert fingerprint["avg_leverage_score"] > 0
+def test_maturity_builds_context_quality_scores(
+ client, admin_headers, seeded_maturity_data, db_session
+):
+ now = datetime.now(tz=UTC)
+ s1 = seeded_maturity_data["s1"]
+ s2 = seeded_maturity_data["s2"]
+ repo = GitRepository(
+ full_name=f"acme/context-{uuid.uuid4().hex[:8]}",
+ has_claude_md=True,
+ has_agents_md=False,
+ has_claude_dir=True,
+ ai_readiness_score=80.0,
+ ai_readiness_checked_at=now - timedelta(days=7),
+ )
+ db_session.add(repo)
+ db_session.flush()
+
+ low_repo = GitRepository(
+ full_name=f"acme/context-low-{uuid.uuid4().hex[:8]}",
+ has_claude_md=False,
+ has_agents_md=False,
+ has_claude_dir=False,
+ ai_readiness_score=10.0,
+ ai_readiness_checked_at=now - timedelta(days=120),
+ )
+ db_session.add(low_repo)
+ db_session.flush()
+
+ s1.repository_id = repo.id
+ s1.input_tokens = 1000
+ s1.cache_read_tokens = 3000
+ s1.source_metadata = {
+ "native_telemetry": {
+ "context_usage": {"reference_count": 2},
+ }
+ }
+ s2.repository_id = repo.id
+ s2.input_tokens = 3000
+ s2.cache_read_tokens = 1000
+ low_session = Session(
+ id=str(uuid.uuid4()),
+ engineer_id=seeded_maturity_data["eng2"].id,
+ repository_id=low_repo.id,
+ started_at=now - timedelta(hours=6),
+ input_tokens=120_000,
+ cache_read_tokens=0,
+ )
+ db_session.add_all(
+ [
+ low_session,
+ SessionFacets(session_id=s1.id, outcome="success"),
+ ModelUsage(
+ session_id=s1.id,
+ model_name="claude-sonnet-4-5-20250929",
+ input_tokens=1000,
+ output_tokens=500,
+ cache_read_tokens=3000,
+ cache_creation_tokens=0,
+ ),
+ ]
+ )
+ db_session.flush()
+
+ response = client.get("/api/v1/analytics/maturity", headers=admin_headers)
+
+ assert response.status_code == 200
+ data = response.json()
+ assert data["context_quality"][0]["repository"] == repo.full_name
+ assert data["context_quality"][-1]["repository"] == low_repo.full_name
+ quality = next(row for row in data["context_quality"] if row["repository"] == repo.full_name)
+ assert quality["session_count"] == 2
+ assert quality["guide_coverage_score"] == 80.0
+ assert quality["guide_freshness_score"] == 100.0
+ assert quality["cache_hit_rate"] == 0.5
+ assert quality["avg_input_tokens"] == 2000.0
+ assert quality["token_efficiency_score"] == 100.0
+ assert quality["context_usage_coverage_pct"] == 50.0
+ assert quality["tool_coverage_pct"] == 100.0
+ assert quality["model_coverage_pct"] == 50.0
+ assert quality["facet_coverage_pct"] == 50.0
+ assert quality["sensor_coverage_score"] == 62.5
+ assert quality["context_quality_score"] == 82.8
+ assert quality["top_gaps"] == [
+ "Add AGENTS.md",
+ "Complete model telemetry",
+ "Complete outcome facets",
+ ]
+
+
def test_maturity_builds_delegation_patterns(
client, admin_headers, seeded_maturity_data, db_session
):